Employees
These are homework tasks on employees database.
Clone and init the database (requires a local PostgreSQL instance):
psql -U postgres -c 'CREATE DATABASE employees;'
git clone https://github.com/vrajmohan/pgsql-sample-data.git
psql -U postgres -d employees -f pgsql-sample-data/employee/employees.dump
Execute a PRQL query:
cd prql-compiler
cargo run compile examples/employees/average-title-salary.prql | psql -U postgres -d employees
Task 1
rank the employee titles according to the average salary for each department.
My solution:
- for each employee, find their average salary,
- join employees with their departments and titles (duplicating employees for each of their titles and departments)
- group by department and title, aggregating average salary
- join with department to get department name
PRQL
from salaries
group [emp_no] (
aggregate [emp_salary = average salary]
)
join t=titles [==emp_no]
join dept_emp side:left [==emp_no]
group [dept_emp.dept_no, t.title] (
aggregate [avg_salary = average emp_salary]
)
join departments [==dept_no]
select [dept_name, title, avg_salary]
SQL
WITH table_3 AS (
SELECT
AVG(salary) AS _expr_0,
emp_no
FROM
salaries
GROUP BY
emp_no
),
table_1 AS (
SELECT
t.title,
AVG(table_2._expr_0) AS avg_salary,
dept_emp.dept_no
FROM
table_3 AS table_2
JOIN titles AS t ON table_2.emp_no = t.emp_no
LEFT JOIN dept_emp ON table_2.emp_no = dept_emp.emp_no
GROUP BY
dept_emp.dept_no,
t.title
)
SELECT
departments.dept_name,
table_0.title,
table_0.avg_salary
FROM
table_1 AS table_0
JOIN departments ON table_0.dept_no = departments.dept_no
Task 2
Estimate distribution of salaries and gender for each department departments.
PRQL
from e=employees
join salaries [==emp_no]
group [e.emp_no, e.gender] (
aggregate [
emp_salary = average salaries.salary
]
)
join de=dept_emp [==emp_no] side:left
group [de.dept_no, gender] (
aggregate [
salary_avg = average emp_salary,
salary_sd = stddev emp_salary,
]
)
join departments [==dept_no]
select [dept_name, gender, salary_avg, salary_sd]
SQL
WITH table_3 AS (
SELECT
e.gender,
AVG(salaries.salary) AS _expr_0,
e.emp_no
FROM
employees AS e
JOIN salaries ON e.emp_no = salaries.emp_no
GROUP BY
e.emp_no,
e.gender
),
table_1 AS (
SELECT
table_2.gender,
AVG(table_2._expr_0) AS salary_avg,
STDDEV(table_2._expr_0) AS salary_sd,
de.dept_no
FROM
table_3 AS table_2
LEFT JOIN dept_emp AS de ON table_2.emp_no = de.emp_no
GROUP BY
de.dept_no,
table_2.gender
)
SELECT
departments.dept_name,
table_0.gender,
table_0.salary_avg,
table_0.salary_sd
FROM
table_1 AS table_0
JOIN departments ON table_0.dept_no = departments.dept_no
Task 3
Estimate distribution of salaries and gender for each manager.
PRQL
from e=employees
join salaries [==emp_no]
group [e.emp_no, e.gender] (
aggregate [
emp_salary = average salaries.salary
]
)
join de=dept_emp [==emp_no]
join dm=dept_manager [
(dm.dept_no == de.dept_no) and s"(de.from_date, de.to_date) OVERLAPS (dm.from_date, dm.to_date)"
]
group [dm.emp_no, gender] (
aggregate [
salary_avg = average emp_salary,
salary_sd = stddev emp_salary
]
)
derive mng_no = emp_no
join managers=employees [==emp_no]
derive mng_name = s"managers.first_name || ' ' || managers.last_name"
select [mng_name, managers.gender, salary_avg, salary_sd]
SQL
WITH table_3 AS (
SELECT
e.gender,
AVG(salaries.salary) AS _expr_0,
e.emp_no
FROM
employees AS e
JOIN salaries ON e.emp_no = salaries.emp_no
GROUP BY
e.emp_no,
e.gender
),
table_1 AS (
SELECT
AVG(table_2._expr_0) AS salary_avg,
STDDEV(table_2._expr_0) AS salary_sd,
dm.emp_no
FROM
table_3 AS table_2
JOIN dept_emp AS de ON table_2.emp_no = de.emp_no
JOIN dept_manager AS dm ON dm.dept_no = de.dept_no
AND (de.from_date, de.to_date) OVERLAPS (dm.from_date, dm.to_date)
GROUP BY
dm.emp_no,
table_2.gender
)
SELECT
managers.first_name || ' ' || managers.last_name AS mng_name,
managers.gender,
table_0.salary_avg,
table_0.salary_sd
FROM
table_1 AS table_0
JOIN employees AS managers ON table_0.emp_no = managers.emp_no
Task 4
Find distributions of titles, salaries and genders for each department.
PRQL
from de=dept_emp
join s=salaries side:left [
(s.emp_no == de.emp_no),
s"({s.from_date}, {s.to_date}) OVERLAPS ({de.from_date}, {de.to_date})"
]
group [de.emp_no, de.dept_no] (
aggregate salary = (average s.salary)
)
join employees [==emp_no]
join titles [==emp_no]
select [dept_no, salary, employees.gender, titles.title]
SQL
WITH table_1 AS (
SELECT
de.dept_no,
AVG(s.salary) AS salary,
de.emp_no
FROM
dept_emp AS de
LEFT JOIN salaries AS s ON s.emp_no = de.emp_no
AND (s.from_date, s.to_date) OVERLAPS (de.from_date, de.to_date)
GROUP BY
de.emp_no,
de.dept_no
)
SELECT
table_0.dept_no,
table_0.salary,
employees.gender,
titles.title
FROM
table_1 AS table_0
JOIN employees ON table_0.emp_no = employees.emp_no
JOIN titles ON table_0.emp_no = titles.emp_no