-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalysis_queries.sql
More file actions
299 lines (279 loc) · 11.3 KB
/
analysis_queries.sql
File metadata and controls
299 lines (279 loc) · 11.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
-- ============================================================
-- Workforce Operations Analytics
-- Analysis Queries — 10 Real Interview-Style Questions
-- Author: Tai Nguyen
-- Tools: SQL (PostgreSQL-compatible)
-- ============================================================
-- ============================================================
-- QUERY 01 — Department Headcount & Avg Hourly Rate
-- Skill: JOIN + GROUP BY + aggregation
-- Business Q: How many active employees does each department
-- have, and what is the average hourly rate?
-- ============================================================
SELECT
d.department_name,
d.location,
COUNT(e.employee_id) AS headcount,
ROUND(AVG(e.hourly_rate), 2) AS avg_hourly_rate,
MIN(e.hourly_rate) AS min_rate,
MAX(e.hourly_rate) AS max_rate
FROM departments d
LEFT JOIN employees e
ON d.department_id = e.department_id
AND e.is_active = TRUE
GROUP BY d.department_name, d.location
ORDER BY headcount DESC;
-- ============================================================
-- QUERY 02 — Attendance Rate by Employee
-- Skill: LEFT JOIN + CASE + aggregation
-- Business Q: Which employees have the highest and lowest
-- attendance rates over the tracked period?
-- ============================================================
SELECT
e.employee_id,
e.first_name || ' ' || e.last_name AS employee_name,
d.department_name,
COUNT(s.shift_id) AS total_shifts,
COUNT(a.attendance_id) AS attended_shifts,
ROUND(
COUNT(a.attendance_id) * 100.0
/ NULLIF(COUNT(s.shift_id), 0), 1
) AS attendance_rate_pct
FROM employees e
JOIN departments d ON e.department_id = d.department_id
JOIN shifts s ON e.employee_id = s.employee_id
LEFT JOIN attendance a ON s.shift_id = a.shift_id
AND a.status != 'Absent'
WHERE e.is_active = TRUE
GROUP BY e.employee_id, employee_name, d.department_name
ORDER BY attendance_rate_pct DESC;
-- ============================================================
-- QUERY 03 — Late Arrivals Analysis
-- Skill: TIMESTAMP math + CASE + filtering
-- Business Q: How often is each employee late, and by how
-- many minutes on average?
-- ============================================================
SELECT
e.first_name || ' ' || e.last_name AS employee_name,
d.department_name,
COUNT(*) AS total_late_shifts,
ROUND(
AVG(
EXTRACT(EPOCH FROM (a.clock_in - (s.shift_date + s.scheduled_start)::TIMESTAMP))
/ 60
), 1
) AS avg_minutes_late
FROM attendance a
JOIN shifts s ON a.shift_id = s.shift_id
JOIN employees e ON a.employee_id = e.employee_id
JOIN departments d ON e.department_id = d.department_id
WHERE a.status = 'Late'
GROUP BY employee_name, d.department_name
ORDER BY total_late_shifts DESC, avg_minutes_late DESC;
-- ============================================================
-- QUERY 04 — Task Completion Rate & Quality Score by Employee
-- Skill: Aggregation + calculated fields + ORDER BY
-- Business Q: Who are the top performers based on task
-- completion and quality score?
-- ============================================================
SELECT
e.first_name || ' ' || e.last_name AS employee_name,
d.department_name,
SUM(t.tasks_assigned) AS total_assigned,
SUM(t.tasks_completed) AS total_completed,
ROUND(
SUM(t.tasks_completed) * 100.0
/ NULLIF(SUM(t.tasks_assigned), 0), 1
) AS completion_rate_pct,
ROUND(AVG(t.quality_score), 2) AS avg_quality_score
FROM tasks t
JOIN employees e ON t.employee_id = e.employee_id
JOIN departments d ON e.department_id = d.department_id
GROUP BY employee_name, d.department_name
ORDER BY avg_quality_score DESC, completion_rate_pct DESC;
-- ============================================================
-- QUERY 05 — Weekly Hours Worked per Employee
-- Skill: TIMESTAMP math + GROUP BY + date truncation
-- Business Q: How many hours did each employee work this
-- week, and are any over or under target (40 hrs)?
-- ============================================================
SELECT
e.first_name || ' ' || e.last_name AS employee_name,
e.employment_type,
ROUND(
SUM(
EXTRACT(EPOCH FROM (a.clock_out - a.clock_in))
/ 3600
), 2
) AS hours_worked,
CASE
WHEN e.employment_type = 'Full-time'
AND SUM(EXTRACT(EPOCH FROM (a.clock_out - a.clock_in)) / 3600) >= 40
THEN 'On target'
WHEN e.employment_type = 'Full-time'
THEN 'Under target'
ELSE 'Part-time / Contract'
END AS hours_status
FROM attendance a
JOIN employees e ON a.employee_id = e.employee_id
WHERE a.clock_out IS NOT NULL
GROUP BY employee_name, e.employment_type
ORDER BY hours_worked DESC;
-- ============================================================
-- QUERY 06 — Running Total of Tasks Completed (Window Function)
-- Skill: Window function — SUM() OVER (ORDER BY date)
-- Business Q: What is the cumulative task output day-over-day
-- across the operation?
-- ============================================================
SELECT
task_date,
SUM(tasks_completed) AS daily_tasks_completed,
SUM(SUM(tasks_completed))
OVER (ORDER BY task_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
AS running_total
FROM tasks
GROUP BY task_date
ORDER BY task_date;
-- ============================================================
-- QUERY 07 — Employee Performance Ranking by Department (Window)
-- Skill: Window function — RANK() OVER (PARTITION BY)
-- Business Q: Who is the top performer in each department
-- based on average quality score?
-- ============================================================
WITH dept_scores AS (
SELECT
e.first_name || ' ' || e.last_name AS employee_name,
d.department_name,
ROUND(AVG(t.quality_score), 2) AS avg_quality_score
FROM tasks t
JOIN employees e ON t.employee_id = e.employee_id
JOIN departments d ON e.department_id = d.department_id
GROUP BY employee_name, d.department_name
)
SELECT
department_name,
employee_name,
avg_quality_score,
RANK() OVER (
PARTITION BY department_name
ORDER BY avg_quality_score DESC
) AS dept_rank
FROM dept_scores
ORDER BY department_name, dept_rank;
-- ============================================================
-- QUERY 08 — Employees with Below-Average Quality Score (CTE)
-- Skill: CTE + subquery + comparison to aggregate
-- Business Q: Which employees are performing below the
-- company-wide average quality score?
-- ============================================================
WITH employee_quality AS (
SELECT
e.employee_id,
e.first_name || ' ' || e.last_name AS employee_name,
d.department_name,
ROUND(AVG(t.quality_score), 2) AS avg_quality_score
FROM tasks t
JOIN employees e ON t.employee_id = e.employee_id
JOIN departments d ON e.department_id = d.department_id
GROUP BY e.employee_id, employee_name, d.department_name
),
company_avg AS (
SELECT ROUND(AVG(quality_score), 2) AS overall_avg
FROM tasks
)
SELECT
eq.employee_name,
eq.department_name,
eq.avg_quality_score,
ca.overall_avg AS company_avg,
ROUND(eq.avg_quality_score - ca.overall_avg, 2)
AS variance_from_avg
FROM employee_quality eq
CROSS JOIN company_avg ca
WHERE eq.avg_quality_score < ca.overall_avg
ORDER BY variance_from_avg ASC;
-- ============================================================
-- QUERY 09 — Shift Coverage Gaps (Absent Employees)
-- Skill: LEFT JOIN + IS NULL pattern (anti-join)
-- Business Q: Which scheduled shifts had no attendance record
-- (i.e., employee was absent or data is missing)?
-- ============================================================
SELECT
s.shift_id,
s.shift_date,
s.shift_type,
e.first_name || ' ' || e.last_name AS employee_name,
d.department_name,
s.scheduled_start,
s.scheduled_end
FROM shifts s
JOIN employees e ON s.employee_id = e.employee_id
JOIN departments d ON e.department_id = d.department_id
LEFT JOIN attendance a ON s.shift_id = a.shift_id
WHERE a.attendance_id IS NULL
ORDER BY s.shift_date, d.department_name;
-- ============================================================
-- QUERY 10 — Full Operational Dashboard View (CTE chain)
-- Skill: Multi-CTE + JOIN + CASE — analyst-level summary
-- Business Q: Give a single summary row per employee showing
-- attendance rate, avg quality, hours worked,
-- and a performance flag.
-- ============================================================
WITH attendance_summary AS (
SELECT
s.employee_id,
COUNT(s.shift_id) AS total_shifts,
COUNT(a.attendance_id) AS attended,
ROUND(
COUNT(a.attendance_id) * 100.0
/ NULLIF(COUNT(s.shift_id), 0), 1
) AS attendance_pct
FROM shifts s
LEFT JOIN attendance a
ON s.shift_id = a.shift_id
AND a.status != 'Absent'
GROUP BY s.employee_id
),
quality_summary AS (
SELECT
employee_id,
ROUND(AVG(quality_score), 2) AS avg_quality,
SUM(tasks_completed) AS total_completed
FROM tasks
GROUP BY employee_id
),
hours_summary AS (
SELECT
employee_id,
ROUND(
SUM(EXTRACT(EPOCH FROM (clock_out - clock_in)) / 3600), 1
) AS total_hours
FROM attendance
WHERE clock_out IS NOT NULL
GROUP BY employee_id
)
SELECT
e.first_name || ' ' || e.last_name AS employee_name,
d.department_name,
e.employment_type,
COALESCE(att.attendance_pct, 0) AS attendance_rate_pct,
COALESCE(qs.avg_quality, 0) AS avg_quality_score,
COALESCE(hs.total_hours, 0) AS total_hours_worked,
COALESCE(qs.total_completed, 0) AS tasks_completed,
CASE
WHEN att.attendance_pct >= 95
AND qs.avg_quality >= 95 THEN 'High performer'
WHEN att.attendance_pct >= 80
AND qs.avg_quality >= 88 THEN 'Solid performer'
WHEN att.attendance_pct < 80
OR qs.avg_quality < 88 THEN 'Needs review'
ELSE 'Insufficient data'
END AS performance_flag
FROM employees e
JOIN departments d ON e.department_id = d.department_id
LEFT JOIN attendance_summary att ON e.employee_id = att.employee_id
LEFT JOIN quality_summary qs ON e.employee_id = qs.employee_id
LEFT JOIN hours_summary hs ON e.employee_id = hs.employee_id
WHERE e.is_active = TRUE
ORDER BY avg_quality_score DESC, attendance_rate_pct DESC;