Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions calculate_largest_expensors.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
USE memory.default;

/*
First we need to find all expenses and to do that, we are selecting from "EXPENSE" table. After than, we need to join
it to "EMPLOYEE" table and finally to "EMPLOYEE" table again to get actual employess and their managers. After that, it's just
a matter of a simple sum and filter.

We use HAVING instead of WHERE because of SQL order of operations as HAVING is being evaluated after the actual aggreation, while
WHERE is evaluated before aggregation.
*/
SELECT
employee.employee_id,
CONCAT(employee.first_name, ' ', employee.last_name) AS employee_name,
manager.manager_id,
CONCAT(manager.first_name, ' ', manager.last_name) AS manager_name,
SUM(expense.unit_price * expense.quantity) AS total_expensed_amount
FROM
EXPENSE expense
LEFT JOIN
EMPLOYEE employee
ON expense.employee_id = employee.employee_id
LEFT JOIN
EMPLOYEE manager
ON manager.employee_id = employee.manager_id
GROUP BY
employee.employee_id,
CONCAT(employee.first_name, ' ', employee.last_name),
manager.manager_id,
CONCAT(manager.first_name, ' ', manager.last_name)
HAVING
SUM(expense.unit_price * expense.quantity) > 1000
ORDER BY
SUM(expense.unit_price * expense.quantity) DESC;
28 changes: 28 additions & 0 deletions create_employees.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
USE memory.default;

/*
In real world, creating a table would not be so trivial (perhaps only for really small or manually created use-cases).
Another option on how to load data (from .csv, .parquet, etc.) would be to connect to an external location such as S3 or GCS or even
local file system and load from there.

As I have time constraint on this task, I've decided to manually load data.
*/
CREATE TABLE IF NOT EXISTS EMPLOYEE (
employee_id TINYINT,
first_name VARCHAR,
last_name VARCHAR,
job_title VARCHAR,
manager_id TINYINT
);

-- Data as shown in hr/employee_index.csv
INSERT INTO EMPLOYEE VALUES
(1, 'Ian', 'James', 'CEO', 4),
(2, 'Umberto', 'Torrielli', 'CSO', 1),
(3, 'Alex', 'Jacobson', 'MD EMEA', 2),
(4, 'Darren', 'Poynton', 'CFO', 2),
(5, 'Tim', 'Beard', 'MD APAC', 2),
(6, 'Gemma', 'Dodd', 'COS', 1),
(7, 'Lisa', 'Platten', 'CHR', 6),
(8, 'Stefano', 'Camisaca', 'GM Activation', 2),
(9, 'Andrea', 'Ghibaudi', 'MD NAM', 2);
20 changes: 20 additions & 0 deletions create_expenses.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
USE memory.default;

/*
Same as for create_employees.sql...
*/
CREATE TABLE IF NOT EXISTS EXPENSE (
employee_id TINYINT,
unit_price DECIMAL(8, 2),
quantity TINYINT
);

-- Data as shown in finance/receipts_from_last_night/*.txt. Again, due to data sample size, I've simply manually looked up all of the employees ids and their respective names
INSERT INTO EXPENSE VALUES
(3, 6.50, 14),
(3, 11.00, 20),
(3, 22.00, 18),
(3, 13.00, 75),
(9, 300.00, 1),
(4, 40.00, 9),
(2, 17.50, 4);
35 changes: 35 additions & 0 deletions create_invoices.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
USE memory.default;

/*
Same as for create_employees.sql...
*/
CREATE TABLE IF NOT EXISTS SUPPLIER (
supplier_id TINYINT,
name VARCHAR
);

-- Data as shown in finance/invoices/*.txt
INSERT INTO SUPPLIER VALUES
(1, 'Catering Plus'),
(2, 'Dave''s Discos'),
(3, 'Entertainment tonight'),
(4, 'Ice Ice Baby'),
(5, 'Party Animals');

/*
Same as for create_employees.sql...
*/
CREATE TABLE IF NOT EXISTS INVOICE (
supplier_id TINYINT,
invoice_amount DECIMAL(8, 2),
due_date DATE
);

-- Data as shown in finance/invoices/*.txt
INSERT INTO INVOICE VALUES
(5, 6000.00, LAST_DAY_OF_MONTH(DATE_ADD('month', 3, CURRENT_DATE))),
(1, 2000.00, LAST_DAY_OF_MONTH(DATE_ADD('month', 2, CURRENT_DATE))),
(1, 1500.00, LAST_DAY_OF_MONTH(DATE_ADD('month', 3, CURRENT_DATE))),
(2, 500.00, LAST_DAY_OF_MONTH(DATE_ADD('month', 1, CURRENT_DATE))),
(3, 6000.00, LAST_DAY_OF_MONTH(DATE_ADD('month', 3, CURRENT_DATE))),
(4, 4000.00, LAST_DAY_OF_MONTH(DATE_ADD('month', 6, CURRENT_DATE)));
44 changes: 44 additions & 0 deletions find_manager_cycles.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
USE memory.default;

-- We are using recursiveness here as this is a perfect use case
WITH RECURSIVE manager_loop (
start_employee,
current_employee,
next_manager,
path
) AS (
-- Anchor part of the query starts for every employee
SELECT
e.employee_id AS start_employee,
e.employee_id AS current_employee,
e.manager_id AS next_manager,
ARRAY[e.employee_id] AS path
FROM
EMPLOYEE e

UNION ALL

-- Where recursiveness starts by following the manager chain
SELECT
ml.start_employee,
e.employee_id AS current_employee,
e.manager_id AS next_manager,
ml.path || e.employee_id
FROM
manager_loop ml
JOIN
EMPLOYEE e
ON ml.next_manager = e.employee_id
WHERE
1 = 1
AND NOT CONTAINS(ml.path, e.employee_id)
)
-- In the final select, a cycle is found when the next manager is the same person that we started with
SELECT
ml.start_employee AS employee_id,
ARRAY_JOIN(ml.path || ml.next_manager, ', ') AS full_cycle_path
FROM
manager_loop ml
WHERE
1 = 1
AND next_manager = start_employee;
63 changes: 63 additions & 0 deletions generate_supplier_payment_plans.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
USE memory.default;

-- Aggregate all invoices for each supplier to get a total amount and final due date.
WITH supplier_invoice AS (
SELECT
s.supplier_id,
s.name AS supplier_name,
CAST(SUM(i.invoice_amount) AS DECIMAL(10, 2)) AS sum_invoice_amt,
MAX(i.due_date) AS latest_due_date
FROM
INVOICE i
LEFT JOIN
SUPPLIER s
ON i.supplier_id = s.supplier_id
GROUP BY
s.supplier_id,
s.name
),

-- Calculate the number of payments and the amount for a standard and final payment.
payment_details AS (
SELECT
supplier_id,
supplier_name,
sum_invoice_amt,
DATE_DIFF('month', CURRENT_DATE, latest_due_date) + 1 AS payment_num,
CAST(FLOOR(sum_invoice_amt / (DATE_DIFF('month', CURRENT_DATE, latest_due_date) + 1)) AS DECIMAL(10, 2)) AS payment_amt_monthly,
CAST(MOD(sum_invoice_amt, (DATE_DIFF('month', CURRENT_DATE, latest_due_date) + 1)) AS DECIMAL(10, 2)) AS last_payment_adjustment
FROM
supplier_invoice
),

-- Generate the payment schedule rows and calculate values for each month.
payment_schedule AS (
SELECT
d.supplier_id,
d.supplier_name,
d.sum_invoice_amt,
-- Use a CASE statement to determine the payment amount for this specific row (seq)
CASE
WHEN seq = d.payment_num - 1 THEN d.payment_amt_monthly + d.last_payment_adjustment -- Last payment
ELSE d.payment_amt_monthly -- Rest of payments
END AS payment_amount,
LAST_DAY_OF_MONTH(DATE_ADD('month', seq, CURRENT_DATE)) AS payment_date,
seq
FROM
payment_details d
CROSS JOIN
UNNEST(SEQUENCE(0, d.payment_num - 1)) AS t(seq)
)
-- Calculate the running balance and display the final report.
SELECT
supplier_id,
supplier_name,
payment_amount,
-- Use a window function to get the running total of payments and subtract from the total
sum_invoice_amt - SUM(payment_amount) OVER (PARTITION BY supplier_id ORDER BY payment_date) AS balance_outstanding,
payment_date
FROM
payment_schedule
ORDER BY
supplier_id,
payment_date;
49 changes: 49 additions & 0 deletions tests/00_referential_integrity.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
USE memory.default;

-- Test 1: Check for expenses logged by non-existent employees.
-- This query identifies any employee_id in the EXPENSE table
-- that does not have a corresponding entry in the EMPLOYEE table.
-- An ideal result is an empty set, indicating no orphaned expense records.
SELECT
e.employee_id
FROM
EXPENSE e
LEFT JOIN
EMPLOYEE emp
ON e.employee_id = emp.employee_id
WHERE
1 = 1
AND emp.employee_id IS NULL;

-- Test 2: Check for employees with non-existent managers.
-- This query checks for any manager_id in the EMPLOYEE table that does not
-- correspond to a valid employee_id in the same table. This is a self-referencing
-- foreign key check. The CEO's manager_id might be NULL, so we exclude that.
-- An ideal result is an empty set.
SELECT
emp.employee_id,
emp.manager_id
FROM
EMPLOYEE emp
LEFT JOIN
EMPLOYEE mgr
ON emp.manager_id = mgr.employee_id
WHERE
1 = 1
AND mgr.employee_id IS NULL
AND emp.manager_id IS NOT NULL;

-- Test 3: Check for invoices from non-existent suppliers.
-- This query looks for any supplier_id in the INVOICE table that
-- does not exist in the SUPPLIER table.
-- A clean result (empty set) means all invoices are linked to valid suppliers.
SELECT
i.supplier_id
FROM
INVOICE i
LEFT JOIN
SUPPLIER s
ON i.supplier_id = s.supplier_id
WHERE
1 = 1
AND s.supplier_id IS NULL;
104 changes: 104 additions & 0 deletions tests/01_data_validation.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
USE memory.default;

/*
Test 1: Row Count Checks
These queries return the total number of rows in each table.
This is useful for tracking table growth and verifying data loads.
*/
SELECT
'EMPLOYEE' AS table_name,
COUNT(*) AS row_count
FROM
EMPLOYEE
UNION ALL
SELECT
'EXPENSE' AS table_name,
COUNT(*) AS row_count
FROM
EXPENSE
UNION ALL
SELECT
'SUPPLIER' AS table_name,
COUNT(*) AS row_count
FROM
SUPPLIER
UNION ALL
SELECT
'INVOICE' AS table_name,
COUNT(*) AS row_count
FROM
INVOICE;

/*
Test 2: Null Value Checks for Primary Keys
These queries check for NULL values in primary key columns.
These columns should never be null. An ideal result is 0.
*/
SELECT
'EMPLOYEE' AS table_name,
COUNT(*) AS null_employee_ids
FROM
EMPLOYEE
WHERE
1 = 1
AND employee_id IS NULL
UNION ALL
SELECT
'SUPPLIER' AS table_name,
COUNT(*) AS null_supplier_ids
FROM
SUPPLIER
WHERE
1 = 1
AND supplier_id IS NULL;

/*
Test 3: Uniqueness Checks for Primary Keys
These queries identify duplicate primary key values.
An ideal result is an empty set.
*/
SELECT
employee_id,
COUNT(*)
FROM
EMPLOYEE
GROUP BY
employee_id
HAVING
COUNT(*) > 1;

SELECT
supplier_id,
COUNT(*)
FROM
SUPPLIER
GROUP BY
supplier_id
HAVING
COUNT(*) > 1;

/*
Test 4: Data Constraint Checks
This query checks for any records that violate logical data constraints,
such as negative prices or quantities.
An ideal result is an empty set.
*/
SELECT
employee_id,
unit_price,
quantity
FROM
EXPENSE
WHERE
1 = 1
AND unit_price <= 0 OR quantity <= 0;

-- Check for negative invoice amounts
SELECT
supplier_id,
invoice_amount
FROM
INVOICE
WHERE
1 = 1
AND invoice_amount <= 0;