Skip to content
Open
30 changes: 30 additions & 0 deletions calculate_largest_expensors.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
-- ============================================================
-- REPORTING LARGEST EXPENSORS
-- ============================================================
-- This query identifies employees whose total expenses exceed 1000.
-- It includes employee details, their manager's details, and their total expensed amount.
-- Results are ordered by total_expensed_amount in descending order.

SELECT
e.employee_id AS employee_id, -- Unique identifier for the employee
CONCAT(e.first_name, ' ', e.last_name) AS employee_name, -- Full name of the employee
e.manager_id AS manager_id, -- Unique identifier for the employee's manager
CONCAT(m.first_name, ' ', m.last_name) AS manager_name, -- Full name of the manager
SUM(exp.unit_price * exp.quantity) AS total_expensed_amount -- Total expenses incurred by the employee
FROM
EMPLOYEE e -- Main employee table
LEFT JOIN
EXPENSE exp ON e.employee_id = exp.employee_id -- Join with the expense table to get expense details
LEFT JOIN
EMPLOYEE m ON e.manager_id = m.employee_id -- Self-join to get the manager's details
GROUP BY
e.employee_id, e.first_name, e.last_name, -- Group by employee details
e.manager_id, m.first_name, m.last_name -- Group by manager details to ensure aggregation
HAVING
SUM(exp.unit_price * exp.quantity) > 1000 -- Filter employees with total expenses greater than 1000
ORDER BY
total_expensed_amount DESC; -- Order the results in descending order based on total expenses

-- ============================================================
-- END OF CODE
-- ============================================================
121 changes: 121 additions & 0 deletions create_employees.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
-- ============================================================
-- SETTING UP THE SCHEMA
-- ============================================================
-- Ensure you're working within the correct catalog and schema.
USE memory.default;

-- ============================================================
-- DATA LOADING IN NORMAL CONDITIONS
-- ============================================================
-- Under normal circumstances, the employee data would be loaded directly
-- from the hr/employee_index.csv file using a supported method. This could
-- include an ETL process, COPY INTO statement, or specific file-reading mechanisms
-- supported by the database engine or your ETL pipeline.

-- Example (pseudo-code):
-- COPY EMPLOYEE FROM 'path/to/hr/employee_index.csv' WITH CSV HEADER;

-- Since this example does not include direct CSV loading,
-- we will insert the data manually for demonstration purposes.

-- ============================================================
-- MANUAL DATA INSERTION
-- ============================================================
-- Inserting the data manually for demonstration purposes.
-- Below are the initial records for the EMPLOYEE table.

-- ============================================================
-- EMPLOYEE TABLE CREATION
-- ============================================================
-- The EMPLOYEE table stores information about company employees.
-- It includes unique identifiers for each employee, their names,
-- job titles, and the ID of their manager (if applicable).
-- Create the EMPLOYEE table without primary or foreign key constraints (Trino doesn't support primary keys, foreign keys, or constraints directly)

CREATE TABLE EMPLOYEE (
employee_id TINYINT NOT NULL, -- Unique identifier for each employee
first_name VARCHAR(50) NOT NULL, -- Employee's first name
last_name VARCHAR(50) NOT NULL, -- Employee's last name
job_title VARCHAR(50) NOT NULL, -- Employee's job title
manager_id TINYINT -- ID of the employee's manager
);

-- ============================================================
-- INSERTING DATA INTO EMPLOYEE TABLE
-- ============================================================
-- Below are the initial data records for the EMPLOYEE table.
-- These records represent current employees and their hierarchical relationships.

INSERT INTO EMPLOYEE (employee_id, first_name, last_name, job_title, manager_id)
VALUES
-- CEO of the company, no manager assigned.
-- IMPORTANT: CHECK THIS
-- If he is the CEO, why does he report to the CFO?? Maybe is NULL, I need to ask (It is solved in part 4)
-- Are the Chief's hierarchies well established?
(1, 'Ian', 'James', 'CEO', 4),

-- CSO (Chief Strategy Officer), reports directly to the CEO.
(2, 'Umberto', 'Torrielli', 'CSO', 1),

-- MD EMEA (Managing Director for Europe, Middle East, Africa), reports to CSO.
(3, 'Alex', 'Jacobson', 'MD EMEA', 2),

-- CFO (Chief Financial Officer), reports to CSO.
(4, 'Darren', 'Poynton', 'CFO', 2),

-- MD APAC (Managing Director for Asia-Pacific), reports to CSO.
(5, 'Tim', 'Beard', 'MD APAC', 2),

-- COS (Chief of Staff), reports directly to the CEO.
(6, 'Gemma', 'Dodd', 'COS', 1),

-- CHR (Chief Human Resources), reports to COS.
(7, 'Lisa', 'Platten', 'CHR', 6),

-- GM Activation (General Manager for Activation), reports to CSO.
(8, 'Stefano', 'Camisaca', 'GM Activation', 2),

-- MD NAM (Managing Director for North America), reports to CSO.
(9, 'Andrea', 'Ghibaudi', 'MD NAM', 2);

-- ============================================================
-- IMPLEMENTATION NOTES
-- ============================================================
-- 1. NOT NULL constraints are applied to critical columns (employee_id, first_name, last_name, job_title)
-- to prevent incomplete records.
-- 2. If you anticipate more than 255 employees, consider changing TINYINT to SMALLINT or INTEGER.

-- ============================================================
-- VALIDATIONS
-- ============================================================

-- Verify that the table exists
SHOW TABLES FROM memory.default;

-- Describe the table to check its columns and constraints
DESCRIBE memory.default.EMPLOYEE;

-- Query to find invalid manager IDs that are not present in employee_id
SELECT e.manager_id
FROM memory.default.EMPLOYEE e
LEFT JOIN memory.default.EMPLOYEE m ON e.manager_id = m.employee_id
WHERE e.manager_id IS NOT NULL AND m.employee_id IS NULL;

-- Retrieve all data from the EMPLOYEE table
SELECT * FROM memory.default.EMPLOYEE
ORDER BY employee_id;

-- Query employees with their manager details
SELECT
e.employee_id AS employee_id,
CONCAT(e.first_name, ' ', e.last_name) AS employee_name,
e.job_title AS job_title,
e.manager_id AS manager_id,
CONCAT(m.first_name, ' ', m.last_name) AS manager_name
FROM memory.default.EMPLOYEE e
LEFT JOIN memory.default.EMPLOYEE m ON e.manager_id = m.employee_id
ORDER BY e.employee_id;

-- ============================================================
-- END OF CODE
-- ============================================================
49 changes: 49 additions & 0 deletions create_expenses.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
-- ============================================================
-- EXPENSE TABLE CREATION
-- ============================================================
-- The EXPENSE table tracks expenses incurred by employees,
-- including item prices and quantities.

CREATE TABLE EXPENSE (
employee_id TINYINT NOT NULL, -- Unique identifier for the employee incurring the expense
unit_price DECIMAL(8, 2), -- Cost per unit
quantity TINYINT -- Number of units purchased
);
-- ============================================================
-- INSERTING DATA INTO THE EXPENSE TABLE
-- ============================================================
-- Insert expense records into the EXPENSE table.

INSERT INTO EXPENSE (employee_id, unit_price, quantity)
VALUES
-- Alex Jacobson's expenses
(3, 6.50, 14), -- Drinks, lots of drinks
(3, 11.00, 20), -- More Drinks
(3, 22.00, 18), -- So Many Drinks!
(3, 13.00, 75), -- I bought everyone in the bar a drink!

-- Andrea Ghibaudi's expense
(9, 300.00, 1), -- Flights from Mexico back to New York

-- Darren Poynton's expenses
(4, 40.00, 9), -- Ubers to get us all home

-- Umberto Torrielli's expense
(2, 17.50, 4); -- I had too much fun and needed something to eat

-- ============================================================
-- VALIDATIONS
-- ============================================================

-- Retrieve all data from the EXPENSE table
SELECT * FROM EXPENSE;

-- Query to find employee IDs in EXPENSE that are not present in EMPLOYEE
SELECT e.employee_id
FROM memory.default.EXPENSE e
LEFT JOIN memory.default.EMPLOYEE emp ON e.employee_id = emp.employee_id
WHERE emp.employee_id IS NULL;

-- ============================================================
-- END OF CODE
-- ============================================================
67 changes: 67 additions & 0 deletions create_invoices.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
-- ============================================================
-- SUPPLIER TABLE CREATION
-- ============================================================
-- Stores information about suppliers.

CREATE TABLE SUPPLIER (
supplier_id TINYINT NOT NULL, -- Unique identifier for each supplier
name VARCHAR NOT NULL -- Name of the supplier
);

-- Insert suppliers into SUPPLIER table
INSERT INTO SUPPLIER (supplier_id, name)
VALUES
(1, 'Catering Plus'),
(2, 'Dave\'s Discos'),
(3, 'Entertainment Tonight'),
(4, 'Ice Ice Baby'),
(5, 'Party Animals');


-- ============================================================
-- INVOICE TABLE CREATION
-- ============================================================
-- Stores information about supplier invoices.

CREATE TABLE INVOICE (
supplier_id TINYINT NOT NULL, -- Supplier ID (references SUPPLIER)
invoice_amount DECIMAL(8, 2), -- Invoice amount
due_date DATE -- Due date (last day of the specified month)
);

-- Insert invoices into INVOICE table
-- Since the invoices are from a Christmas party, I supposed that it was in December 2024, the "months from now" calculations start there.
INSERT INTO INVOICE (supplier_id, invoice_amount, due_date)
VALUES
-- Party Animals invoices
(5, 6000.00, DATE('2025-03-31')), -- 3 months from December (March 2025)

-- Catering Plus invoices
(1, 2000.00, DATE('2025-02-28')), -- 2 months from December (February 2025)
(1, 1500.00, DATE('2025-03-31')), -- 3 months from December (March 2025)

-- Dave's Discos invoice
(2, 500.00, DATE('2025-01-31')), -- 1 month from December (January 2025)

-- Entertainment Tonight invoice
(3, 6000.00, DATE('2025-03-31')), -- 3 months from December (March 2025)

-- Ice Ice Baby invoice
(4, 4000.00, DATE('2025-06-30')); -- 6 months from December (June 2025)

-- ============================================================
-- VALIDATIONS
-- ============================================================
-- Verify that the tables exists
SELECT * FROM SUPPLIER ORDER BY supplier_id;
SELECT * FROM INVOICE ORDER BY supplier_id, due_date;

-- Ensure all supplier IDs in INVOICE exist in SUPPLIER
SELECT i.supplier_id
FROM INVOICE i
LEFT JOIN SUPPLIER s ON i.supplier_id = s.supplier_id
WHERE s.supplier_id IS NULL;

-- ============================================================
-- END OF CODE
-- ============================================================
35 changes: 35 additions & 0 deletions find_manager_cycles.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
-- ============================================================
-- DETECTING CYCLES IN EMPLOYEE-MANAGER RELATIONSHIPS
-- ============================================================
-- Identifies cycles where employees approve each other's expenses.

WITH RECURSIVE ManagerCycle (employee_id, manager_id, cycle) AS (
-- Base case: Start with each employee and their direct manager.
SELECT
e.employee_id,
e.manager_id,
CAST(e.employee_id AS VARCHAR) AS cycle -- Initialize the cycle with the employee's ID
FROM EMPLOYEE e
WHERE e.manager_id IS NOT NULL -- Exclude employees without a manager

UNION ALL

-- Recursive case: Traverse the manager hierarchy to detect cycles.
SELECT
e.employee_id,
e.manager_id,
CONCAT(mc.cycle, ',', CAST(e.employee_id AS VARCHAR)) AS cycle -- Append employee ID to the cycle
FROM EMPLOYEE e
JOIN ManagerCycle mc ON e.manager_id = mc.employee_id -- Follow the management chain
WHERE mc.cycle NOT LIKE CONCAT('%', CAST(e.employee_id AS VARCHAR), '%') -- Prevent infinite loops by checking if the employee is already in the cycle
)

SELECT
mc.employee_id,
mc.cycle
FROM ManagerCycle mc
WHERE mc.employee_id = SPLIT_PART(mc.cycle, ',', 1); -- Ensure the first employee in the cycle appears in the result

-- ============================================================
-- END OF CODE
-- ============================================================
68 changes: 68 additions & 0 deletions generate_supplier_payment_plans.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
-- ============================================================
-- GENERATE MONTHLY PAYMENT PLAN FOR SUPPLIERS
-- ============================================================
-- This query calculates a monthly payment plan for suppliers,
-- ensuring payment dates are of type DATE and correspond to the last day of the month.

WITH InvoiceData AS (
-- Step 1: Aggregate invoices per supplier
-- For each supplier, calculate the total outstanding balance (sum of invoice amounts).
-- The result will have one row per supplier with their total balance.
SELECT
s.supplier_id, -- Unique identifier for the supplier
s.name AS supplier_name, -- Name of the supplier
SUM(i.invoice_amount) AS total_balance -- Total outstanding balance for the supplier
FROM
SUPPLIER s
JOIN
INVOICE i ON s.supplier_id = i.supplier_id -- Join to match invoices with their suppliers
GROUP BY
s.supplier_id, -- Group by supplier ID
s.name -- Group by supplier name
),
PaymentSchedule AS (
-- Step 2: Generate a payment schedule for each supplier
-- The schedule splits the total balance into monthly installments,
-- ensuring payments are made on the last day of each month.
SELECT
id.supplier_id, -- Supplier ID
id.supplier_name, -- Supplier name
monthly_index.index AS monthly_index, -- Payment month index, generated from SEQUENCE
CASE
WHEN monthly_index.index * 1500 >= id.total_balance THEN
-- If the cumulative payments reach or exceed the total balance:
id.total_balance - (monthly_index.index - 1) * 1500 -- Remaining balance as final payment
ELSE
1500.00 -- Standard monthly payment
END AS payment_amount, -- Amount to be paid for this month
GREATEST(id.total_balance - monthly_index.index * 1500, 0) AS balance_outstanding,
-- Calculate the remaining balance after this payment
-- Ensure the balance does not go negative using GREATEST.

DATE_ADD('day', -1,
DATE_ADD('month', monthly_index.index, DATE_TRUNC('month', DATE('2024-12-31')))
) AS payment_date -- Calculate the last day of the payment month
-- First, move forward by the number of months specified by monthly_index.
-- Then, subtract one day from the first day of the next month to find the last day of the month.
FROM
InvoiceData id
CROSS JOIN UNNEST(
SEQUENCE(1, CAST(CEIL(id.total_balance / 1500) AS BIGINT)) -- Generate a sequence of months needed to pay off the total balance
) AS monthly_index(index) -- Assign the sequence an alias (monthly_index) with a column (index)
)
-- Step 3: Output the final payment schedule
SELECT
supplier_id, -- Supplier ID
supplier_name, -- Supplier name
payment_amount, -- Payment amount for the month
balance_outstanding, -- Remaining balance after this payment
payment_date -- Date of the payment (last day of the month)
FROM
PaymentSchedule
ORDER BY
supplier_id, -- Sort by supplier ID
payment_date; -- Then sort by payment date

-- ============================================================
-- END OF CODE
-- ============================================================