2-Hour Power Revision β Master all key patterns, solutions & theory
- Window Functions
- PARTITION BY
- Common Table Expressions (CTE)
- Joins
- Subqueries
- Aggregations & Grouping
- Theoretical Questions
- Quick Reference Patterns
Window functions perform calculations across rows without collapsing them (unlike GROUP BY).
FUNCTION() OVER (
PARTITION BY col -- Optional: divide into groups
ORDER BY col -- Optional: order within partition
)| Function | Ties | Gaps | Example Result |
|---|---|---|---|
ROW_NUMBER() |
No | N/A | 1, 2, 3, 4, 5 |
RANK() |
Yes | Yes | 1, 1, 3, 4, 4, 6 |
DENSE_RANK() |
Yes | No | 1, 1, 2, 3, 3, 4 |
NTILE(n) |
Divides into n buckets | N/A | 1, 1, 2, 2, 3, 3 |
-- LOGIC: Assign row numbers within each dept, then filter to top 3
SELECT DepartmentID, EmployeeName, Salary, EmployeeRank
FROM (
SELECT
DepartmentID,
EmployeeName,
Salary,
ROW_NUMBER() OVER (
PARTITION BY DepartmentID
ORDER BY Salary DESC
) AS EmployeeRank
FROM Employees
) ranked
WHERE EmployeeRank <= 3;π‘ Pattern: Use subquery because you can't filter window functions in WHERE directly.
-- LOGIC: Number orders by date (newest first), pick the #1
SELECT CustomerID, OrderID, OrderDate, Amount
FROM (
SELECT
CustomerID,
OrderID,
OrderDate,
Amount,
ROW_NUMBER() OVER (
PARTITION BY CustomerID
ORDER BY OrderDate DESC
) AS rn
FROM Orders
) ranked
WHERE rn = 1;-- LOGIC: RANK() gives same rank for ties, skips next
SELECT
StudentID,
StudentName,
ExamScore,
RANK() OVER (ORDER BY ExamScore DESC) AS Rank
FROM Students;Result: If two students have 95, both get rank 1, next gets rank 3.
SELECT
Category,
ProductName,
TotalSales,
RANK() OVER (
PARTITION BY Category
ORDER BY TotalSales DESC
) AS SalesRank
FROM Products;-- LOGIC: DENSE_RANK() = same rank for ties, NO gaps
SELECT
MovieID,
MovieName,
Rating,
DENSE_RANK() OVER (ORDER BY Rating DESC) AS Rank
FROM Movies;Result: Ratings 9,9,8,7 β Ranks 1,1,2,3 (not 1,1,3,4)
-- LOGIC: First aggregate, then rank
SELECT
CustomerID,
SUM(SpendAmount) AS TotalSpend,
DENSE_RANK() OVER (ORDER BY SUM(SpendAmount) DESC) AS Rank
FROM Transactions
GROUP BY CustomerID;-- LOGIC: LAG(col, n) gets value from n rows back
SELECT
Month,
Revenue,
LAG(Revenue, 1) OVER (ORDER BY Month) AS PreviousMonthRevenue
FROM MonthlyRevenue;π‘ First row will have NULL for PreviousMonthRevenue.
SELECT
EmployeeID,
EffectiveDate,
Salary,
LAG(Salary, 1) OVER (
PARTITION BY EmployeeID
ORDER BY EffectiveDate
) AS PreviousSalary
FROM EmployeeSalaries;-- LOGIC: LEAD(col, n) gets value from n rows ahead
SELECT
FlightID,
Airline,
DepartureTime,
LEAD(DepartureTime, 1) OVER (
PARTITION BY Airline
ORDER BY DepartureTime
) AS NextFlightDepartureTime
FROM Flights;SELECT
StockID,
StockDate,
ClosingPrice,
LEAD(ClosingPrice, 1) OVER (
PARTITION BY StockID
ORDER BY StockDate
) AS NextDayClosingPrice
FROM Stocks;PARTITION BY divides data into groups for window function calculations. Each partition is processed independently.
-- Without PARTITION BY: Calculation over ALL rows
AVG(salary) OVER () -- Average of entire table
-- With PARTITION BY: Calculation per group
AVG(salary) OVER (PARTITION BY department_id) -- Average per deptSELECT
DepartmentID,
EmployeeName,
Salary,
RANK() OVER (
PARTITION BY DepartmentID
ORDER BY Salary DESC
) AS RankInDepartment
FROM Employees;-- LOGIC: Running average = include all rows up to current
SELECT
CustomerID,
OrderDate,
Amount,
AVG(Amount) OVER (
PARTITION BY CustomerID
ORDER BY OrderDate
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS AverageToDate
FROM Orders;π‘ Key Frame Clauses:
ROWS UNBOUNDED PRECEDING= from start to current (running total)ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING= 3-row moving window
-- LOGIC: Compare current amount with LAG, filter increases
SELECT
CustomerID,
TransactionDate,
Amount,
PreviousAmount,
CASE WHEN Amount > PreviousAmount THEN 'Yes' ELSE 'No' END AS Increased
FROM (
SELECT
CustomerID,
TransactionDate,
Amount,
LAG(Amount, 1) OVER (
PARTITION BY CustomerID
ORDER BY TransactionDate
) AS PreviousAmount
FROM Transactions
) t
WHERE Amount > PreviousAmount;-- LOGIC: ROW_NUMBER = 1 means first purchase
SELECT
CustomerID,
PurchaseDate,
Item,
CASE WHEN rn = 1 THEN 'Yes' ELSE 'No' END AS IsFirstPurchase
FROM (
SELECT
CustomerID,
PurchaseDate,
Item,
ROW_NUMBER() OVER (
PARTITION BY CustomerID
ORDER BY PurchaseDate
) AS rn
FROM Purchases
) t;CTEs are temporary named result sets that make complex queries readable.
WITH cte_name AS (
SELECT ...
)
SELECT * FROM cte_name;Advantages over Subqueries:
- More readable
- Can reference multiple times
- Supports recursion
-- LOGIC: CTE calculates monthly totals, main query ranks & filters
WITH MonthlySpend AS (
SELECT
CustomerID,
DATE_TRUNC('month', OrderDate) AS Month,
SUM(Amount) AS TotalSpend,
ROW_NUMBER() OVER (
PARTITION BY DATE_TRUNC('month', OrderDate)
ORDER BY SUM(Amount) DESC
) AS SpendRank
FROM Orders
GROUP BY CustomerID, DATE_TRUNC('month', OrderDate)
)
SELECT CustomerID, Month, TotalSpend, SpendRank
FROM MonthlySpend
WHERE SpendRank <= 3;WITH DailyAvg AS (
SELECT
Region,
AVG(Amount) AS AvgDailySales
FROM Sales
GROUP BY Region
)
SELECT
s.SaleID,
s.SaleDate,
s.Region,
s.Amount,
d.AvgDailySales
FROM Sales s
JOIN DailyAvg d ON s.Region = d.Region
WHERE s.Amount > d.AvgDailySales * 1.5;WITH RankedTransactions AS (
SELECT
CustomerID,
Amount,
ROW_NUMBER() OVER (
PARTITION BY CustomerID
ORDER BY Amount DESC
) AS rn
FROM Transactions
)
SELECT CustomerID, Amount AS SecondHighestAmount
FROM RankedTransactions
WHERE rn = 2;WITH DeptAvg AS (
SELECT
DepartmentID,
AVG(Salary) AS AvgSalary
FROM Employees
GROUP BY DepartmentID
)
SELECT e.EmployeeID, e.DepartmentID, e.Salary, d.AvgSalary
FROM Employees e
JOIN DeptAvg d ON e.DepartmentID = d.DepartmentID
WHERE e.Salary > d.AvgSalary * 2;LEFT JOIN: [ββββββββ] RIGHT JOIN: [ββββββββ]
[ ββββ] [ββββ ]
INNER JOIN: [ ββββ] FULL OUTER: [ββββββββββββ]
[ββββ ββββ]
-- LOGIC: LEFT JOIN + check for NULL on right side
SELECT c.CustomerID, c.Name
FROM Customers c
LEFT JOIN Orders o ON c.CustomerID = o.CustomerID
WHERE o.OrderID IS NULL;π‘ Pattern: Find "orphans" = LEFT JOIN + IS NULL
-- LOGIC: Join table to itself using manager relationship
SELECT
e.EmployeeID,
e.Name AS EmployeeName,
m.Name AS ManagerName
FROM Employees e
LEFT JOIN Employees m ON e.ManagerID = m.EmployeeID;π‘ Use LEFT JOIN to include employees without managers (CEO).
SELECT p.ProductID, p.Name
FROM Products p
LEFT JOIN OrderDetails od ON p.ProductID = od.ProductID
WHERE od.ProductID IS NULL;SELECT
r.RegionName,
MAX(s.Amount) AS HighestSale
FROM Sales s
JOIN Regions r ON s.Region = r.RegionID
GROUP BY r.RegionName;SELECT
c.Name AS CategoryName,
SUM(s.Quantity * s.Price) AS TotalRevenue
FROM Sales s
JOIN Products p ON s.ProductID = p.ProductID
JOIN Categories c ON p.CategoryID = c.CategoryID
GROUP BY c.Name;SELECT
s.Name AS StudentName,
c.Title AS CourseName
FROM Students s
JOIN Enrollments e ON s.StudentID = e.StudentID
JOIN Courses c ON e.CourseID = c.CourseID
ORDER BY s.Name, c.Title;SELECT
o.OrderID,
o.CustomerID,
o.OrderDate,
p.Name AS ProductName,
oi.Quantity,
p.Price,
(oi.Quantity * p.Price) AS TotalCost
FROM Orders o
JOIN OrderItems oi ON o.OrderID = oi.OrderID
JOIN Products p ON oi.ProductID = p.ProductID;| Placement | Returns | Example |
|---|---|---|
| SELECT | Scalar (1 value) | (SELECT MAX(sal) FROM emp) |
| FROM | Table (derived table) | FROM (SELECT ...) AS t |
| WHERE | Value or list | WHERE id IN (SELECT ...) |
-- LOGIC: Compare total to overall average
SELECT CustomerID, SUM(Amount) AS TotalSpend
FROM Orders
GROUP BY CustomerID
HAVING SUM(Amount) > (
SELECT AVG(TotalSpend)
FROM (
SELECT SUM(Amount) AS TotalSpend
FROM Orders
GROUP BY CustomerID
) t
);-- Method 1: Subquery with MAX
SELECT MAX(Salary) AS SecondHighest
FROM Employees
WHERE Salary < (SELECT MAX(Salary) FROM Employees);
-- Method 2: LIMIT OFFSET
SELECT DISTINCT Salary
FROM Employees
ORDER BY Salary DESC
LIMIT 1 OFFSET 1;
-- Method 3: DENSE_RANK
SELECT Salary
FROM (
SELECT Salary, DENSE_RANK() OVER (ORDER BY Salary DESC) AS rn
FROM Employees
) t
WHERE rn = 2;-- Method 1: NOT IN
SELECT ProductID
FROM Products
WHERE ProductID NOT IN (SELECT ProductID FROM OrderItems);
-- Method 2: NOT EXISTS (preferred for NULLs)
SELECT ProductID
FROM Products p
WHERE NOT EXISTS (
SELECT 1 FROM OrderItems oi WHERE oi.ProductID = p.ProductID
);-- LOGIC: For each row, check if salary = max in that dept
SELECT EmployeeID, DepartmentID, Salary
FROM Employees e
WHERE Salary = (
SELECT MAX(Salary)
FROM Employees
WHERE DepartmentID = e.DepartmentID
);SELECT CustomerID
FROM Orders
GROUP BY CustomerID
HAVING COUNT(*) > 1;SELECT ProductID, Price
FROM Products
WHERE Price > (SELECT AVG(Price) FROM Products);-- LOGIC: Find 90th percentile cutoff, filter above it
SELECT EmployeeID, Salary
FROM Employees
WHERE Salary >= (
SELECT PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY Salary)
FROM Employees
);
-- Alternative with subquery:
SELECT EmployeeID, Salary
FROM Employees e
WHERE (
SELECT COUNT(*) FROM Employees WHERE Salary > e.Salary
) < (SELECT COUNT(*) * 0.1 FROM Employees);-- LOGIC: Use double NOT EXISTS (relational division)
SELECT c.CustomerID
FROM Customers c
WHERE NOT EXISTS (
SELECT p.ProductID
FROM Products p
WHERE NOT EXISTS (
SELECT 1
FROM Orders o
WHERE o.CustomerID = c.CustomerID
AND o.ProductID = p.ProductID
)
);π‘ Translation: "No product exists that this customer hasn't ordered"
SELECT DISTINCT c.City
FROM Customers c
WHERE NOT EXISTS (
SELECT 1
FROM Orders o
WHERE o.CustomerID = c.CustomerID
);SELECT DepartmentID
FROM Employees
GROUP BY DepartmentID
HAVING COUNT(*) = 1;| Function | Purpose | NULL handling |
|---|---|---|
COUNT(*) |
Count all rows | Includes NULLs |
COUNT(col) |
Count non-NULL | Excludes NULLs |
SUM(col) |
Total | Ignores NULLs |
AVG(col) |
Average | Ignores NULLs |
MIN/MAX(col) |
Min/Max | Ignores NULLs |
SELECT
ProductID,
SUM(Quantity * Price) AS TotalRevenue
FROM OrderItems
GROUP BY ProductID;SELECT
CustomerID,
AVG(Amount) AS AvgOrderValue
FROM Orders
GROUP BY CustomerID;SELECT
DATE_TRUNC('month', OrderDate) AS Month,
COUNT(*) AS OrderCount
FROM Orders
GROUP BY DATE_TRUNC('month', OrderDate)
ORDER BY Month;SELECT CustomerID, SUM(Amount) AS TotalSpend
FROM Orders
GROUP BY CustomerID
ORDER BY TotalSpend DESC
LIMIT 1;SELECT CategoryID, COUNT(*) AS ProductCount
FROM Products
GROUP BY CategoryID;SELECT
CustomerID,
EXTRACT(YEAR FROM OrderDate) AS OrderYear,
COUNT(*) AS OrderCount
FROM Orders
GROUP BY CustomerID, EXTRACT(YEAR FROM OrderDate);SELECT
StoreID,
SaleDate,
AVG(Amount) AS AvgDailySales
FROM Sales
GROUP BY StoreID, SaleDate;WITH MonthlyProductCounts AS (
SELECT
DATE_TRUNC('month', o.OrderDate) AS Month,
oi.ProductID,
COUNT(*) AS OrderCount,
ROW_NUMBER() OVER (
PARTITION BY DATE_TRUNC('month', o.OrderDate)
ORDER BY COUNT(*) DESC
) AS rn
FROM Orders o
JOIN OrderItems oi ON o.OrderID = oi.OrderID
GROUP BY DATE_TRUNC('month', o.OrderDate), oi.ProductID
)
SELECT Month, ProductID, OrderCount
FROM MonthlyProductCounts
WHERE rn = 1;SELECT CustomerID, COUNT(*) AS OrderCount
FROM Orders
GROUP BY CustomerID
ORDER BY OrderCount DESC
LIMIT 5;SELECT c.CategoryID, c.Name
FROM Categories c
LEFT JOIN Products p ON c.CategoryID = p.CategoryID
LEFT JOIN OrderItems oi ON p.ProductID = oi.ProductID
GROUP BY c.CategoryID, c.Name
HAVING COUNT(oi.ProductID) = 0;| Join Type | Returns |
|---|---|
| INNER JOIN | Only matching rows from both tables |
| LEFT (OUTER) JOIN | All from left + matching from right |
| RIGHT (OUTER) JOIN | All from right + matching from left |
| FULL (OUTER) JOIN | All from both tables |
| CROSS JOIN | Cartesian product (all combinations) |
| SELF JOIN | Table joined to itself |
- Column(s) that uniquely identify each row
- NOT NULL + UNIQUE combined
- Each table should have exactly ONE primary key
- Example:
EmployeeID,OrderID
- Column that references a Primary Key in another table
- Creates relationships between tables
- Enforces referential integrity
- Can be NULL (unless specified otherwise)
None! They are exactly the same.
LEFT JOINis shorthand forLEFT OUTER JOIN- SQL allows
OUTERkeyword to be optional
Process of organizing data to reduce redundancy and improve integrity.
| Form | Rule |
|---|---|
| 1NF | Atomic values, no repeating groups |
| 2NF | 1NF + no partial dependencies |
| 3NF | 2NF + no transitive dependencies |
Benefits: Less redundancy, better data integrity, more flexibility
| WHERE | HAVING |
|---|---|
| Filters rows before grouping | Filters groups after grouping |
| Cannot use aggregate functions | CAN use aggregate functions |
| Runs before GROUP BY | Runs after GROUP BY |
SELECT dept, AVG(salary)
FROM employees
WHERE status = 'active' -- Row filter (before GROUP BY)
GROUP BY dept
HAVING AVG(salary) > 50000 -- Group filter (after GROUP BY)| UNION | UNION ALL |
|---|---|
| Removes duplicates | Keeps all rows |
| Slower (needs sorting) | Faster |
| Use when duplicates unwanted | Use for performance |
| Command | What it does | Rollback? | Triggers? |
|---|---|---|---|
| DELETE | Removes rows (can filter) | Yes | Yes |
| TRUNCATE | Removes ALL rows | No* | No |
| DROP | Removes entire table | No | No |
*Rollback support varies by database
- Data structure to speed up data retrieval
- Like a book index - quick lookup
- Trade-off: Faster reads, slower writes
Types:
- B-tree (default, general purpose)
- Hash (equality comparisons)
- GIN/GiST (full-text, arrays, JSON)
| Non-Correlated | Correlated |
|---|---|
| Runs once independently | Runs once per outer row |
| No reference to outer query | References outer query columns |
| Generally faster | Can be slow for large datasets |
-- Non-correlated (runs once)
WHERE salary > (SELECT AVG(salary) FROM emp)
-- Correlated (runs per row)
WHERE salary > (SELECT AVG(salary) FROM emp e2 WHERE e2.dept = e1.dept)SELECT * FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY group_col ORDER BY val DESC) AS rn
FROM table
) t WHERE rn <= N;SELECT col, COUNT(*)
FROM table
GROUP BY col
HAVING COUNT(*) > 1;SUM(amount) OVER (ORDER BY date ROWS UNBOUNDED PRECEDING)current_value - LAG(value) OVER (ORDER BY date) AS changeSELECT id + 1 AS gap_start
FROM table t
WHERE NOT EXISTS (SELECT 1 FROM table WHERE id = t.id + 1);-- Use date - ROW_NUMBER to create groups
SELECT *, date - ROW_NUMBER() OVER (ORDER BY date) * INTERVAL '1 day' AS grp
FROM tableSELECT
category,
SUM(CASE WHEN year = 2023 THEN amount END) AS "2023",
SUM(CASE WHEN year = 2024 THEN amount END) AS "2024"
FROM sales
GROUP BY category;SELECT id, 'col1' AS column_name, col1 AS value FROM table
UNION ALL
SELECT id, 'col2', col2 FROM table;- NOT IN with NULLs β Use NOT EXISTS instead
- Using SELECT * with GROUP BY β Only select grouped columns + aggregates
- Filtering window functions in WHERE β Use subquery
- Forgetting ORDER BY with LIMIT β Results are non-deterministic
- JOIN without proper conditions β Creates Cartesian product
- Comparing NULLs with = β Use IS NULL / IS NOT NULL
1. FROM & JOINs (Get data from tables)
2. WHERE (Filter rows)
3. GROUP BY (Create groups)
4. HAVING (Filter groups)
5. SELECT (Choose columns)
6. DISTINCT (Remove duplicates)
7. ORDER BY (Sort results)
8. LIMIT/OFFSET (Limit rows returned)
π‘ This is why you can't use column aliases in WHERE!
- Read the question carefully β Note "per department", "each customer", etc.
- Think in sets β SQL operates on sets, not loops
- Start simple β Build query step by step, test each part
- Use CTEs for clarity β Easier to debug and explain
- Handle edge cases β NULLs, ties, empty results
- Explain your logic β Interviewers care about thought process
Good luck! You've got this! π