-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAAP.sql
More file actions
162 lines (140 loc) · 5.2 KB
/
AAP.sql
File metadata and controls
162 lines (140 loc) · 5.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
-- Change over time analysis
-- analyzing the sales performance over the time (yearly basis)
select
year(order_Date) as order_date_year,
sum(sales_amount) as total_sales_amount,
count(distinct customer_key) as total_Customers
from gold.fact_sales
where order_Date is not null
group by year(order_Date)
order by year(order_Date);
-- result: the sales amount and the customers were gradually increasing over the years, but the year 2014 faced a little decline than the previous year
-- cumulative analysis
-- calculating the total sales per month and the running total of sales over the time
select
*,
sum(total_Sales) over(partition by month_of_orderdate order by month_of_orderdate) as running_total_sales,
avg(average_price) over(partition by month_of_orderdate order by month_of_orderdate) as running_Average
from
(
select
datetrunc(month, order_Date) as month_of_orderdate,
sum(sales_amount) as total_Sales,
avg(price) as average_price
from gold.fact_sales
where order_date is not null
group by datetrunc(month, order_Date)
) as sub_query;
-- performance analysis
-- analyzing the yearly performance of products by comparing each products sales to both its average sale performance and the previous years sales
with yearly_product_Sales as
(
select
year(f.order_date) as year_of_orderdate,
p.product_name,
sum(f.sales_amount) as current_Sales_Amount
from gold.fact_sales f
left join gold.dim_products p
on p.product_key = f.product_key
where f.order_date is not null
group by year(f.order_date), p.product_name
)
select
year_of_orderdate,
product_name,
current_Sales_Amount,
avg(current_Sales_Amount) over(partition by product_name) as average_Sales,
current_Sales_amount - avg(current_Sales_Amount) over(partition by product_name) as difference_in_average,
case
when current_Sales_amount - avg(current_Sales_Amount) over(partition by product_name) > 0 then 'Higher than average'
when current_Sales_amount - avg(current_Sales_Amount) over(partition by product_name) < 0 then 'Below than average'
else 'Average'
end as average_status,
lag(current_Sales_Amount) over (partition by product_name order by year_of_orderdate) as previous_year_salesAmount,
current_Sales_Amount - lag(current_Sales_Amount) over (partition by product_name order by year_of_orderdate) as difference_in_salesamt_py,
case
when current_Sales_Amount - lag(current_Sales_Amount) over (partition by product_name order by year_of_orderdate) > 0 then 'Increasing'
when current_Sales_Amount - lag(current_Sales_Amount) over (partition by product_name order by year_of_orderdate) < 0 then 'Decreasing'
else 'No Change'
end as sales_py_status
from yearly_product_Sales
order by product_name, year_of_orderdate;
-- part to whole analysis
-- analyzing which category contributes the most to overall sales
with category_Sales as
(
select
p.category,
sum(f.sales_amount) as total_Sales_amount
from gold.fact_sales f
left join gold.dim_products p
on f.product_key = p.product_key
group by p.category
)
select
*,
sum(total_Sales_Amount) over() as overall_Sales,
concat(round((cast(total_Sales_Amount as float) / sum(total_Sales_Amount) over()) * 100,2), '%') as percentage_of_total
from category_Sales
order by total_Sales_amount desc;
-- result: The bike category contributes the most to the overall sales whereas the clothing category contributes the least.
-- Data Segmentation
-- segmenting products into cost ranges and count how many products fall into each segment
with product_segment as
(
select
product_key,
product_name,
cost,
case
when cost < 100 then 'Below 100'
when cost between 100 and 500 then '100-500'
when cost between 500 and 1000 then '500-1000'
else 'Above 1000'
end as Cost_range
from gold.dim_products
)
select
cost_Range,
count(*) as count_of_products
from product_segment
group by Cost_range
order by cost_Range desc;
-- result: The majority of the products falls under the below 100 costs and only 39 products costs above 1000.
-- grouping the customers into three segments based on their spending behavior.
/*
VIP : at least 12 months of history and spending more than 5000.
Regular : at least 12 months of history but spending 5000 or less.
New : lifespan less than 12 months
And find the total number of customers by each group
*/
with customer_segmentation as
(
select
c.customer_key,
sum(f.sales_amount) as total_spending,
min(order_Date) as last_orderdate,
max(order_Date) as first_orderdate,
DATEDIFF(month, min(order_Date), max(order_Date)) as cus_lifespan
from gold.fact_sales f
left join gold.dim_customers c
on f.customer_key = c.customer_key
group by c.customer_key
)
select
cus_segment,
count(customer_key) as Count_of_customer
from (
select
*,
case
when cus_lifespan >=12 and total_spending > 5000 then 'VIP'
when cus_lifespan >= 12 and total_spending <= 5000 then 'REGULAR'
when cus_lifespan < 12 then 'NEW'
end as cus_segment
from customer_segmentation
) as sub_query
where cus_segment is not null
group by cus_segment
order by Count_of_customer desc;
-- result: Majority of the customers are NEW and the REGULAR customer are least.