-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRFM_sales.sql
196 lines (170 loc) · 6.07 KB
/
RFM_sales.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
-- RFM(Recency Frequent Monetary)
-- Recency - Last order date
-- Requent - count of total orders
-- Monetary - total spend
-- show complete dataset
use bharath;
SELECT * FROM dbo.sales_data
-- show the distinct values of some common dimensions
SELECT DISTINCT status FROM sales_data -- 6
SELECT DISTINCT year_id FROM sales_data -- 3
SELECT DISTINCT PRODUCTLINE FROM sales_data --7
SELECT DISTINCT MSRP FROM sales_data -- 80 huge plotting
SELECT DISTINCT COUNTRY FROM sales_data -- 19
SELECT DISTINCT TERRITORY FROM sales_data -- 4
SELECT DISTINCT DEALSIZE FROM sales_data -- 3
-- sales of all the products
-- group the sales by products and order the data by revenue as desc
SELECT PRODUCTLINE, sum(sales) AS REVENUE
FROM sales_data
GROUP BY PRODUCTLINE
ORDER BY 2 DESC;
-- revenue with the status of product
SELECT STATUS, sum(sales) AS REVENUE
FROM sales_data
GROUP BY STATUS
ORDER BY 2 DESC;
-- year with maximum sales of the product
SELECT YEAR_ID, sum(sales) AS REVENUE
FROM sales_data
GROUP BY YEAR_ID
ORDER BY 2 DESC;
-- 2005 - least
-- 2003 - modest
-- 2004 - highest
SELECT DISTINCT MONTH_ID FROM sales_data
WHERE YEAR_ID = 2005; -- sales for only 5 months
SELECT DISTINCT MONTH_ID FROM sales_data
WHERE YEAR_ID = 2004
ORDER BY MONTH_ID ASC; -- sales for all months made it the highest sales
-- revenue based on the dealsize
SELECT DEALSIZE, sum(sales) AS REVENUE
FROM sales_data
GROUP BY DEALSIZE
ORDER BY 2 DESC; -- with medium dealsize as highest
-- maximum revenue earned in each month for the 3 years
SELECT MONTH_ID, sum(sales) AS REVENUE, count(ORDERNUMBER) AS ORDER_COUNTS
FROM sales_data
GROUP BY MONTH_ID
ORDER BY 2 DESC; -- November best month of sales
-- for 2004
SELECT MONTH_ID, sum(sales) AS REVENUE, count(ORDERNUMBER) AS ORDER_COUNTS
FROM sales_data
WHERE YEAR_ID = 2004
GROUP BY MONTH_ID
ORDER BY 2 DESC; -- November best month of sales
-- for 2005
SELECT MONTH_ID, sum(sales) AS REVENUE, count(ORDERNUMBER) AS ORDER_COUNTS
FROM sales_data
WHERE YEAR_ID = 2005
GROUP BY MONTH_ID
ORDER BY 2 DESC; -- May is the best month of sales
-- for 2003
SELECT MONTH_ID, sum(sales) AS REVENUE, count(ORDERNUMBER) AS ORDER_COUNTS
FROM sales_data
WHERE YEAR_ID = 2003
GROUP BY MONTH_ID
ORDER BY 2 DESC; -- November is the best month of sales
-- November is the best overall best month of sales with highest orders
-- checking the products that are sold during this highest sales month
SELECT MONTH_ID, PRODUCTLINE, sum(sales) AS REVENUE, count(ORDERNUMBER) AS ORDER_COUNTS
FROM sales_data
WHERE MONTH_ID = 11 -- add YEAR_ID in where clause to see products for each year
GROUP BY MONTH_ID, PRODUCTLINE
ORDER BY 3 DESC;
-- Best Customer by using RFM Analysis
-- RFM(Recency-Frequency-Monetary) an indexing technique used to segment customers based on their past purchase history
-- Recency - how long their last purchase was
-- Frequent - how often they purchase
-- Monetary - how much spent
-- Best Customer from sales data
SELECT max(ORDERDATE) FROM sales_data -- max order date
--DROP TABLE IF EXISTS #RFM
--;WITH RFM AS
--(
-- SELECT CUSTOMERNAME,
-- sum(sales) AS MonetaryValue,
-- avg(sales) AS AverageMonetaryValue,
-- count(ORDERNUMBER) AS Frequencey,
-- max(ORDERDATE) AS LastOrderDate,
-- (SELECT max(ORDERDATE) FROM sales_data) AS MaxOrderDate,
-- DATEDIFF(DD, max(ORDERDATE), (SELECT max(ORDERDATE) FROM sales_data)) AS Recency -- (last-max) how long the purchase is been i.e difference of the last and max date
-- -- less recency indicated recent orders
-- -- NTILE() function helps to divide the records into equal or buckets assigning a number to each bucket from 1
-- FROM sales_data
-- GROUP BY CUSTOMERNAME
--),
--final_RFM AS
--(
-- SELECT *,
-- NTILE(4) OVER(ORDER BY Recency DESC) AS RFM_Recency,
-- NTILE(4) OVER(ORDER BY Frequencey) AS RFM_Frequencey,
-- NTILE(4) OVER(ORDER BY AverageMonetaryValue) AS RFM_Monetary
-- FROM RFM
--)
--SELECT *,
-- RFM_Recency + RFM_Frequencey + RFM_Monetary AS RFM_total,
-- CAST(RFM_Recency AS varchar) + CAST(RFM_Frequencey AS varchar) + CAST(RFM_Monetary AS varchar) RFM_Total_String
-- INTO #RFM
--FROM final_RFM
--SELECT * FROM #RFM
CREATE VIEW RFM_C
AS
SELECT CUSTOMERNAME,
sum(sales) AS MonetaryValue,
avg(sales) AS AverageMonetaryValue,
count(ORDERNUMBER) AS Frequency,
max(ORDERDATE) AS LastOrderDate,
(SELECT max(ORDERDATE) FROM sales_data) AS MaxOrderDate,
DATEDIFF(DD, max(ORDERDATE), (SELECT max(ORDERDATE) FROM sales_data)) AS Recency -- (last-max) how long the purchase is been i.e difference of the last and max date
-- less recency indicated recent orders
-- NTILE() function helps to divide the records into equal or buckets assigning a number to each bucket from 1
FROM sales_data
GROUP BY CUSTOMERNAME
-- DROP VIEW RFM_C
-- SELECT * FROM RFM_C -- RFM-C
CREATE VIEW total_RFM
AS
SELECT *,
NTILE(4) OVER(ORDER BY Recency DESC) AS RFM_Recency,
NTILE(4) OVER(ORDER BY Frequency) AS RFM_Frequency,
NTILE(4) OVER(ORDER BY MonetaryValue) AS RFM_Monetary
FROM RFM_C
-- DROP VIEW total_RFM
-- SELECT * FROM total_RFM -- total_RFM
CREATE VIEW RFM_S
AS
SELECT *,
RFM_Recency + RFM_Frequency + RFM_Monetary AS RFM_total,
CAST(RFM_Recency AS varchar) + CAST(RFM_Frequency AS varchar) + CAST(RFM_Monetary AS varchar) RFM_Total_String
FROM total_RFM
-- DROP VIEW RFM_S
SELECT * FROM RFM_S -- RFM_S
SELECT CUSTOMERNAME, RFM_Recency, RFM_Frequency, RFM_Monetary,
CASE
WHEN RFM_total BETWEEN 3 AND 5 THEN 'Lost Customers' -- didn't buy anything
WHEN RFM_total BETWEEN 6 AND 7 THEN 'Slipping customers' -- big buyers havent purchased lately
WHEN RFM_total = 8 THEN 'New Customers'
WHEN RFM_total = 9 OR RFM_total > 9 THEN 'Active' -- often buyers
END RFM_Segment
FROM RFM_S
-- Most often sold products
SELECT DISTINCT ORDERNUMBER, STUFF(
(SELECT ',' + PRODUCTCODE
FROM sales_data AS p
WHERE ORDERNUMBER IN
(
SELECT ORDERNUMBER
FROM
(
SELECT ORDERNUMBER, COUNT(*) AS quantity
FROM sales_data
WHERE STATUS = 'shipped'
GROUP BY ORDERNUMBER
) AS b
WHERE quantity = 9
)
AND p.ORDERNUMBER = s.ORDERNUMBER
FOR XML PATH('')), 1, 1, '') AS ProductCodes
FROM sales_data AS s
ORDER BY ProductCodes DESC