Skip to content

Commit e49160a

Browse files
UP V1
1 parent f75a800 commit e49160a

File tree

2 files changed

+320
-0
lines changed

2 files changed

+320
-0
lines changed

rule_based_classification.py

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
#############################################
2+
# RULE BASED CLASSIFICATION - تصنيف العملاء
3+
#############################################
4+
5+
# PRICE: صرف العميل
6+
# SOURCE: جهاز العميل
7+
# SEX: جنسية العميل
8+
# COUNTRY: بلد العميل
9+
# AGE: عمر العميل
10+
11+
12+
################# BEFORE - قبل #####################
13+
14+
# PRICE SOURCE SEX COUNTRY AGE
15+
# 0 39 android male bra 17
16+
# 1 39 android male bra 17
17+
# 2 49 android male bra 17
18+
# 3 29 android male tur 17
19+
# 4 49 android male tur 17
20+
21+
22+
################# AFTER - بعد #####################
23+
24+
# customers_level_based price segment
25+
# 0 BRA_ANDROID_FEMALE_0_18 1139.800000 A
26+
# 1 BRA_ANDROID_FEMALE_19_23 1070.600000 A
27+
# 2 BRA_ANDROID_FEMALE_24_30 508.142857 A
28+
# 3 BRA_ANDROID_FEMALE_31_40 233.166667 C
29+
# 4 BRA_ANDROID_FEMALE_41_66 236.666667 C
30+
31+
import numpy as np
32+
import pandas as pd
33+
import seaborn as sns
34+
35+
DATA_PATH = "../Data/persona.csv"
36+
df = pd.read_csv(DATA_PATH)
37+
38+
39+
################# DATA UNDERSTANDING #####################
40+
41+
# Int64Index: 5000 entries, 0 to 4999
42+
# Data columns (total 5 columns):
43+
# # Column Non-Null Count Dtype
44+
# --- ------ -------------- -----
45+
# 0 PRICE 5000 non-null int64
46+
# 1 SOURCE 5000 non-null object
47+
# 2 SEX 5000 non-null object
48+
# 3 COUNTRY 5000 non-null object
49+
# 4 AGE 5000 non-null int64
50+
51+
df.columns = ["price", "source", "sex", "country", "age"]
52+
53+
df.price.value_counts()
54+
55+
# 29 1305
56+
# 39 1260
57+
# 49 1031
58+
# 19 992
59+
# 59 212
60+
# 9 200
61+
62+
df.describe()
63+
64+
# price age
65+
# count 5000.000000 5000.000000
66+
# mean 34.132000 23.581400
67+
# std 12.464897 8.995908
68+
# min 9.000000 15.000000
69+
# 25% 29.000000 17.000000
70+
# 50% 39.000000 21.000000
71+
# 75% 39.000000 27.000000
72+
# max 59.000000 66.000000
73+
74+
df.source.unique()
75+
76+
# ['android' 'ios']
77+
78+
df.sex.value_counts()
79+
80+
# female 2621
81+
# male 2379
82+
83+
df.country.value_counts()
84+
85+
# usa 2065
86+
# bra 1496
87+
# deu 455
88+
# tur 451
89+
# fra 303
90+
# can 230
91+
92+
df.groupby("country").agg({"price":"sum"}).sort_values(by="price", ascending=False)
93+
94+
# country price
95+
# usa 70225
96+
# bra 51354
97+
# tur 15689
98+
# deu 15485
99+
# fra 10177
100+
# can 7730
101+
102+
df.groupby("country").agg({"price":"mean"}).sort_values(by="price", ascending=False)
103+
104+
# country price
105+
# tur 34.787140
106+
# bra 34.327540
107+
# deu 34.032967
108+
# usa 34.007264
109+
# can 33.608696
110+
# fra 33.587459
111+
112+
df.groupby("source").agg({"price":"mean"}).sort_values(by="price", ascending=False)
113+
114+
# source price
115+
# android 34.174849
116+
# ios 34.069102
117+
118+
df.groupby(["country", "source"]).agg({"price":"mean"}).sort_values(by="price", ascending=False)
119+
120+
# country source price
121+
# tur android 36.229437
122+
# bra android 34.387029
123+
# usa ios 34.371703
124+
# fra android 34.312500
125+
# deu ios 34.268817
126+
# bra ios 34.222222
127+
# can ios 33.951456
128+
# deu android 33.869888
129+
# usa android 33.760357
130+
# can android 33.330709
131+
# tur ios 33.272727
132+
# fra ios 32.776224
133+
134+
135+
################# DATA PROCESSING #####################
136+
137+
agg_df = df.groupby([
138+
"country", "source", "sex", "age"
139+
]).agg({
140+
"price":"mean"
141+
}).sort_values(by="price", ascending=False).reset_index()
142+
143+
agg_df["new_age"] = pd.cut(agg_df["age"], bins=[0, 18, 23, 30, 40, 70], labels=['0_18', '19_23', '24_30', '31_40', '41_70'])
144+
145+
# country source sex age price new_age
146+
# 0 bra android male 46 59.0 41_70
147+
# 1 usa android male 36 59.0 31_40
148+
# 2 fra android female 24 59.0 24_30
149+
# 3 usa ios male 32 54.0 31_40
150+
# 4 deu android female 36 49.0 31_40
151+
152+
agg_df["customer_level_based"] = ["_".join(col) for col in agg_df.drop(["age", "price"], axis=1).values]
153+
154+
# country source sex age price new_age customer_level_based
155+
# 0 bra android male 46 59.0 41_70 bra_android_male_41_70
156+
# 1 usa android male 36 59.0 31_40 usa_android_male_31_40
157+
# 2 fra android female 24 59.0 24_30 fra_android_female_24_30
158+
# 3 usa ios male 32 54.0 31_40 usa_ios_male_31_40
159+
# 4 deu android female 36 49.0 31_40 deu_android_female_31_40
160+
161+
agg_df = agg_df.groupby("customer_level_based").agg({"price":"mean"}).reset_index()
162+
163+
# customer_level_based price
164+
# 0 bra_android_female_0_18 35.645303
165+
# 1 bra_android_female_19_23 34.077340
166+
# 2 bra_android_female_24_30 33.863946
167+
# 3 bra_android_female_31_40 34.898326
168+
# 4 bra_android_female_41_70 36.737179
169+
170+
agg_df["segment"] = pd.qcut(agg_df["price"], 4, labels=["D", "C", "B", "A"])
171+
172+
# customer_level_based price segment
173+
# 0 bra_android_female_0_18 35.645303 B
174+
# 1 bra_android_female_19_23 34.077340 C
175+
# 2 bra_android_female_24_30 33.863946 C
176+
# 3 bra_android_female_31_40 34.898326 B
177+
# 4 bra_android_female_41_70 36.737179 A
178+
179+
180+
new_user = 'bra_android_female_31_40'
181+
agg_df[agg_df['customer_level_based'] == new_user]
182+
183+
# customer_level_based price segment
184+
# 3 bra_android_female_31_40 34.898326 B
185+
186+
187+
############ Functionalization ############
188+
189+
def rule_based_classifier(dataframe, bins=[0, 18, 23, 30, 40, 70], labels=['0_18', '19_23', '24_30', '31_40', '41_70'], export_to_csv=False):
190+
191+
agg_df = df.groupby([
192+
"country", "source", "sex", "age"
193+
]).agg({
194+
"price":"mean"
195+
}).sort_values(by="price", ascending=False).reset_index()
196+
197+
agg_df["new_age"] = pd.cut(agg_df["age"], bins=bins, labels=labels)
198+
199+
agg_df["customer_level_based"] = ["_".join(col) for col in agg_df.drop(["age", "price"], axis=1).values]
200+
201+
agg_df = agg_df.groupby("customer_level_based").agg({"price":"mean"}).reset_index()
202+
203+
agg_df["segment"] = pd.qcut(agg_df["price"], 4, labels=["D", "C", "B", "A"])
204+
205+
if export_to_csv:
206+
agg_df.to_csv("rule_based_result.csv")
207+
208+
return agg_df
209+
210+
rule_based = rule_based_classifier(df, export_to_csv=True)

rule_based_result.csv

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
,customer_level_based,price,segment
2+
0,bra_android_female_0_18,35.645302694721295,B
3+
1,bra_android_female_19_23,34.077340379017514,C
4+
2,bra_android_female_24_30,33.86394557823129,C
5+
3,bra_android_female_31_40,34.89832643940475,B
6+
4,bra_android_female_41_70,36.73717948717949,A
7+
5,bra_android_male_0_18,34.80586076094243,B
8+
6,bra_android_male_19_23,31.673243243243245,D
9+
7,bra_android_male_24_30,33.413919413919416,C
10+
8,bra_android_male_31_40,34.327380952380956,B
11+
9,bra_android_male_41_70,40.041666666666664,A
12+
10,bra_ios_female_0_18,34.90041035353535,B
13+
11,bra_ios_female_19_23,36.40384615384615,A
14+
12,bra_ios_female_24_30,34.01587301587302,C
15+
13,bra_ios_female_31_40,29.875901875901878,D
16+
14,bra_ios_female_41_70,35.34090909090909,B
17+
15,bra_ios_male_0_18,34.66455912508544,B
18+
16,bra_ios_male_19_23,34.26604278074866,B
19+
17,bra_ios_male_24_30,33.86486486486486,C
20+
18,bra_ios_male_31_40,31.566137566137566,D
21+
19,bra_ios_male_41_70,31.08369408369408,D
22+
20,can_android_female_0_18,30.896551724137932,D
23+
21,can_android_female_19_23,32.01470588235294,D
24+
22,can_android_female_24_30,19.0,D
25+
23,can_android_female_41_70,34.0,C
26+
24,can_android_male_0_18,36.0,B
27+
25,can_android_male_19_23,40.111111111111114,A
28+
26,can_android_male_24_30,35.00961538461539,B
29+
27,can_android_male_41_70,37.57142857142857,A
30+
28,can_ios_female_0_18,31.352941176470587,D
31+
29,can_ios_female_24_30,31.727272727272727,D
32+
30,can_ios_male_0_18,36.74603174603175,A
33+
31,can_ios_male_24_30,31.17391304347826,D
34+
32,can_ios_male_31_40,29.0,D
35+
33,can_ios_male_41_70,31.0,D
36+
34,deu_android_female_0_18,30.912280701754383,D
37+
35,deu_android_female_19_23,33.333333333333336,C
38+
36,deu_android_female_24_30,33.56912878787879,C
39+
37,deu_android_female_31_40,37.333333333333336,A
40+
38,deu_android_female_41_70,36.083333333333336,A
41+
39,deu_android_male_0_18,37.2487922705314,A
42+
40,deu_android_male_19_23,36.07070707070707,A
43+
41,deu_android_male_24_30,19.0,D
44+
42,deu_android_male_31_40,35.972727272727276,B
45+
43,deu_ios_female_0_18,35.34522661523626,B
46+
44,deu_ios_female_19_23,34.0,C
47+
45,deu_ios_female_24_30,32.5,C
48+
46,deu_ios_female_31_40,33.0,C
49+
47,deu_ios_male_0_18,37.0,A
50+
48,deu_ios_male_19_23,39.0,A
51+
49,deu_ios_male_24_30,33.0,C
52+
50,deu_ios_male_31_40,31.352941176470587,D
53+
51,deu_ios_male_41_70,37.0,A
54+
52,fra_android_female_0_18,36.316958403914924,A
55+
53,fra_android_female_19_23,32.81944444444444,C
56+
54,fra_android_female_24_30,45.42857142857143,A
57+
55,fra_android_female_31_40,34.0,C
58+
56,fra_android_male_0_18,24.625,D
59+
57,fra_android_male_19_23,35.837606837606835,B
60+
58,fra_android_male_24_30,33.0,C
61+
59,fra_android_male_31_40,35.66666666666667,B
62+
60,fra_android_male_41_70,34.0,C
63+
61,fra_ios_female_0_18,31.312271062271062,D
64+
62,fra_ios_female_24_30,27.714285714285715,D
65+
63,fra_ios_female_31_40,32.81818181818181,C
66+
64,fra_ios_male_0_18,33.44444444444444,C
67+
65,fra_ios_male_19_23,39.888888888888886,A
68+
66,fra_ios_male_24_30,31.5,D
69+
67,fra_ios_male_31_40,33.6875,C
70+
68,fra_ios_male_41_70,39.0,A
71+
69,tur_android_female_0_18,37.685763888888886,A
72+
70,tur_android_female_19_23,33.44444444444445,C
73+
71,tur_android_female_24_30,30.785714285714285,D
74+
72,tur_android_female_31_40,41.83333333333333,A
75+
73,tur_android_female_41_70,34.11904761904762,B
76+
74,tur_android_male_0_18,39.87301587301587,A
77+
75,tur_android_male_19_23,27.333333333333332,D
78+
76,tur_android_male_24_30,39.43910256410256,A
79+
77,tur_android_male_31_40,29.0,D
80+
78,tur_android_male_41_70,39.0,A
81+
79,tur_ios_female_0_18,34.3921568627451,B
82+
80,tur_ios_female_19_23,36.06060606060606,A
83+
81,tur_ios_female_24_30,34.0,C
84+
82,tur_ios_female_31_40,32.333333333333336,D
85+
83,tur_ios_female_41_70,34.0,C
86+
84,tur_ios_male_0_18,35.55086848635236,B
87+
85,tur_ios_male_19_23,24.833333333333332,D
88+
86,tur_ios_male_24_30,45.0,A
89+
87,tur_ios_male_31_40,42.333333333333336,A
90+
88,tur_ios_male_41_70,25.05263157894737,D
91+
89,usa_android_female_0_18,33.649390332519474,C
92+
90,usa_android_female_19_23,33.627633891163306,C
93+
91,usa_android_female_24_30,31.269980506822616,D
94+
92,usa_android_female_31_40,32.8030303030303,C
95+
93,usa_android_female_41_70,39.25,A
96+
94,usa_android_male_0_18,34.577184614795236,B
97+
95,usa_android_male_19_23,34.27065165688917,B
98+
96,usa_android_male_24_30,31.886477667990274,D
99+
97,usa_android_male_31_40,35.904761904761905,B
100+
98,usa_android_male_41_70,34.33990414347557,B
101+
99,usa_ios_female_0_18,34.1824114487158,B
102+
100,usa_ios_female_19_23,35.37904761904762,B
103+
101,usa_ios_female_24_30,32.845238095238095,C
104+
102,usa_ios_female_31_40,28.51388888888889,D
105+
103,usa_ios_female_41_70,34.10372670807453,B
106+
104,usa_ios_male_0_18,33.983495499799844,C
107+
105,usa_ios_male_19_23,34.90187180941577,B
108+
106,usa_ios_male_24_30,34.83814333814334,B
109+
107,usa_ios_male_31_40,36.206324110671936,A
110+
108,usa_ios_male_41_70,35.75,B

0 commit comments

Comments
 (0)