-
Notifications
You must be signed in to change notification settings - Fork 1
/
aggregates_in_pandas_module10.py
60 lines (54 loc) · 1.71 KB
/
aggregates_in_pandas_module10.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import codecademylib
import pandas as pd
orders = pd.read_csv('orders.csv')
print(orders.head(10))
most_expensive = orders.price.max()
num_colors = orders.shoe_color.nunique()
import codecademylib
import pandas as pd
orders = pd.read_csv('orders.csv')
pricey_shoes = orders.groupby('shoe_type').price.max()
print(pricey_shoes)
print(type(pricey_shoes))
import codecademylib
import pandas as pd
orders = pd.read_csv('orders.csv')
pricey_shoes = orders.groupby('shoe_type').price.max().reset_index()
print(pricey_shoes)
print(type(pricey_shoes))
import codecademylib
import numpy as np
import pandas as pd
orders = pd.read_csv('orders.csv')
cheap_shoes = orders.groupby('shoe_color').price.apply(lambda x:np.percentile(x,25)).reset_index()
print(cheap_shoes)
import codecademylib
import numpy as np
import pandas as pd
orders = pd.read_csv('orders.csv')
shoe_counts = orders.groupby(['shoe_type','shoe_color']).id.count().reset_index()
print(shoe_counts)
import codecademylib
import numpy as np
import pandas as pd
orders = pd.read_csv('orders.csv')
shoe_counts = orders.groupby(['shoe_type', 'shoe_color']).id.count().reset_index()
shoe_counts_pivot = shoe_counts.pivot(
columns = 'shoe_color',
index = 'shoe_type',
values = 'id'
).reset_index()
print(shoe_counts_pivot)
import codecademylib
import pandas as pd
user_visits = pd.read_csv('page_visits.csv')
print(user_visits.head())
click_source = user_visits.groupby('utm_source').id.count().reset_index()
print(click_source)
click_source_by_month = user_visits.groupby(['utm_source','month']).id.count().reset_index()
click_source_by_month_pivot = click_source_by_month.pivot(
columns = 'month',
index = 'utm_source',
values = 'id'
).reset_index()
print(click_source_by_month_pivot)