-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelper.py
96 lines (65 loc) · 3.19 KB
/
helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy as np
def fetch_medal_tally(df,year,country):
medal_df = df.drop_duplicates(subset=['Team', 'NOC', 'Games', 'Year', 'City', 'Sport', 'Event', 'Medal'])
flag = 0
if year == 'Overall' and country == 'Overall':
temp_df = medal_df
if year == 'Overall' and country != 'Overall':
flag = 1
temp_df = medal_df[medal_df['region'] == country]
if year != 'Overall' and country == 'Overall':
temp_df = medal_df[medal_df['Year'] == int(year)]
if year != 'Overall' and country != 'Overall':
temp_df = medal_df[(medal_df['Year'] == int(year)) & (medal_df['region'] == country)]
if flag == 1:
x = temp_df.groupby('Year').sum()[['Gold', 'Silver', 'Bronze']].sort_values('Year').reset_index()
else:
x = temp_df.groupby('region').sum()[['Gold', 'Silver', 'Bronze']].sort_values('Gold',ascending=False).reset_index()
x['total'] = x['Gold'] + x['Silver'] + x['Bronze']
x['Gold'] = x['Gold'].astype('int')
x['Silver'] =x['Silver'].astype('int')
x['Bronze'] = x['Bronze'].astype('int')
x['total'] = x['total'].astype('int')
return(x)
def medal_tally(df):
medal_tally = df.drop_duplicates(subset=['Team', 'NOC', 'Games', 'Year', 'City', 'Sport', 'Event', 'Medal'])
medal_tally =medal_tally.groupby('NOC').sum()[['Gold', 'Silver', 'Bronze']].sort_values('Gold', ascending=False).reset_index()
medal_tally['total'] = medal_tally['Gold'] + medal_tally['Silver'] + medal_tally['Bronze']
medal_tally['Gold']=medal_tally['Gold'].astype('int')
medal_tally['Silver'] = medal_tally['Silver'].astype('int')
medal_tally['Bronze'] = medal_tally['Bronze'].astype('int')
medal_tally['total'] = medal_tally['total'].astype('int')
return medal_tally
def country_year_list(df):
years=df['Year'].unique().tolist()
years.sort()
years.insert(0,'Overall')
country = np.unique(df['region'].dropna().values).tolist()
country.sort()
country.insert(0, 'Overall')
return years,country
def data_over_time(df,col):
nations_over_time = df.drop_duplicates(['Year', col])['Year'].value_counts().reset_index().sort_values('Year')
return nations_over_time
def most_successful(df,sport):
temp_df=df.dropna(subset=['Medal'])
if sport !='Overall':
temp_df=temp_df[temp_df['Sport']==sport]
x= temp_df['Name'].value_counts().reset_index().head(15).merge(df,how='left')[['Name','Sport','region']].drop_duplicates()
return x
def weight_v_height(df,sport):
athlete_df = df.drop_duplicates(subset=['Name', 'region'])
athlete_df['Medal'].fillna('No Medal', inplace=True)
if sport != 'Overall':
temp_df = athlete_df[athlete_df['Sport'] == sport]
return temp_df
else:
return athlete_df
def men_vs_women(df):
athlete_df = df.drop_duplicates(subset=['Name', 'region'])
men = athlete_df[athlete_df['Sex'] == 'M'].groupby('Year').count()['Name'].reset_index()
women = athlete_df[athlete_df['Sex'] == 'F'].groupby('Year').count()['Name'].reset_index()
final = men.merge(women, on='Year', how='left')
final.rename(columns={'Name_x': 'Male', 'Name_y': 'Female'}, inplace=True)
final.fillna(0, inplace=True)
return final