-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstats_and_visuals.py
41 lines (34 loc) · 1.31 KB
/
stats_and_visuals.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
def clean_df():
""" stats & visualizations will always call this functions;
geomapping will use a more raw form of data, for better or more complete visuals """
df = pd.read_csv('country_db.csv')
df.dropna(axis=1, how='all', inplace=True) # drop column if all vals are null
df.dropna(inplace=True)
df['gdp_per_capita'] = df['gdp_per_capita'].apply(lambda x: int(x.replace(',', '')))
return df
def gen_scatter_latitude_gdp():
df = clean_df()
sns.scatterplot(x='latitude', y='gdp_per_capita', size="gdp_per_capita", sizes=(30, 800), data=df, legend=None)
sns.regplot(x='latitude', y='gdp_per_capita', data=df)
plt.xlabel('latitude of capital city')
plt.savefig('static/latitude_gdp_relationship.jpg')
plt.close()
def pearsonr_latitude_gdp():
df = clean_df()
x, y = df['latitude'], df['gdp_per_capita']
correlation, p_value = stats.pearsonr(x, y)
p_value = format_p_value(p_value)
return correlation, p_value
def format_p_value(p_value):
d = {0.001: '< .001', 0.01: '< .01', 0.05: '< .05'}
for k, v in d.items():
if p_value < k:
p_value = v
break
else:
p_value = round(p_value, 2)
return p_value