-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Combined 4 first releases into a single executable
- Loading branch information
Showing
15 changed files
with
1,202 additions
and
460 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
""" | ||
Contains code for revisions 1-4 | ||
All functions return RMSE | ||
""" | ||
import pandas as pd | ||
import numpy as np | ||
from scipy import stats | ||
from sklearn import linear_model | ||
import random | ||
import math | ||
|
||
def analyze_r01(state, human, time): | ||
""" | ||
Revision 1 | ||
""" | ||
if human: | ||
DATA_CSV = 'data/'+state+'_2a.csv' | ||
else: | ||
DATA_CSV = 'data/'+state+'_2b.csv' | ||
data_points = pd.read_csv(DATA_CSV) | ||
nr1 = data_points['carb'] | ||
nr1 = nr1.iloc[1:len(nr1)] | ||
nr2 = data_points['py'] | ||
nr2 = nr2.iloc[1:len(nr2)] | ||
data_points = data_points.iloc[0:len(data_points.index)-1] | ||
nr1.index = np.arange(len(nr1.index)) | ||
nr2.index = np.arange(len(nr2.index)) | ||
if time: | ||
data_points.loc[:, 'growth'] = (nr1 - data_points['carb']) / (nr2 - data_points['py']) | ||
else: | ||
data_points.loc[:, 'growth'] = nr1 | ||
data_points.loc[:, 'post_py'] = nr2 | ||
data_points = data_points[data_points.post_py // 10000 == data_points.py // 10000] | ||
data_points.drop(['py', 'post_py'], axis=1, inplace=True) | ||
data_points.index = np.arange(len(data_points.index)) | ||
data_points = data_points.loc[:, ['carb', 'growth']] | ||
data_points = data_points.as_matrix().tolist() | ||
random.shuffle(data_points) | ||
training = data_points[0:9 * len(data_points) / 10] | ||
test = data_points[9 * len(data_points) / 10:len(data_points)] | ||
training = np.array(training) | ||
m = stats.linregress(training).slope | ||
n = stats.linregress(training).intercept | ||
sq_error = 0.0 | ||
for elem in test: | ||
predicted = m * elem[0] + n | ||
actual = elem[1] | ||
sq_error += (actual - predicted) ** 2 | ||
mse = math.sqrt(sq_error/len(test)) | ||
return mse | ||
|
||
def analyze_r02(state, human, time): | ||
""" | ||
Revision 2 | ||
""" | ||
if human: | ||
DATA_CSV = 'data/'+state+'_2a.csv' | ||
else: | ||
DATA_CSV = 'data/'+state+'_2b.csv' | ||
data_points = pd.read_csv(DATA_CSV) | ||
nr1 = data_points['carb'] | ||
nr1 = nr1.iloc[1:len(nr1)] | ||
nr2 = data_points['py'] | ||
nr2 = nr2.iloc[1:len(nr2)] | ||
data_points = data_points.iloc[0:len(data_points.index)-1] | ||
nr1.index = np.arange(len(nr1.index)) | ||
nr2.index = np.arange(len(nr2.index)) | ||
if time: | ||
data_points.loc[:, 'growth'] = (nr1 - data_points['carb']) / (nr2 - data_points['py']) | ||
else: | ||
data_points.loc[:, 'growth'] = nr1 | ||
data_points.loc[:, 'post_py'] = nr2 | ||
data_points = data_points[data_points.post_py // 10000 == data_points.py // 10000] | ||
data_points.drop(['py', 'post_py'], axis=1, inplace=True) | ||
data_points.index = np.arange(len(data_points.index)) | ||
data_points = data_points.loc[:, ['carb', 'growth']] | ||
data_points = data_points.as_matrix().tolist() | ||
N_GROUPS = 10 | ||
random.shuffle(data_points) | ||
groups = [] | ||
prev_cutoff = 0 | ||
for i in np.arange(N_GROUPS): | ||
next_cutoff = (i + 1) * len(data_points) / N_GROUPS | ||
groups.append(data_points[prev_cutoff:next_cutoff]) | ||
prev_cutoff = next_cutoff | ||
min_mse = float("inf") | ||
for i in np.arange(N_GROUPS): | ||
training = [] | ||
test = [] | ||
for j, group in enumerate(groups): | ||
if j == i: | ||
test = group | ||
else: | ||
training.extend(group) | ||
training = np.array(training) | ||
m = stats.linregress(training).slope | ||
n = stats.linregress(training).intercept | ||
sq_error = 0.0 | ||
for elem in test: | ||
predicted = m * elem[0] + n | ||
actual = elem[1] | ||
sq_error += (actual - predicted) ** 2 | ||
mse = math.sqrt(sq_error/len(test)) | ||
if mse < min_mse: | ||
min_mse = mse | ||
return min_mse | ||
|
||
def analyze_r03(state, human, time, called_as_r04=False): | ||
""" | ||
Revision 3 | ||
""" | ||
if human: | ||
DATA_CSV = 'data/'+state+'_2a.csv' | ||
else: | ||
DATA_CSV = 'data/'+state+'_2b.csv' | ||
data_points = pd.read_csv(DATA_CSV) | ||
nr1 = data_points['carb'] | ||
nr1 = nr1.iloc[1:len(nr1)] | ||
nr2 = data_points['py'] | ||
nr2 = nr2.iloc[1:len(nr2)] | ||
data_points = data_points.iloc[0:len(data_points.index)-1] | ||
nr1.index = np.arange(len(nr1.index)) | ||
nr2.index = np.arange(len(nr2.index)) | ||
if time: | ||
data_points.loc[:, 'growth'] = (nr1 - data_points['carb']) / (nr2 - data_points['py']) | ||
else: | ||
data_points.loc[:, 'growth'] = nr1 | ||
data_points.loc[:, 'post_py'] = nr2 | ||
mode = stats.mode((data_points['post_py'] - data_points['py']).tolist()).mode[0] | ||
if called_as_r04: | ||
data_points = data_points[data_points.post_py - data_points.py == mode] | ||
else: | ||
data_points = data_points[data_points.post_py // 10000 == data_points.py // 10000] | ||
data_points.drop(['py', 'post_py'], axis=1, inplace=True) | ||
data_points.index = np.arange(len(data_points.index)) | ||
data_points = data_points.loc[:, ['carb', 'lat', 'lon', 'growth']] | ||
data_points = data_points.as_matrix().tolist() | ||
for i, elem in enumerate(data_points): | ||
elem[0] = [x for x in elem] | ||
elem[0].pop() | ||
elem[1] = elem[-1] | ||
data_points[i] = elem[0:2] | ||
N_GROUPS = 10 | ||
random.shuffle(data_points) | ||
groups = [] | ||
prev_cutoff = 0 | ||
for i in np.arange(N_GROUPS): | ||
next_cutoff = (i + 1) * len(data_points) / N_GROUPS | ||
groups.append(data_points[prev_cutoff:next_cutoff]) | ||
prev_cutoff = next_cutoff | ||
min_mse = float("inf") | ||
for i in np.arange(N_GROUPS): | ||
training = [] | ||
test = [] | ||
for j, group in enumerate(groups): | ||
if j == i: | ||
test = group | ||
else: | ||
training.extend(group) | ||
training = np.array(training).T.tolist() | ||
clf = linear_model.LinearRegression() | ||
clf.fit(training[0], training[1]) | ||
coef = clf.coef_ | ||
cons = clf.intercept_ | ||
sq_error = 0.0 | ||
for elem in test: | ||
predicted = sum(coef * elem[0]) + cons | ||
actual = elem[1] | ||
sq_error += (actual - predicted) ** 2 | ||
mse = math.sqrt(sq_error/len(test)) | ||
if mse < min_mse: | ||
min_mse = mse | ||
return min_mse | ||
|
||
def analyze_r04(state, human, time): | ||
""" | ||
Revision 4 | ||
""" | ||
return analyze_r03(state, human, time, called_as_r04=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"from scipy import stats\n", | ||
"import seaborn as sns\n", | ||
"import random\n", | ||
"import math" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def analyze_r01(state, human, time):\n", | ||
" if human:\n", | ||
" DATA_CSV = 'data/'+state+'_2a.csv'\n", | ||
" else:\n", | ||
" DATA_CSV = 'data/'+state+'_2b.csv'\n", | ||
" data_points = pd.read_csv(DATA_CSV)\n", | ||
" nr1 = data_points['carb']\n", | ||
" nr1 = nr1.iloc[1:len(nr1)]\n", | ||
" nr2 = data_points['py']\n", | ||
" nr2 = nr2.iloc[1:len(nr2)]\n", | ||
" data_points = data_points.iloc[0:len(data_points.index)-1]\n", | ||
" nr1.index = np.arange(len(nr1.index))\n", | ||
" nr2.index = np.arange(len(nr2.index))\n", | ||
" if time:\n", | ||
" data_points.loc[:,'growth'] = (nr1 - data_points['carb']) / (nr2 - data_points['py'])\n", | ||
" else:\n", | ||
" data_points.loc[:,'growth'] = nr1\n", | ||
" data_points.loc[:,'post_py'] = nr2\n", | ||
" data_points = data_points[data_points.post_py // 10000 == data_points.py // 10000]\n", | ||
" data_points.drop(['py','post_py'], axis=1, inplace=True)\n", | ||
" data_points.index = np.arange(len(data_points.index))\n", | ||
" data_points = data_points.loc[:,['carb','growth']]\n", | ||
" training = []\n", | ||
" test = []\n", | ||
" prob = 10\n", | ||
" for i, point in data_points.iterrows():\n", | ||
" if random.randint(1, prob) == 1:\n", | ||
" prob = 10\n", | ||
" test.append(point)\n", | ||
" else:\n", | ||
" prob -= 1\n", | ||
" training.append(point)\n", | ||
" training = np.array(training)\n", | ||
" m = stats.linregress(training).slope\n", | ||
" n = stats.linregress(training).intercept\n", | ||
" sq_error = 0.0\n", | ||
" for elem in test:\n", | ||
" predicted = m * elem[0] + n\n", | ||
" actual = elem[1]\n", | ||
" sq_error += (actual - predicted) ** 2\n", | ||
" mse = math.sqrt(sq_error/len(test))\n", | ||
" return mse" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"STATE = 'ME'\n", | ||
"print analyze_r01(STATE, True, True)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 2", | ||
"language": "python", | ||
"name": "python2" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
Oops, something went wrong.