-
Notifications
You must be signed in to change notification settings - Fork 0
/
randomForest.py
66 lines (44 loc) · 2.09 KB
/
randomForest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import random
from tqdm import tqdm
from sklearn.model_selection import GridSearchCV
# regressionData = pd.read_csv('feedToModelData.csv')
regressionData = pd.read_csv('feedToModelData2.csv', index_col = 0)
regressionData = regressionData.dropna()
regressionData = shuffle(regressionData)
x = regressionData[regressionData.columns[2:]]
x = x.loc[:, x.columns != 'Team1Win']
standardScalerX = StandardScaler()
x = standardScalerX.fit_transform(x)
y = regressionData['Team1Win']
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=random.randint(0, 100))
forest=RandomForestClassifier()
n_estimators = [100, 300, 500, 800, 1200]
max_depth = [5, 8, 15, 25, 30]
min_samples_split = [2, 5, 10, 15, 100]
min_samples_leaf = [1, 2, 5, 10]
hyperF = dict(n_estimators = n_estimators, max_depth = max_depth,
min_samples_split = min_samples_split,
min_samples_leaf = min_samples_leaf)
gridF = GridSearchCV(forest, hyperF, cv = 3, verbose = 1,
n_jobs = -1)
clf = gridF.fit(X_train, y_train)
#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(X_train,y_train)
y_pred=clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
randomForestPredictions = pd.DataFrame(X_test)
randomForestPredictions['Prediction'] = clf.predict(X_test)
randomForestPredictions['Actual'] = y_test.values
randomForestPredictions['Correct'] = (randomForestPredictions['Prediction'] == randomForestPredictions['Actual'])
randomForestPredictions['Model'] = 'Random Forest'
randomForestPredictions.to_csv('randomForestPredictions.csv')
randomForestPredictions.to_csv('modelPredictions.csv', mode = 'a', header = False)