-
Notifications
You must be signed in to change notification settings - Fork 0
/
random_forest_grid_search.py
61 lines (52 loc) · 2.06 KB
/
random_forest_grid_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
rom sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.cluster import KMeans
import numpy as np
import sys
# Load Data
traj_total = np.load('') # Load reduced trajectory
# Low dimensional clustering
ncluster = # Insert number of macrostates
kmeans = KMeans(n_clusters=ncluster, random_state=0).fit(traj_total)
label = kmeans.labels_
# Data processing
label = np.reshape(label, (len(traj_total), 1))
labeled_dataset = np.concatenate((traj_total, label), axis=1)
np.random.shuffle(labeled_dataset) # Shuffle data features
data = labeled_dataset[:, 0:2] # Features (projected dimensions)
label = labeled_dataset[:, 2] # Labes (macrostates)
#Data split
X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=0.33, random_state=42)
# Set the parameters by cross-validation
parameters = {'n_estimators': [5, 10, 15, 20, 25, 30], 'max_depth': [2,4,6,8,10,12]}
print("# Tuning hyper-parameters for accuracy")
print()
# Fit for all combination of parameters
clf = GridSearchCV(RandomForestClassifier(), parameters, cv=5, scoring='accuracy', n_jobs=-1)
clf.fit(X_train, y_train)
# Print results
print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print("Best accuracy score found on development set:")
print()
print(clf.best_score_)
print()
print("Grid scores on development set:")
print()
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
print("%0.3f (+/-%0.03f) for %r"
% (mean, std * 2, params))
print()
print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))