-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathlearners.py
152 lines (116 loc) · 3.85 KB
/
learners.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
from abc import ABCMeta, abstractmethod
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
import numpy as np
class AbstractLearner(object):
"""
Interface for all learning models. Extend this class to create new learners.
"""
__metaclass__ = ABCMeta
@abstractmethod
def train(self, X, Y):
"""
Trains our model.
Args
----
X : feature matrix. Each row is a feature vector of a datum
(e.g. feature vector for a comment)
Y : value vector. Each entry corresponds to the value associated
with the corresponding row in X.
num rows of X == length of Y
Returns
-------
Does NOT return anything. train() will have side effects on the object
and maintain state so that predict() can be called.
"""
pass
@abstractmethod
def predict(self, X):
"""
Predicts the values for the feature matrix X. This MUST be called
after train().
Args
----
X : feature matrix. Each row is a feature vector of a datum
Returns
-------
Y : predicted values for each row of X
"""
pass
class GaussianNBLearner(AbstractLearner):
"""
Gaussian Naive Bayes Learner
http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html
We need to use X.toarray() because those functions expect dense arrays.
"""
def __init__(self):
self.nb = GaussianNB()
def train(self, X, Y):
if hasattr(X, 'toarray'):
self.nb.fit(X.toarray(), Y)
else:
self.nb.fit(X, Y)
def predict(self, X):
if (hasattr(X, "toarray")):
return self.nb.predict(X.toarray())
else:
return self.nb.predict(X)
def score(self, X, Y):
return np.mean(np.abs(self.nb.predict(X) - np.array(Y)))
class SVMLearner(AbstractLearner):
"""
Support Vector Machine Learner for regression (continuous
labels as opposed to discrete labels)
http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html
"""
def __init__(self, **kwargs):
self.svr = SVR(**kwargs)
def train(self, X, Y):
self.svr.fit(X, Y)
def predict(self, X):
return self.svr.predict(X)
class KNeighborsLearner(AbstractLearner):
"""
Learner using k-nearest neighbors
"""
def __init__(self, **kwargs):
self.knn = KNeighborsRegressor(**kwargs)
def train(self, X, Y):
self.knn.fit(X, Y)
def predict(self, X):
return self.knn.predict(X)
class MultiNBLearner(AbstractLearner):
"""
Multinomial Naive Bayes Learner
http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.MultinomialNB.html
"""
def __init__(self, nbuckets, **kwargs):
self.nb = MultinomialNB(**kwargs)
self.nbuckets = nbuckets
def train(self, X, Y):
newY = [0 for _ in range(len(Y))]
for i, y in enumerate(Y):
bucket = np.floor(y * self.nbuckets)
newY[i] = bucket
self.nb.fit(X, np.array(newY))
def predict(self, X):
return self.nb.predict(X)
class DecisionTreeLearner(AbstractLearner):
"""
Decision Tree Regressor
http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html
"""
def __init__(self, **kwargs):
self.tree = DecisionTreeRegressor(**kwargs)
def train(self, X, Y):
if hasattr(X, 'toarray'):
self.tree.fit(X.toarray(), Y)
else:
self.tree.fit(X, Y)
def predict(self, X):
if (hasattr(X, "toarray")):
return self.tree.predict(X.toarray())
else:
return self.tree.predict(X)