-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmuffinvscupcake.py
65 lines (50 loc) · 2.2 KB
/
muffinvscupcake.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import pandas as pd
import numpy as np
from sklearn import svm
import matplotlib.pyplot as plt
import seaborn as sns; sns.set(font_scale=1.2)
#load data
recipes=pd.read_csv('recipes_muffins_cupcakes.csv')
#plot data as two ingredients on two axes
sns.lmplot('Flour', 'Sugar', data=recipes, hue='Type',
palette='Set1', fit_reg=False, scatter_kws={"s": 70});
#model inputs
ingredients = recipes[['Flour','Sugar']].as_matrix()
type_label = np.where(recipes['Type']=='Muffin', 0, 1)
recipe_features = recipes.columns.values[1:].tolist()
#now we are fitting the svm model using sklearn's svc
model = svm.SVC(kernel='linear')
model.fit(ingredients, type_label)
#visualise the hyperplanes
# Get the separating hyperplane
w = model.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(30, 60)
yy = a * xx - (model.intercept_[0]) / w[1]
# Plot the parallels to the separating hyperplane that pass through the support vectors
b = model.support_vectors_[0]
yy_down = a * xx + (b[1] - a * b[0])
b = model.support_vectors_[-1]
yy_up = a * xx + (b[1] - a * b[0])
#now we will plot the hyperplane
sns.lmplot('Flour', 'Sugar', data=recipes, hue='Type', palette='Set1', fit_reg=False, scatter_kws={"s": 70})
plt.plot(xx, yy, linewidth=2, color='black');
#we know that the best hyperplane is the one with max margin
sns.lmplot('Flour', 'Sugar', data=recipes, hue='Type', palette='Set1', fit_reg=False, scatter_kws={"s": 70})
plt.plot(xx, yy, linewidth=2, color='black')
plt.plot(xx, yy_down, 'k--')
plt.plot(xx, yy_up, 'k--')
plt.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1],
s=80, facecolors='none');
#predict function
def muffin_or_cupcake(flour, sugar):
if(model.predict([[flour, sugar]]))==0:
print('muffin')
else:
print('cupcake')
# Predict if 50 parts flour and 20 parts sugar, for example
muffin_or_cupcake(50, 20)
#outputs 'muffin'
"""
Pickling is just serialization: putting data into a form that can be stored in a file and retrieved later. Pickling is a way to convert a python object (list, dict, etc.) into a character stream. The idea is that this character stream contains all the information necessary to reconstruct the object in another python script.
"""