-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_predicted_probs.py
94 lines (72 loc) · 2.67 KB
/
plot_predicted_probs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import numpy as np
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import os
import pickle
from scipy.special import softmax
path = os.path.dirname(os.path.abspath(__file__))+"/svmdata"
alphas_file = path+"/alphas_raw.pkl"
betas_file = path+"/betas_raw.pkl"
labels_file = path+"/labels.pkl"
with open(labels_file, "rb") as f:
labels = pickle.load(f)
with open(alphas_file,"rb") as f :
alphas = pickle.load(f)
with open(betas_file,"rb") as f :
betas = pickle.load(f)
# https://jakevdp.github.io/PythonDataScienceHandbook/05.07-support-vector-machines.html
def plot_svc_decision_function(model, ax=None, plot_support=True):
"""Plot the decision function for a 2D SVC"""
if ax is None:
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# create grid to evaluate model
x = np.linspace(xlim[0], xlim[1], 30)
y = np.linspace(ylim[0], ylim[1], 30)
Y, X = np.meshgrid(y, x)
xy = np.vstack([X.ravel(), Y.ravel()]).T
P = model.decision_function(xy).reshape(X.shape)
# plot decision boundary and margins
ax.contour(X, Y, P, colors='black',
levels=[-1, 0, 1], alpha=0.5,
linestyles=['--', '-', '--'])
# plot support vectors
if plot_support:
ax.scatter(model.support_vectors_[:, 0],
model.support_vectors_[:, 1],
s=300, linewidth=1, facecolors='none');
ax.set_xlim(xlim)
ax.set_ylim(ylim)
# alphas and betas are after softmax, so their range is [0; 1]
# get the max probability
max_alphas = [max(alpha) for alpha in alphas]
max_betas = [max(beta) for beta in betas]
X = np.array(list(zip(max_alphas,max_betas)))
xfit = np.linspace(0.0,1.0)
labels = np.array(labels)
# Plot unanswerable questions
labels_no_answer = np.argwhere(labels == -1)
X_no_answer = X[labels_no_answer]
X_no_answer = X_no_answer.reshape(X_no_answer.shape[0],2)
print(X_no_answer.shape)
plt.figure()
plt.scatter(X_no_answer[:,0],X_no_answer[:,1],s=5, color = "red", label = "unanswerable questions")
# Plot answerable questions
labels_answer = np.argwhere(labels == 1)
X_answer = X[labels_answer]
X_answer = X_answer.reshape(X_answer.shape[0],2)
print(X_answer.shape)
# plt.figure()
plt.scatter(X_answer[:,0],X_answer[:,1],s=5, c="blue", label = "answerable questions")
plt.title("Scatterplot of probabilities for predicted start and end index", fontdict = {'fontsize' : 14})
plt.legend(loc='upper left')
plt.xlabel(r"$p(predicted\_start\_index)$")
plt.ylabel(r"$p(predicted\_end\_index)$")
# Fit SVM
# model = SVC(kernel='linear',C=1.0)
# model.fit(X, labels)
# plot_svc_decision_function(model)
# score = model.score(X, labels)
# print("Score: ",score)
plt.show()