-
Notifications
You must be signed in to change notification settings - Fork 0
/
A_Classification_SVM.py
112 lines (76 loc) · 2.53 KB
/
A_Classification_SVM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import plotly.plotly as py
import plotly.tools as tls
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn import (metrics, neighbors, model_selection,svm)
### dataset with audio features ####
# read data
df1 = pd.read_csv("Data/dataframe_A_classification.csv", sep = " ")
# drop the id
df1 = (df1.drop(df1.columns[0], axis=1))
X = np.array(df1.drop(['chart_random'], axis = 1))
y = np.array(df1['chart_random'])
# 10 folds cross validation
kf = model_selection.KFold(n_splits = 10)
kf.get_n_splits(X)
err = np.zeros(10)
i = 0
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
clf = svm.SVC(gamma='scale')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
err[i] = sum(abs(y_test-y_pred))/len(y_test)
i +=1
print("the error for the song feature is:")
print(sum(err)/10)
#### dataset with audio analysis data ####
# read dataframe with audio analysis
df1 = pd.read_csv("Data/dataframe_A_with_sa_scaled.csv", sep = ",")
# drop the id
df1 = (df1.drop(df1.columns[0], axis=1))
# only look at audio analysis
cols = list(range(0,14))
df1 = (df1.drop(df1.columns[cols], axis=1))
X = np.array(df1.drop(['chart_random'], axis=1))
y = np.array(df1['chart_random'])
# 10 folds cross validation
kf = model_selection.KFold(n_splits = 10)
kf.get_n_splits(X)
err = np.zeros(10)
i = 0
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
clf = svm.SVC(gamma = 'scale')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
err[i] = sum(abs(y_test-y_pred))/len(y_test)
i +=1
print("the error for the audio analysis is:")
print(sum(err)/10)
#### full dataset ####
# read full dataframe
df1 = pd.read_csv("Data/dataframe_A_with_sa_scaled.csv", sep = ",")
# drop the id
df1 = (df1.drop(df1.columns[0], axis=1))
X = np.array(df1.drop(['chart_random'], axis=1))
y = np.array(df1['chart_random'])
# 10 folds cross validation
kf = model_selection.KFold(n_splits = 10)
kf.get_n_splits(X)
err = np.zeros(10)
i = 0
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
clf = svm.SVC(gamma = 'scale')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
err[i] = sum(abs(y_test-y_pred))/len(y_test)
i +=1
print("the error for the full dataset is:")
print(sum(err)/10)