-
Notifications
You must be signed in to change notification settings - Fork 77
/
Copy pathfit_tsne.py
70 lines (50 loc) · 2.04 KB
/
fit_tsne.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import time
import random
random.seed(67)
import numpy as np
np.random.seed(67)
import pandas as pd
from tsne import bh_sne
from sklearn.preprocessing import PolynomialFeatures
def save_tsne(perplexity, dimensions=2, polynomial=False):
df_train = pd.read_csv('data/train_data.csv')
df_valid = pd.read_csv('data/valid_data.csv')
df_test = pd.read_csv('data/test_data.csv')
feature_cols = list(df_train.columns[:-1])
target_col = df_train.columns[-1]
X_train = df_train[feature_cols].values
y_train = df_train[target_col].values
X_valid = df_valid[feature_cols].values
y_valid = df_valid[target_col].values
X_test = df_test[feature_cols].values
X_all = np.concatenate([X_train, X_valid, X_test], axis=0)
if polynomial:
poly = PolynomialFeatures(degree=2)
X_all = poly.fit_transform(X_all)
print('Running TSNE (perplexity: {}, dimensions: {}, polynomial: {})...'.format(perplexity, dimensions, polynomial))
start_time = time.time()
tsne_all = bh_sne(X_all, d=dimensions, perplexity=float(perplexity))
print('TSNE: {}s'.format(time.time() - start_time))
tsne_train = tsne_all[:X_train.shape[0]]
assert(len(tsne_train) == len(X_train))
tsne_valid = tsne_all[X_train.shape[0]:X_train.shape[0]+X_valid.shape[0]]
assert(len(tsne_valid) == len(X_valid))
tsne_test = tsne_all[X_train.shape[0]+X_valid.shape[0]:X_train.shape[0]+X_valid.shape[0]+X_test.shape[0]]
assert(len(tsne_test) == len(X_test))
if polynomial:
save_path = 'data/tsne_{}d_{}p_poly.npz'.format(dimensions, perplexity)
else:
save_path = 'data/tsne_{}d_{}p.npz'.format(dimensions, perplexity)
np.savez(save_path, \
train=tsne_train, \
valid=tsne_valid, \
test=tsne_test)
print('Saved: {}'.format(save_path))
def main():
for perplexity in [10, 20, 40, 50]:
save_tsne(perplexity, polynomial=True)
if __name__ == '__main__':
main()