-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark_heuristic.py
71 lines (57 loc) · 2.53 KB
/
benchmark_heuristic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy as np
from io import StringIO
import sklearn.neighbors
from sklearn.neighbors import NearestNeighbors
import os
import time
import subprocess
from dataset import get_dataset
from nearestneighbors import c_nearest_neighbors
from benchmark import benchmark, benchmark_dim, git_clone
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-r','--repetitions', help='repetitions', default=1, type=int)
parser.add_argument('-k', help='k', default=20, type=int)
parser.add_argument('-dim', help='dimension of space', default=128, type=int)
parser.add_argument('-m', '--metric', help='l2', default='l2')
parser.add_argument('-ns', '--nstart', help='logn start', default=8, type=int)
parser.add_argument('-ne', '--nend', help='logn end', default=18, type=int)
parser.add_argument('-nr', '--nres', help='logn resolution', default=1, type=int)
parser.add_argument('-ds', '--dimstart', help='dim start', default=8, type=int)
parser.add_argument('-de', '--dimend', help='dim end', default=None, type=int)
parser.add_argument('-dst', '--dimstep', help='dim step', default=8, type=int)
parser.add_argument('-t', '--tag', help='single tag', default=None, type=str)
parser.add_argument('-d', '--dataset', help='audio or gaussian', default='gaussian')
args = parser.parse_args()
print(args)
# script that benchmarks the permutation of the greedy heuristic
# clones modified nn_descent into tmp which prints permutation and exits
result = []
n=2**14
dim=8
clusters=8
dataset = get_dataset(data_name='clustered', n=n, dim=dim, clusters=clusters, noshuffle=False)
git_clone('reorder-print-perm')
nn_list, no_reorder = c_nearest_neighbors('tmp/nn_descent', dataset, args.k, args.metric, args.repetitions)
s= nn_list[0].stdout
start = s.find("fwd_permutation\n") + len("fwd_permutation\n")
end = s.find("fwd_permutation_end")
substring = s[start:end]
fwd_perm = np.loadtxt(StringIO(substring), dtype=int)
X = dataset.X
X_ = np.zeros(X.shape)
for i in range(len(fwd_perm)):
X_[fwd_perm[i]] = X[i]
nbrs = NearestNeighbors(n_neighbors=1).fit(dataset.means)
distances, indices = nbrs.kneighbors(X_)
window = 1000
n = len(fwd_perm)
freqs = []
clusters = sorted(list(set(indices.flatten())))
for cluster in clusters:
occurences = list(map(lambda i: np.sum(indices[i-window:i+window]==cluster), range(window, n-window)))
freq = np.true_divide(occurences, 2*window)
freqs.append(freq)
print("header: ", clusters)
np.savetxt('clust_freq.txt', np.array(freqs).transpose())