-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmeasure_efficiency.py
148 lines (107 loc) · 4.48 KB
/
measure_efficiency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
''' compute and measure various metrics to access the quality of a sequence of points '''
from scipy.stats.stats import pearsonr
from scipy.stats.mstats import gmean
import numpy as np
# compute the correlation of sequences
def sequence_correlation(seq):
# compute correlation coefficient between every pair of sequences
cor = 0
for n in range(seq.shape[0]):
for k in range(n+1, seq.shape[0]):
cor = max(cor, np.abs(pearsonr(seq[n],seq[k])[0]))
print('maximum correlation: {:.6f}'.format(cor))
return
# compute D*-discrepancy of a 1d sequence
def compute_discrepancy(pts):
num_pts = pts.shape[0]
d_minus = np.abs(np.sort(pts) - np.arange(num_pts) / num_pts)
d_plus = np.abs(np.sort(pts) - (np.arange(num_pts) + 1) / num_pts)
d = np.amax((d_minus, d_plus))
return d
# compute D*-discrepancy // should be rewritten //
def sequence_discrepancy(seq, num_tests=10000):
d, num_points = seq.shape
a = np.linspace(0, 1, num_tests+1)[1:]
D = np.zeros(num_tests)
for n in range(num_tests):
D[n] = np.abs(np.sum(np.prod((seq < a[n]), axis=0)) / num_points - a[n]**d)
print('D*-discrepancy: {:.6f}'.format(np.max(D)))
return
# compute the matrix of distances between the points
def dist_matrix(s):
n = s.shape[0]
dst = np.zeros((n,n)) - 1
for n in range(n):
dst[n] = np.linalg.norm(s - s[n], axis=1)
dst.sort(axis=1)
return dst
# measure points density
def density(dim, obj, s, dst, num_measures=10):
# extract the number of points
num_points = s.shape[0]
# take num_measures radius values
r = np.linspace(0, 1, num_measures+1)[1:] * np.pi/2
# estimate density
dns = np.zeros((num_points,num_measures), dtype=np.int32)
for k in range(num_measures):
# find the number of points in a ball of radius r[k]
dns[:,k] = np.count_nonzero(np.where(dst < r[k], dst, 0), axis=1)
# adjust for balls
if obj=='ball':
scale = 1 - np.clip((1 - np.linalg.norm(s, axis=1))/r[k], 0, 1)
scale = 1 - scale**dim / 2
dns[:,k] = dns[:,k] / scale
return dns
def measure_coverage(dim, obj, s, dst, num_rnd=1000):
# estimate average distance to the closest neighbor
dst_cl_avg = np.mean(dst[:,1])
# generate num_rnd random points
d = (dim if obj=='ball' else dim+1)
s_rnd = np.random.randn(num_rnd, d)
s_rnd /= np.linalg.norm(s_rnd, axis=1).reshape((-1,1))
if obj in ['ball','b']:
s_rnd *= (np.random.uniform(0,1,(num_rnd,1)))**(1/d)
# find distances to the dim-th closest point
dst_rnd = np.zeros(num_rnd)
for k in range(num_rnd):
dst_rnd[k] = np.partition(np.linalg.norm(s-s_rnd[k], axis=1), dim)[dim]
# find maximal distance to the dim-th closest point
dst_rnd_max = np.max(dst_rnd)
# compute ratio
cvr_ratio = dst_cl_avg / dst_rnd_max
return cvr_ratio
# compute and display various stats
def display_stats(dim, obj, s):
num_points = s.shape[0]
# compute the distance matrix
dst = dist_matrix(s)
# average distances (this only makes sense for a sphere)
dst_avg = dst.mean(axis=0, keepdims=True)
dst_std = np.std(np.linalg.norm(dst - dst_avg, axis=1))
# average distance to the closest neighbor ratio
dst_cl = np.sort(dst[:,1])
num_avg = np.floor(dst_cl.size / 10).astype(int)
dst_cl_ratio = np.mean(dst_cl[:num_avg]) / np.mean(dst_cl[-num_avg:])
# measure and normalize density
dns = density(dim, obj, s, dst)
# #dns = dns / (np.max(dns, axis=0) + 1e-16)
# compute standard deviation of dns
dns_std = np.std(dns, axis=0)
# adjust dns_std to account for the number of points, radius, and dimensionality
dns_std = dns_std / num_points# / (np.linspace(0, 1, 11)[1:])**dim
# arithmetic and geometric means of dns_std
dns_std_am = np.mean(dns_std, axis=0)
# #dns_std_gm = gmean(dns_std, axis=0)
# measure coverage
cvr_ratio = measure_coverage(dim, obj, s, dst, num_rnd=10000)
print()
print('efficiency stats, smaller is better:')
# #print(' distances deviation: {:.6f}'.format(dst_std))
print(' density deviation : {0}'.format(dns_std))
# #print(' density gmean : {:.4f}'.format(dns_std_gm))
print(' density mean : {:.4f}'.format(dns_std_am))
print(' 1 - min_dist_ratio : {:.4f}'.format(1 - dst_cl_ratio))
print(' 1 - coverage_ratio : {:.4f}'.format(1 - cvr_ratio))
print()
stats = [dns_std_am, 1 - dst_cl_ratio, 1 - cvr_ratio]
return stats