-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpyrandvec.py
204 lines (158 loc) · 5.79 KB
/
pyrandvec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
"""Module with methods for the generation of random vectors with a fixed sum of one."""
import random
import math
def sample(n: int, d: int, method: str = 'normalisation', shuffle: bool = False) -> list[list[float]]:
"""
Generate a list of n d-dimensional random vectors whose elements sum up to one.
Args:
n (int): desired number of vectors.
d (int): dimension.
method (str): desired method (see sample_* functions in this module).
shuffle (bool): shall each vector be randomly shuffled? Default is False.
Returns:
List of length n of d-dimensional lists.
"""
assert n >= 1
assert d >= 2
funs = {
'normalisation': sample_normalisation,
'exponential': sample_exponential,
'iterative': sample_iterative,
'trigonometric': sample_trigonometric,
'simplex': sample_simplex
}
assert method in funs.keys()
fun = funs[method]
vecs = fun(n, d)
if not shuffle:
return vecs
return list(map(lambda e: random.sample(e, k = len(e)), vecs))
def normalise(x: list[float]) -> list[float]:
"""
Normalise a list of floating point numbers.
I.e., the function divides each component of the list by its sum such that
the resulting list has sum one.
Args:
x (list[float]): Input list.
Returns:
Normalised list.
"""
return list(map(lambda e: e / sum(x), x))
def sample_normalisation(n: int, d: int) -> list[list[float]]:
"""
Generate a list of random vectors via normalisation.
The process works as follows: (1) each component is sampled from a U(0,1) distribution
and subsequently (2) each component is divided by the components' sum.
Args:
n (int): desired number of vectors.
d (int): dimension.
Returns:
List of length n of d-dimensional lists.
"""
assert n >= 1
assert d >= 2
# TODO: ugly as hell and not 'Pythonesque' at all
vecs = [None] * n
for i in range(n):
vec = [random.random() for _ in range(d)]
vecs[i] = normalise(vec)
return vecs
def sample_iterative(n: int, d: int) -> list[list[float]]:
"""
Generate a list of random vectors via an iterative approach.
More precisely, the i-th component of the rpv is sampled uniformly at random from [0, s] where s is
the sum of the 0, ..., (i-1)st components. The last component is finally (1-s). This
way it is unsured that the vectors are normalised.
Args:
n (int): desired number of vectors.
d (int): dimension.
Returns:
List of length n of d-dimensional lists.
"""
assert n >= 1
assert d >= 2
def sample_sample_iterative(d):
s = 0.0
vec = [None] * d
for j in range(d - 1):
vec[j] = random.uniform(0, 1 - s)
s += vec[j]
vec[d - 1] = 1 - s
return vec
vecs = [sample_sample_iterative(d) for _ in range(n)]
return list(vecs)
def sample_trigonometric(n: int, d: int) -> list[list[float]]:
"""
Generate a list of random vectors via a trigonometric method.
Section 5 in the following paper contains the details: Maziero, J. Generating Pseudo-Random
Discrete Probability Distributions. Brazilian Journal of Physics 45, 377–382 (2015).
https://doi.org/10.1007/s13538-015-0337-8)
Args:
n (int): desired number of vectors.
d (int): dimension.
Returns:
List of length n of d-dimensional lists.
"""
assert n >= 1
assert d >= 2
def sample_sample_trigonometric(d):
ts = [random.random() for _ in range(d - 1)]
# build vector of weights
thetas = [None] * d
thetas[0] = 3.1415 / 2 # pi/2
for j in range(d - 1, 0, -1):
thetas[j] = math.acos(math.sqrt(ts[j - 1]))
# build the pRPV
vec = [None] * d
for j in range(d - 1, -1, -1):
r = math.sin(thetas[j]) * math.sin(thetas[j])
for k in range(j + 1, d):
r = r * math.cos(thetas[k]) * math.cos(thetas[k])
vec[j] = r
return vec
return [sample_sample_trigonometric(d) for _ in range(n)]
def sample_exponential(n: int, d: int) -> list[list[float]]:
"""
Generate a list of random vectors by means of the inverse exponential distribution function.
More precisely, the i-th component of the rpv is sampled
uniformly at random from [0, s] where s is the sum of the 0, ..., (i-1)st
components. The last component is finally (1-s). This way it is unsured that
the vectors are normalised.
Args:
n (int): desired number of vectors.
d (int): dimension.
Returns:
List of length n of d-dimensional lists.
"""
assert n >= 1
assert d >= 2
vecs = []
for i in range(n):
vec = [(-1) * math.log(1 - random.random()) for _ in range(d)]
vecs.append(normalise(vec))
return vecs
def sample_simplex(n: int, d: int) -> list[list[float]]:
"""
Generate a list of random vectors via simplex sampling.
See the following paper for details on this method:
Grimme, C. Picking a Uniformly Random Point from an Arbitrary Simplex.
Technical Report. https://doi.org/10.13140/RG.2.1.3807.6968
Args:
n (int): desired number of vectors.
d (int): dimension.
Returns:
List of length n of d-dimensional lists.
"""
assert n >= 1
assert d >= 2
def sample_vector_from_unit_simplex(d):
unifs = [random.random() for _ in range(d - 1)]
unifs = sorted(unifs)
unifs2 = [0] * (d + 1)
for i in range(d - 1):
unifs2[i + 1] = unifs[i]
unifs2[d] = 1
vec = (unifs2[i] - unifs2[i - 1] for i in range(1, d + 1))
return vec
vecs = [list(sample_vector_from_unit_simplex(d)) for _ in range(n)]
return vecs