1
- from typing import Optional
1
+ import hashlib
2
+ from typing import Optional , Tuple
2
3
3
4
import numpy as np
4
5
from scipy import integrate , optimize , stats
5
6
from scipy .special import erfinv
6
7
8
+
9
+ class AnalysisException (Exception ):
10
+ pass
11
+
12
+
7
13
class BaseMathsTest :
8
14
9
15
# sub method to calculate the sample size per variation and the intercept
10
16
@staticmethod
11
17
def _calculate_sample_size (
12
- var_H0 : float ,
13
- mean_H1 : float ,
14
- var_H1 : float ,
15
- alpha : float ,
16
- beta : float
17
- ):
18
-
18
+ var_H0 : float , mean_H1 : float , var_H1 : float , alpha : float , beta : float
19
+ ) -> Tuple [int , float ]:
19
20
def D (T ):
20
21
if T < 0 :
21
22
return 0
@@ -30,40 +31,52 @@ def term_2(x, T):
30
31
def integrand (x , T ):
31
32
return term_1 (x , T ) * term_2 (x , T )
32
33
33
- def integral (T ):
34
+ def integral (T ) -> float :
34
35
if T <= 0 :
35
36
return 0
36
37
return integrate .quad (integrand , 0 , T , args = (T ))[0 ]
37
38
38
- def fun (T ):
39
+ def fun (T ) -> float :
39
40
return - integral (T ) - 1 + alpha
40
41
41
42
sample_size = optimize .root (fun , x0 = 100 , jac = False ).x [0 ]
42
43
sample_size_int = int (np .ceil (sample_size ))
43
44
44
45
intercept = D (sample_size )
45
46
47
+ if np .abs (fun (sample_size )) > 0.000001 :
48
+ raise AnalysisException (
49
+ "The numerical solver was not able to find a root for the provided values."
50
+ "This is an internal error that can happen with extreme values that result "
51
+ "in a very low required number of samples."
52
+ )
53
+
46
54
return (sample_size_int , intercept )
47
55
48
56
# calculates the probability that the experiment has hit the bound between the two check-ins.
49
57
@staticmethod
50
58
def _probability_of_crossing (
51
- intercept : float ,
52
- mean_H1 : float ,
53
- var_H1 : float ,
54
- samples_0 : int ,
55
- successes_0 : float ,
56
- samples_increment : int ,
57
- successes_change : float
59
+ intercept : float ,
60
+ mean_H1 : float ,
61
+ var_H1 : float ,
62
+ samples_0 : int ,
63
+ successes_0 : float ,
64
+ samples_increment : int ,
65
+ successes_change : float ,
58
66
):
59
67
60
- if intercept + (samples_0 + samples_increment ) * mean_H1 >= successes_0 + successes_change :
68
+ if (
69
+ intercept + (samples_0 + samples_increment ) * mean_H1
70
+ >= successes_0 + successes_change
71
+ ):
61
72
return 1.0
62
73
63
74
term_1 = - successes_change + mean_H1 * samples_increment
64
75
term_2 = - intercept - mean_H1 * samples_0 + successes_0
65
76
66
- crossing_probability = np .exp (- 2 * term_2 * (term_2 - term_1 ) / (samples_increment * var_H1 ))
77
+ crossing_probability = np .exp (
78
+ - 2 * term_2 * (term_2 - term_1 ) / (samples_increment * var_H1 )
79
+ )
67
80
68
81
return crossing_probability
69
82
@@ -72,7 +85,7 @@ def evaluate_experiment(
72
85
previous_success_delta : float ,
73
86
success_change : float ,
74
87
previous_samples_number : int ,
75
- samples_increment : int
88
+ samples_increment : int ,
76
89
):
77
90
"""
78
91
:param previous_success_delta: Difference between sum of successes of treatment and baseline at the last
@@ -81,10 +94,20 @@ def evaluate_experiment(
81
94
:param previous_samples_number: Number of samples per variation at the last check-in.
82
95
:param samples_increment: Number of samples per variation in the current batch.
83
96
"""
84
-
85
- ## TODO
86
- ## if previous_samples_number >= self.required_samples:
87
- ## throw exception
97
+ if samples_increment < 0 or previous_samples_number < 0 :
98
+ raise AnalysisException ("Number of samples cannot be less than 0" )
99
+ if (
100
+ abs (success_change ) > samples_increment
101
+ or abs (previous_success_delta ) > previous_samples_number
102
+ ):
103
+ raise AnalysisException (
104
+ "Number of successes cannot be greater than number of samples"
105
+ )
106
+ if previous_samples_number > self .required_samples :
107
+ raise AnalysisException (
108
+ "Number of samples from previous check-in is greater than required samples. "
109
+ "A conclusion (1 or -1) should already have been reached!"
110
+ )
88
111
89
112
scaled_samples_increment = samples_increment
90
113
scaled_success_change = success_change
@@ -98,14 +121,21 @@ def evaluate_experiment(
98
121
is_last_evaluation = True
99
122
100
123
crossing_probability = self ._probability_of_crossing (
101
- self .intercept , self .mean_H1 , self .var_H1 , previous_samples_number , previous_success_delta ,
102
- scaled_samples_increment , scaled_success_change
124
+ self .intercept ,
125
+ self .mean_H1 ,
126
+ self .var_H1 ,
127
+ previous_samples_number ,
128
+ previous_success_delta ,
129
+ scaled_samples_increment ,
130
+ scaled_success_change ,
103
131
)
104
132
105
133
state = 0
106
134
if stats .uniform .rvs (random_state = self .seed ) < crossing_probability :
107
135
state = - 1
108
- self .seed = int (1_000_000_000 * stats .uniform .rvs (random_state = self .seed ))
136
+ self .seed : Optional [int ] = int (
137
+ 1_000_000_000 * stats .uniform .rvs (random_state = self .seed )
138
+ )
109
139
110
140
if is_last_evaluation & (state == 0 ):
111
141
state = 1
@@ -119,12 +149,11 @@ def __init__(
119
149
alpha : float ,
120
150
beta : float ,
121
151
var_A : Optional [float ] = None ,
122
- seed : Optional [object ] = None ,
152
+ seed : Optional [str ] = None ,
123
153
):
124
154
"""
125
155
:param mean_A: The (estimated) mean value of the success metric in the control variation.
126
156
:param mde: The minimum detectable (relative) effect (MDE) we expect to see on the B side.
127
- For example: An expected 1% uplift should be passed as 0.01.
128
157
:param alpha: The alpha value, or type 1 error, to use for the test.
129
158
:param beta: The beta value, or type 2 error, to use for the test.
130
159
:param var_A: The (estimated) variance of the success metric in the control variation
@@ -137,8 +166,30 @@ def __init__(
137
166
a name or key, so that the experiment results stay consistent if the test
138
167
is performed multiple times.
139
168
"""
169
+ for value in (alpha , beta ):
170
+ if value <= 0 or value >= 1 :
171
+ raise ValueError (
172
+ f"Received invalid value of { value } . Passed values for alpha and beta should"
173
+ f"be within (0, 1)"
174
+ )
175
+ if mde <= 0 :
176
+ raise ValueError ("The minimum detectable effect must be positive!" )
177
+ if mean_A <= 0 :
178
+ raise ValueError ("mean_A must be positive!" )
179
+ if var_A is None and mean_A >= 1 :
180
+ raise ValueError (
181
+ "When variance is not passed, we assume a binary metric -- in this case, "
182
+ "the provided mean must be between 0 and 1 OR the variance must be provided."
183
+ )
184
+ if var_A is not None and var_A <= 0 :
185
+ raise ValueError ("Variance must be positive if provided!" )
140
186
self .mean_A = mean_A
141
187
self .mean_B = mean_A * (1.0 + mde )
188
+ # This check only applies for the binary case, i.e. where we don't receive the variance
189
+ if self .mean_B > 1 and var_A is None :
190
+ raise AnalysisException (
191
+ "Cannot possibly detect an effect that brings binary target metric over 100%"
192
+ )
142
193
self .mean_H1 = self .mean_B - self .mean_A
143
194
144
195
if var_A is not None :
@@ -156,8 +207,14 @@ def __init__(
156
207
157
208
self .seed = None
158
209
if seed is not None :
159
- self .seed = int (str ( abs ( hash ( seed )) )[:8 ])
210
+ self .seed = int (hashlib . sha256 ( seed . encode ()). hexdigest ( )[:8 ], 16 )
160
211
161
212
(self .required_samples , self .intercept ) = self ._calculate_sample_size (
162
213
self .var_H0 , self .mean_H1 , self .var_H1 , self .alpha , self .beta
163
214
)
215
+
216
+ if self .required_samples < 0 :
217
+ raise AnalysisException (
218
+ "The provided alpha and beta values result in a negative number of required"
219
+ "samples -- please reconsider your values."
220
+ )
0 commit comments