7
7
import pandas as pd
8
8
from numpy import log , exp , logaddexp , asarray , c_ as vconcat
9
9
from pandas import DataFrame
10
- from scipy .special import gammaln , betaln , binom
10
+ from scipy .special import gammaln , betaln , binom , beta as betaf
11
11
12
12
from ..utils import _fit , _check_inputs
13
13
from . import BaseFitter
@@ -56,39 +56,37 @@ def _loglikelihood(params, x, tx, T):
56
56
"""Loglikelihood for optimizer."""
57
57
alpha , beta , gamma , delta = params
58
58
59
- beta_ab = betaln (alpha , beta )
60
- beta_gd = betaln (gamma , delta )
61
-
62
- indiv_loglike = (betaln (alpha + x , beta + T - x ) - beta_ab +
63
- betaln (gamma , delta + T ) - beta_gd )
64
-
59
+ betaln_ab = betaln (alpha , beta )
60
+ betaln_gd = betaln (gamma , delta )
65
61
recency_T = T - tx - 1
66
62
63
+ A = (betaln (alpha + x , beta + T - x ) - betaln_ab +
64
+ betaln (gamma , delta + T ) - betaln_gd )
65
+
67
66
J = np .arange (recency_T .max () + 1 )
68
67
69
- @np .vectorize
70
- def _sum (x , tx , recency_T ):
68
+ def _sum_ (x , tx , recency_T ):
71
69
if recency_T <= - 1 :
72
- return - np .inf
70
+ return 10e-10
71
+ elif recency_T == 0 :
72
+ return betaf (alpha + x , beta + tx - x ) * betaf (gamma + 1 , delta + tx )
73
+ else :
74
+ j = J [:recency_T + 1 ]
75
+ return (betaf (alpha + x , beta + tx - x + j ) * betaf (gamma + 1 , delta + tx + j )).sum ()
73
76
74
- j = J [:int (recency_T ) + 1 ]
75
- return log (
76
- np .sum (exp (betaln (alpha + x , beta + tx - x + j ) - beta_ab +
77
- betaln (gamma + 1 , delta + tx + j ) - beta_gd )))
77
+ sum_ = np .vectorize (_sum_ , [np .float ])
78
78
79
- s = _sum (x , tx , recency_T )
80
- indiv_loglike = logaddexp (indiv_loglike , s )
81
-
82
- return indiv_loglike
79
+ B = log (sum_ (x , tx , recency_T )) - betaln_gd - betaln_ab
80
+ return logaddexp (A , B )
83
81
84
82
@staticmethod
85
- def _negative_log_likelihood (params , frequency , recency , n , n_custs ,
83
+ def _negative_log_likelihood (params , frequency , recency , n_periods , weights ,
86
84
penalizer_coef = 0 ):
87
85
penalizer_term = penalizer_coef * sum (np .asarray (params ) ** 2 )
88
86
return - np .mean (BetaGeoBetaBinomFitter ._loglikelihood (
89
- params , frequency , recency , n ) * n_custs ) + penalizer_term
87
+ params , frequency , recency , n_periods ) * weights ) + penalizer_term
90
88
91
- def fit (self , frequency , recency , n , n_custs , verbose = False ,
89
+ def fit (self , frequency , recency , n_periods , weights = None , verbose = False ,
92
90
tol = 1e-4 , iterative_fitting = 1 , index = None ,
93
91
fit_method = 'Nelder-Mead' , maxiter = 2000 , initial_params = None ,
94
92
** kwargs ):
@@ -101,17 +99,18 @@ def fit(self, frequency, recency, n, n_custs, verbose=False,
101
99
Total periods with observed transactions
102
100
recency: array_like
103
101
Period of most recent transaction
104
- n: array_like
105
- Number of transaction opportunities.
106
- n_custs: array_like
107
- Number of customers with given frequency/recency/T. Fader
108
- and Hardie condense the individual RFM matrix into all
102
+ n_periods: array_like
103
+ Number of transaction opportunities. Previously called `n`.
104
+ weights: None or array_like
105
+ Number of customers with given frequency/recency/T,
106
+ defaults to 1 if not specified. Fader and
107
+ Hardie condense the individual RFM matrix into all
109
108
observed combinations of frequency/recency/T. This
110
109
parameter represents the count of customers with a given
111
110
purchase pattern. Instead of calculating individual
112
111
loglikelihood, the loglikelihood is calculated for each
113
112
pattern and multiplied by the number of customers with
114
- that pattern.
113
+ that pattern. Previously called `n_custs`.
115
114
verbose: boolean, optional
116
115
Set to true to print out convergence diagnostics.
117
116
tol: float, optional
@@ -137,15 +136,20 @@ def fit(self, frequency, recency, n, n_custs, verbose=False,
137
136
fitted and with parameters estimated
138
137
139
138
"""
140
- frequency = asarray (frequency )
141
- recency = asarray (recency )
142
- n = asarray (n )
143
- n_custs = asarray (n_custs )
144
- _check_inputs (frequency , recency , n )
139
+ frequency = asarray (frequency ).astype (int )
140
+ recency = asarray (recency ).astype (int )
141
+ n_periods = asarray (n_periods ).astype (int )
142
+
143
+ if weights is None :
144
+ weights = np .ones_like (recency , dtype = np .int64 )
145
+ else :
146
+ weights = asarray (weights )
147
+
148
+ _check_inputs (frequency , recency , n_periods )
145
149
146
150
params , self ._negative_log_likelihood_ = _fit (
147
151
self ._negative_log_likelihood ,
148
- [frequency , recency , n , n_custs , self .penalizer_coef ],
152
+ [frequency , recency , n_periods , weights , self .penalizer_coef ],
149
153
iterative_fitting ,
150
154
initial_params ,
151
155
4 ,
@@ -156,44 +160,43 @@ def fit(self, frequency, recency, n, n_custs, verbose=False,
156
160
** kwargs )
157
161
self .params_ = OrderedDict (zip (['alpha' , 'beta' , 'gamma' , 'delta' ],
158
162
params ))
159
- self .data = DataFrame (vconcat [frequency , recency , n , n_custs ],
160
- columns = ['frequency' , 'recency' , 'n ' , 'n_custs ' ])
163
+ self .data = DataFrame (vconcat [frequency , recency , n_periods , weights ],
164
+ columns = ['frequency' , 'recency' , 'n_periods ' , 'weights ' ])
161
165
if index is not None :
162
166
self .data .index = index
163
- # Making a large array replicating n by n_custs having n.
164
- n_exploded = []
165
- for n_ , n_cust in zip (n , n_custs ):
166
- n_exploded += [n_ ] * n_cust
167
+
167
168
self .generate_new_data = lambda size = 1 : beta_geometric_beta_binom_model (
168
- np .array (n_exploded ), * self ._unload_params ('alpha' , 'beta' , 'gamma' , 'delta' ), size = size )
169
+ # Making a large array replicating n by n_custs having n.
170
+ np .array (sum ([n_ ] * n_cust for (n_ , n_cust ) in zip (n_periods , weights ))),
171
+ * self ._unload_params ('alpha' , 'beta' , 'gamma' , 'delta' ), size = size )
169
172
return self
170
173
171
- def conditional_expected_number_of_purchases_up_to_time (self , t ):
174
+ def conditional_expected_number_of_purchases_up_to_time (self , m_periods_in_future , frequency , recency , n_periods ):
172
175
"""
173
176
Conditional expected purchases in future time period.
174
177
175
- The expected number of future transactions across the next t
178
+ The expected number of future transactions across the next m_periods_in_future
176
179
transaction opportunities by a customer with purchase history
177
180
(x, tx, n).
178
181
179
- .. math:: E(X(n, n+n* )|alpha, beta, gamma, delta, frequency, recency, n )
182
+ .. math:: E(X(n_periods, n_periods+m_periods_in_future )|alpha, beta, gamma, delta, frequency, recency, n_periods )
180
183
181
184
See (13) in Fader & Hardie 2010.
182
185
183
186
Parameters
184
187
----------
185
188
t: array_like
186
- time periods (n+t)
189
+ time n_periods (n+t)
187
190
188
191
Returns
189
192
-------
190
193
array_like
191
194
predicted transactions
192
195
193
196
"""
194
- x = self . data [ ' frequency' ]
195
- tx = self . data [ ' recency' ]
196
- n = self . data [ 'n' ]
197
+ x = frequency
198
+ tx = recency
199
+ n = n_periods
197
200
198
201
params = self ._unload_params ('alpha' , 'beta' , 'gamma' , 'delta' )
199
202
alpha , beta , gamma , delta = params
@@ -203,18 +206,18 @@ def conditional_expected_number_of_purchases_up_to_time(self, t):
203
206
p3 = delta / (gamma - 1 ) * exp (gammaln (gamma + delta ) -
204
207
gammaln (1 + delta ))
205
208
p4 = exp (gammaln (1 + delta + n ) - gammaln (gamma + delta + n ))
206
- p5 = exp (gammaln (1 + delta + n + t ) - gammaln (gamma + delta + n + t ))
209
+ p5 = exp (gammaln (1 + delta + n + m_periods_in_future ) - gammaln (gamma + delta + n + m_periods_in_future ))
207
210
208
211
return p1 * p2 * p3 * (p4 - p5 )
209
212
210
- def conditional_probability_alive (self , m ):
213
+ def conditional_probability_alive (self , m_periods_in_future , frequency , recency , n_periods ):
211
214
"""
212
215
Conditional probability alive.
213
216
214
217
Conditional probability customer is alive at transaction opportunity
215
- n + m .
218
+ n_periods + m_periods_in_future .
216
219
217
- .. math:: P(alive at n + m |alpha, beta, gamma, delta, frequency, recency, n )
220
+ .. math:: P(alive at n_periods + m_periods_in_future |alpha, beta, gamma, delta, frequency, recency, n_periods )
218
221
219
222
See (A10) in Fader and Hardie 2010.
220
223
@@ -232,19 +235,16 @@ def conditional_probability_alive(self, m):
232
235
params = self ._unload_params ('alpha' , 'beta' , 'gamma' , 'delta' )
233
236
alpha , beta , gamma , delta = params
234
237
235
- x = self .data ['frequency' ]
236
- tx = self .data ['recency' ]
237
- n = self .data ['n' ]
238
238
239
- p1 = betaln (alpha + x , beta + n - x ) - betaln (alpha , beta )
240
- p2 = betaln (gamma , delta + n + m ) - betaln (gamma , delta )
241
- p3 = self ._loglikelihood (params , x , tx , n )
239
+ p1 = betaln (alpha + frequency , beta + n_periods - frequency ) - betaln (alpha , beta )
240
+ p2 = betaln (gamma , delta + n_periods + m_periods_in_future ) - betaln (gamma , delta )
241
+ p3 = self ._loglikelihood (params , frequency , recency , n_periods )
242
242
243
243
return exp (p1 + p2 ) / exp (p3 )
244
244
245
245
def expected_number_of_transactions_in_first_n_periods (self , n ):
246
246
"""
247
- Return expected number of transactions in first n periods .
247
+ Return expected number of transactions in first n n_periods .
248
248
249
249
Expected number of transactions occurring across first n transaction
250
250
opportunities.
@@ -268,7 +268,7 @@ def expected_number_of_transactions_in_first_n_periods(self, n):
268
268
params = self ._unload_params ('alpha' , 'beta' , 'gamma' , 'delta' )
269
269
alpha , beta , gamma , delta = params
270
270
271
- x_counts = self .data .groupby ('frequency' )['n_custs ' ].sum ()
271
+ x_counts = self .data .groupby ('frequency' )['weights ' ].sum ()
272
272
x = asarray (x_counts .index )
273
273
274
274
p1 = binom (n , x ) * exp (betaln (alpha + x , beta + n - x ) -
0 commit comments