Skip to content
This repository has been archived by the owner on Jun 28, 2024. It is now read-only.

Commit

Permalink
Merge pull request #241 from CamDavidsonPilon/v0.10.1
Browse files Browse the repository at this point in the history
v0.10.1
  • Loading branch information
CamDavidsonPilon authored Jan 7, 2019
2 parents 00ca929 + f74e195 commit ed37455
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 47 deletions.
7 changes: 6 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@ language: python
dist: trusty
python:
- "2.7"
- "3.4"
- "3.5"
- "3.6"
# Enable newer 3.7 without globally enabling sudo and dist: xenial for other build jobs
matrix:
include:
- python: 3.7
dist: xenial
sudo: true
before_install:
- sudo apt-get update
install:
Expand Down
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

### 0.10.1
- performance improvements to `generate_data.py` for large datasets #195
- performance improvements to `summary_data_from_transaction_data`, thanks @MichaelSchreier
- Previously, `GammaGammaFitter` would have an infinite mean when its `q` parameter was less than 1. This was possible for some datasets. In 0.10.1, a new argument is added to `GammaGammaFitter` to constrain that `q` is greater than 1. This can be done with `q_constraint=True` in the call to `GammaGammaFitter.fit`. See issue #146. Thanks @vruvora
- Stop support of scipy < 1.0.
- Stop support of < Python 3.5.

### 0.10.0
- `BetaGeoBetaBinomFitter.fit` has replaced `n_custs` with the more appropriately named `weights` (to align with other statisical libraries). By default and if unspecified, `weights` is equal to an array of 1s.
- The `conditional_` methods on `BetaGeoBetaBinomFitter` have been updated to handle exogenously provided recency, frequency and periods.
Expand Down
2 changes: 1 addition & 1 deletion dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ flake8
autopep8
pytest
matplotlib
pytest-cov
pytest-cov==2.5.1
pytest-mpl
coveralls
pydocstyle
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@
# built documents.
#
# The short X.Y version.
version = '0.10.0.0'
version = '0.10.1'
# The full version, including alpha/beta/rc tags.
release = '0.10.0.0'
release = version

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
5 changes: 2 additions & 3 deletions lifetimes/fitters/beta_geo_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
where, exp
from numpy import ones_like
from pandas import DataFrame
from scipy.special import gammaln, hyp2f1, beta, gamma
from scipy import misc
from scipy.special import gammaln, hyp2f1, beta, gamma, logsumexp

from . import BaseFitter
from ..utils import _fit, _scale_time, _check_inputs
Expand Down Expand Up @@ -164,7 +163,7 @@ def _negative_log_likelihood(params, freq, rec, T, weights, penalizer_coef):
(r + freq) * log(rec + alpha)
A_4[isnan(A_4) | isinf(A_4)] = 0
penalizer_term = penalizer_coef * sum(np.asarray(params) ** 2)
return - (weights * (A_1 + A_2 + misc.logsumexp(vconcat[A_3, A_4], axis=1, b=d))).mean() \
return - (weights * (A_1 + A_2 + logsumexp(vconcat[A_3, A_4], axis=1, b=d))).mean() \
+ penalizer_term

def conditional_expected_number_of_purchases_up_to_time(self, t, frequency,
Expand Down
8 changes: 4 additions & 4 deletions lifetimes/fitters/pareto_nbd_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from numpy import log, exp, logaddexp, asarray, any as npany, c_ as vconcat
from pandas import DataFrame
from scipy.special import gammaln, hyp2f1, betaln
from scipy import misc
from scipy.special import logsumexp

from . import BaseFitter
from ..utils import _fit, _check_inputs, _scale_time
Expand Down Expand Up @@ -162,7 +162,7 @@ def _log_A_0(params, freq, recency, age):
except TypeError:
sign = 1

return (misc.logsumexp([log(p_1) + rsf * log(q_2), log(p_2) +
return (logsumexp([log(p_1) + rsf * log(q_2), log(p_2) +
rsf * log(q_1)], axis=0, b=[sign, -sign]) -
rsf * log(q_1 * q_2))

Expand Down Expand Up @@ -376,7 +376,7 @@ def _log_B_three(i):
zeroth_term = (n == 0) * (1 - exp(log_p_zero))
first_term = n * log(t) - gammaln(n + 1) + log_B_one - log_l
second_term = log_B_two - log_l
third_term = misc.logsumexp(
third_term = logsumexp(
[i * log(t) - gammaln(i + 1) + _log_B_three(i) - log_l for i in range(n + 1)],
axis=0
)
Expand All @@ -389,7 +389,7 @@ def _log_B_three(i):

# In some scenarios (e.g. large n) tiny numerical errors in the calculation of second_term and third_term
# cause sumexp to be ever so slightly negative and logsumexp throws an error. Hence we ignore the sign here.
return zeroth_term + exp(misc.logsumexp(
return zeroth_term + exp(logsumexp(
[first_term, second_term, third_term], b=[sign, sign, -sign],
axis=0,
return_sign=True
Expand Down
54 changes: 27 additions & 27 deletions lifetimes/generate_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

import numpy as np
from scipy import stats
from numpy import random
import pandas as pd


Expand Down Expand Up @@ -36,8 +36,8 @@ def beta_geometric_nbd_model(T, r, alpha, a, b, size=1):
else:
T = np.asarray(T)

probability_of_post_purchase_death = stats.beta.rvs(a, b, size=size)
lambda_ = stats.gamma.rvs(r, scale=1. / alpha, size=size)
probability_of_post_purchase_death = random.beta(a, b, size=size)
lambda_ = random.gamma(r, scale=1. / alpha, size=size)

columns = ['frequency', 'recency', 'T', 'lambda', 'p', 'alive', 'customer_id']
df = pd.DataFrame(np.zeros((size, len(columns))), columns=columns)
Expand All @@ -48,12 +48,12 @@ def beta_geometric_nbd_model(T, r, alpha, a, b, size=1):

# hacky until I can find something better
times = []
next_purchase_in = stats.expon.rvs(scale=1. / l)
next_purchase_in = random.exponential(scale=1. / l)
alive = True
while (np.sum(times) + next_purchase_in < T[i]) and alive:
times.append(next_purchase_in)
next_purchase_in = stats.expon.rvs(scale=1. / l)
alive = np.random.random() > p
next_purchase_in = random.exponential(scale=1. / l)
alive = random.random() > p

times = np.array(times).cumsum()
df.iloc[i] = np.unique(np.array(times).astype(int)).shape[0], np.max(times if times.shape[0] > 0 else 0), T[i], l, p, alive, i
Expand Down Expand Up @@ -102,8 +102,8 @@ def beta_geometric_nbd_model_transactional_data(T, r, alpha, a, b, observation_p
start_date = [observation_period_end - pd.Timedelta(T[i] - 1, unit=freq) for i in range(size)]
T = np.asarray(T)

probability_of_post_purchase_death = stats.beta.rvs(a, b, size=size)
lambda_ = stats.gamma.rvs(r, scale=1. / alpha, size=size)
probability_of_post_purchase_death = random.beta(a, b, size=size)
lambda_ = random.gamma(r, scale=1. / alpha, size=size)

columns = ['customer_id', 'date']
df = pd.DataFrame(columns=columns)
Expand All @@ -115,13 +115,13 @@ def beta_geometric_nbd_model_transactional_data(T, r, alpha, a, b, observation_p
age = T[i]

purchases = [[i, s - pd.Timedelta(1, unit=freq)]]
next_purchase_in = stats.expon.rvs(scale=1. / l)
next_purchase_in = random.exponential(scale=1. / l)
alive = True

while next_purchase_in < age and alive:
purchases.append([i, s + pd.Timedelta(next_purchase_in, unit=freq)])
next_purchase_in += stats.expon.rvs(scale=1. / l)
alive = np.random.random() > p
next_purchase_in += random.exponential(scale=1. / l)
alive = random.random() > p

df = df.append(pd.DataFrame(purchases, columns=columns))

Expand Down Expand Up @@ -160,23 +160,23 @@ def pareto_nbd_model(T, r, alpha, s, beta, size=1):
else:
T = np.asarray(T)

lambda_ = stats.gamma.rvs(r, scale=1. / alpha, size=size)
mus = stats.gamma.rvs(s, scale=1. / beta, size=size)
lambda_ = random.gamma(r, scale=1. / alpha, size=size)
mus = random.gamma(s, scale=1. / beta, size=size)

columns = ['frequency', 'recency', 'T', 'lambda', 'mu', 'alive', 'customer_id']
df = pd.DataFrame(np.zeros((size, len(columns))), columns=columns)

for i in range(size):
l = lambda_[i]
mu = mus[i]
time_of_death = stats.expon.rvs(scale=1. / mu)
time_of_death = random.exponential(scale=1. / mu)

# hacky until I can find something better
times = []
next_purchase_in = stats.expon.rvs(scale=1. / l)
next_purchase_in = random.exponential(scale=1. / l)
while np.sum(times) + next_purchase_in < min(time_of_death, T[i]):
times.append(next_purchase_in)
next_purchase_in = stats.expon.rvs(scale=1. / l)
next_purchase_in = random.exponential(scale=1. / l)

times = np.array(times).cumsum()
df.iloc[i] = np.unique(np.array(times).astype(int)).shape[0], np.max(times if times.shape[0] > 0 else 0), T[i], l, mu, time_of_death > T[i], i
Expand Down Expand Up @@ -219,8 +219,8 @@ def modified_beta_geometric_nbd_model(T, r, alpha, a, b, size=1):
else:
T = np.asarray(T)

probability_of_post_purchase_death = stats.beta.rvs(a, b, size=size)
lambda_ = stats.gamma.rvs(r, scale=1. / alpha, size=size)
probability_of_post_purchase_death = random.beta(a, b, size=size)
lambda_ = random.gamma(r, scale=1. / alpha, size=size)

columns = ['frequency', 'recency', 'T', 'lambda', 'p', 'alive', 'customer_id']
df = pd.DataFrame(np.zeros((size, len(columns))), columns=columns)
Expand All @@ -231,12 +231,12 @@ def modified_beta_geometric_nbd_model(T, r, alpha, a, b, size=1):

# hacky until I can find something better
times = []
next_purchase_in = stats.expon.rvs(scale=1. / l)
alive = np.random.random() > p # essentially the difference between this model and BG/NBD
next_purchase_in = random.exponential(scale=1. / l)
alive = random.random() > p # essentially the difference between this model and BG/NBD
while (np.sum(times) + next_purchase_in < T[i]) and alive:
times.append(next_purchase_in)
next_purchase_in = stats.expon.rvs(scale=1. / l)
alive = np.random.random() > p
next_purchase_in = random.exponential(scale=1. / l)
alive = random.random() > p

times = np.array(times).cumsum()
df.iloc[i] = np.unique(np.array(times).astype(int)).shape[0], np.max(times if times.shape[0] > 0 else 0), T[i], l, p, alive, i
Expand Down Expand Up @@ -282,8 +282,8 @@ def beta_geometric_beta_binom_model(N, alpha, beta, gamma, delta, size=1):
else:
N = np.asarray(N)

probability_of_post_purchase_death = np.random.beta(a=alpha, b=beta, size=size)
thetas = np.random.beta(a=gamma, b=delta, size=size)
probability_of_post_purchase_death = random.beta(a=alpha, b=beta, size=size)
thetas = random.beta(a=gamma, b=delta, size=size)

columns = ['frequency', 'recency', 'n_periods', 'p', 'theta', 'alive', 'customer_id']
df = pd.DataFrame(np.zeros((size, len(columns))), columns=columns)
Expand All @@ -296,12 +296,12 @@ def beta_geometric_beta_binom_model(N, alpha, beta, gamma, delta, size=1):
alive = True
times = []
while current_t < N[i] and alive:
alive = np.random.binomial(1, theta) == 0
if alive and np.random.binomial(1, p) == 1:
alive = random.binomial(1, theta) == 0
if alive and random.binomial(1, p) == 1:
times.append(current_t)
current_t += 1
# adding in final death opportunity to agree with [1]
if alive:
alive = np.random.binomial(1, theta) == 0
alive = random.binomial(1, theta) == 0
df.iloc[i] = len(times), times[-1] + 1 if len(times) != 0 else 0, N[i], p, theta, alive, i
return df
2 changes: 1 addition & 1 deletion lifetimes/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from __future__ import unicode_literals

__version__ = '0.10.0.0'
__version__ = '0.10.1'
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
numpy
scipy
numpy>1.10.0
scipy>=1.0.0
pandas>=0.19
dill
dill>=0.2.6
8 changes: 3 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,14 @@
"License :: OSI Approved :: MIT License",
"Programming Language :: Python",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Topic :: Scientific/Engineering",
],
install_requires=[
"numpy",
"scipy",
"numpy>=1.10.0",
"scipy>=1.0.0",
"pandas>=0.19",
"dill"
"dill>=0.2.6"
],
package_data={
"lifetimes": [
Expand Down

0 comments on commit ed37455

Please sign in to comment.