Skip to content

Commit

Permalink
implement the normal model
Browse files Browse the repository at this point in the history
  • Loading branch information
wd60622 committed Jan 17, 2024
1 parent d04d0a1 commit 810b8ba
Show file tree
Hide file tree
Showing 6 changed files with 215 additions and 13 deletions.
48 changes: 37 additions & 11 deletions conjugate/distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,28 +487,44 @@ def dist(self):
class NormalInverseGamma:
"""Normal inverse gamma distribution.
Supports both 1 dimensional and multivariate cases.
Args:
mu: mean
delta_inverse: covariance matrix
alpha: shape
beta: scale
delta_inverse: covariance matrix, 2d array for multivariate case
nu: alternative precision parameter for 1 dimensional case
"""

mu: NUMERIC
delta_inverse: NUMERIC
alpha: NUMERIC
beta: NUMERIC
delta_inverse: NUMERIC = None
nu: NUMERIC = None

def __post_init__(self) -> None:
if self.delta_inverse is None and self.nu is None:
raise ValueError("Either delta_inverse or nu must be provided.")

if self.delta_inverse is not None and self.nu is not None:
raise ValueError("Only one of delta_inverse or nu must be provided.")

@classmethod
def from_inverse_gamma(
cls, mu: NUMERIC, delta_inverse: NUMERIC, inverse_gamma: InverseGamma
cls,
mu: NUMERIC,
inverse_gamma: InverseGamma,
delta_inverse: NUMERIC = None,
nu: NUMERIC = None,
) -> "NormalInverseGamma":
return cls(
mu=mu,
delta_inverse=delta_inverse,
alpha=inverse_gamma.alpha,
beta=inverse_gamma.beta,
delta_inverse=delta_inverse,
nu=nu,
)

@property
Expand All @@ -528,6 +544,20 @@ def sample_variance(self, size: int, random_state=None) -> NUMERIC:
"""
return self.inverse_gamma.dist.rvs(size=size, random_state=random_state)

def _sample_beta_1d(self, variance, size: int, random_state=None) -> NUMERIC:
sigma = (variance / self.nu) ** 0.5
return stats.norm(self.mu, sigma).rvs(size=size, random_state=random_state)

def _sample_beta_nd(self, variance, size: int, random_state=None) -> NUMERIC:
return np.stack(
[
stats.multivariate_normal(self.mu, v * self.delta_inverse).rvs(
size=1, random_state=random_state
)
for v in variance
]
)

def sample_beta(
self, size: int, return_variance: bool = False, random_state=None
) -> Union[NUMERIC, Tuple[NUMERIC, NUMERIC]]:
Expand All @@ -544,14 +574,10 @@ def sample_beta(
"""
variance = self.sample_variance(size=size, random_state=random_state)

beta = np.stack(
[
stats.multivariate_normal(self.mu, v * self.delta_inverse).rvs(
size=1, random_state=random_state
)
for v in variance
]
sample_beta = (
self._sample_beta_1d if self.delta_inverse is None else self._sample_beta_nd
)
beta = sample_beta(variance=variance, size=size, random_state=random_state)

if return_variance:
return beta, variance
Expand Down
57 changes: 57 additions & 0 deletions conjugate/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,63 @@ def normal_known_mean_posterior_predictive(
)


def normal_normal_inverse_gamma(
x_total: NUMERIC,
x2_total: NUMERIC,
n: NUMERIC,
normal_inverse_gamma_prior: NormalInverseGamma,
) -> NormalInverseGamma:
"""Posterior distribution for a normal likelihood with a normal inverse gamma prior.
Derivation from paper [here](https://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf).
Args:
x_total: sum of all outcomes
x2_total: sum of all outcomes squared
n: total number of samples in x_total and x2_total
normal_inverse_gamma_prior: NormalInverseGamma prior
Returns:
NormalInverseGamma posterior distribution
"""
x_mean = x_total / n

nu_0_inv = normal_inverse_gamma_prior.nu

nu_post_inv = nu_0_inv + n
mu_post = ((nu_0_inv * normal_inverse_gamma_prior.mu) + (n * x_mean)) / nu_post_inv

alpha_post = normal_inverse_gamma_prior.alpha + (n / 2)
beta_post = normal_inverse_gamma_prior.beta + 0.5 * (
(normal_inverse_gamma_prior.mu**2) * nu_0_inv
+ x2_total
- (mu_post**2) * nu_post_inv
)

return NormalInverseGamma(
mu=mu_post,
nu=nu_post_inv,
alpha=alpha_post,
beta=beta_post,
)


def normal_normal_inverse_gamma_posterior_predictive(
normal_inverse_gamma: NormalInverseGamma,
) -> StudentT:
var = (
normal_inverse_gamma.beta
* (normal_inverse_gamma.nu + 1)
/ (normal_inverse_gamma.nu * normal_inverse_gamma.alpha)
)
return StudentT(
mu=normal_inverse_gamma.mu,
sigma=var**0.5,
nu=2 * normal_inverse_gamma.alpha,
)


def linear_regression(
X: NUMERIC,
y: NUMERIC,
Expand Down
82 changes: 82 additions & 0 deletions docs/examples/sql.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Bayesian Models with SQL


Because `conjugate-models` works with [general numerical inputs](../generalized-inputs), we can use Bayesian models in SQL
with the SQL builder, `PyPika`.

For the example, we will estimate use normal model to estimate the
total sales amount by group.

The example table is called `events` and we will assume a normal model for the
column `sales` for each value of the column `group`.

We can create the sufficient statistics needed for `normal_normal_inverse_gamma`
directly with the SQL builder.


```python
from pypika import Query, Table, functions as fn

event_table = Table("events")

sales = event_table.sales
sales_squared = sales**2

# Sufficient statistics
x_total = fn.Sum(sales)
x2_total = fn.Sum(sales_squared)
n = fn.Count("*")

# Start a query for a groupby
query = (
Query.from_(event_table)
.groupby(event_table.group)
.select(
event_table.group,
)
)
```

Perform the Bayesian inference as usual, but using the variables reflecting
the columns.

```python
from conjugate.distributions import NormalInverseGamma
from conjugate.models import (
normal_normal_inverse_gamma,
normal_normal_inverse_gamma_posterior_predictive,
)

# Bayesian Inference
prior = NormalInverseGamma(mu=0, nu=1 / 10, alpha=1 / 10, beta=1)
posterior = normal_normal_inverse_gamma(
x_total=x_total,
x2_total=x2_total,
n=n,
normal_inverse_gamma_prior=prior,
)
posterior_predictive = normal_normal_inverse_gamma_posterior_predictive(posterior)
```

Then add the columns we want from the inference

```
# Add the posterior predictive estimate
query = query.select(
posterior_predictive.mu.as_("mu"),
posterior_predictive.sigma.as_("sigma"),
posterior_predictive.nu.as_("nu"),
)
```

Which results in this query:

```sql
SELECT "group",
(0.0+COUNT(*)*SUM("sales")/COUNT(*))/(0.1+COUNT(*)) "mu",
POW((1+0.5*(0.0+SUM(POW("sales", 2))-POW((0.0+COUNT(*)*SUM("sales")/COUNT(*))/(0.1+COUNT(*)), 2)*(0.1+COUNT(*))))*(0.1+COUNT(*)+1)/((0.1+COUNT(*))*(0.1+COUNT(*)/2)), 0.5) "sigma",
2*(0.1+COUNT(*)/2) "nu"
FROM "events"
GROUP BY "group"
```

1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ nav:
- Unsupported Distributions: examples/pymc-sampling.md
- Bayesian Update: examples/bayesian-update.md
- Linear Regression: examples/linear-regression.md
- Inference in SQL: examples/sql.md

plugins:
- search
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "conjugate-models"
version = "0.5.0"
version = "0.6.0"
description = "Bayesian Conjugate Models in Python"
authors = ["Will Dean <wd60622@gmail.com>"]
license = "MIT"
Expand Down
38 changes: 37 additions & 1 deletion tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,12 @@
normal_known_variance_posterior_predictive,
normal_known_mean,
normal_known_mean_posterior_predictive,
normal_normal_inverse_gamma,
normal_normal_inverse_gamma_posterior_predictive,
)

rng = np.random.default_rng(42)


class MockOperation:
def __init__(self, left, right, operation: str):
Expand Down Expand Up @@ -210,7 +214,6 @@ def test_multinomial_dirichlet_analysis(alpha) -> None:
)
def test_linear_regression(intercept, slope, sigma) -> None:
n_points = 100
rng = np.random.default_rng(42)

x = np.linspace(-5, 5, n_points)

Expand Down Expand Up @@ -352,3 +355,36 @@ def test_normal_known_mean() -> None:
mu=known_mu, inverse_gamma=posterior
)
assert isinstance(posterior_predictive, StudentT)


def test_normal_normal_inverse_gamma() -> None:
true_mu, true_sigma = 25, 7.5

true = Normal(true_mu, true_sigma)

n = 25
data = true.dist.rvs(size=n, random_state=rng)

prior = NormalInverseGamma(0.0, alpha=1 / 5, beta=10, nu=1 / 25)
posterior = normal_normal_inverse_gamma(
x_total=data.sum(),
x2_total=(data**2).sum(),
n=n,
normal_inverse_gamma_prior=prior,
)

assert isinstance(posterior, NormalInverseGamma)

posterior_predictive = normal_normal_inverse_gamma_posterior_predictive(
normal_inverse_gamma=posterior,
)
assert isinstance(posterior_predictive, StudentT)

prior_predictive = normal_normal_inverse_gamma_posterior_predictive(
normal_inverse_gamma=prior,
)

assert (
prior_predictive.dist.logpdf(data).sum()
< posterior_predictive.dist.logpdf(data).sum()
)

0 comments on commit 810b8ba

Please sign in to comment.