From e1e0f8a0da5ebc84bf30c596c8acd39ee5a925f5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 27 Oct 2024 22:46:18 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .github/release.yml | 2 +- .github/workflows/coverage.yml | 6 +-- .github/workflows/docs.yml | 4 +- .github/workflows/sync-closing-labels.yml | 2 +- .github/workflows/tests.yml | 4 +- CONTRIBUTING.md | 19 ++++---- Makefile | 4 +- README.md | 14 +++--- docs/distributions.md | 4 +- docs/examples/bayesian-update.md | 55 ++++++++++------------ docs/examples/binomial.md | 22 ++++----- docs/examples/bootstrap.md | 30 ++++++------ docs/examples/generalized-inputs.md | 14 +++--- docs/examples/indexing.md | 6 +-- docs/examples/linear-regression.md | 6 +-- docs/examples/plotting.md | 6 +-- docs/examples/sampling-distributions.md | 19 ++++---- docs/examples/scaling-distributions.md | 10 ++-- docs/examples/scipy-connection.md | 10 ++-- docs/examples/sql.md | 13 +++-- docs/examples/thompson.md | 30 ++++++------ docs/examples/unsupported-distributions.md | 14 +++--- docs/examples/vectorized-inputs.md | 8 ++-- docs/index.md | 18 +++---- docs/mixins.md | 2 +- docs/models.md | 6 +-- docs/overrides/partials/comments.html | 2 +- mkdocs.yml | 16 +++---- pyproject.toml | 4 +- 29 files changed, 172 insertions(+), 178 deletions(-) diff --git a/.github/release.yml b/.github/release.yml index 79ec5c2..a4d8681 100644 --- a/.github/release.yml +++ b/.github/release.yml @@ -15,4 +15,4 @@ changelog: - docs - title: Maintenance 🔧 labels: - - "*" \ No newline at end of file + - "*" diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index a956e61..bc6026a 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -2,13 +2,13 @@ name: Workflow for Codecov conjugate on: push: - paths: + paths: - "conjugate/**" - "pyproject.toml" - "tests/**" pull_request: - paths: + paths: - "conjugate/**" - "pyproject.toml" - "tests/**" @@ -24,7 +24,7 @@ jobs: with: python-version: '3.10' - name: Install dependencies - run: pip install . pytest pytest-cov pytest-mpl pypika + run: pip install . pytest pytest-cov pytest-mpl pypika - name: Run tests and collect coverage run: pytest - name: Upload coverage to Codecov diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 41df4ce..ef2d426 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -2,7 +2,7 @@ name: Docs on: push: branches: - - master + - master - main permissions: contents: write @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - with: + with: fetch-depth: 0 - uses: actions/setup-python@v4 with: diff --git a/.github/workflows/sync-closing-labels.yml b/.github/workflows/sync-closing-labels.yml index d9e3b63..faed499 100644 --- a/.github/workflows/sync-closing-labels.yml +++ b/.github/workflows/sync-closing-labels.yml @@ -13,7 +13,7 @@ jobs: uses: actions/checkout@v2 - name: Sync labels with closing issues uses: wd60622/closing-labels@v0.0.3 - with: + with: exclude: "duplicate,help wanted,question" env: GH_TOKEN: ${{ github.token }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b2e3181..92358ef 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,12 +2,12 @@ name: Tests on: push: - paths: + paths: - "conjugate/**" - "pyproject.toml" - "tests/**" pull_request: - paths: + paths: - "conjugate/**" - "pyproject.toml" - "tests/**" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f058596..76c3808 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ # Guidelines for Contributing -Contributions are welcomed in all forms. These may be bugs, feature requests, documentation, or examples. Please feel free to: +Contributions are welcomed in all forms. These may be bugs, feature requests, documentation, or examples. Please feel free to: - Submit an issue - Open a pull request @@ -8,19 +8,19 @@ Contributions are welcomed in all forms. These may be bugs, feature requests, do ## Open an Issue -If you find a bug or have a feature request, please [open an issue](https://github.com/wd60622/conjugate/issues/new) on GitHub. +If you find a bug or have a feature request, please [open an issue](https://github.com/wd60622/conjugate/issues/new) on GitHub. Please check that it is not one of the [open issues](https://github.com/wd60622/conjugate/issues). ## Local Development Steps ### Create a forked branch of the repo -Do this once but keep it up to date +Do this once but keep it up to date 1. [Fork wd60622/conjugate GitHub repo](https://github.com/wd60622/conjugate/fork) 1. Clone forked repo and set upstream - ```bash + ```bash git clone git@github.com:/conjugate.git cd conjugate git remote add upstream git@github.com:wd60622/conjugate.git @@ -36,15 +36,15 @@ In the root of the repo, run: poetry install ``` -And also install the [pre-commit](https://pre-commit.com/) hooks with: +And also install the [pre-commit](https://pre-commit.com/) hooks with: -```bash -pre-commit install +```bash +pre-commit install ``` ## Pull Request Checklist -Please check that your pull request meets the following criteria: +Please check that your pull request meets the following criteria: - Unit tests pass - pre-commit hooks pass @@ -64,5 +64,4 @@ Tests will run on each pull request. Documentation will be updated with each merge to `main` branch. -Package release to PyPI on every GitHub Release. - +Package release to PyPI on every GitHub Release. diff --git a/Makefile b/Makefile index fb45cb6..e245867 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ .DEFAULT_GOAL := help -help: +help: @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z0-9_-]+:.*?## / {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) | sort test-generate-baseline: ## Generate baseline images for tests @@ -12,7 +12,7 @@ test: ## Run tests poetry run pytest tests cov: ## Run tests and generate coverage report - poetry run pytest tests + poetry run pytest tests coverage html open htmlcov/index.html diff --git a/README.md b/README.md index 1bdea64..4837c0f 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Conjugate Models [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) -[![Tests](https://github.com/wd60622/conjugate/actions/workflows/tests.yml/badge.svg)](https://github.com/wd60622/conjugate/actions/workflows/tests.yml) -[![PyPI version](https://badge.fury.io/py/conjugate-models.svg)](https://badge.fury.io/py/conjugate-models) +[![Tests](https://github.com/wd60622/conjugate/actions/workflows/tests.yml/badge.svg)](https://github.com/wd60622/conjugate/actions/workflows/tests.yml) +[![PyPI version](https://badge.fury.io/py/conjugate-models.svg)](https://badge.fury.io/py/conjugate-models) [![docs](https://github.com/wd60622/conjugate/actions/workflows/docs.yml/badge.svg)](https://wd60622.github.io/conjugate/) [![codecov](https://codecov.io/github/wd60622/conjugate/branch/main/graph/badge.svg)](https://app.codecov.io/github/wd60622/conjugate) @@ -11,11 +11,11 @@ Bayesian conjugate models in Python ## Installation -```bash +```bash pip install conjugate-models ``` -## Features +## Features - [Connection to Scipy Distributions](https://wd60622.github.io/conjugate/examples/scipy-connection) with `dist` attribute - [Built in Plotting](https://wd60622.github.io/conjugate/examples/plotting) with `plot_pdf`, `plot_pmf`, and `plot_cdf` methods @@ -25,7 +25,7 @@ pip install conjugate-models - Out of box compatibility with `polars`, `pandas`, `numpy`, and more. - [Unsupported Distributions](https://wd60622.github.io/conjugate/examples/pymc-sampling) for sampling from unsupported distributions -## Supported Models +## Supported Models Many likelihoods are supported including @@ -41,7 +41,7 @@ Many likelihoods are supported including 1. Pass data and prior into model from `models` modules 1. Analytics with posterior and posterior predictive distributions -```python +```python from conjugate.distributions import Beta, BetaBinomial from conjugate.models import binomial_beta, binomial_beta_predictive @@ -54,7 +54,7 @@ prior = Beta(1, 1) prior_predictive: BetaBinomial = binomial_beta_predictive(n=N, distribution=prior) posterior: Beta = binomial_beta(n=N, x=x, prior=prior) -posterior_predictive: BetaBinomial = binomial_beta_predictive(n=N, distribution=posterior) +posterior_predictive: BetaBinomial = binomial_beta_predictive(n=N, distribution=posterior) ``` From here, do any analysis you'd like! diff --git a/docs/distributions.md b/docs/distributions.md index 6f2e5f6..ca34d47 100644 --- a/docs/distributions.md +++ b/docs/distributions.md @@ -1,6 +1,6 @@ --- -comments: true +comments: true --- -# Distributions +# Distributions ::: conjugate.distributions diff --git a/docs/examples/bayesian-update.md b/docs/examples/bayesian-update.md index 7d8c840..adf9b36 100644 --- a/docs/examples/bayesian-update.md +++ b/docs/examples/bayesian-update.md @@ -1,10 +1,10 @@ --- -comments: true +comments: true --- -Easy to use Bayesian inference incrementally by setting the posterior to the prior for the next set of data points. +Easy to use Bayesian inference incrementally by setting the posterior to the prior for the next set of data points. -Abstractly, something like this: +Abstractly, something like this: ```python prior = ... @@ -30,11 +30,11 @@ import matplotlib.pyplot as plt from conjugate.distributions import NormalInverseGamma from conjugate.models import normal -def create_sampler(mu, sigma, rng): +def create_sampler(mu, sigma, rng): """Generate a sampler from a normal distribution with mean `mu` and standard deviation `sigma`.""" - def sample(n: int): + def sample(n: int): return rng.normal(loc=mu, scale=sigma, size=n) - + return sample @@ -45,9 +45,9 @@ sample = create_sampler(mu=mu, sigma=sigma, rng=rng) prior = NormalInverseGamma( - mu=0, - alpha=1, beta=1, - nu=1, + mu=0, + alpha=1, beta=1, + nu=1, ) cumsum = 0 @@ -57,9 +57,9 @@ for batch_size in batch_sizes: data = sample(n=batch_size) posterior = normal( - x_total=data.sum(), - x2_total=(data ** 2).sum(), - n=batch_size, + x_total=data.sum(), + x2_total=(data ** 2).sum(), + n=batch_size, prior=prior ) @@ -69,14 +69,14 @@ for batch_size in batch_sizes: label = f"n={cumsum}" ax.scatter(variance_samples ** 0.5, beta_samples, alpha=0.25, label=label) - prior = posterior + prior = posterior ax.scatter(sigma, mu, color="black", label="true") ax.set( - xlabel="$\sigma$", - ylabel="$\mu$", - xlim=(0, None), - ylim=(0, None), + xlabel="$\sigma$", + ylabel="$\mu$", + xlim=(0, None), + ylim=(0, None), title="Updated posterior samples of $\mu$ and $\sigma$" ) ax.legend() @@ -91,7 +91,7 @@ plt.show() With a Binomial model, we can assume as Beta prior -```python +```python import numpy as np import matplotlib.pyplot as plt @@ -99,8 +99,8 @@ import matplotlib.pyplot as plt from conjugate.distributions import Beta from conjugate.models import binomial_beta -def create_sampler(p, rng): - def sampler(n: int): +def create_sampler(p, rng): + def sampler(n: int): return rng.binomial(n=n, p=p) return sampler @@ -114,13 +114,13 @@ sample = create_sampler(p=p, rng=rng) ax = plt.gca() cumsum = 0 batch_sizes = [5, 25, 50] -for batch_size in batch_sizes: +for batch_size in batch_sizes: x = sample(n=batch_size) posterior = binomial_beta( - x=x, - n=batch_size, - prior=prior, + x=x, + n=batch_size, + prior=prior, ) cumsum += batch_size @@ -132,14 +132,11 @@ for batch_size in batch_sizes: ax.axvline(p, label="true p", color="black", ymax=0.05) ax.set( - xlabel="p", - ylim=(None, 10), + xlabel="p", + ylim=(None, 10), ) ax.legend() plt.show() ``` ![Binomial Model](../images/bayesian-update-binomial.png) - - - diff --git a/docs/examples/binomial.md b/docs/examples/binomial.md index 612ae38..4b456cf 100644 --- a/docs/examples/binomial.md +++ b/docs/examples/binomial.md @@ -1,7 +1,7 @@ --- -comments: true +comments: true --- -# Binomial Model +# Binomial Model The [Binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution) models the @@ -11,22 +11,22 @@ data. This example demonstrates how to fit a Binomial model with a Beta prior and compares the prior and posterior distributions as well as the predictive -distributions with the true distribution of +distributions with the true distribution of ## Import modules -Import the required distributions: +Import the required distributions: - `Binomial`: The assumed model likelihood in order to generate synthetic data - `Beta`: Prior for `Binomial` distribution - `BetaBinomial`: The predictive distribution for the `Binomial` model -and the functions: +and the functions: - `binomial_beta`: get the posterior distribution from prior and data - `binomial_beta_predictive`: get the predictive distribution -```python +```python from conjugate.distributions import Beta, Binomial, BetaBinomial from conjugate.models import binomial_beta, binomial_beta_predictive @@ -64,12 +64,12 @@ Get the predictive distribution for `n` new trials with ```python prior_predictive: BetaBinomial = binomial_beta_predictive( - n=N, - distribution=prior, + n=N, + distribution=prior, ) posterior_predictive: BetaBinomial = binomial_beta_predictive( - n=N, - distribution=posterior, + n=N, + distribution=posterior, ) ``` @@ -80,7 +80,7 @@ well as the predictive distributions with the true distribution. ```python -# Figure +# Figure fig, axes = plt.subplots(ncols=2, nrows=1, figsize=(8, 4)) # Prior and Posterior diff --git a/docs/examples/bootstrap.md b/docs/examples/bootstrap.md index 1b9c562..6af38c6 100644 --- a/docs/examples/bootstrap.md +++ b/docs/examples/bootstrap.md @@ -1,13 +1,13 @@ --- -comments: true +comments: true --- # Bootstrap Comparison In this example, we will compare the bootstrap method with the use of a Bayesian model. -Bootstrap is statistical method which relies on resampling of the -data in order to estimate the uncertainty of a given statistic. +Bootstrap is statistical method which relies on resampling of the +data in order to estimate the uncertainty of a given statistic. In order to do this comparison, the [`pandas-bootstrap` package](https://wd60622.github.io/pandas-bootstrap/) will be used. @@ -30,12 +30,12 @@ import matplotlib.pyplot as plt import bootstrap from conjugate.distributions import ( - Gamma, + Gamma, NegativeBinomial, - Poisson, + Poisson, ) from conjugate.models import ( - poisson_gamma, + poisson_gamma, poisson_gamma_predictive, ) @@ -45,7 +45,7 @@ rng = np.random.default_rng(seed) true_lambda = 1.5 true_distribution = Poisson(true_lambda) -def create_data_generator(true_distribution, rng): +def create_data_generator(true_distribution, rng): def generate_data(size) -> pd.Series: return pd.Series(true_distribution.dist.rvs(size=size, random_state=rng)) return generate_data @@ -57,10 +57,10 @@ generate_data = create_data_generator(true_distribution, rng) ## Bootstrap method In order to generate the statistic for the bootstrap method, we just need to -create function that gets the maximum value of the desired sample size. +create function that gets the maximum value of the desired sample size. The `boot` attribute of the `pandas.Series` is an object from -`pandas-bootstrap` to facilitate the bootstrap process. Read more about it in the +`pandas-bootstrap` to facilitate the bootstrap process. Read more about it in the documentation [here](https://wd60622.github.io/pandas-bootstrap/extensions/). ```python @@ -71,7 +71,7 @@ def stat(data: pd.Series, n: int) -> int: return data.sample(frac=1).iloc[:n].max() def create_bootstrap_stat(n_new: int, samples: int): - def bootstrap_stat(data: pd.Series) -> pd.Series: + def bootstrap_stat(data: pd.Series) -> pd.Series: return data.boot.get_samples(stat, n=n_new, B=samples) return bootstrap_stat @@ -90,8 +90,8 @@ def get_posterior_predictive(data: pd.Series, prior: Gamma) -> NegativeBinomial: posterior = poisson_gamma(x_total=x_total, n=n, prior=prior) return poisson_gamma_predictive(distribution=posterior) -def create_conjugate_stat(n_new: int, samples: int, prior: Gamma): - def conjugate_stat(data: pd.Series) -> pd.Series: +def create_conjugate_stat(n_new: int, samples: int, prior: Gamma): + def conjugate_stat(data: pd.Series) -> pd.Series: posterior_predictive = get_posterior_predictive(data, prior) return pd.Series( posterior_predictive @@ -116,10 +116,10 @@ ns = [5, 25, 50, 100] nrows = 2 ncols = 2 fig, axes = plt.subplots( - nrows=nrows, - ncols=ncols, + nrows=nrows, + ncols=ncols, figsize=(ncols * 5, nrows * 5), - sharex=True, + sharex=True, sharey=True, ) diff --git a/docs/examples/generalized-inputs.md b/docs/examples/generalized-inputs.md index 07ae289..04b1494 100644 --- a/docs/examples/generalized-inputs.md +++ b/docs/examples/generalized-inputs.md @@ -1,9 +1,9 @@ --- -comments: true +comments: true --- # Generalized Numerical Inputs -Conjugate models work with anything that works like numbers. +Conjugate models work with anything that works like numbers. Here are examples of the Binomial and Beta distributions with different packages data as input. For more details on this model, see the [Binomial Model @@ -20,7 +20,7 @@ from conjugate.models import binomial_beta ## Polars -Bayesian models with the [Polars](https://docs.pola.rs/) package: +Bayesian models with the [Polars](https://docs.pola.rs/) package: ```python import polars as pl @@ -47,7 +47,7 @@ Bayesian models in SQL using the SQL Builder, [PyPika](https://github.com/kayak/pypika): ```python -from pypika import Field +from pypika import Field # Columns from table in database N = Field("total") @@ -83,8 +83,8 @@ print("Posterior beta:", posterior.beta) Use [PyMC](https://www.pymc.io/) distributions for sampling with additional uncertainty: -```python -import pymc as pm +```python +import pymc as pm alpha = pm.Gamma.dist(alpha=1, beta=20) beta = pm.Gamma.dist(alpha=1, beta=20) @@ -93,7 +93,7 @@ beta = pm.Gamma.dist(alpha=1, beta=20) N = 10 X = 4 -# Conjugate prior +# Conjugate prior prior = Beta(alpha=alpha, beta=beta) posterior = binomial_beta(n=N, x=X, prior=prior) diff --git a/docs/examples/indexing.md b/docs/examples/indexing.md index aaba7ef..7f08c94 100644 --- a/docs/examples/indexing.md +++ b/docs/examples/indexing.md @@ -1,9 +1,9 @@ --- -comments: true +comments: true --- # Indexing Parameters -The distributions can be indexed for subsets. +The distributions can be indexed for subsets. ```python beta = np.arange(1, 10) @@ -16,4 +16,4 @@ plt.legend() plt.show() ``` -![Sliced Distribution](./../images/sliced-distribution.png) \ No newline at end of file +![Sliced Distribution](./../images/sliced-distribution.png) diff --git a/docs/examples/linear-regression.md b/docs/examples/linear-regression.md index 600bf70..5f822cb 100644 --- a/docs/examples/linear-regression.md +++ b/docs/examples/linear-regression.md @@ -1,5 +1,5 @@ --- -comments: true +comments: true --- We can fit linear regression that includes a predictive distribution for new data using a conjugate prior. This example only has one covariate, but the same approach can be used for multiple covariates. @@ -32,7 +32,7 @@ y = intercept + slope * x + rng.normal(scale=sigma, size=n_points) ## Define Prior and Find Posterior -There needs to be a prior for the intercept, slope, and the variance. +There needs to be a prior for the intercept, slope, and the variance. ```python prior = NormalInverseGamma( @@ -72,7 +72,7 @@ df_samples = pd.DataFrame(samples, index=x_new) ## Plot Results -We can see that the posterior predictive distribution begins to widen as we move away from the data. +We can see that the posterior predictive distribution begins to widen as we move away from the data. Overall, the posterior predictive distribution is a good fit for the data. The true line is within the 95% posterior predictive interval. diff --git a/docs/examples/plotting.md b/docs/examples/plotting.md index 8cea67e..c6f8bd3 100644 --- a/docs/examples/plotting.md +++ b/docs/examples/plotting.md @@ -1,5 +1,5 @@ --- -comments: true +comments: true --- # Plotting Distributions @@ -8,13 +8,13 @@ methods. The `plot_pdf` method is used for continuous distributions and the `plot_pmf` method is used for discrete distributions. Similarly, all distributions have a `plot_cdf` method for plotting the -cumulative distribution function. +cumulative distribution function. There is limited support for some distributions like the `Dirichlet` or those without a `dist` scipy. -```python +```python from conjugate.distributions import Beta, Gamma, Normal import matplotlib.pyplot as plt diff --git a/docs/examples/sampling-distributions.md b/docs/examples/sampling-distributions.md index 9ba3633..3fa33a3 100644 --- a/docs/examples/sampling-distributions.md +++ b/docs/examples/sampling-distributions.md @@ -1,5 +1,5 @@ --- -comments: true +comments: true --- # Sampling from Distributions @@ -26,27 +26,27 @@ samples = true_distribution.dist.rvs(n_samples) ## Vector parameter -If the parameter is a vector, then there will be a broadcast issue from the scipy +If the parameter is a vector, then there will be a broadcast issue from the scipy distribution. ```python import numpy as np lam = np.array([ - [1, 2], - [0.5, 5], + [1, 2], + [0.5, 5], ]) true_distribution = Exponential(lam=lam) n_samples = 100 -try: +try: true_distribution.dist.rvs(n_samples) -except ValueError: +except ValueError: print("The number of samples doesn't broadcast with the shape of parameters!") ``` -However, this is easy to fix by prepending the number of samples to the shape of +However, this is easy to fix by prepending the number of samples to the shape of the model parameter shape ```python @@ -56,8 +56,8 @@ samples = true_distribution.dist.rvs(size=size, random_state=rng) ## Vector parameters -If there are many parameters in your model, then use the `np.broadcast_shapes` -function in order to get the correct shape before sampling +If there are many parameters in your model, then use the `np.broadcast_shapes` +function in order to get the correct shape before sampling ```python from conjugate.distributions import Normal @@ -71,4 +71,3 @@ shape = np.broadcast_shapes(mu.shape, sigma.shape) size = (n_samples, *shape) samples = true_distribution.dist.rvs(size=size, random_state=rng) ``` - diff --git a/docs/examples/scaling-distributions.md b/docs/examples/scaling-distributions.md index 22b98b4..2536adc 100644 --- a/docs/examples/scaling-distributions.md +++ b/docs/examples/scaling-distributions.md @@ -1,9 +1,9 @@ --- -comments: true +comments: true --- -# Scaling Distributions +# Scaling Distributions -Some of the distributions can be scaled by a constant factor or added together. For instance, operations with Poisson distribution represent the number of events in a given time interval. +Some of the distributions can be scaled by a constant factor or added together. For instance, operations with Poisson distribution represent the number of events in a given time interval. ```python from conjugate.distributions import Poisson @@ -30,7 +30,7 @@ plt.show() ![Scaled Poisson](../images/poisson-scaling-example.png) -The normal distribution also supports scaling making use of the fact that the variance of a scaled normal distribution is the square of the scaling factor. +The normal distribution also supports scaling making use of the fact that the variance of a scaled normal distribution is the square of the scaling factor. ```python from conjugate.distributions import Normal @@ -47,4 +47,4 @@ ax.legend() plt.show() ``` -![Scaled Normal](../images/normal-scaling-example.png) \ No newline at end of file +![Scaled Normal](../images/normal-scaling-example.png) diff --git a/docs/examples/scipy-connection.md b/docs/examples/scipy-connection.md index 5ef1fab..39f8527 100644 --- a/docs/examples/scipy-connection.md +++ b/docs/examples/scipy-connection.md @@ -1,15 +1,15 @@ --- -comments: true +comments: true --- # Connection to SciPy Distributions Many distributions have the `dist` attribute which is a scipy.stats distribution object. From there, the methods from scipy.stats to get the pdf, cdf, etc can be leveraged. -```python -from conjugate.distribution import Beta +```python +from conjugate.distribution import Beta beta = Beta(1, 1) -scipy_dist = beta.dist +scipy_dist = beta.dist print(scipy_dist.mean()) # 0.5 @@ -17,4 +17,4 @@ print(scipy_dist.ppf([0.025, 0.975])) # [0.025 0.975] samples = scipy_dist.rvs(100) -``` \ No newline at end of file +``` diff --git a/docs/examples/sql.md b/docs/examples/sql.md index c3287d9..0f20ea6 100644 --- a/docs/examples/sql.md +++ b/docs/examples/sql.md @@ -1,13 +1,13 @@ -# Bayesian Models with SQL +# Bayesian Models with SQL Because `conjugate-models` works with [general numerical inputs](generalized-inputs.md), we can use Bayesian models in SQL with the SQL builder, `PyPika`. -For the example, we will estimate use normal model to estimate the -total sales amount by group. +For the example, we will estimate use normal model to estimate the +total sales amount by group. -The example table is called `events` and we will assume a normal model for the +The example table is called `events` and we will assume a normal model for the column `sales` for each value of the column `group`. We can create the sufficient statistics needed for `normal_normal_inverse_gamma` @@ -38,7 +38,7 @@ query = ( ``` Perform the Bayesian inference as usual, but using the variables reflecting -the columns. +the columns. ```python from conjugate.distributions import NormalInverseGamma @@ -69,7 +69,7 @@ query = query.select( ) ``` -Which results in this query: +Which results in this query: ```sql SELECT "group", @@ -79,4 +79,3 @@ SELECT "group", FROM "events" GROUP BY "group" ``` - diff --git a/docs/examples/thompson.md b/docs/examples/thompson.md index 21654eb..114121c 100644 --- a/docs/examples/thompson.md +++ b/docs/examples/thompson.md @@ -1,5 +1,5 @@ --- -comments: true +comments: true --- # Thompson Sampling @@ -10,7 +10,7 @@ which makes use of posterior distributions for the variable of interest. ## Minimize Waiting Time We will assume an exponential distribution wait time for each group with an unknown -average wait time for each group. +average wait time for each group. The conjugate prior of the exponential distribution is a gamma distribution. @@ -31,7 +31,7 @@ n_groups = len(lam) true_dist = Exponential(lam=lam) ``` -We will create some helper functions to abstract: +We will create some helper functions to abstract: - sampling from the true distribution of a group - create the statistics required for Bayesian update of exponential gamma model @@ -39,18 +39,18 @@ We will create some helper functions to abstract: ```python def sample_true_distribution( - group_to_sample: int, - rng, + group_to_sample: int, + rng, true_dist: Exponential = true_dist, ) -> float: return true_dist[group_to_sample].dist.rvs(random_state=rng) def bayesian_update_stats( - group_sampled: int, - group_sample: float, + group_sampled: int, + group_sample: float, n_groups: int = n_groups, -) -> tuple[np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray]: x = np.zeros(n_groups) n = np.zeros(n_groups) @@ -58,9 +58,9 @@ def bayesian_update_stats( n[group_sampled] = 1 return x, n - -def thompson_step(estimate: Gamma, rng) -> Gamma: + +def thompson_step(estimate: Gamma, rng) -> Gamma: sample = estimate.dist.rvs(random_state=rng) group_to_sample = np.argmin(sample) @@ -81,7 +81,7 @@ estimate = Gamma(alpha, beta) rng = np.random.default_rng(42) total_samples = 250 -for _ in range(total_samples): +for _ in range(total_samples): estimate = thompson_step(estimate=estimate, rng=rng) ``` @@ -96,7 +96,7 @@ ax = axes[0] estimate.set_max_value(2).plot_pdf(label=lam, ax=ax) ax.legend(title="True Mean") ax.set( - xlabel="Mean Wait Time", + xlabel="Mean Wait Time", title="Posterior Distribution by Group", ) @@ -104,10 +104,10 @@ ax = axes[1] n_times_sampled = estimate.beta - 1 ax.scatter(lam, n_times_sampled / total_samples) ax.set( - xlabel="True Mean Wait Time", - ylabel="% of times sampled", + xlabel="True Mean Wait Time", + ylabel="% of times sampled", ylim=(0, None), - title="Exploitation of Best Group", + title="Exploitation of Best Group", ) # Format yaxis as percentage ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"{x:.0%}")) diff --git a/docs/examples/unsupported-distributions.md b/docs/examples/unsupported-distributions.md index be6bfd5..0f9fac2 100644 --- a/docs/examples/unsupported-distributions.md +++ b/docs/examples/unsupported-distributions.md @@ -1,12 +1,12 @@ --- -comments: true +comments: true --- # Unsupported Posterior Predictive Distributions Suppose we want to use the Pareto model with a Gamma prior which doesn't have a -supported distribution for the posterior predictive. +supported distribution for the posterior predictive. -We can get posterior predictive samples by: +We can get posterior predictive samples by: 1. Sample from the posterior distribution 2. Sample from the model distribution using posterior samples @@ -42,9 +42,9 @@ posterior: Gamma = pareto_gamma( ## 1. Using `conjugate-models` -Since the distributions are vectorized, just: +Since the distributions are vectorized, just: -1. Get the number of samples from the posterior +1. Get the number of samples from the posterior 2. Take a single sample from the model distribution ```python @@ -56,9 +56,9 @@ posterior_predictive_samples = Pareto(x_m=x_m, alpha=alpha_samples).dist.rvs(ran ## 2. Using PyMC -Another route would be using PyMC then use the `draw` function. +Another route would be using PyMC then use the `draw` function. -```python +```python import pymc as pm posterior_alpha = pm.Gamma.dist(alpha=posterior.alpha, beta=posterior.beta) diff --git a/docs/examples/vectorized-inputs.md b/docs/examples/vectorized-inputs.md index a4c5631..0011a4e 100644 --- a/docs/examples/vectorized-inputs.md +++ b/docs/examples/vectorized-inputs.md @@ -1,13 +1,13 @@ --- -comments: true +comments: true --- # Vectorized Inputs -All data and priors will allow for vectorized assuming the shapes work for broadcasting. +All data and priors will allow for vectorized assuming the shapes work for broadcasting. The plotting also supports arrays of results -```python +```python import numpy as np from conjugate.distributions import Beta @@ -19,7 +19,7 @@ import matplotlib.pyplot as plt N = 10 x = 4 -# Analytics +# Analytics prior = Beta(alpha=1, beta=np.array([1, 10])) posterior = binomial_beta(n=N, x=x, prior=prior) diff --git a/docs/index.md b/docs/index.md index c77eb9f..357e5cc 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,12 +1,12 @@ --- hide: - - navigation + - navigation --- # Conjugate Models [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) -[![Tests](https://github.com/wd60622/conjugate/actions/workflows/tests.yml/badge.svg)](https://github.com/wd60622/conjugate/actions/workflows/tests.yml) -[![PyPI version](https://badge.fury.io/py/conjugate-models.svg)](https://badge.fury.io/py/conjugate-models) +[![Tests](https://github.com/wd60622/conjugate/actions/workflows/tests.yml/badge.svg)](https://github.com/wd60622/conjugate/actions/workflows/tests.yml) +[![PyPI version](https://badge.fury.io/py/conjugate-models.svg)](https://badge.fury.io/py/conjugate-models) [![docs](https://github.com/wd60622/conjugate/actions/workflows/docs.yml/badge.svg)](https://wd60622.github.io/conjugate/) [![codecov](https://codecov.io/github/wd60622/conjugate/branch/main/graph/badge.svg)](https://app.codecov.io/github/wd60622/conjugate) @@ -15,7 +15,7 @@ Bayesian conjugate models in Python ## Installation -```bash +```bash pip install conjugate-models ``` @@ -29,7 +29,7 @@ pip install conjugate-models - Out of box compatibility with `polars`, `pandas`, `numpy`, and more. - [Unsupported Distributions](examples/pymc-sampling.md) for sampling from unsupported distributions -## Supported Models +## Supported Models Many likelihoods are supported including @@ -45,7 +45,7 @@ Many likelihoods are supported including 1. Pass data and prior into model from `models` modules 1. Analytics with posterior and posterior predictive distributions -```python +```python from conjugate.distributions import Beta, BetaBinomial from conjugate.models import binomial_beta, binomial_beta_predictive @@ -58,7 +58,7 @@ prior = Beta(1, 1) prior_predictive: BetaBinomial = binomial_beta_predictive(n=N, distribution=prior) posterior: Beta = binomial_beta(n=N, x=x, prior=prior) -posterior_predictive: BetaBinomial = binomial_beta_predictive(n=N, distribution=posterior) +posterior_predictive: BetaBinomial = binomial_beta_predictive(n=N, distribution=posterior) ``` From here, do any analysis you'd like! @@ -113,7 +113,7 @@ plt.scatter(n, samples, color="black", label="observed samples") plt.plot(n, posterior.dist.mean(), color="red", label="posterior mean") # fill between the 95% credible interval plt.fill_between( - n, + n, posterior.dist.ppf(0.025), posterior.dist.ppf(0.975), color="red", @@ -133,7 +133,7 @@ plt.show() Even with a moving probability, this simple to implement model can be useful. -```python +```python ... def sigmoid(x): diff --git a/docs/mixins.md b/docs/mixins.md index 662557f..9dd2487 100644 --- a/docs/mixins.md +++ b/docs/mixins.md @@ -1,5 +1,5 @@ --- -comments: true +comments: true --- # Mixins diff --git a/docs/models.md b/docs/models.md index ca99c14..67f8521 100644 --- a/docs/models.md +++ b/docs/models.md @@ -1,6 +1,6 @@ --- -comments: true +comments: true --- -# Models +# Models -::: conjugate.models \ No newline at end of file +::: conjugate.models diff --git a/docs/overrides/partials/comments.html b/docs/overrides/partials/comments.html index 91f8f2c..9bd3322 100644 --- a/docs/overrides/partials/comments.html +++ b/docs/overrides/partials/comments.html @@ -30,7 +30,7 @@

{{ lang.t("meta.comments") }}

: "light" // Instruct Giscus to set theme - giscus.setAttribute("data-theme", theme) + giscus.setAttribute("data-theme", theme) } // Register event handlers after documented loaded diff --git a/mkdocs.yml b/mkdocs.yml index b2ee2ef..aa53968 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -31,7 +31,7 @@ repo_url: https://github.com/wd60622/conjugate edit_uri: blob/main/docs/ site_url: https://wd60622.github.io/conjugate -extra: +extra: homepage: https://wd60622.github.io/ social: - icon: fontawesome/brands/github @@ -39,12 +39,12 @@ extra: nav: - Overview: index.md - - Modules: + - Modules: - Models: models.md - Distributions: distributions.md - Mixins: mixins.md - - Examples: - - Case Studies: + - Examples: + - Case Studies: - Binomial Model: examples/binomial.md - Bayesian Update: examples/bayesian-update.md - Thompson Sampling: examples/thompson.md @@ -52,7 +52,7 @@ nav: - Unsupported Distributions: examples/unsupported-distributions.md - Inference in SQL: examples/sql.md - Bootstrap Comparison: examples/bootstrap.md - - Features: + - Features: - Plotting: examples/plotting.md - SciPy Distributions: examples/scipy-connection.md - Sampling Distributions: examples/sampling-distributions.md @@ -78,10 +78,10 @@ markdown_extensions: - pymdownx.inlinehilite - pymdownx.snippets - pymdownx.superfences - - pymdownx.arithmatex: + - pymdownx.arithmatex: generic: true -extra_javascript: +extra_javascript: - javascripts/mathjax.js - https://polyfill.io/v3/polyfill.min.js?features=es6 - - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js + - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js diff --git a/pyproject.toml b/pyproject.toml index 6fa36e6..55ac121 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,8 +68,8 @@ legacy_tox_ini = """ py310 [testenv] - deps = - pytest + deps = + pytest pytest-cov pytest-mpl pypika