Skip to content

Commit 75d108d

Browse files
committed
added pdoc
1 parent 1a0a54c commit 75d108d

File tree

7 files changed

+143
-109
lines changed

7 files changed

+143
-109
lines changed

.github/workflows/python-docs.yml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
name: website
2+
3+
# build the documentation whenever there are new commits on main
4+
on:
5+
push:
6+
branches:
7+
- main
8+
# Alternative: only build for tags.
9+
# tags:
10+
# - '*'
11+
12+
# security: restrict permissions for CI jobs.
13+
permissions:
14+
contents: read
15+
16+
jobs:
17+
# Build the documentation and upload the static HTML files as an artifact.
18+
build:
19+
runs-on: ubuntu-latest
20+
steps:
21+
- uses: actions/checkout@v4
22+
- uses: actions/setup-python@v5
23+
with:
24+
python-version: '3.x'
25+
26+
# ADJUST THIS: install all dependencies (including pdoc)
27+
- run: |
28+
pip install pdoc
29+
pip install -e .
30+
pdoc incrementalstats -o docs
31+
32+
- uses: actions/upload-pages-artifact@v3
33+
with:
34+
path: docs/
35+
36+
# Deploy the artifact to GitHub pages.
37+
# This is a separate job so that only actions/deploy-pages has the necessary permissions.
38+
deploy:
39+
needs: build
40+
runs-on: ubuntu-latest
41+
permissions:
42+
pages: write
43+
id-token: write
44+
environment:
45+
name: github-pages
46+
url: ${{ steps.deployment.outputs.page_url }}
47+
steps:
48+
- id: deployment
49+
uses: actions/deploy-pages@v4

README.md

Lines changed: 5 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,14 @@
1-
# Incremental stats
1+
# Install
22

33
`pip install incrementalstats`
44

5-
A few incremental 1st order statistics in numpy. Currently:
5+
# Stats
66

7+
This package implements a few incremental 1st order statistics in numpy.
8+
9+
Currently:
710
- Correlation (Pearson)
811
- Covariance
912
- Variance
1013
- Mean
1114
- Welch-t
12-
13-
## Setup a venv
14-
15-
mkdir venv
16-
virtualenv -p `which python3` venv
17-
source venv/bin/activate
18-
19-
## Option 1: Checkout hackable project
20-
21-
python -m pip install --upgrade pip
22-
git clone https://github.com/ceesb/python-incrementalstats
23-
pip install -e python-incrementalstats
24-
cd python-incrementalstats
25-
26-
Now all the changes you make to this project source code are "live".
27-
28-
## Option 2: Build a wheel
29-
30-
git clone https://github.com/ceesb/python-incrementalstats
31-
cd python-incrementalstats
32-
python -m build
33-
34-
Now you can distribute or install the wheel created in the `dist` folder.
35-
36-
$ ls dist/
37-
incrementalstats-0.0.1-py3-none-any.whl incrementalstats-0.0.1.tar.gz
38-
pip install dist/incrementalstats-0.0.1-py3-none-any.whl
39-
40-
## Run the tests
41-
42-
python -m unittest

src/incrementalstats/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
"""
2+
.. include:: ../../README.md
3+
"""
4+
15
from .mean_var import IncrementalMeanVariance
26
from .covariance_correlation import IncrementalCovarianceCorrelation
37
from .welcht import IncrementalWelcht

src/incrementalstats/covariance_correlation.py

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class IncrementalCovarianceCorrelation:
2626
for row in range(nX):
2727
ic.update(m1[row,:], m2[row,:])
2828
29-
reference_covariance = (1 / (nX - 1)) * np.matmul((m1 - np.mean(m1, axis=0)).T, (m2 - np.mean(m2, axis=0)))
29+
reference_covariance = (1 / (nX - 1)) * np.matmul((m1 - np.getMean()(m1, axis=0)).T, (m2 - np.getMean()(m2, axis=0)))
3030
reference_m1_stddev = np.std(m1, axis=0, ddof=1)
3131
reference_m2_stddev = np.std(m2, axis=0, ddof=1)
3232
reference_1_over_m1_stddev = (1 / numpy_m1_stddev).reshape(numpy_m1_stddev.size, 1)
@@ -41,57 +41,61 @@ class IncrementalCovarianceCorrelation:
4141

4242
def __init__(self, nX, nY):
4343
"""Initialize with the #columns for matrices A and B"""
44-
self.nX = nX
45-
self.nY = nY
46-
self.imX = IncrementalMeanVariance(nX)
47-
self.imY = IncrementalMeanVariance(nY)
48-
self.cov = np.zeros((nX, nY), dtype=np.float64)
49-
self.n = 0
44+
self._nX = nX
45+
self._nXY = nY
46+
self._imX = IncrementalMeanVariance(nX)
47+
self._imY = IncrementalMeanVariance(nY)
48+
self._cov = np.zeros((nX, nY), dtype=np.float64)
49+
self._n = 0
5050

5151
def update(self, x, y):
5252
"""Updates the covariance matrix with a single row of matrix A and a single row of matrix B"""
53-
if len(x) != self.nX:
53+
if len(x) != self._nX:
5454
raise Exception("wrong x length")
55-
if len(y) != self.nY:
55+
if len(y) != self._nXY:
5656
raise Exception("wrong y length")
5757

58-
self.n += 1
59-
f = (self.n - 1) / self.n
58+
self._n += 1
59+
f = (self._n - 1) / self._n
6060

61-
mfX = (x - self.imX.mean) * f
62-
mfY = y - self.imY.mean
61+
mfX = (x - self._imX.getMean()) * f
62+
mfY = y - self._imY.getMean()
6363

64-
self.cov += np.tensordot(mfX, mfY, axes=0)
64+
self._cov += np.tensordot(mfX, mfY, axes=0)
6565

66-
self.imX.update(x)
67-
self.imY.update(y)
66+
self._imX.update(x)
67+
self._imY.update(y)
6868

6969
def add(self, x: IncrementalCovarianceCorrelation):
7070
"""Merges another object of IncrementalCovarianceCorrelation into this co-variance matrix. This is useful in
7171
parallelized computations, where different nodes compute co-variances over different
7272
ranges of rows"""
73-
n = self.n + x.n
74-
f = (self.n * x.n ** 2 + x.n * self.n ** 2) / (n ** 2)
73+
n = self._n + x._n
74+
f = (self._n * x._n ** 2 + x._n * self._n ** 2) / (n ** 2)
7575

76-
deltaX = self.imX.mean - x.imX.mean
76+
deltaX = self._imX.getMean() - x._imX.getMean()
7777
deltaX = deltaX.reshape(deltaX.size, 1) * f
7878

79-
deltaY = self.imY.mean - x.imY.mean
79+
deltaY = self._imY.getMean() - x._imY.getMean()
8080
deltaY = deltaY.reshape(1, deltaY.size)
8181

82-
self.cov += x.cov + deltaX * deltaY
83-
self.n = n
84-
self.imX.add(x.imX)
85-
self.imY.add(x.imY)
82+
self._cov += x._cov + deltaX * deltaY
83+
self._n = n
84+
self._imX.add(x._imX)
85+
self._imY.add(x._imY)
8686

8787
def getCovariance(self):
8888
"""Returns the scaled co-variance matrix with 1 degree of freedom"""
89-
return 1 / (self.n - 1) * self.cov
89+
return 1 / (self._n - 1) * self._cov
9090

9191
def getCorrelation(self):
9292
"""Returns Pearson's correlation matrix"""
93-
sX = 1 / np.sqrt(self.imX.getVariance())
93+
sX = 1 / np.sqrt(self._imX.getVariance())
9494
sX = sX.reshape(sX.size, 1)
95-
sY = 1 / np.sqrt(self.imY.getVariance())
95+
sY = 1 / np.sqrt(self._imY.getVariance())
9696
sY = sY.reshape(1, sY.size)
97-
return 1 / (self.n - 1) * self.cov * sX * sY
97+
return 1 / (self._n - 1) * self._cov * sX * sY
98+
99+
def getN(self):
100+
"""Number of observations"""
101+
return self._n

src/incrementalstats/mean_var.py

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ class IncrementalMeanVariance:
2323
for row in range(nX):
2424
im.update(m[row,:])
2525
26-
reference_mean = m.mean(axis=0)
27-
reference_variance = m.var(axis=0, ddof=1)
26+
reference_mean = m._mean(axis=0)
27+
reference_variance = m._var(axis=0, ddof=1)
2828
assert np.allclose(im.getMean(), reference_mean)
2929
assert np.allclose(im.getVariance(), reference_variance)
3030
```
@@ -35,40 +35,44 @@ def __init__(self, ncolumns):
3535
"""Initialize with the #columns of the hypothetical matrix M over
3636
which we will compute the mean / variance"""
3737
nX = ncolumns
38-
self.nX = nX
39-
self.mean = np.zeros(nX, dtype=np.float64)
40-
self.var = np.zeros(nX, dtype=np.float64)
41-
self.n = 0
38+
self._nX = nX
39+
self._mean = np.zeros(nX, dtype=np.float64)
40+
self._var = np.zeros(nX, dtype=np.float64)
41+
self._n = 0
4242

4343
def add(self, other: IncrementalMeanVariance):
4444
"""Merges another object of IncrementalMeanVariance into this mean/variance. This is useful in
4545
parallelized computations, where different nodes compute mean/variance over different
4646
ranges of rows"""
4747
x = other
48-
n = self.n + x.n
49-
delta = x.mean - self.mean
50-
self.mean += x.n * (delta / n)
51-
self.var += x.var + self.n * x.n * delta ** 2 / n
52-
self.n = n
48+
n = self._n + x._n
49+
delta = x._mean - self._mean
50+
self._mean += x._n * (delta / n)
51+
self._var += x._var + self._n * x._n * delta ** 2 / n
52+
self._n = n
5353

5454
def update(self, row):
5555
x = row
5656
"""Updates the mean/variance with a single row. """
57-
if len(x) != self.nX:
57+
if len(x) != self._nX:
5858
raise Exception("wrong length")
5959

60-
self.n += 1
61-
y1 = x - self.mean
62-
self.mean += y1 / self.n
63-
y2 = x - self.mean
64-
self.var += y1 * y2
60+
self._n += 1
61+
y1 = x - self._mean
62+
self._mean += y1 / self._n
63+
y2 = x - self._mean
64+
self._var += y1 * y2
6565

6666
def getMean(self):
6767
"""Returns the current mean"""
68-
return self.mean.copy()
68+
return self._mean.copy()
6969

7070
def getVariance(self):
7171
"""Returns the current variance"""
72-
if self.n < 2:
72+
if self._n < 2:
7373
raise Exception("not enough data")
74-
return 1/(self.n - 1) * self.var
74+
return 1/(self._n - 1) * self._var
75+
76+
def getN(self):
77+
"""Number of observations"""
78+
return self._n

src/incrementalstats/welcht.py

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,51 @@
1-
# function tval(m1, m2, v1, v2, n1, n2)
2-
# x = (m1 - m2) / sqrt((v1 / n1) + (v2 / n2) + eps(0.0))
3-
# if isnan(x)
4-
# return 0.0
5-
# else
6-
# return x
7-
# end
8-
# end
9-
10-
111
from __future__ import annotations
122
import numpy as np
133

144
from .mean_var import IncrementalMeanVariance
155

166
class IncrementalWelcht:
177
"""
18-
Incremental Welch-t between 2 groups
8+
Incremental Welch-t between 2 groups (0 and not 0)
199
"""
2010

2111
def __init__(self, nsamples):
2212
"""Initialize with the #samples we're computing Welch-t over"""
2313
nX = nsamples
24-
self.mv0 = IncrementalMeanVariance(nX)
25-
self.mv1 = IncrementalMeanVariance(nX)
26-
self.n = 0
14+
self._mv0 = IncrementalMeanVariance(nX)
15+
self._mv1 = IncrementalMeanVariance(nX)
16+
self._n = 0
2717

2818
def add(self, other: IncrementalWelcht):
29-
self.mv0.add(other.mv0)
30-
self.mv1.add(other.mv1)
19+
"""Merges another object of IncrementalWelcht into this object. This is useful in
20+
parallelized computations, where different nodes compute Welcht-t over different
21+
ranges of rows"""
22+
self._mv0.add(other.mv0)
23+
self._mv1.add(other.mv1)
3124

3225
def update(self, group, row):
26+
"""Updates the Welch-t state with a group id (0 or not 0) and a single row of samples"""
3327
if group == 0:
34-
self.mv0.update(row)
28+
self._mv0.update(row)
3529
else:
36-
self.mv1.update(row)
30+
self._mv1.update(row)
3731

3832
def getWelcht(self):
39-
m0 = self.mv0.getMean()
40-
v0 = self.mv0.getVariance()
41-
n0 = self.mv0.n
42-
m1 = self.mv1.getMean()
43-
v1 = self.mv1.getVariance()
44-
n1 = self.mv1.n
33+
"""Returns the Welch-t statistic. NaNs are zero'd and a small factor is added in the
34+
denominator to prevent infinities"""
35+
m0 = self._mv0.getMean()
36+
v0 = self._mv0.getVariance()
37+
n0 = self._mv0.getN()
38+
m1 = self._mv1.getMean()
39+
v1 = self._mv1.getVariance()
40+
n1 = self._mv1.getN()
4541

4642
x = (m0 - m1) / np.sqrt((v0 / n0) + (v1 / n1) + 1e-12)
4743

4844
x = np.nan_to_num(x, copy = False)
4945

5046
return x
47+
48+
def getN(self):
49+
"""Number of observations"""
50+
return self._mv0.getN() + self._mv1.getN()
51+

tests/test_incremental_stats.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ def testCorrectness(self):
6464
self.assertTrue(np.allclose(ic1.getCovariance(), numpy_covariance))
6565
self.assertTrue(np.allclose(ic1.getCorrelation(), numpy_correlation))
6666

67-
self.assertTrue(np.allclose(ic1.imX.getMean(), ic.imX.getMean()))
68-
self.assertTrue(np.allclose(ic1.imY.getMean(), ic.imY.getMean()))
69-
self.assertTrue(np.allclose(ic1.imX.getVariance(), ic.imX.getVariance()))
70-
self.assertTrue(np.allclose(ic1.imY.getVariance(), ic.imY.getVariance()))
67+
self.assertTrue(np.allclose(ic1._imX.getMean(), ic._imX.getMean()))
68+
self.assertTrue(np.allclose(ic1._imY.getMean(), ic._imY.getMean()))
69+
self.assertTrue(np.allclose(ic1._imX.getVariance(), ic._imX.getVariance()))
70+
self.assertTrue(np.allclose(ic1._imY.getVariance(), ic._imY.getVariance()))
7171

7272
if __name__ == "__main__":
7373
unittest.main(verbosity=2)

0 commit comments

Comments
 (0)