pre-commit, remove poetry, bug fix

neurallatents · Jan 21, 2024 · d75fc9a · d75fc9a
1 parent bec1851
commit d75fc9a
Show file tree

Hide file tree

Showing 7 changed files with 737 additions and 348 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,12 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.2.0
+    hooks:
+    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+-   repo: https://github.com/psf/black
+    rev: 22.8.0
+    hooks:
+    -   id: black
+        exclude: ^docs/
diff --git a/nlb_tools/evaluation.py b/nlb_tools/evaluation.py
diff --git a/nlb_tools/nwb_interface.py b/nlb_tools/nwb_interface.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,39 +1,3 @@
-[tool.poetry]
-name = "nlb_tools"
-version = "0.0.2"
-license = "MIT"
-description = "Python tools for participating in Neural Latents Benchmark '21"
-authors = [
-    "Felix Pei <felp8484@gmail.com>"
-]
-packages = [
-    {include = "nlb_tools"}
-]
-
-readme = "README.md"
-homepage = "https://github.com/neurallatents/nlb_tools"
-classifiers = [
-    "Intended Audience :: Science/Research",
-    "Operating System :: Microsoft :: Windows",
-    "Operating System :: MacOS",
-    "Operating System :: Unix",
-    "License :: OSI Approved :: MIT License",
-    "Programming Language :: Python :: 3",
-]
-
-[tool.poetry.dependencies]
-python = "^3.7"
-pandas = ">=1.0.0,<= 1.3.4"
-scipy = "*"
-numpy = "*"
-scikit-learn = "*"
-h5py = ">=2.9,<4"
-pynwb = "*"
-
-[tool.poetry.dev-dependencies]
-pytest = "*"
-dandi = "*"
-
 [build-system]
-requires = ["poetry-core>=1.0.0"]
-build-backend = "poetry.core.masonry.api"
+requires = ["setuptools>=61.0.0", "wheel"]
+build-backend = "setuptools.build_meta"
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+pandas>=1.0.0,<=1.3.4
+scipy
+numpy
+scikit-learn
+h5py<4,>=2.9
+pynwb
diff --git a/setup.py b/setup.py
@@ -1,16 +1,35 @@
 from setuptools import setup, find_packages
 
+with open("README.md") as f:
+    long_description = f.read()
+
+with open("requirements.txt") as f:
+    requirements = f.readlines()
+
 setup(
-    name='nlb_tools',
-    version='0.0.2',
+    name="nlb_tools",
+    version="0.0.3",
+    description="Python tools for participating in Neural Latents Benchmark '21",
     packages=find_packages(),
-    install_requires=[
-        'pandas>=1.0.0,<=1.3.4',
-        'scipy',
-        'numpy',
-        'scikit-learn',
-        'h5py<4,>=2.9',
-        'pynwb',
+    install_requires=requirements,
+    author="Felix Pei",
+    classifiers=[
+        "Intended Audience :: Science/Research",
+        "Operating System :: Microsoft :: Windows",
+        "Operating System :: MacOS",
+        "Operating System :: Unix",
+        "License :: OSI Approved :: MIT License",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
     ],
-    author="Neural Latents",
+    extras_require={
+        "dev": ["pytest", "dandi"],
+    },
+    license="MIT",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    python_requires=">=3.7",
+    setup_requires=["setuptools>=61.0.0", "wheel"],
+    url="https://github.com/neurallatents/nlb_tools",
 )
diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
@@ -1 +1,179 @@
-# TODO: make tests of main evaluation functions
+import pytest
+import numpy as np
+from scipy.special import gammaln
+
+from nlb_tools.evaluation import (
+    evaluate,
+    neg_log_likelihood,
+    bits_per_spike,
+    fit_and_eval_decoder,
+    eval_psth,
+    speed_tp_correlation,
+    velocity_decoding,
+)
+
+
+# -- NLL and bits/spike ----------
+
+
+def test_neg_log_likelihood():
+    """Test that NLL computation is correct"""
+    # randomized test
+    for _ in range(20):
+        spikes = np.random.randint(low=0, high=5, size=(10, 100, 10)).astype(float)
+        rates = np.random.exponential(scale=1.0, size=(10, 100, 10))
+
+        expected_nll = np.sum(rates - spikes * np.log(rates) + gammaln(spikes + 1.0))
+        actual_nll = neg_log_likelihood(rates, spikes)
+        assert np.isclose(expected_nll, actual_nll)
+
+
+def test_neg_log_likelihood_mismatched_shapes():
+    """Test that NLL computation fails when shapes don't match"""
+    # randomized test
+    spikes = np.random.randint(low=0, high=5, size=(10, 100, 8)).astype(float)
+    rates = np.random.exponential(scale=1.0, size=(10, 100, 10))
+
+    with pytest.raises(AssertionError):
+        neg_log_likelihood(rates, spikes)
+
+
+def test_neg_log_likelihood_negative_rates():
+    """Test that NLL computation fials when rates are negative"""
+    # randomized test
+    spikes = np.random.randint(low=0, high=5, size=(10, 100, 8)).astype(float)
+    rates = np.random.exponential(scale=1.0, size=(10, 100, 10))
+    rates -= np.min(rates) + 5  # guarantee negative rates
+
+    with pytest.raises(AssertionError):
+        neg_log_likelihood(rates, spikes)
+
+
+def test_neg_log_likelihood_drop_nans():
+    """Test that NLL computation is correct when there are nans in either rates or spikes"""
+    # randomized test
+    for _ in range(20):
+        spikes = np.random.randint(low=0, high=5, size=(10, 100, 10)).astype(float)
+        rates = np.random.exponential(scale=1.0, size=(10, 100, 10))
+        mask = np.random.rand(10, 100, 10) > 0.9
+        spikes[mask] = np.nan
+        if np.random.rand() > 0.5:  # rates does not have to have nans
+            rates[mask] = np.nan
+
+        expected_nll = np.sum(
+            rates[~mask]
+            - spikes[~mask] * np.log(rates[~mask])
+            + gammaln(spikes[~mask] + 1.0)
+        )
+        actual_nll = neg_log_likelihood(rates, spikes)
+        assert np.isclose(expected_nll, actual_nll)
+
+
+def test_neg_log_likelihood_mismatched_nans():
+    """Test that NLL computation is correct"""
+    # randomized test
+    spikes = np.random.randint(low=0, high=5, size=(10, 100, 10)).astype(float)
+    rates = np.random.exponential(scale=1.0, size=(10, 100, 10))
+    mask = np.random.rand(10, 100, 10)
+    # make sure spikes and rates have different nans
+    spikes[mask < 0.1] = np.nan
+    rates[mask > 0.9] = np.nan
+
+    with pytest.raises(AssertionError):
+        neg_log_likelihood(rates, spikes)
+
+
+def test_bits_per_spike():
+    for _ in range(20):
+        spikes = np.random.randint(low=0, high=5, size=(10, 100, 10)).astype(float)
+        rates = np.random.exponential(scale=1.0, size=(10, 100, 10))
+        null_rates = np.tile(
+            spikes.mean(axis=(0, 1), keepdims=True),
+            (spikes.shape[0], spikes.shape[1], 1),
+        ).squeeze()
+
+        expected_rate_nll = np.sum(
+            rates - spikes * np.log(rates) + gammaln(spikes + 1.0)
+        )
+        expected_null_nll = np.sum(
+            null_rates - spikes * np.log(null_rates) + gammaln(spikes + 1.0)
+        )
+        expected_bps = (
+            (expected_null_nll - expected_rate_nll) / np.sum(spikes) / np.log(2)
+        )
+        actual_bps = bits_per_spike(rates, spikes)
+        assert np.isclose(expected_bps, actual_bps)
+
+
+def test_bits_per_spike_drop_nans():
+    for _ in range(20):
+        spikes = np.random.randint(low=0, high=5, size=(10, 100, 10)).astype(float)
+        rates = np.random.exponential(scale=1.0, size=(10, 100, 10))
+        mask = np.random.rand(10, 100, 10) > 0.9
+        spikes[mask] = np.nan
+        if np.random.rand() > 0.5:  # rates does not have to have nans
+            rates[mask] = np.nan
+        null_rates = np.tile(
+            np.nanmean(spikes, axis=(0, 1), keepdims=True),
+            (spikes.shape[0], spikes.shape[1], 1),
+        ).squeeze()
+
+        expected_rate_nll = np.sum(
+            rates[~mask]
+            - spikes[~mask] * np.log(rates[~mask])
+            + gammaln(spikes[~mask] + 1.0)
+        )
+        expected_null_nll = np.sum(
+            null_rates[~mask]
+            - spikes[~mask] * np.log(null_rates[~mask])
+            + gammaln(spikes[~mask] + 1.0)
+        )
+        expected_bps = (
+            (expected_null_nll - expected_rate_nll) / np.nansum(spikes) / np.log(2)
+        )
+        actual_bps = bits_per_spike(rates, spikes)
+        assert np.isclose(expected_bps, actual_bps)
+
+
+# -- Ridge regression ---------------
+
+
+def test_fit_and_eval_decoder():
+    rng = np.random.default_rng(0)
+    x = rng.standard_normal(size=(1000, 10))
+    y = x @ rng.standard_normal(size=(10, 2))
+
+    # noiseless should have high R^2
+    score = fit_and_eval_decoder(
+        train_rates=x[:800],
+        train_behavior=y[:800],
+        eval_rates=x[800:],
+        eval_behavior=y[800:],
+    )
+    assert score > 0.95
+
+    # with noise should still have decent R^2
+    y += rng.standard_normal(size=(1000, 2)) * 0.1
+    score = fit_and_eval_decoder(
+        train_rates=x[:800],
+        train_behavior=y[:800],
+        eval_rates=x[800:],
+        eval_behavior=y[800:],
+    )
+    assert score > 0.25  # arbitrary heuristic
+
+    # regressing on noise should have poor R^2
+    y = rng.standard_normal(size=(1000, 2))
+    score = fit_and_eval_decoder(
+        train_rates=x[:800],
+        train_behavior=y[:800],
+        eval_rates=x[800:],
+        eval_behavior=y[800:],
+    )
+    assert score < 0.95  # arbitrary heuristic
+
+
+# -- PSTH evaluation
+
+# def test_eval_psth():
+#     return