eulerlab
diff --git a/‎.github/workflows/publish-to-pypi.yml‎
Lines changed: 118 additions & 0 deletions b/‎.github/workflows/publish-to-pypi.yml‎
Lines changed: 118 additions & 0 deletions
diff --git a/‎.github/workflows/unit-tests.yml‎
Lines changed: 36 additions & 0 deletions b/‎.github/workflows/unit-tests.yml‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 35 additions & 0 deletions b/‎.gitignore‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 85 additions & 0 deletions b/‎README.md‎
Lines changed: 85 additions & 0 deletions
diff --git a/‎gcl_classifier/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎gcl_classifier/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎gcl_classifier/classifier.py‎
Lines changed: 109 additions & 0 deletions b/‎gcl_classifier/classifier.py‎
Lines changed: 109 additions & 0 deletions
@@ -0,0 +1,118 @@
+name: Publish gcl-classifier
+# Adapted from https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
+
+on: push
+
+jobs:
+  build:
+    name: Build distribution
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        persist-credentials: false
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: "3.x"
+    - name: Install pypa/build
+      run: >-
+        python3 -m
+        pip install
+        build
+        --user
+    - name: Build a binary wheel and a source tarball
+      run: python3 -m build
+    - name: Store the distribution packages
+      uses: actions/upload-artifact@v4
+      with:
+        name: python-package-distributions
+        path: dist/
+
+  publish-to-testpypi:
+    name: TestPyPI
+    needs:
+    - build
+    runs-on: ubuntu-latest
+
+    environment:
+      name: testpypi
+      url: https://test.pypi.org/p/gcl-classifier
+
+    permissions:
+      id-token: write  # IMPORTANT: mandatory for trusted publishing
+
+    steps:
+    - name: Download all the dists
+      uses: actions/download-artifact@v4
+      with:
+        name: python-package-distributions
+        path: dist/
+    - name: Publish distribution to TestPyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
+      with:
+        repository-url: https://test.pypi.org/legacy/
+        skip-existing: true  # skip if version already exists
+
+  publish-to-pypi:
+    name: PyPI
+    if: startsWith(github.ref, 'refs/tags/release')  # only publish to PyPI on tag pushes that start with release
+    needs:
+    - build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/gcl-classifier
+    permissions:
+      id-token: write  # IMPORTANT: mandatory for trusted publishing
+
+    steps:
+    - name: Download all the dists
+      uses: actions/download-artifact@v4
+      with:
+        name: python-package-distributions
+        path: dist/
+    - name: Publish distribution to PyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
+
+  github-release:
+    name: Github Release
+    needs:
+    - publish-to-pypi
+    runs-on: ubuntu-latest
+
+    permissions:
+      contents: write
+      id-token: write
+
+    steps:
+    - name: Download all the dists
+      uses: actions/download-artifact@v4
+      with:
+        name: python-package-distributions
+        path: dist/
+    - name: Sign the dists with Sigstore
+      uses: sigstore/gh-action-sigstore-python@v3.0.0
+      with:
+        inputs: >-
+          ./dist/*.tar.gz
+          ./dist/*.whl
+    - name: Create GitHub Release
+      env:
+        GITHUB_TOKEN: ${{ github.token }}
+      run: >-
+        gh release create
+        "$GITHUB_REF_NAME"
+        --repo "$GITHUB_REPOSITORY"
+        --notes ""
+    - name: Upload artifact signatures to GitHub Release
+      env:
+        GITHUB_TOKEN: ${{ github.token }}
+      # Upload to GitHub Release using the `gh` CLI.
+      # `dist/` contains the built packages, and the
+      # sigstore-produced signatures and certificates.
+      run: >-
+        gh release upload
+        "$GITHUB_REF_NAME" dist/**
+        --repo "$GITHUB_REPOSITORY"
@@ -0,0 +1,36 @@
+name: Unit Tests
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  unit-tests:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [ '3.13' ]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      
+      - name: Install uv
+        run: |
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          echo "${HOME}/.local/bin" >> $GITHUB_PATH
+
+      - name: Install package with dependencies
+        run: |
+          uv sync
+          uv pip install pytest
+
+      - name: Run Unit Tests
+        run: echo "Skipped"  # uv run pytest tests/ -v
@@ -0,0 +1,35 @@
+.venv
+.idea
+data
+outputs
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# Notebooks
+.ipynb_checkpoints/
@@ -0,0 +1,85 @@
+# GCL Classifier
+
+A ganglion cell layer (GCL) classifier trained on functional two-photon calcium imaging recordings of mouse retinas in response to chirp and moving bar stimuli.
+The labels and classification are based on Baden, Franke, Berens et al. (2016) "The functional diversity of retinal ganglion cells in the mouse." Nature 529.7586 (2016): 345-350. 
+The classifier was already used in two publictions: 
+1. Qiu, et al. "Efficient coding of natural scenes improves neural system identification." PLoS computational biology 19.4 (2023): e1011037.
+2. Gonschorek, et al. "Nitric oxide modulates contrast suppression in a subset of mouse retinal ganglion cells." Elife 13 (2025): RP98742.
+
+
+# Installation
+
+## Quick Start
+```bash
+pip install gcl_classifier
+```
+
+If you additonally want to run the attached notebooks, install the extra dependencies using:
+```bash
+pip install "gcl_classifier[notebook]"
+```
+
+## First Time Setup
+
+On first use, the model will be downloaded from Hugging Face Hub.
+This happens automatically and only needs to be done once.
+```python
+from gcl_classifier import get_model
+
+# Downloads model to ~/.cache/gcl_classifier/
+model = get_model()
+```
+
+Let's use this model for celltype classification:
+```python
+import numpy as np
+from gcl_classifier.data import get_data
+from gcl_classifier.classifier import extract_features
+
+# "Fake" preprocessed data for two cells
+bar_ds_pvalues = np.array([0.04, 0.20])
+roi_sizes_um2 = np.array([43.0, 56.5])
+chirp_traces = np.random.random((2, 249))
+bar_traces = np.random.random((2, 32))
+
+# Load feat matrix to transform data into feature space
+data = get_data()
+chirp_features = data["chirp_feats"]
+bar_features = data["bar_feats"]
+
+# Extract the features for classifier
+X, feature_names = extract_features(
+    preproc_chirps=chirp_traces,
+    preproc_bars=bar_traces,
+    bar_ds_pvalues=bar_ds_pvalues,
+    roi_size_um2s=roi_sizes_um2,
+    chirp_features=chirp_features,
+    bar_features=bar_features,
+)
+
+# Get predictions and probabilities
+predictions = model.predict(X)
+predictions_probs = model.predict_proba(X)
+```
+
+## Cache Location
+
+Models (and training data) are cached at:
+- Linux/Mac: `~/.cache/gcl_classifier/`
+- Windows: `C:\Users\<username>\.cache\gcl_classifier\`
+
+To clear cache and re-download the model:
+```python
+from gcl_classifier.model import load_model
+model, model_dict = load_model(force_download=True)
+```
+
+You can do the same for the training data, but you only need to do this if you want to re-train the model.
+```python
+from gcl_classifier.data import load_data
+data = load_data(force_download=True)
+```
+
+## Offline Usage
+
+After initial download, the package works offline using the cached model.
@@ -0,0 +1,2 @@
+from .model import get_model, predict, predict_proba
+from .data import get_data
@@ -0,0 +1,109 @@
+import pickle
+
+import numpy as np
+
+from gcl_classifier.labels import baden_cluster_id_to_group_id, baden_group_id_to_supergroup, BADEN_CLUSTER_INFO
+
+
+def classify_cells(preproc_chirps, preproc_bars, bar_ds_pvalues, roi_size_um2s,
+                   chirp_features, bar_features, classifier):
+    features, feature_names = extract_features(
+        preproc_chirps, preproc_bars, bar_ds_pvalues, roi_size_um2s, chirp_features, bar_features)
+    probs = classifier.predict_proba(features)
+    return probs
+
+
+def baden16_cluster_probs_to_info(probs):
+    if len(probs) != 75:
+        raise ValueError(f"Expected 75 probabilities corresponding to 75 Baden clusters, got {len(probs)}.")
+
+    cluster_id = np.argmax(probs) + 1  # Cluster IDs are 1-indexed
+    group_id = baden_cluster_id_to_group_id(cluster_id)
+    supergroup = baden_group_id_to_supergroup(group_id)
+    prob_cluster = probs[cluster_id - 1]
+
+    group_ids = BADEN_CLUSTER_INFO[:, 2].astype(int)
+    supergroups = BADEN_CLUSTER_INFO[:, 3].astype(str)
+
+    prob_group = np.sum(probs[group_ids == group_id])
+    prob_supergroup = np.sum(probs[supergroups == supergroup])
+    prob_rgc = np.sum(probs[supergroups != 'dAC'])
+    prob_class = (1. - prob_rgc) if supergroup == 'dAC' else prob_rgc
+
+    return cluster_id, group_id, supergroup, prob_cluster, prob_group, prob_supergroup, prob_class
+
+
+def extract_features(
+    preproc_chirps,
+    preproc_bars,
+    bar_ds_pvalues,
+    roi_size_um2s,
+    chirp_features,
+    bar_features,
+    ) -> tuple[np.ndarray, list[str]]:
+    """
+    Transforms the preprocessed chirps and bars using the provided chirp/bar features.
+    Concatenates the results with the bar_ds_pvalues and roi_sizes, and returns them together with feature names.
+    The result can be used as input to the classifier.
+    """
+    features = np.concatenate([
+        np.dot(preproc_chirps, chirp_features),
+        np.dot(preproc_bars, bar_features),
+        bar_ds_pvalues[:, np.newaxis],
+        roi_size_um2s[:, np.newaxis]
+    ], axis=-1)
+
+    feature_names = [f'chirp_{i}' for i in range(chirp_features.shape[1])] + \
+                    [f'bar_{i}' for i in range(bar_features.shape[1])] + ['bar_ds_pvalue', 'roi_size_um2']
+
+    return features, feature_names
+
+
+def check_classifier_dict(clf_dict: dict) -> dict:
+    assert type(clf_dict) == dict, "Classifier file must contain a dictionary with classifier data."
+
+    # Check keys
+    assert 'classifier' in clf_dict, "Classifier dictionary must contain a 'classifier' key."
+    assert 'chirp_feats' in clf_dict, "Classifier dictionary must contain a 'chirp_feats' key."
+    assert 'bar_feats' in clf_dict, "Classifier dictionary must contain a 'bar_feats' key."
+    assert 'feature_names' in clf_dict, "Classifier dictionary must contain a 'feature_names' key."
+    assert 'train_x' in clf_dict, "Classifier dictionary must contain a 'train_x' key."
+    assert 'train_y' in clf_dict, "Classifier dictionary must contain a 'train_y' key."
+    assert 'y_names' in clf_dict, "Classifier dictionary must contain a 'y_names' key."
+
+    # Chek value
+    assert isinstance(clf_dict['train_x'], np.ndarray), "The 'train_x' key must contain a numpy array."
+    assert isinstance(clf_dict['train_y'], np.ndarray), "The 'train_y' key must contain a numpy array."
+    assert clf_dict['train_x'].shape[0] == clf_dict[
+        'train_y'].size, "The number of samples in 'train_x' and 'train_y' must match."
+
+    for val in np.unique(clf_dict['train_y']):
+        assert val in clf_dict['y_names'].keys(), f"Value {val} in 'train_y' not found in 'y_names'."
+
+    # Check if classifier is a valid scikit-learn classifier
+    from sklearn.base import is_classifier
+    assert is_classifier(clf_dict['classifier']), "The 'classifier' key must contain a valid scikit-learn classifier."
+
+    return clf_dict
+
+
+def save_classifier_and_data(classifier, chirp_feats, bar_feats, feature_names, train_x, train_y, y_names,
+                             classifier_file, **kwargs) -> None:
+    """
+    Saves the classifier and its metadata to a file.
+    """
+    clf_dict = {
+        'classifier': classifier,
+        'chirp_feats': chirp_feats,
+        'bar_feats': bar_feats,
+        'feature_names': feature_names,
+        'train_x': train_x,
+        'train_y': train_y,
+        'y_names': y_names,
+        **kwargs
+    }
+
+    check_classifier_dict(clf_dict)
+
+    with open(classifier_file, 'wb') as f:
+        pickle.dump(clf_dict, f)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .model import get_model, predict, predict_proba`
	`2`	`+from .data import get_data`