Merge branch 'main' into add_mcc

Signed-off-by: Amrit Krishnan <amrit110@gmail.com>
VectorInstitute · Feb 20, 2024 · 924514b · 924514b
2 parents 1d664fa + 087e6f8
commit 924514b
Show file tree

Hide file tree

Showing 74 changed files with 2,348 additions and 488 deletions.
diff --git a/.github/workflows/code_checks.yml b/.github/workflows/code_checks.yml
@@ -26,7 +26,7 @@ jobs:
   run-code-check:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v4.1.1
       - name: Install poetry
         run: python3 -m pip install --upgrade pip && python3 -m pip install poetry
       - uses: actions/setup-python@v5.0.0

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -22,7 +22,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4.1.1
 
       - name: Log in to Docker Hub
         uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a

diff --git a/.github/workflows/docs_build.yml b/.github/workflows/docs_build.yml
@@ -25,7 +25,7 @@ jobs:
           submodules: 'true'
       - name: Install dependencies, build docs and coverage report
         run: python3 -m pip install --upgrade pip && python3 -m pip install poetry
-      - uses: actions/setup-python@v4.7.1
+      - uses: actions/setup-python@v5.0.0
         with:
           python-version: '3.10'
           cache: 'poetry'
@@ -37,21 +37,20 @@ jobs:
           poetry env use '3.10'
           source $(poetry env info --path)/bin/activate
           env MPICC=/opt/openmpi-4.1.5/bin/mpicc poetry install --with docs,dev,test --all-extras
-          # pandoc README.md -f markdown -t rst -s -o docs/source/intro.rst
           cd docs && rm -rf source/reference/api/_autosummary && make html
           cd .. && coverage run -m pytest -m "not integration_test" && coverage xml && coverage report -m
       - name: Upload coverage to Codecov
-        uses: Wandalen/wretry.action@v1.0.36
+        uses: Wandalen/wretry.action@v1.4.4
         with:
-          action: codecov/codecov-action@v3.1.3
+          action: codecov/codecov-action@v4.0.1
           with: |
             token: ${{ secrets.CODECOV_TOKEN }}
             file: ./coverage.xml
             name: codecov-umbrella
             fail_ci_if_error: true
           attempt_limit: 5
           attempt_delay: 30000
-      - uses: actions/setup-node@v3
+      - uses: actions/setup-node@v4.0.2
         with:
           node-version: 18
           cache: yarn

diff --git a/.github/workflows/docs_deploy.yml b/.github/workflows/docs_deploy.yml
@@ -23,12 +23,12 @@ jobs:
   deploy:
     runs-on: [self-hosted, db, gpu]
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4.1.1
         with:
           submodules: 'true'
       - name: Install dependencies, build docs and coverage report
         run: python3 -m pip install --upgrade pip && python3 -m pip install poetry
-      - uses: actions/setup-python@v4.7.1
+      - uses: actions/setup-python@v5.0.0
         with:
           python-version: '3.10'
           cache: 'poetry'
@@ -38,22 +38,21 @@ jobs:
           ompi_info
           poetry env use '3.10'
           source $(poetry env info --path)/bin/activate
-          env MPICC=/opt/openmpi-4.1.5/bin/mpicc poetry install --with test,dev --all-extras
-          # pandoc README.md -f markdown -t rst -s -o docs/source/intro.rst
+          env MPICC=/opt/openmpi-4.1.5/bin/mpicc poetry install --with docs,test,dev --all-extras
           cd docs && rm -rf source/reference/api/_autosummary && make html
           cd .. && coverage run -m pytest -m "not integration_test" && coverage xml && coverage report -m
       - name: Upload coverage to Codecov
-        uses: Wandalen/wretry.action@v1.0.36
+        uses: Wandalen/wretry.action@v1.4.4
         with:
-          action: codecov/codecov-action@v3.1.3
+          action: codecov/codecov-action@v4.0.1
           with: |
             token: ${{ secrets.CODECOV_TOKEN }}
             file: ./coverage.xml
             name: codecov-umbrella
             fail_ci_if_error: true
           attempt_limit: 5
           attempt_delay: 30000
-      - uses: actions/setup-node@v3
+      - uses: actions/setup-node@v4.0.2
         with:
           node-version: 18
           cache: yarn
@@ -65,7 +64,7 @@ jobs:
           yarn build
           cp -r ../build/html build/api
       - name: Deploy to GitHub Pages
-        uses: peaceiris/actions-gh-pages@v3
+        uses: peaceiris/actions-gh-pages@v3.9.3
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           publish_branch: github_pages

diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
@@ -38,10 +38,10 @@ jobs:
   integration-tests:
     runs-on: [self-hosted, gpu, db]
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4.1.1
       - name: Install poetry
         run: pip install poetry
-      - uses: actions/setup-python@v4.7.1
+      - uses: actions/setup-python@v5.0.0
         with:
           python-version: '3.10'
       - name: Install dependencies and check code
@@ -54,9 +54,9 @@ jobs:
           env MPICC=/opt/openmpi-4.1.5/bin/mpicc poetry install --with docs,dev,test --all-extras
           coverage run -m pytest -m integration_test && coverage xml && coverage report -m
       - name: Upload coverage to Codecov
-        uses: Wandalen/wretry.action@v1.0.36
+        uses: Wandalen/wretry.action@v1.4.4
         with:
-          action: codecov/codecov-action@v3.1.3
+          action: codecov/codecov-action@v4.0.1
           with: |
             token: ${{ secrets.CODECOV_TOKEN }}
             file: ./coverage.xml

diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml
@@ -26,10 +26,10 @@ jobs:
   base-package-install-check:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4.1.1
       - name: Install pip
         run: python3 -m pip install --upgrade pip
-      - uses: actions/setup-python@v4.7.1
+      - uses: actions/setup-python@v5.0.0
         with:
           python-version: '3.10'
       - name: Install package and test import

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -12,10 +12,10 @@ jobs:
         run: |
           sudo apt-get update
           sudo apt-get install libcurl4-openssl-dev libssl-dev
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4.1.1
       - name: Install poetry
         run: python3 -m pip install --upgrade pip && python3 -m pip install poetry
-      - uses: actions/setup-python@v4.7.1
+      - uses: actions/setup-python@v5.0.0
         with:
           python-version: '3.10'
       - name: Build package

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -15,34 +15,30 @@ repos:
     - id: check-yaml
     - id: check-toml
 
-  - repo: https://github.com/psf/black
-    rev: 23.7.0
-    hooks:
-    - id: black
-
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: 'v0.2.1'
+    rev: 'v0.2.2'
     hooks:
     - id: ruff
       args: [--fix, --exit-non-zero-on-fix]
       types_or: [python, jupyter]
+    - id: ruff-format
+      types_or: [python, jupyter]
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.7.1
+    rev: v1.8.0
     hooks:
     - id: mypy
       entry: python3 -m mypy --config-file pyproject.toml
       language: system
       types: [python]
-      exclude: "use_cases|tests|cyclops/(models|monitor|report/plot)"
+      exclude: "tests|cyclops/(models|monitor|report/plot)"
 
   - repo: local
     hooks:
     - id: nbstripout
       name: nbstripout
       language: system
       entry: python3 -m nbstripout
-      exclude: ^docs/source/tutorials/gemini/.*\.ipynb$
 
   - repo: https://github.com/nbQA-dev/nbQA
     rev: 1.7.1

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -15,17 +15,12 @@ pre-commit run --all-files
 
 ## Coding guidelines
 
-For code style, we recommend the [google style guide](https://google.github.io/styleguide/pyguide.html).
-
-Pre-commit hooks apply the [black](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html)
-code formatting.
+For code style, we recommend the [PEP 8 style guide](https://peps.python.org/pep-0008/).
 
 For docstrings we use [numpy format](https://numpydoc.readthedocs.io/en/latest/format.html).
 
-We also use [flake8](https://flake8.pycqa.org/en/latest/) and [pylint](https://pylint.pycqa.org/en/stable/)
-for further static code analysis. The pre-commit hooks show errors which you need
-to fix before submitting a PR.
+We use [ruff](https://docs.astral.sh/ruff/) for code formatting and static code
+analysis. Ruff checks various rules including [flake8](https://docs.astral.sh/ruff/faq/#how-does-ruff-compare-to-flake8). The pre-commit hooks show errors which you need to fix before submitting a PR.
 
 Last but not the least, we use type hints in our code which is then checked using
-[mypy](https://mypy.readthedocs.io/en/stable/). Currently, mypy checks are not
-strict, but will be enforced more as the API code becomes more stable.
+[mypy](https://mypy.readthedocs.io/en/stable/).
diff --git a/cyclops/data/df/handle_types.py b/cyclops/data/df/handle_types.py
@@ -1013,7 +1013,8 @@ def collect_indicators(
         # Get categories
         data[cat] = np.argmax(data[indicators].values, axis=1)
         indicator_names = [
-            indicator[len(cat) + 1 :] for indicator in indicators  # noqa: E203
+            indicator[len(cat) + 1 :]
+            for indicator in indicators  # noqa: E203
         ]
         map_dict = {
             i: (name if name != MISSING_CATEGORY else np.nan)

diff --git a/cyclops/data/df/vectorized.py b/cyclops/data/df/vectorized.py
@@ -143,9 +143,10 @@ def split_vectorized(
         seed=seed,
     )
 
-    splits = [
-        vec.split_by_indices(axes_list[i], index_splits) for i, vec in enumerate(vecs)  # type: ignore
-    ]
+    splits = (
+        vec.split_by_indices(axes_list[i], index_splits)  # type: ignore
+        for i, vec in enumerate(vecs)
+    )
 
     return tuple(splits)
 

diff --git a/cyclops/data/slicer.py b/cyclops/data/slicer.py
@@ -96,11 +96,13 @@ class SliceSpec:
     ...         {"feature_1": {"value": ["value_1", "value_2"]}},
     ...         {"feature_1": {"value": "value_1", "negate": True, "keep_nulls": True}},
     ...         {"feature_1": {"min_value": "2020-01-01", "max_value": "2020-12-31"}},
-    ...         {"feature_1": {
-    ...             "min_value": 5,
-    ...             "max_value": 60,
-    ...             "min_inclusive": False,
-    ...             "max_inclusive": False}
+    ...         {
+    ...             "feature_1": {
+    ...                 "min_value": 5,
+    ...                 "max_value": 60,
+    ...                 "min_inclusive": False,
+    ...                 "max_inclusive": False,
+    ...             }
     ...         },
     ...         {"feature_1": {"year": [2020, 2021, 2022]}},
     ...         {"feature_1": {"month": [6, 7, 8]}},
@@ -110,7 +112,8 @@ class SliceSpec:
     ...         {
     ...             "feature_1": {"value": "value_1"},
     ...             "feature_2": {
-    ...                 "min_value": "2020-01-01", "keep_nulls": False,
+    ...                 "min_value": "2020-01-01",
+    ...                 "keep_nulls": False,
     ...             },
     ...             "feature_3": {"year": ["2000", "2010", "2020"]},
     ...         },

diff --git a/cyclops/evaluate/evaluator.py b/cyclops/evaluate/evaluator.py
@@ -268,7 +268,8 @@ def _compute_metrics(
                         stacklevel=1,
                     )
                     metric_output: Dict[str, Array] = {
-                        metric_name: float("NaN") for metric_name in metrics  # type: ignore[attr-defined,misc]
+                        metric_name: float("NaN")  # type: ignore
+                        for metric_name in metrics  # type: ignore
                     }
                 elif (
                     batch_size is None or batch_size < 0

diff --git a/cyclops/evaluate/fairness/evaluator.py b/cyclops/evaluate/fairness/evaluator.py
@@ -755,7 +755,8 @@ def _compute_metrics(  # noqa: C901, PLR0912
         if len(dataset) == 0:
             warnings.warn(empty_dataset_msg, RuntimeWarning, stacklevel=1)
             results: Dict[str, Any] = {
-                metric_name: float("NaN") for metric_name in metrics  # type: ignore[attr-defined]
+                metric_name: float("NaN")
+                for metric_name in metrics  # type: ignore[attr-defined]
             }
         elif (
             batch_size is None or batch_size <= 0

diff --git a/cyclops/evaluate/metrics/accuracy.py b/cyclops/evaluate/metrics/accuracy.py
@@ -108,8 +108,10 @@ class MulticlassAccuracy(MulticlassStatScores, registry_key="multiclass_accuracy
     array([1.        , 0.        , 0.66666667])
     >>> metric.reset_state()
     >>> target = [[0, 1, 2], [2, 1, 0]]
-    >>> preds = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1], [0.2, 0.6, 0.2]],
-    ...          [[0.1, 0.8, 0.1], [0.05, 0.95, 0], [0.2, 0.6, 0.2]]]
+    >>> preds = [
+    ...     [[0.05, 0.95, 0], [0.1, 0.8, 0.1], [0.2, 0.6, 0.2]],
+    ...     [[0.1, 0.8, 0.1], [0.05, 0.95, 0], [0.2, 0.6, 0.2]],
+    ... ]
     >>> for t, p in zip(target, preds):
     ...     metric.update_state(t, p)
     >>> metric.compute()
@@ -184,8 +186,7 @@ class MultilabelAccuracy(MultilabelStatScores, registry_key="multilabel_accuracy
     array([1., 1., 0.])
     >>> metric.reset_state()
     >>> target = [[[0, 1, 1], [1, 0, 0]], [[1, 0, 0], [0, 1, 1]]]
-    >>> preds = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1]],
-    ...          [[0.1, 0.8, 0.1], [0.05, 0.95, 0]]]
+    >>> preds = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1]], [[0.1, 0.8, 0.1], [0.05, 0.95, 0]]]
     >>> for t, p in zip(target, preds):
     ...     metric.update_state(t, p)
     >>> metric.compute()
@@ -291,8 +292,10 @@ class Accuracy(Metric, registry_key="accuracy", force_register=True):
     array([1.        , 0.        , 0.66666667])
     >>> metric.reset_state()
     >>> target = [[0, 1, 2], [2, 1, 0]]
-    >>> preds = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1], [0.2, 0.6, 0.2]],
-    ...          [[0.1, 0.8, 0.1], [0.05, 0.95, 0], [0.2, 0.6, 0.2]]]
+    >>> preds = [
+    ...     [[0.05, 0.95, 0], [0.1, 0.8, 0.1], [0.2, 0.6, 0.2]],
+    ...     [[0.1, 0.8, 0.1], [0.05, 0.95, 0], [0.2, 0.6, 0.2]],
+    ... ]
     >>> for t, p in zip(target, preds):
     ...     metric.update_state(t, p)
     >>> metric.compute()
@@ -307,8 +310,7 @@ class Accuracy(Metric, registry_key="accuracy", force_register=True):
     array([1., 1., 0.])
     >>> metric.reset_state()
     >>> target = [[[0, 1, 1], [1, 0, 0]], [[1, 0, 0], [0, 1, 1]]]
-    >>> preds = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1]],
-    ...          [[0.1, 0.8, 0.1], [0.05, 0.95, 0]]]
+    >>> preds = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1]], [[0.1, 0.8, 0.1], [0.05, 0.95, 0]]]
     >>> for t, p in zip(target, preds):
     ...     metric.update_state(t, p)
     >>> metric.compute()

diff --git a/cyclops/evaluate/metrics/auroc.py b/cyclops/evaluate/metrics/auroc.py
@@ -108,15 +108,21 @@ class MulticlassAUROC(MulticlassPrecisionRecallCurve, registry_key="multiclass_a
     --------
     >>> from cyclops.evaluate.metrics import MulticlassAUROC
     >>> target = [0, 1, 2, 0]
-    >>> preds = [[0.9, 0.05, 0.05], [0.05, 0.89, 0.06],
-    ...         [0.05, 0.01, 0.94], [0.9, 0.05, 0.05]]
+    >>> preds = [
+    ...     [0.9, 0.05, 0.05],
+    ...     [0.05, 0.89, 0.06],
+    ...     [0.05, 0.01, 0.94],
+    ...     [0.9, 0.05, 0.05],
+    ... ]
     >>> metric = MulticlassAUROC(num_classes=3)
     >>> metric(target, preds)
     array([1., 1., 1.])
     >>> metric.reset_state()
     >>> target = [[0, 1, 0], [1, 0, 1]]
-    >>> preds = [[[0.1, 0.9, 0.0], [0.7, 0.2, 0.1], [0.2, 0.3, 0.5]],
-    ...         [[0.1, 0.1, 0.8], [0.7, 0.2, 0.1], [0.2, 0.3, 0.5]]]
+    >>> preds = [
+    ...     [[0.1, 0.9, 0.0], [0.7, 0.2, 0.1], [0.2, 0.3, 0.5]],
+    ...     [[0.1, 0.1, 0.8], [0.7, 0.2, 0.1], [0.2, 0.3, 0.5]],
+    ... ]
     >>> for t, p in zip(target, preds):
     ...     metric.update_state(t, p)
     >>> metric.compute()
@@ -278,15 +284,21 @@ class AUROC(Metric, registry_key="auroc", force_register=True):
     >>> # (multiclass)
     >>> from cyclops.evaluate.metrics import MulticlassAUROC
     >>> target = [0, 1, 2, 0]
-    >>> preds = [[0.9, 0.05, 0.05], [0.05, 0.89, 0.06],
-    ...         [0.05, 0.01, 0.94], [0.9, 0.05, 0.05]]
+    >>> preds = [
+    ...     [0.9, 0.05, 0.05],
+    ...     [0.05, 0.89, 0.06],
+    ...     [0.05, 0.01, 0.94],
+    ...     [0.9, 0.05, 0.05],
+    ... ]
     >>> metric = MulticlassAUROC(num_classes=3)
     >>> metric(target, preds)
     array([1., 1., 1.])
     >>> metric.reset_state()
     >>> target = [[0, 1, 0], [1, 0, 1]]
-    >>> preds = [[[0.1, 0.9, 0.0], [0.7, 0.2, 0.1], [0.2, 0.3, 0.5]],
-    ...         [[0.1, 0.1, 0.8], [0.7, 0.2, 0.1], [0.2, 0.3, 0.5]]]
+    >>> preds = [
+    ...     [[0.1, 0.9, 0.0], [0.7, 0.2, 0.1], [0.2, 0.3, 0.5]],
+    ...     [[0.1, 0.1, 0.8], [0.7, 0.2, 0.1], [0.2, 0.3, 0.5]],
+    ... ]
     >>> for t, p in zip(target, preds):
     ...     metric.update_state(t, p)
     >>> metric.compute()