Switch from black to ruff

smart-on-fhir · Feb 4, 2025 · aef22c0 · aef22c0
1 parent a35da46
commit aef22c0
Show file tree

Hide file tree

Showing 20 changed files with 120 additions and 113 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -16,7 +16,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
 
     steps:
       - uses: actions/checkout@v4
@@ -36,6 +36,10 @@ jobs:
         run: |
           python -m pytest --cov=chart_review --cov-report=xml
 
+      - name: Log missing coverage
+        run: |
+          coverage report -m --skip-covered
+
       - name: Check coverage report
         if: github.ref != 'refs/heads/main'
         uses: orgoro/coverage@v3.2
@@ -44,7 +48,7 @@ jobs:
           token: ${{ secrets.GITHUB_TOKEN }}
           thresholdAll: .99
           thresholdNew: 1
-          thresholdModified: 1
+          thresholdModified: .97
 
   lint:
     runs-on: ubuntu-latest
@@ -57,19 +61,5 @@ jobs:
           python -m pip install --upgrade pip
           pip install .[dev]
 
-      - name: Run pycodestyle
-        # E203: pycodestyle is a little too rigid about slices & whitespace
-        #  See https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#slices
-        # W503: a default ignore that we are restoring
-        run: |
-          pycodestyle --max-line-length=100 --ignore=E203,W503 .
-
-      - name: Run bandit
-        if: success() || failure() # still run bandit if above checks fail
-        run: |
-          bandit -c pyproject.toml -r .
-
-      - name: Run black
-        if: success() || failure() # still run black if above checks fails
-        run: |
-          black --check --verbose .
+      - name: Run ruff
+        run: ruff check --output-format=github .
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,12 +1,10 @@
 repos:
-  - repo: https://github.com/psf/black
-    # this version is synced with the black mentioned in .github/workflows/ci.yml
-    rev: 24.4.0
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.4  # keep in rough sync with pyproject.toml
     hooks:
-      - id: black
-        entry: bash -c 'black "$@"; git add -u' --
-        # It is recommended to specify the latest version of Python
-        # supported by your project here, or alternatively use
-        # pre-commit's default_language_version, see
-        # https://pre-commit.com/#top_level-default_language_version
-        language_version: python3.12
+      - name: Ruff formatting
+        id: ruff-format
+        entry: bash -c 'ruff format --force-exclude "$@"; git add -u' --
+      - name: Ruff linting
+        id: ruff
+        stages: [pre-push]
diff --git a/chart_review/agree.py b/chart_review/agree.py
@@ -1,15 +1,15 @@
 from collections.abc import Collection, Iterable
-from typing import Union
+from typing import Optional, Union
 
-from chart_review import types
+from chart_review import defines
 
 
 def confusion_matrix(
-    annotations: types.ProjectAnnotations,
+    annotations: defines.ProjectAnnotations,
     truth: str,
     annotator: str,
     note_range: Collection[int],
-    labels: Iterable[str] = None,
+    labels: Optional[Iterable[str]] = None,
 ) -> dict[str, list]:
     """
     Confusion Matrix (TP, FP, TN, FN)
@@ -27,8 +27,8 @@ def confusion_matrix(
         "FN": False Negative (truth said positive+, annotator said No)
         "TN": True Negative (truth and annotator both said No)
     """
-    truth_mentions = annotations.mentions.get(truth, types.Mentions())
-    annotator_mentions = annotations.mentions.get(annotator, types.Mentions())
+    truth_mentions = annotations.mentions.get(truth, defines.Mentions())
+    annotator_mentions = annotations.mentions.get(annotator, defines.Mentions())
 
     # Only examine labels that were used by any compared annotators at least once
     label_set = set()
@@ -132,7 +132,7 @@ def score_matrix(matrix: dict, sig_digits=3) -> dict:
     }
 
 
-def csv_table(score: dict, class_labels: types.LabelSet):
+def csv_table(score: dict, class_labels: defines.LabelSet):
     table = list()
     table.append(csv_header(False, True))
     table.append(csv_row_score(score, as_string=True))
@@ -160,7 +160,7 @@ def csv_header(pick_label=False, as_string=False):
 
 
 def csv_row_score(
-    score: dict, pick_label: str = None, as_string: bool = False
+    score: dict, pick_label: Optional[str] = None, as_string: bool = False
 ) -> Union[str, list[str]]:
     """
     Table Row entry

diff --git a/chart_review/cli.py b/chart_review/cli.py
@@ -2,6 +2,7 @@
 
 import argparse
 import sys
+from typing import Optional
 
 from chart_review.commands import accuracy, default, frequency, ids, labels, mentions
 
@@ -23,7 +24,7 @@ def define_parser() -> argparse.ArgumentParser:
     return parser
 
 
-def main_cli(argv: list[str] = None) -> None:
+def main_cli(argv: Optional[list[str]] = None) -> None:
     """Main entrypoint that wraps all the core program logic"""
     try:
         parser = define_parser()

diff --git a/chart_review/cli_utils.py b/chart_review/cli_utils.py
@@ -18,8 +18,7 @@ def add_project_args(parser: argparse.ArgumentParser, is_global: bool = False) -
         default=None if is_global else argparse.SUPPRESS,
         metavar="DIR",
         help=(
-            "directory holding project files, "
-            "like labelstudio-export.json (default: current dir)"
+            "directory holding project files, like labelstudio-export.json (default: current dir)"
         ),
     )
     group.add_argument(

diff --git a/chart_review/cohort.py b/chart_review/cohort.py
@@ -1,6 +1,7 @@
-from typing import Iterable
+from collections.abc import Iterable
+from typing import Optional
 
-from chart_review import agree, common, config, errors, external, simplify, types
+from chart_review import agree, common, config, defines, errors, external, simplify
 
 
 class CohortReader:
@@ -27,7 +28,7 @@ def __init__(self, proj_config: config.ProjectConfig):
 
         # Add a placeholder for any annotators that don't have mentions for some reason
         for annotator in self.config.annotators.values():
-            self.annotations.mentions.setdefault(annotator, types.Mentions())
+            self.annotations.mentions.setdefault(annotator, defines.Mentions())
 
         # Load external annotations (i.e. from NLP tags or ICD10 codes)
         for name, value in self.config.external_annotations.items():
@@ -49,7 +50,7 @@ def __init__(self, proj_config: config.ProjectConfig):
 
     def _collect_note_ranges(
         self, exported_json: list[dict]
-    ) -> tuple[dict[str, types.NoteSet], types.NoteSet]:
+    ) -> tuple[dict[str, defines.NoteSet], defines.NoteSet]:
         # Detect note ranges if they were not defined in the project config
         # (i.e. default to the full set of annotated notes)
         note_ranges = {k: set(v) for k, v in self.config.note_ranges.items()}
@@ -60,7 +61,7 @@ def _collect_note_ranges(
         all_ls_notes = {int(entry["id"]) for entry in exported_json if "id" in entry}
 
         # Parse ignored IDs (might be note IDs, might be external IDs)
-        ignored_notes = types.NoteSet()
+        ignored_notes = defines.NoteSet()
         for ignore_id in self.config.ignore:
             ls_id = external.external_id_to_label_studio_id(exported_json, str(ignore_id))
             if ls_id is None:
@@ -83,14 +84,18 @@ def _collect_note_ranges(
     def class_labels(self):
         return self.annotations.labels
 
-    def _select_labels(self, label_pick: str = None) -> Iterable[str]:
+    def _select_labels(self, label_pick: Optional[str] = None) -> Iterable[str]:
         if label_pick:
             return [label_pick]
         else:
             return self.class_labels
 
     def confusion_matrix(
-        self, truth: str, annotator: str, note_range: types.NoteSet, label_pick: str = None
+        self,
+        truth: str,
+        annotator: str,
+        note_range: defines.NoteSet,
+        label_pick: Optional[str] = None,
     ) -> dict:
         """
         This is the rollup of counting each symptom only once, not multiple times.

diff --git a/chart_review/commands/frequency.py b/chart_review/commands/frequency.py
@@ -5,7 +5,7 @@
 import rich.table
 import rich.text
 
-from chart_review import cli_utils, console_utils, types
+from chart_review import cli_utils, console_utils, defines
 
 
 def make_subparser(parser: argparse.ArgumentParser) -> None:
@@ -40,7 +40,7 @@ def print_frequency(args: argparse.Namespace) -> None:
                         all_text_to_count[text] = all_text_to_count.get(text, 0) + 1
 
                         # And finally, add it to our running term-confusion tracker
-                        text_labels.setdefault(text, types.LabelSet()).add(label)
+                        text_labels.setdefault(text, defines.LabelSet()).add(label)
 
     # Now group up the data into a formatted table
     table = cli_utils.create_table("Annotator", "Label", "Mention", "Count")
@@ -73,5 +73,5 @@ def add_annotator_to_table(name, label_to_text: dict) -> None:
         console_utils.print_ignored_charts(reader)
         if has_term_confusion:
             rich.get_console().print(
-                f"  * This text has multiple associated labels.", style="italic"
+                "  * This text has multiple associated labels.", style="italic"
             )
diff --git a/chart_review/commands/labels.py b/chart_review/commands/labels.py
@@ -5,7 +5,7 @@
 import rich.table
 import rich.text
 
-from chart_review import cli_utils, console_utils, types
+from chart_review import cli_utils, console_utils, defines
 
 
 def make_subparser(parser: argparse.ArgumentParser) -> None:
@@ -20,14 +20,14 @@ def print_labels(args: argparse.Namespace) -> None:
 
     # Calculate all label counts for each annotator
     label_names = sorted(reader.class_labels, key=str.casefold)
-    label_notes: dict[str, dict[str, types.NoteSet]] = {}  # annotator -> label -> note IDs
-    any_annotator_note_sets: dict[str, types.NoteSet] = {}
+    label_notes: dict[str, dict[str, defines.NoteSet]] = {}  # annotator -> label -> note IDs
+    any_annotator_note_sets: dict[str, defines.NoteSet] = {}
     for annotator, mentions in reader.annotations.mentions.items():
         label_notes[annotator] = {}
         for name in label_names:
             note_ids = {note_id for note_id, labels in mentions.items() if name in labels}
             label_notes[annotator][name] = note_ids
-            any_annotator_note_sets.setdefault(name, types.NoteSet()).update(note_ids)
+            any_annotator_note_sets.setdefault(name, defines.NoteSet()).update(note_ids)
 
     label_table = cli_utils.create_table("Annotator", "Label", "Chart Count")
 

diff --git a/chart_review/common.py b/chart_review/common.py
@@ -1,10 +1,8 @@
 """Utility methods"""
 
-from enum import Enum, EnumMeta
-from typing import Optional, Union
-from collections.abc import Iterable
-import logging
 import json
+import logging
+from typing import Optional, Union
 
 ###############################################################################
 # Helper Functions: read/write JSON and text
@@ -19,7 +17,7 @@ def read_json(path: str) -> Union[dict, list[dict]]:
     """
     logging.debug("read_json() %s", path)
 
-    with open(path, "r") as f:
+    with open(path) as f:
         return json.load(f, strict=False)
 
 
@@ -44,7 +42,7 @@ def read_text(path: str) -> str:
     """
     logging.debug("read_text() %s", path)
 
-    with open(path, "r") as f:
+    with open(path) as f:
         return f.read()
 
 

diff --git a/chart_review/config.py b/chart_review/config.py
@@ -2,12 +2,12 @@
 import os
 import re
 import sys
-from typing import Iterable, Optional, Union
+from collections.abc import Iterable
+from typing import Optional, Union
 
-import rich.console
 import yaml
 
-from chart_review import errors, types
+from chart_review import defines, errors
 
 
 class ProjectConfig:
@@ -29,7 +29,7 @@ def __init__(self, project_dir: Optional[str] = None, config_path: Optional[str]
         # since that's what is stored in Label Studio. So that's what we return from this method.
         # But as humans writing config files, it's more natural to think of "name -> id".
         # So that's what we keep in the config, and we just reverse it here for convenience.
-        self.annotators = types.AnnotatorMap()
+        self.annotators = defines.AnnotatorMap()
         self.external_annotations = {}
         for name, value in self._data.get("annotators", {}).items():
             if isinstance(value, int):  # real annotation layer in Label Studio
@@ -44,15 +44,15 @@ def __init__(self, project_dir: Optional[str] = None, config_path: Optional[str]
             self.note_ranges[key] = list(self._parse_note_range(values))
 
         # ** Implied labels **
-        self.implied_labels = types.ImpliedLabels()
+        self.implied_labels = defines.ImpliedLabels()
         for key, value in self._data.get("implied-labels", {}).items():
             # Coerce single labels into a set
             if not isinstance(value, list):
                 value = {value}
             self.implied_labels[key] = set(value)
 
         # ** Grouped labels **
-        self.grouped_labels = types.GroupedLabels()
+        self.grouped_labels = defines.GroupedLabels()
         for key, value in self._data.get("grouped-labels", {}).items():
             # Coerce single labels into a set
             if not isinstance(value, list):
@@ -104,7 +104,7 @@ def _parse_note_range(self, value: Union[str, int, list[Union[str, int]]]) -> It
             return []
 
     @property
-    def class_labels(self) -> types.LabelSet:
+    def class_labels(self) -> defines.LabelSet:
         return set(self._data.setdefault("labels", []))
 
     @property

diff --git a/chart_review/console_utils.py b/chart_review/console_utils.py
@@ -2,10 +2,10 @@
 
 import rich
 
-from chart_review import cohort, types
+from chart_review import cohort, defines
 
 
-def pretty_note_range(notes: types.NoteSet) -> str:
+def pretty_note_range(notes: defines.NoteSet) -> str:
     """
     Returns a pretty, human-readable string for a set of notes.
 

diff --git a/chart_review/types.py → chart_review/defines.py b/chart_review/types.py → chart_review/defines.py