Create CLI

In this commit, I created the `macta-tools` CLI. Additionally, I added another field to the `CTAToolInterface` class called `_required_kwargs` - a list of strings that represent the names of the kwargs that the tool must be passed, simplifying the syntax to do so. Consequently, the `NotNoneRequirement` and its related files are no longer needed, so I deleted them. I also added improved the logging suppression in the SCANVI interface file. I added some small tweaks to the project configuration to re-add line length checks and to configure where pyright searches for issues.
AleksBekker · Aug 9, 2023 · f51340b · f51340b
1 parent 9fe04e5
commit f51340b
Show file tree

Hide file tree

Showing 15 changed files with 151 additions and 63 deletions.
diff --git a/.flake8 b/.flake8
@@ -1,5 +1,5 @@
 [flake8]
-ignore = E501,W504
+ignore = W504
 max-line-length = 120
 per-file-ignores = __init__.py:F401
 inline-quotes = single

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,6 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "macta_tools"
-version = "0.0.4"
 authors = [{ name = "Aleksandr Bekker", email = "alekso.bekker@gmail.com" }]
 description = "Annotation tools for the MACTA suite"
 keywords = ["cell type annotation", "single cell"]
@@ -23,6 +22,13 @@ dependencies = [
     "pydantic>=2.0.0",
     "scanpy>=1.9.3",
 ]
+dynamic = ["version"]
+
+[tool.setuptools.dynamic]
+version = { attr = "macta_tools.__version__" }
+
+[project.scripts]
+"macta-tools" = "macta_tools:cli_main"
 
 [project.urls]
 "Homepage" = "https://github.com/AleksBekker/MACTA_py"

diff --git a/pyrightconfig.json b/pyrightconfig.json
@@ -0,0 +1,3 @@
+{
+  exclude: ["./build/**", "**/__pycache__", "**/.*"]
+}
diff --git a/src/macta_tools/__init__.py b/src/macta_tools/__init__.py
@@ -1,5 +1,6 @@
 from macta_tools import tools, utils
 from macta_tools._annotate import annotate
-from macta_tools._cli import run_from_cli
+from macta_tools._cli import main as cli_main
 
-__all__ = ['annotate', 'tools', 'utils', 'run_from_cli']
+__all__ = ['annotate', 'tools', 'utils', 'cli_main']
+__version__ = '0.0.4'
diff --git a/src/macta_tools/_annotate.py b/src/macta_tools/_annotate.py
@@ -10,7 +10,7 @@
 
 
 def annotate(expr_data: AnnData, ref_data: Union[AnnData, pd.DataFrame], annot_type: str, result_type: str = 'labels',
-             annot_tools: Union[str, Container[str]] = '*',
+             annot_tools: Optional[Container[str]] = None,
              tool_interfaces: Optional[Dict[str, CTAToolInterface]] = None, **kwargs: Any
              ) -> Dict[str, Union[pd.Series, pd.DataFrame]]:
     """Runs MACTA annotation analysis.
@@ -30,7 +30,7 @@ def annotate(expr_data: AnnData, ref_data: Union[AnnData, pd.DataFrame], annot_t
     if tool_interfaces is None:
         tool_interfaces = AVAILABLE
 
-    if annot_tools == '*':
+    if not annot_tools:
         annot_tools = list(tool_interfaces.keys())
 
     results = {}
@@ -46,8 +46,8 @@ def annotate(expr_data: AnnData, ref_data: Union[AnnData, pd.DataFrame], annot_t
     return results
 
 
-def run_tool(tool_name: str, interface: CTAToolInterface, expr_data: AnnData, ref_data: AnnData, annot_type: str,
-             result_type: str, **kwargs: Any) -> Optional[pd.Series]:
+def run_tool(tool_name: str, interface: CTAToolInterface, expr_data: AnnData, ref_data: Union[AnnData, pd.DataFrame],
+             annot_type: str, result_type: str, **kwargs: Any) -> Union[pd.DataFrame, pd.Series, None]:
     """Fully runs the annotation for one tool and handles typical issues and exceptions.
 
     Arguments:

diff --git a/src/macta_tools/_cli.py b/src/macta_tools/_cli.py
@@ -0,0 +1,100 @@
+from argparse import ArgumentParser, FileType, Namespace
+from pathlib import Path
+
+import scanpy as sc
+from pandas.compat.pickle_compat import pkl
+
+from macta_tools import annotate
+
+
+def parse_args() -> Namespace:
+
+    parser = ArgumentParser('MACTA-tools', description='Tools for cell-type annotation in Python')
+
+    parser.add_argument(
+        'expr',
+        # dest='expr_data',
+        type=FileType(),
+    )
+
+    parser.add_argument(
+        'ref',
+        # dest='ref_data',
+        type=FileType(),
+    )
+
+    parser.add_argument(
+        'annot_type',
+        choices=['marker', 'ref']
+    )
+
+    parser.add_argument(
+        'convert_to',
+        choices=['labels', 'scores'],
+    )
+
+    parser.add_argument(
+        'output',
+        # dest='output_path',
+        type=FileType(),
+    )
+
+    parser.add_argument(
+        '-t', '--tools',
+        nargs='+',
+    )
+
+    # Tool kwargs
+
+    parser.add_argument(
+        '--force_update',
+        type=bool,
+    )
+
+    parser.add_argument(
+        '--update_models',
+        type=bool,
+    )
+
+    parser.add_argument('--batch_col')
+    parser.add_argument('--cell_type_col')
+
+    return parser.parse_args()
+
+
+# def read_file(path_name: bytes) -> Union[AnnData, pd.DataFrame, None]:
+#
+#     path = Path(str(path_name))
+#     ext = path.suffix
+#
+#     if ext == 'csv':
+#         return pd.read_csv(path)
+#
+#     if ext == 'tsv':
+#         return pd.read_csv(path, delimiter='\t')
+#
+#     if ext == 'pkl':
+#         with path.open('rb') as file:
+#             return pkl.load(file)
+#
+#     if ext == 'h5ad':
+#         return sc.read_h5ad(path)
+#
+#     return None
+
+
+def main():
+    args = parse_args()
+
+    # print('\nARGUMENTS')
+    # from pprint import pprint
+    # pprint(vars(args))
+    # print()
+
+    expr_data = sc.read_h5ad(args.expr)
+    ref_data = sc.read_h5ad(args.ref)
+
+    results = annotate(expr_data, ref_data, **vars(args))
+
+    with Path(args.output).open('wb') as file:
+        pkl.dump(results, file)
diff --git a/src/macta_tools/tools/__init__.py b/src/macta_tools/tools/__init__.py
@@ -1,11 +1,18 @@
+import sys
+import warnings
 from contextlib import suppress
 from typing import Dict
 
+from numba.core.errors import NumbaDeprecationWarning
+
 from macta_tools.tools._cta_tool_interface import CTAToolInterface
 
 AVAILABLE: Dict[str, CTAToolInterface] = {}
 __all__ = ['AVAILABLE', 'CTAToolInterface']
 
+# TODO delete this when possible
+if not sys.warnoptions:
+    warnings.simplefilter('ignore', NumbaDeprecationWarning)
 
 with suppress(ImportError):
     from macta_tools.tools._celltypist_interface import CelltypistInterface

diff --git a/src/macta_tools/tools/_celltypist_interface.py b/src/macta_tools/tools/_celltypist_interface.py
@@ -25,7 +25,7 @@ class CelltypistInterface(CTAToolInterface):
         annot_type=EqualityRequirement('ref'),
     )
 
-    def annotate(self, expr_data: AnnData, ref_data: models.Model, **kwargs: Any) -> AnnotationResult:
+    def annotate(self, expr_data: AnnData, ref_data: models.Model, **_: Any) -> AnnotationResult:
         """Runs annotation using `celltypist`.
 
         Arguments:
@@ -37,7 +37,7 @@ def annotate(self, expr_data: AnnData, ref_data: models.Model, **kwargs: Any) ->
         """
         return celltypist.annotate(expr_data, model=ref_data, majority_voting=True)
 
-    def convert(self, results: AnnotationResult, convert_to: str, **kwargs: Any) -> Union[pd.DataFrame, pd.Series]:
+    def convert(self, results: AnnotationResult, convert_to: str, **_: Any) -> Union[pd.DataFrame, pd.Series]:
         """Converts `celltypist` results to standardized format.
 
         Arguments:

diff --git a/src/macta_tools/tools/_cta_tool_interface.py b/src/macta_tools/tools/_cta_tool_interface.py
@@ -1,7 +1,7 @@
 """Implementation of abstract class for an interface to a CTA tool."""
 
 from abc import ABC, abstractmethod
-from typing import Any, Dict, Optional, Union
+from typing import Any, Collection, Dict, Optional, Union
 
 import pandas as pd
 
@@ -11,6 +11,7 @@
 class CTAToolInterface(ABC):
     """Abstract class for tool interfaces"""
 
+    _required_kwargs: Collection[str] = []
     _requirements: Optional[RequirementList] = None
 
     # region Abstract methods
@@ -43,7 +44,7 @@ def convert(self, results: Any, convert_to: str, **kwargs: Any) -> Union[pd.Data
 
     # region Pre-processing methods
 
-    def preprocess_expr(self, expr_data: Any, **kwargs: Any) -> Any:
+    def preprocess_expr(self, expr_data: Any, **_: Any) -> Any:
         """Pre-process expr_data for use in a specific algorithm.
 
         Arguments:
@@ -54,7 +55,7 @@ def preprocess_expr(self, expr_data: Any, **kwargs: Any) -> Any:
         """
         return expr_data
 
-    def preprocess_ref(self, ref_data: Any, **kwargs: Any) -> Any:
+    def preprocess_ref(self, ref_data: Any, **_: Any) -> Any:
         """Pre-process expr_data for use in a specific algorithm.
 
         Arguments:
@@ -69,7 +70,7 @@ def preprocess_ref(self, ref_data: Any, **kwargs: Any) -> Any:
 
     # region Other class methods for annotation
 
-    def run_full(self, expr_data: Any, ref_data: Any, convert_to: str, **kwargs: Any) -> pd.Series:
+    def run_full(self, expr_data: Any, ref_data: Any, convert_to: str, **kwargs: Any) -> Union[pd.DataFrame, pd.Series]:
         """Run `self.annotate`, followed by `self.convert` on a data set.
 
         Arguments:
@@ -107,6 +108,10 @@ def check_requirements(self, values: Optional[Dict[str, Any]] = None, **kwargs:
         if values is None:
             values = {}
 
+        for kwarg in self._required_kwargs:
+            if kwarg not in kwargs:
+                return False
+
         return self._requirements.check(**values, **kwargs)
 
     # endregion
diff --git a/src/macta_tools/tools/_scanvi.py b/src/macta_tools/tools/_scanvi.py
@@ -7,22 +7,24 @@
 
 from macta_tools.tools._cta_tool_interface import CTAToolInterface
 from macta_tools.utils.contexts import suppress_logging
-from macta_tools.utils.requirements import EqualityRequirement, NotNoneRequirement, RequirementList
+from macta_tools.utils.requirements import EqualityRequirement, RequirementList
 
 # Suppress the output that comes with importing scArches
 with suppress_logging():
-    from scarches import SCANVI, SCVI
+    # Ignored because a ImportError is expected for this module
+    # TODO create module/file documnentation specifying that a ModuleNotFoundError is expected
+    from scarches import SCANVI, SCVI  # type: ignore
 
 
 class ScanviInterface(CTAToolInterface):
     """Interface for running ScanVI analysis."""
 
     _requirements = RequirementList(
         annot_type=EqualityRequirement('ref'),
-        batch_col=NotNoneRequirement(),
-        cell_type_col=NotNoneRequirement(),
     )
 
+    _required_kwargs = ['batch_col', 'cell_type_col']
+
     def annotate(self, expr_data: AnnData, ref_data: SCANVI, **kwargs: Any) -> SCANVI:
         """Runs annotation using `SCANVI`.
 
@@ -47,7 +49,7 @@ def annotate(self, expr_data: AnnData, ref_data: SCANVI, **kwargs: Any) -> SCANV
 
         return model
 
-    def convert(self, results: SCANVI, convert_to: str, **kwargs: Any) -> Union[pd.DataFrame, pd.Series]:
+    def convert(self, results: SCANVI, convert_to: str, **_: Any) -> Union[pd.DataFrame, pd.Series]:
         """Converts a `SCANVI` model to the standard data types.
 
         Arguments:
@@ -67,7 +69,7 @@ def convert(self, results: SCANVI, convert_to: str, **kwargs: Any) -> Union[pd.D
         raise ValueError(f'{convert_to} is an invalid option for `convert_to`')
 
     def preprocess_ref(self, ref_data: AnnData, cell_type_col: str = '', batch_col: str = '', ref_type: str = 'counts',
-                       **kwargs: Any) -> SCANVI:
+                       **_: Any) -> SCANVI:
         """Preprocesses the reference data into a `SCANVI` model.
 
         Arguments:
@@ -101,6 +103,7 @@ def preprocess_ref(self, ref_data: AnnData, cell_type_col: str = '', batch_col:
         reference_latent['scanvi_batch'] = ref_data.obs[batch_col].tolist()
 
         reference_latent['scanvi_predictions'] = scanvae.predict()
-        logging.info(f'ScanVI: accuracy = {np.mean(reference_latent.obs.scanvi_predictions == reference_latent.obs.cell_type):.4%}')
+        accuracy = np.mean(reference_latent.obs.scanvi_predictions == reference_latent.obs.cell_type)
+        logging.info(f'ScanVI: {accuracy = :.4%}')
 
         return scanvae
diff --git a/src/macta_tools/utils/contexts.py b/src/macta_tools/utils/contexts.py
@@ -1,11 +1,10 @@
 import logging
-import typing
 from contextlib import contextmanager
+from typing import Iterator
 
 
-@typing.no_type_check
 @contextmanager
-def suppress_logging() -> None:
+def suppress_logging() -> Iterator[None]:
     state = logging.getLogger().getEffectiveLevel()
     logging.disable(logging.CRITICAL)
     yield

diff --git a/src/macta_tools/utils/requirements/__init__.py b/src/macta_tools/utils/requirements/__init__.py
@@ -1,8 +1,7 @@
 from macta_tools.utils.requirements._contains_requirement import ContainsRequirement
 from macta_tools.utils.requirements._equality_requirement import EqualityRequirement
 from macta_tools.utils.requirements._is_instance_requirement import IsInstanceRequirement
-from macta_tools.utils.requirements._not_none_requirement import NotNoneRequirement
 from macta_tools.utils.requirements._requirement import Requirement
 from macta_tools.utils.requirements._requirement_list import RequirementList
 
-__all__ = ['Requirement', 'ContainsRequirement', 'IsInstanceRequirement', 'EqualityRequirement', 'RequirementList', 'NotNoneRequirement']
+__all__ = ['Requirement', 'ContainsRequirement', 'IsInstanceRequirement', 'EqualityRequirement', 'RequirementList']
diff --git a/src/macta_tools/utils/requirements/_not_none_requirement.py b/src/macta_tools/utils/requirements/_not_none_requirement.py
diff --git a/src/macta_tools/utils/requirements/_requirement_list.py b/src/macta_tools/utils/requirements/_requirement_list.py
@@ -17,7 +17,8 @@ def __init__(self, requirements: Optional[Dict[str, Requirement]] = None, **kwar
         super().__init__(requirements={**requirements, **kwargs})
 
     @field_validator('requirements')
-    def requirements_correct_dict(cls, requirements: Optional[Dict[str, Requirement]] = None, **kwargs: Any) -> Dict[str, Requirement]:
+    def requirements_correct_dict(cls, requirements: Optional[Dict[str, Requirement]] = None, **_: Any
+                                  ) -> Dict[str, Requirement]:
 
         # Input Validation
         if not isinstance(requirements, dict) or not IsInstanceRequirement(str).check(*requirements.keys()) \

diff --git a/tests/utils/requirements/test_some_requirement.py b/tests/utils/requirements/test_some_requirement.py