Skip to content

Commit

Permalink
fix mypy and ruff errors (#257)
Browse files Browse the repository at this point in the history
* use overload for MetcalfScoring

* add networkx stub file

* fix mypy errors

fix code or ignore type checking for some nonsense mypy errors

* fix ruff check errors for refactored code

* run ruff format

* fix imports

* uniform the use of TYPE_CHECKING to only avoid circular imports

This ensures that the type hints are available both during type checking and at runtime, improving code clarity and reducing the chance of runtime errors related to type hints.

* fix non-existing attribute bug

* fix typos

* use broader type hints Sequence and Mapping

use broader type hints Sequence and Mapping to replace list and dict, respectively

* change `datas` to `data`

* use specific types for return of abstract method when possible

use more general type when necessary
  • Loading branch information
CunliangGeng authored Jun 14, 2024
1 parent fc9ddec commit 6a6f170
Show file tree
Hide file tree
Showing 40 changed files with 188 additions and 143 deletions.
2 changes: 1 addition & 1 deletion README.dev.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ python3 -m pip install --upgrade pip setuptools
# install development dependencies
pip install --no-cache-dir --editable ".[dev]"

# install non-pypi dependecies
# install non-pypi dependencies
install-nplinker-deps
```

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ dev = [
"types-Deprecated",
"types-beautifulsoup4",
"types-jsonschema",
"types-networkx",
"pandas-stubs",
# docs
"mkdocs",
Expand Down
4 changes: 2 additions & 2 deletions src/nplinker/arranger.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def _get_gnps_file_mappings_file(self) -> Path:
file_mappings_tsv if file_mappings_tsv.exists() else file_mappings_csv
)

return gnps_file_mappings_file
return gnps_file_mappings_file # type: ignore

def _download_and_extract_gnps(self) -> None:
"""Download and extract the GNPS data.
Expand Down Expand Up @@ -304,7 +304,7 @@ def arrange_strain_mappings(self) -> None:
If `self.config.mode` is "local", validate the strain mappings file.
If `self.config.mode` is "podp", always generate the strain mappings file and validate it.
The valiation checks if the strain mappings file exists and if it is a valid JSON file
The validation checks if the strain mappings file exists and if it is a valid JSON file
according to the schema defined in `schemas/strain_mappings_schema.json`.
"""
if self.config.mode == "podp":
Expand Down
4 changes: 0 additions & 4 deletions src/nplinker/class_info/runcanopus.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,3 @@ def run_canopus(mgf_file, output_path, extra_params="--maxmz 600 formula zodiac
open(os.path.join(output_path, "completed"), "w").close()

return True


if __name__ == "__main__":
run_canopus(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
2 changes: 1 addition & 1 deletion src/nplinker/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def load_config(config_file: str | PathLike) -> Dynaconf:


# Note:
# Validataor parameter `required=False` means the setting (e.g. "loglevel") must not exist rather
# Validator parameter `required=False` means the setting (e.g. "loglevel") must not exist rather
# than being optional. So don't set the parameter `required` if the key is optional.
CONFIG_VALIDATORS = [
# General settings
Expand Down
7 changes: 3 additions & 4 deletions src/nplinker/genomics/abc.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from abc import ABC
from abc import abstractmethod
from collections.abc import Sequence
from .bgc import BGC
from .gcf import GCF


class BGCLoaderBase(ABC):
"""Abstract base class for BGC loader."""

def __init__(self, data_dir: str):
def __init__(self, data_dir: str) -> None:
"""Initialize the BGC loader.
Args:
Expand All @@ -26,7 +25,7 @@ def get_files(self) -> dict[str, str]:
"""

@abstractmethod
def get_bgcs(self) -> Sequence[BGC]:
def get_bgcs(self) -> list[BGC]:
"""Get BGC objects.
Returns:
Expand All @@ -38,7 +37,7 @@ class GCFLoaderBase(ABC):
"""Abstract base class for GCF loader."""

@abstractmethod
def get_gcfs(self, keep_mibig_only: bool, keep_singleton: bool) -> Sequence[GCF]:
def get_gcfs(self, keep_mibig_only: bool, keep_singleton: bool) -> list[GCF]:
"""Get GCF objects.
Args:
Expand Down
3 changes: 2 additions & 1 deletion src/nplinker/genomics/antismash/antismash_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import fnmatch
import logging
import os
from typing import Mapping
from Bio import SeqIO
from Bio import SeqRecord
from nplinker.genomics import BGC
Expand Down Expand Up @@ -97,7 +98,7 @@ def get_bgcs(self) -> list[BGC]:
return self._bgcs

@staticmethod
def _parse_bgcs(bgc_files: dict[str, str]) -> list[BGC]:
def _parse_bgcs(bgc_files: Mapping[str, str]) -> list[BGC]:
"""Load given BGC files as BGC objects.
Args:
Expand Down
10 changes: 6 additions & 4 deletions src/nplinker/genomics/antismash/podp_antismash_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import time
from os import PathLike
from pathlib import Path
from typing import Mapping
from typing import Sequence
import httpx
from bs4 import BeautifulSoup
from bs4 import NavigableString
Expand Down Expand Up @@ -82,7 +84,7 @@ def read_json(file: str | PathLike) -> dict[str, "GenomeStatus"]:

@staticmethod
def to_json(
genome_status_dict: dict[str, "GenomeStatus"], file: str | PathLike | None = None
genome_status_dict: Mapping[str, "GenomeStatus"], file: str | PathLike | None = None
) -> str | None:
"""Convert the genome status dictionary to a JSON string.
Expand Down Expand Up @@ -122,7 +124,7 @@ def _to_dict(self) -> dict:


def podp_download_and_extract_antismash_data(
genome_records: list[dict[str, dict[str, str]]],
genome_records: Sequence[Mapping[str, Mapping[str, str]]],
project_download_root: str | PathLike,
project_extract_root: str | PathLike,
):
Expand Down Expand Up @@ -220,7 +222,7 @@ def podp_download_and_extract_antismash_data(
raise ValueError("No antiSMASH data found for any genome")


def get_best_available_genome_id(genome_id_data: dict[str, str]) -> str | None:
def get_best_available_genome_id(genome_id_data: Mapping[str, str]) -> str | None:
"""Get the best available ID from genome_id_data dict.
Args:
Expand Down Expand Up @@ -359,7 +361,7 @@ def _resolve_jgi_accession(jgi_id: str) -> str:
return _resolve_genbank_accession(link.text)


def _resolve_refseq_id(genome_id_data: dict[str, str]) -> str:
def _resolve_refseq_id(genome_id_data: Mapping[str, str]) -> str:
"""Get the RefSeq ID to which the genome accession is linked.
Check https://pairedomicsdata.bioinformatics.nl/schema.json.
Expand Down
2 changes: 1 addition & 1 deletion src/nplinker/genomics/bgc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
import logging
from typing import TYPE_CHECKING
from deprecated import deprecated
from nplinker.strain import Strain
from .aa_pred import predict_aa


if TYPE_CHECKING:
from ..strain import Strain
from .gcf import GCF

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion src/nplinker/genomics/gcf.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from nplinker.strain import Strain
from nplinker.strain import StrainCollection


if TYPE_CHECKING:
from nplinker.strain import Strain
from .bgc import BGC

logger = logging.getLogger(__name__)
Expand Down
8 changes: 4 additions & 4 deletions src/nplinker/genomics/mibig/mibig_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ class MibigLoader:
"""

def __init__(self, data_dir: str):
"""Initialize the MIBiG metatdata loader.
"""Initialize the MIBiG metadata loader.
Args:
data_dir: Path to the directory of MIBiG metadata json files
"""
self.data_dir = data_dir
self._file_dict = self.parse_data_dir(self.data_dir)
self._metadata_dict = self._parse_metadatas()
self._metadata_dict = self._parse_metadata()
self._bgcs = self._parse_bgcs()

def get_files(self) -> dict[str, str]:
Expand Down Expand Up @@ -58,15 +58,15 @@ def parse_data_dir(data_dir: str) -> dict[str, str]:
file_dict[fname] = file
return file_dict

def get_metadatas(self) -> dict[str, MibigMetadata]:
def get_metadata(self) -> dict[str, MibigMetadata]:
"""Get MibigMetadata objects.
Returns:
The key is BGC accession (file name) and the value is MibigMetadata object
"""
return self._metadata_dict

def _parse_metadatas(self) -> dict[str, MibigMetadata]:
def _parse_metadata(self) -> dict[str, MibigMetadata]:
"""Parse all metadata files and return MibigMetadata objects.
Returns:
Expand Down
16 changes: 10 additions & 6 deletions src/nplinker/genomics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import logging
from os import PathLike
from pathlib import Path
from typing import Mapping
from typing import Sequence
from jsonschema import validate
from nplinker.defaults import GENOME_BGC_MAPPINGS_FILENAME
from nplinker.schemas import GENOME_BGC_MAPPINGS_SCHEMA
Expand Down Expand Up @@ -65,7 +67,9 @@ def generate_mappings_genome_id_bgc_id(
logger.info("Generated genome-BGC mappings file: %s", output_file)


def add_strain_to_bgc(strains: StrainCollection, bgcs: list[BGC]) -> tuple[list[BGC], list[BGC]]:
def add_strain_to_bgc(
strains: StrainCollection, bgcs: Sequence[BGC]
) -> tuple[list[BGC], list[BGC]]:
"""Assign a Strain object to `BGC.strain` for input BGCs.
BGC id is used to find the corresponding Strain object. It's possible that
Expand Down Expand Up @@ -111,7 +115,7 @@ def add_strain_to_bgc(strains: StrainCollection, bgcs: list[BGC]) -> tuple[list[


def add_bgc_to_gcf(
bgcs: list[BGC], gcfs: list[GCF]
bgcs: Sequence[BGC], gcfs: Sequence[GCF]
) -> tuple[list[GCF], list[GCF], dict[GCF, set[str]]]:
"""Add BGC objects to GCF object based on GCF's BGC ids.
Expand Down Expand Up @@ -165,7 +169,7 @@ def add_bgc_to_gcf(
return gcf_with_bgc, gcf_without_bgc, gcf_missing_bgc


def get_mibig_from_gcf(gcfs: list[GCF]) -> tuple[list[BGC], StrainCollection]:
def get_mibig_from_gcf(gcfs: Sequence[GCF]) -> tuple[list[BGC], StrainCollection]:
"""Get MIBiG BGCs and strains from GCF objects.
Args:
Expand Down Expand Up @@ -277,9 +281,9 @@ def extract_mappings_resolved_genome_id_bgc_id(


def get_mappings_strain_id_bgc_id(
mappings_strain_id_original_genome_id: dict[str, set[str]],
mappings_original_genome_id_resolved_genome_id: dict[str, str],
mappings_resolved_genome_id_bgc_id: dict[str, set[str]],
mappings_strain_id_original_genome_id: Mapping[str, set[str]],
mappings_original_genome_id_resolved_genome_id: Mapping[str, str],
mappings_resolved_genome_id_bgc_id: Mapping[str, set[str]],
) -> dict[str, set[str]]:
"""Get mappings "strain_id <-> bgc_id".
Expand Down
20 changes: 14 additions & 6 deletions src/nplinker/loader.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
from __future__ import annotations
import logging
import os
from deprecated import deprecated
from dynaconf import Dynaconf
from nplinker import NPLINKER_APP_DATA_DIR
from nplinker import defaults
from nplinker.genomics import BGC
from nplinker.genomics import GCF
from nplinker.genomics.antismash import AntismashBGCLoader
from nplinker.genomics.bigscape import BigscapeGCFLoader
from nplinker.genomics.bigscape import BigscapeV2GCFLoader
from nplinker.genomics.mibig import MibigLoader
from nplinker.genomics.utils import add_bgc_to_gcf
from nplinker.genomics.utils import add_strain_to_bgc
from nplinker.genomics.utils import get_mibig_from_gcf
from nplinker.metabolomics import MolecularFamily
from nplinker.metabolomics import Spectrum
from nplinker.metabolomics.gnps import GNPSAnnotationLoader
from nplinker.metabolomics.gnps import GNPSMolecularFamilyLoader
from nplinker.metabolomics.gnps import GNPSSpectrumLoader
Expand Down Expand Up @@ -58,11 +63,14 @@ def __init__(self, config: Dynaconf):
"""
self.config = config

self.bgcs, self.gcfs, self.spectra, self.mfs = [], [], [], []
self.mibig_bgcs = []
self.mibig_strains_in_use = StrainCollection()
self.product_types = []
self.strains = StrainCollection()
self.bgcs: list[BGC] = []
self.gcfs: list[GCF] = []
self.spectra: list[Spectrum] = []
self.mfs: list[MolecularFamily] = []
self.mibig_bgcs: list[BGC] = []
self.mibig_strains_in_use: StrainCollection = StrainCollection()
self.product_types: list = []
self.strains: StrainCollection = StrainCollection()

self.class_matches = None
self.chem_classes = None
Expand Down Expand Up @@ -93,7 +101,7 @@ def _load_strain_mappings(self):
self.strains.add(strain)
logger.info("Loaded {} non-MiBIG Strain objects".format(len(self.strains)))

# 2. filter user specificied strains (remove all that are not specified by user).
# 2. filter user specified strains (remove all that are not specified by user).
# It's not allowed to specify empty list of strains, otherwise validation will fail.
user_strains_file = self.config.root_dir / defaults.STRAINS_SELECTED_FILENAME
if user_strains_file.exists():
Expand Down
42 changes: 30 additions & 12 deletions src/nplinker/metabolomics/abc.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
from abc import ABC
from abc import abstractmethod
from collections.abc import Sequence
from typing import TYPE_CHECKING


if TYPE_CHECKING:
from .molecular_family import MolecularFamily
from .spectrum import Spectrum
from .molecular_family import MolecularFamily
from .spectrum import Spectrum


class SpectrumLoaderBase(ABC):
"""Abstract base class for SpectrumLoader."""

@property
@abstractmethod
def spectra(self) -> Sequence["Spectrum"]: ...
def spectra(self) -> list["Spectrum"]:
"""Get Spectrum objects.
Returns:
A sequence of Spectrum objects.
"""


class MolecularFamilyLoaderBase(ABC):
"""Abstract base class for MolecularFamilyLoader."""

@abstractmethod
def get_mfs(self, keep_singleton: bool) -> Sequence["MolecularFamily"]:
def get_mfs(self, keep_singleton: bool) -> list["MolecularFamily"]:
"""Get MolecularFamily objects.
Args:
Expand All @@ -26,17 +30,31 @@ def get_mfs(self, keep_singleton: bool) -> Sequence["MolecularFamily"]:
only one spectrum.
Returns:
A list of MolecularFamily objects.
A sequence of MolecularFamily objects.
"""


class FileMappingLoaderBase(ABC):
"""Abstract base class for FileMappingLoader."""

@property
@abstractmethod
def mappings(self) -> dict[str, list[str]]: ...
def mappings(self) -> dict[str, list[str]]:
"""Get file mappings.
Returns:
A mapping from spectrum ID to the names of files where the spectrum occurs.
"""


class AnnotationLoaderBase(ABC):
"""Abstract base class for AnnotationLoader."""

@property
@abstractmethod
def annotations(self) -> dict[str, dict]: ...
def annotations(self) -> dict[str, dict]:
"""Get annotations.
Returns:
A mapping from spectrum ID to its annotations.
"""
Loading

0 comments on commit 6a6f170

Please sign in to comment.