NPLinker · CunliangGeng · Apr 8, 2024 · Mar 27, 2024 · Mar 27, 2024
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -131,6 +131,7 @@ plugins:
           members_order: source
           filters: ["!^_"]
           merge_init_into_class: true
+          show_if_no_docstring: true
           show_root_heading: true
           show_root_full_path: false
           show_signature_annotations: true

diff --git a/src/nplinker/genomics/antismash/antismash_downloader.py b/src/nplinker/genomics/antismash/antismash_downloader.py
@@ -36,8 +36,8 @@ def download_and_extract_antismash_data(
             it doesn't exist. The files will be extracted to `<extract_root>/antismash/<antismash_id>` directory.
 
     Raises:
-        ValueError: if download_root and extract_root dirs are the same.
-        ValueError: if <extract_root>/antismash/<refseq_assembly_id> dir is not empty.
+        ValueError: if `download_root` and `extract_root` dirs are the same.
+        ValueError: if `<extract_root>/antismash/<refseq_assembly_id>` dir is not empty.
 
     Examples:
         >>> download_and_extract_antismash_metadata("GCF_004339725.1", "/data/download", "/data/extracted")

diff --git a/src/nplinker/genomics/bigscape/bigscape_loader.py b/src/nplinker/genomics/bigscape/bigscape_loader.py
@@ -21,7 +21,7 @@ def __init__(self, cluster_file: str | PathLike, /) -> None:
         Attributes:
             cluster_file: path to the BiG-SCAPE cluster file.
         """
-        self.cluster_file = str(cluster_file)
+        self.cluster_file: str = str(cluster_file)
         self._gcf_list = self._parse_gcf(self.cluster_file)
 
     def get_gcfs(self, keep_mibig_only: bool = False, keep_singleton: bool = False) -> list[GCF]:
@@ -34,7 +34,7 @@ def get_gcfs(self, keep_mibig_only: bool = False, keep_singleton: bool = False)
                 is a GCF that contains only one BGC.
 
         Returns:
-            list[GCF]: a list of GCF objects.
+            A list of GCF objects.
         """
         gcf_list = self._gcf_list
         if not keep_mibig_only:

diff --git a/src/nplinker/genomics/utils.py b/src/nplinker/genomics/utils.py
@@ -78,10 +78,11 @@ def add_strain_to_bgc(strains: StrainCollection, bgcs: list[BGC]) -> tuple[list[
         bgcs: A list of BGC objects.
 
     Returns:
-        A tuple of two lists of BGC objects. The
-            first list contains BGC objects that are updated with Strain object;
-            the second list contains BGC objects that are not updated with
-            Strain object because no Strain object is found.
+        A tuple of two lists of BGC objects,
+
+            - the first list contains BGC objects that are updated with Strain object;
+            - the second list contains BGC objects that are not updated with
+                Strain object because no Strain object is found.
 
     Raises:
         ValueError: Multiple strain objects found for a BGC id.
@@ -127,11 +128,13 @@ def add_bgc_to_gcf(
         gcfs: A list of GCF objects.
 
     Returns:
-        The first list contains GCF objects that are updated with BGC objects;
-            The second list contains GCF objects that are not updated with BGC objects
-            because no BGC objects are found;
-            The dictionary contains GCF objects as keys and a set of ids of missing
-            BGC objects as values.
+        A tuple of two lists and a dictionary,
+
+            - The first list contains GCF objects that are updated with BGC objects;
+            - The second list contains GCF objects that are not updated with BGC objects
+                because no BGC objects are found;
+            - The dictionary contains GCF objects as keys and a set of ids of missing
+                BGC objects as values.
     """
     bgc_dict = {bgc.bgc_id: bgc for bgc in bgcs}
     gcf_with_bgc = []
@@ -169,9 +172,11 @@ def get_mibig_from_gcf(gcfs: list[GCF]) -> tuple[list[BGC], StrainCollection]:
         gcfs: A list of GCF objects.
 
     Returns:
-        tuple[list[BGC], StrainCollection]: The first is a list of MIBiG BGC
-            objects used in the GCFs; the second is a StrainCollection object
-            that contains all Strain objects used in the GCFs.
+        A tuple of two objects,
+
+            - the first is a list of MIBiG BGC objects used in the GCFs;
+            - the second is a StrainCollection object that contains all Strain objects used in the
+            GCFs.
     """
     mibig_bgcs_in_use = []
     mibig_strains_in_use = StrainCollection()
@@ -189,7 +194,7 @@ def get_mibig_from_gcf(gcfs: list[GCF]) -> tuple[list[BGC], StrainCollection]:
 # strain_id <-> original_geonme_id <-> resolved_genome_id <-> bgc_id
 # ------------------------------------------------------------------------------
 def extract_mappings_strain_id_original_genome_id(
-    podp_project_json_file: str | PathLike
+    podp_project_json_file: str | PathLike,
 ) -> dict[str, set[str]]:
     """Extract mappings "strain id <-> original genome id".
 
@@ -225,7 +230,7 @@ def extract_mappings_strain_id_original_genome_id(
 
 
 def extract_mappings_original_genome_id_resolved_genome_id(
-    genome_status_json_file: str | PathLike
+    genome_status_json_file: str | PathLike,
 ) -> dict[str, str]:
     """Extract mappings "original_genome_id <-> resolved_genome_id".
 
@@ -246,7 +251,7 @@ def extract_mappings_original_genome_id_resolved_genome_id(
 
 
 def extract_mappings_resolved_genome_id_bgc_id(
-    genome_bgc_mappings_file: str | PathLike
+    genome_bgc_mappings_file: str | PathLike,
 ) -> dict[str, set[str]]:
     """Extract mappings "resolved_genome_id <-> bgc_id".
 

diff --git a/src/nplinker/metabolomics/abc.py b/src/nplinker/metabolomics/abc.py
@@ -12,8 +12,7 @@
 class SpectrumLoaderBase(ABC):
     @property
     @abstractmethod
-    def spectra(self) -> Sequence["Spectrum"]:
-        ...
+    def spectra(self) -> Sequence["Spectrum"]: ...
 
 
 class MolecularFamilyLoaderBase(ABC):
@@ -27,19 +26,17 @@ def get_mfs(self, keep_singleton: bool) -> Sequence["MolecularFamily"]:
                 only one spectrum.
 
         Returns:
-            Sequence[MolecularFamily]: a list of MolecularFamily objects.
+            A list of MolecularFamily objects.
         """
 
 
 class FileMappingLoaderBase(ABC):
     @property
     @abstractmethod
-    def mappings(self) -> dict[str, list[str]]:
-        ...
+    def mappings(self) -> dict[str, list[str]]: ...
 
 
 class AnnotationLoaderBase(ABC):
     @property
     @abstractmethod
-    def annotations(self) -> dict[str, dict]:
-        ...
+    def annotations(self) -> dict[str, dict]: ...
diff --git a/src/nplinker/metabolomics/gnps/gnps_annotation_loader.py b/src/nplinker/metabolomics/gnps/gnps_annotation_loader.py
@@ -80,8 +80,8 @@ def annotations(self) -> dict[str, dict]:
         """Get annotations.
 
         Returns:
-            dict[str, dict]: Keys are spectrum ids ("#Scan#" in annotation file)
-                and values are the annotations dict for each spectrum.
+            Keys are spectrum ids ("#Scan#" in annotation file) and values are the annotations dict
+            for each spectrum.
         """
         return self._annotations
 
@@ -129,6 +129,6 @@ def _load(self) -> None:
                 self._annotations[scan_id] = row
                 # insert useful URLs
                 for t in ["png", "json", "svg", "spectrum"]:
-                    self._annotations[scan_id][
-                        f"{t}_url"
-                    ] = GNPS_UNIVERSAL_SPECTRUM_IDENTIFIER_URL.format(t, row["SpectrumID"])
+                    self._annotations[scan_id][f"{t}_url"] = (
+                        GNPS_UNIVERSAL_SPECTRUM_IDENTIFIER_URL.format(t, row["SpectrumID"])
+                    )
diff --git a/src/nplinker/metabolomics/gnps/gnps_file_mapping_loader.py b/src/nplinker/metabolomics/gnps/gnps_file_mapping_loader.py
@@ -53,8 +53,7 @@ def mappings(self) -> dict[str, list[str]]:
         """Return mapping from spectrum id to files in which this spectrum occurs.
 
         Returns:
-            dict[str, list[str]]: Mapping from spectrum id to names of all
-                files in which this spectrum occurs.
+            Mapping from spectrum id to names of all files in which this spectrum occurs.
         """
         return self._mapping
 
@@ -63,8 +62,7 @@ def mapping_reversed(self) -> dict[str, set[str]]:
         """Return mapping from file name to all spectra that occur in this file.
 
         Returns:
-            dict[str, set[str]]: Mapping from file name to all spectra ids that
-                occur in this file.
+            Mapping from file name to all spectra ids that occur in this file.
         """
         mapping_reversed = {}
         for spectrum_id, ms_filenames in self._mapping.items():

diff --git a/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py b/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py
@@ -54,8 +54,7 @@ def get_mfs(self, keep_singleton: bool = False) -> list[MolecularFamily]:
                 only one spectrum.
 
         Returns:
-            list[MolecularFamily]: A list of MolecularFamily objects with their
-                spectra ids.
+            A list of MolecularFamily objects with their spectra ids.
         """
         mfs = self._mfs
         if not keep_singleton:

diff --git a/src/nplinker/metabolomics/utils.py b/src/nplinker/metabolomics/utils.py
@@ -41,10 +41,11 @@ def add_strains_to_spectrum(
         spectra: A list of Spectrum objects.
 
     Returns:
-        A tuple of two lists of Spectrum
-            objects. The first list contains Spectrum objects that are updated
-            with Strain objects; the second list contains Spectrum objects that
-            are not updated with Strain objects becuase no Strain objects are found.
+        A tuple of two lists of Spectrum objects,
+
+            - the first list contains Spectrum objects that are updated with Strain objects;
+            - the second list contains Spectrum objects that are not updated with Strain objects
+            becuase no Strain objects are found.
     """
     spectra_with_strains = []
     spectra_without_strains = []
@@ -85,12 +86,13 @@ def add_spectrum_to_mf(
         mfs: A list of MolecularFamily objects.
 
     Returns:
-        tuple:
-            The first list contains MolecularFamily objects that are updated with Spectrum objects.
-            The second list contains MolecularFamily objects that are not updated with Spectrum
+        A tuple of three elements,
+
+            - the first list contains MolecularFamily objects that are updated with Spectrum objects
+            - the second list contains MolecularFamily objects that are not updated with Spectrum
             objects (all Spectrum objects are missing).
-            The dictionary contains MolecularFamily objects as keys and a set of ids of missing
-            Spectrum objects as values.
+            - the third is a dictionary containing MolecularFamily objects as keys and a set of ids
+            of missing Spectrum objects as values.
     """
     spec_dict = {spec.spectrum_id: spec for spec in spectra}
     mf_with_spec = []
@@ -126,7 +128,7 @@ def add_spectrum_to_mf(
 # strain_id <-> MS_filename <-> spectrum_id
 # ------------------------------------------------------------------------------
 def extract_mappings_strain_id_ms_filename(
-    podp_project_json_file: str | PathLike
+    podp_project_json_file: str | PathLike,
 ) -> dict[str, set[str]]:
     """Extract mappings "strain_id <-> MS_filename".
 
@@ -161,7 +163,7 @@ def extract_mappings_strain_id_ms_filename(
 
 
 def extract_mappings_ms_filename_spectrum_id(
-    gnps_file_mappings_file: str | PathLike
+    gnps_file_mappings_file: str | PathLike,
 ) -> dict[str, set[str]]:
     """Extract mappings "MS_filename <-> spectrum_id".
 

diff --git a/src/nplinker/scoring/iokr/spectrum_filters.py b/src/nplinker/scoring/iokr/spectrum_filters.py
@@ -16,7 +16,6 @@
 import os
 import pickle
 import numpy
-
 # import sys
 # sys.path.append('/home/grimur/git/lda')
 # from lda.code.formula import Formula

diff --git a/src/nplinker/scoring/linking/link_finder.py b/src/nplinker/scoring/linking/link_finder.py
@@ -127,6 +127,7 @@ def get_links(
             List of data frames containing the ids of the linked objects
                 and the score. The data frame has index names of
                 'source', 'target' and 'score':
+
                 - the 'source' row contains the ids of the input/source objects,
                 - the 'target' row contains the ids of the target objects,
                 - the 'score' row contains the scores.

diff --git a/src/nplinker/strain/strain.py b/src/nplinker/strain/strain.py
@@ -47,7 +47,7 @@ def names(self) -> set[str]:
         """Get the set of strain names including id and aliases.
 
         Returns:
-            set[str]: A set of names associated with the strain.
+            A set of names associated with the strain.
         """
         return self._aliases | {self.id}
 
@@ -56,7 +56,7 @@ def aliases(self) -> set[str]:
         """Get the set of known aliases.
 
         Returns:
-            set[str]: A set of aliases associated with the strain.
+            A set of aliases associated with the strain.
         """
         return self._aliases
 

diff --git a/src/nplinker/strain/utils.py b/src/nplinker/strain/utils.py
@@ -32,7 +32,7 @@ def load_user_strains(json_file: str | PathLike) -> set[Strain]:
         json_file: Path to the JSON file containing user specified strains.
 
     Returns:
-        set[Strain]: A set of user specified strains.
+        A set of user specified strains.
     """
     with open(json_file, "r") as f:
         json_data = json.load(f)

diff --git a/src/nplinker/utils.py b/src/nplinker/utils.py
@@ -56,7 +56,7 @@ def get_headers(file: str | PathLike) -> list[str]:
         file: Path to the file to read the header from.
 
     Returns:
-        list[str]: list of column names from the header.
+        A list of column names from the header.
     """
     with open(file) as f:
         headers = f.readline().strip()