fix docstrings

This PR is to fix docstrings to ensure docstrings are formatted correctly to be used for building API documentations. Major changes - Update docstring template (.vscode/vscode_docstring_google_adapted.mustache) - Remove types from `Args` and `Returns` (As static typing is used, it's not necessary to indicate types in the docstrings (see https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) - Fix some errors in docstrings by following Markdown rules
NPLinker · Mar 5, 2024 · 189a5b3 · 189a5b3
1 parent f6ac5a3
commit 189a5b3
Show file tree

Hide file tree

Showing 35 changed files with 313 additions and 307 deletions.
diff --git a/.vscode/vscode_docstring_google_adapted.mustache b/.vscode/vscode_docstring_google_adapted.mustache
@@ -6,23 +6,23 @@
 {{  # parametersExist}}
 Args:
 {{  # args}}
-    {{var}}({{typePlaceholder}}): {{descriptionPlaceholder}}
+    {{var}}: {{descriptionPlaceholder}}
 {{/args}}
 {{  # kwargs}}
-    {{var}} ({{typePlaceholder}}, optional): {{descriptionPlaceholder}}. Defaults to {{& default}}.
+    {{var}}: {{descriptionPlaceholder}}. Defaults to {{& default}}.
 {{/kwargs}}
 {{/parametersExist}}
 
+{{  # returnsExist}}
+Returns:
+{{  # returns}}
+    {{descriptionPlaceholder}}
+{{/returns}}
+{{/returnsExist}}
+
 {{  # exceptionsExist}}
 Raises:
 {{  # exceptions}}
     {{type}}: {{descriptionPlaceholder}}
 {{/exceptions}}
 {{/exceptionsExist}}
-
-{{  # returnsExist}}
-Returns:
-{{  # returns}}
-    {{typePlaceholder}}: {{descriptionPlaceholder}}
-{{/returns}}
-{{/returnsExist}}
diff --git a/src/nplinker/arranger.py b/src/nplinker/arranger.py
@@ -106,6 +106,7 @@ def arrange_gnps(self) -> None:
         existing GNPS data and re-download it if it is invalid.
 
         The validation process includes:
+
         - Check if the GNPS data directory exists.
         - Check if the required files exist in the GNPS data directory, including:
             - file_mappings.tsv or file_mappings.csv
@@ -139,7 +140,7 @@ def _get_gnps_file_mappings_file(self) -> Path:
         the TSV file or the CSV file exists in the default GNPS directory.
 
         Returns:
-            Path: Path to the GNPS file mappings file.
+            Path to the GNPS file mappings file.
         """
         file_mappings_tsv = globals.GNPS_DEFAULT_PATH / globals.GNPS_FILE_MAPPINGS_TSV
         file_mappings_csv = globals.GNPS_DEFAULT_PATH / globals.GNPS_FILE_MAPPINGS_CSV
@@ -182,6 +183,7 @@ def arrange_antismash(self) -> None:
             is a number).
 
         AntiSMASH BGC directory must follow the structure below:
+        ```
         antismash
             ├── genome_id_1 (one AntiSMASH output, e.g. GCF_000514775.1)
             │  ├── GCF_000514775.1.gbk
@@ -190,6 +192,7 @@ def arrange_antismash(self) -> None:
             ├── genome_id_2
             │  ├── ...
             └── ...
+        ```
         """
         pass_validation = False
         if config.mode == "podp":
@@ -229,9 +232,10 @@ def arrange_bigscape(self) -> None:
         will be copied to the default BiG-SCAPE directory.
 
         The validation process includes:
+
         - Check if the default BiG-SCAPE data directory exists.
         - Check if the clustering file "mix_clustering_c{config.bigscape.cutoff}.tsv" exists in the
-            BiG-SCAPE data directory.
+                BiG-SCAPE data directory.
         """
         pass_validation = False
         if config.mode == "podp":
@@ -290,6 +294,7 @@ def _validate_strain_mappings(self) -> None:
         """Validate the strain mappings file.
 
         The validation process includes:
+
         - Check if the strain mappings file exists.
         - Check if the strain mappings file is a valid JSON file according to the schema defined in
             `schemas/strain_mappings_schema.json`.
@@ -346,13 +351,14 @@ def validate_gnps(gnps_dir: Path) -> None:
     """Validate the GNPS data directory and its contents.
 
     The GNPS data directory must contain the following files:
+
     - file_mappings.tsv or file_mappings.csv
     - spectra.mgf
     - molecular_families.tsv
     - annotations.tsv
 
     Args:
-        gnps_dir (Path): Path to the GNPS data directory.
+        gnps_dir: Path to the GNPS data directory.
 
     Raises:
         FileNotFoundError: If the GNPS data directory is not found or any of the required files
@@ -392,6 +398,7 @@ def validate_antismash(antismash_dir: Path) -> None:
 
     The validation only checks the structure of the antiSMASH data directory and file names.
     It does not check
+
     - the content of the BGC files
     - the consistency between the antiSMASH data and the PODP project JSON file for the PODP
         mode
@@ -401,7 +408,7 @@ def validate_antismash(antismash_dir: Path) -> None:
     file (with the suffix ".region???.gbk" where ??? is the region number).
 
     Args:
-        antismash_dir (Path): Path to the antiSMASH data directory.
+        antismash_dir: Path to the antiSMASH data directory.
 
     Raises:
         FileNotFoundError: If the antiSMASH data directory is not found, or no sub-directories
@@ -439,7 +446,7 @@ def validate_bigscape(bigscape_dir: Path) -> None:
     bigscape cutoff value set in the config file.
 
     Args:
-        bigscape_dir(Path): Path to the BiG-SCAPE data directory.
+        bigscape_dir: Path to the BiG-SCAPE data directory.
 
     Raises:
         FileNotFoundError: If the BiG-SCAPE data directory or the clustering file is not found.

diff --git a/src/nplinker/genomics/aa_pred.py b/src/nplinker/genomics/aa_pred.py
@@ -55,7 +55,7 @@ def __init__(self, filename):
             directly use AntiSmash5Record or AntiSmash4Record.
 
         Args:
-            filename(str): AntiSMASH file path
+            filename: AntiSMASH file path
         """
         self.raw_data = []
         self.filename = filename
@@ -100,7 +100,7 @@ def __init__(self, seq_record):
             AntiSMASH v5 data.
 
         Args:
-            seq_record(Bio.SeqRecord): SeqRecord of AntiSMASH
+            seq_record: SeqRecord of AntiSMASH
         """
         self.raw_data = seq_record
         self.description = self.raw_data.description
@@ -143,10 +143,10 @@ def get_prob(self, aa):
         """Get probability of predicted amino acid.
 
         Args:
-            aa(str): amino acid
+            aa: amino acid
 
         Returns:
-            float: prediction probability
+            prediction probability
         """
         if aa in self.specificities:
             return 1.0

diff --git a/src/nplinker/genomics/abc.py b/src/nplinker/genomics/abc.py
@@ -10,7 +10,7 @@ def __init__(self, data_dir: str):
         """Abstract base class for BGC loader.
 
         Args:
-            data_dir(str): Path to directory that contains BGC metadata files
+            data_dir: Path to directory that contains BGC metadata files
                 (.json) or full data genbank files (.gbk).
         """
         self.data_dir = data_dir
@@ -20,29 +20,29 @@ def get_files(self) -> dict[str, str]:
         """Get path to BGC files.
 
         Returns:
-            dict[str, str]: key is BGC name and value is path to BGC file
+            The key is BGC name and value is path to BGC file
         """
 
     @abstractmethod
     def get_bgcs(self) -> Sequence[BGC]:
         """Get BGC objects.
 
         Returns:
-            Sequence[BGC]: a list of :class:`~nplinker.genomic.BGC` objects
+            A list of BGC objects
         """
 
 
 class GCFLoaderBase(ABC):
     @abstractmethod
-    def get_gcfs(self, keep_mibig_only, keep_singleton) -> Sequence[GCF]:
+    def get_gcfs(self, keep_mibig_only: bool, keep_singleton: bool) -> Sequence[GCF]:
         """Get GCF objects.
 
         Args:
-            keep_mibig_only(bool): True to keep GCFs that contain only MIBiG
+            keep_mibig_only: True to keep GCFs that contain only MIBiG
                 BGCs.
-            keep_singleton(bool): True to keep singleton GCFs. A singleton GCF
+            keep_singleton: True to keep singleton GCFs. A singleton GCF
                 is a GCF that contains only one BGC.
 
         Returns:
-            Sequence[GCF]: a list of :class:`~nplinker.genomic.GCF` objects
+            A list of GCF objects
         """
diff --git a/src/nplinker/genomics/antismash/antismash_downloader.py b/src/nplinker/genomics/antismash/antismash_downloader.py
@@ -26,11 +26,11 @@ def download_and_extract_antismash_data(
     of a genome as the id of the archive.
 
     Args:
-        antismash_id(str): The id used to download BGC archive from antiSMASH database.
+        antismash_id: The id used to download BGC archive from antiSMASH database.
             If the id is versioned (e.g., "GCF_004339725.1") please be sure to
             specify the version as well.
-        download_root(str | PathLike): Path to the directory to place downloaded archive in.
-        extract_root(str | PathLike): Path to the directory data files will be extracted to.
+        download_root: Path to the directory to place downloaded archive in.
+        extract_root: Path to the directory data files will be extracted to.
             Note that an `antismash` directory will be created in the specified `extract_root` if
             it doesn't exist. The files will be extracted to `<extract_root>/antismash/<antismash_id>` directory.
 

diff --git a/src/nplinker/genomics/antismash/antismash_loader.py b/src/nplinker/genomics/antismash/antismash_loader.py
@@ -20,6 +20,7 @@ def __init__(self, data_dir: str) -> None:
 
         Note:
             AntiSMASH BGC directory must follow the structure below:
+            ```
             antismash
                 ├── genome_id_1 (one AntiSMASH output, e.g. GCF_000514775.1)
                 │  ├── GCF_000514775.1.gbk
@@ -28,9 +29,10 @@ def __init__(self, data_dir: str) -> None:
                 ├── genome_id_2
                 │  ├── ...
                 └── ...
+            ```
 
         Args:
-            antismash_dir(str): Path to AntiSMASH directory that contains a
+            data_dir: Path to AntiSMASH directory that contains a
                 collection of AntiSMASH outputs.
         """
         self.data_dir = data_dir
@@ -43,8 +45,8 @@ def get_bgc_genome_mapping(self) -> dict[str, str]:
         Note that the directory name of the gbk file is treated as genome id.
 
         Returns:
-            dict[str, str]: key is BGC name (gbk file name) and value is genome
-                id (the directory name of the gbk file).
+            The key is BGC name (gbk file name) and value is genome id (the directory name of the
+            gbk file).
         """
         return {
             bid: os.path.basename(os.path.dirname(bpath)) for bid, bpath in self._file_dict.items()
@@ -54,8 +56,7 @@ def get_files(self) -> dict[str, str]:
         """Get BGC gbk files.
 
         Returns:
-            dict[str, str]: key is BGC name (gbk file name) and value is path to
-                the gbk file
+            The key is BGC name (gbk file name) and value is path to the gbk file.
         """
         return self._file_dict
 
@@ -64,12 +65,11 @@ def _parse_data_dir(data_dir: str) -> dict[str, str]:
         """Parse AntiSMASH directory to get path of all BGC gbk files.
 
         Args:
-            data_dir(str): Path to AntiSMASH directory that contains
+            data_dir: Path to AntiSMASH directory that contains
                 a collection of AntiSMASH outputs
 
         Returns:
-            dict[str, str]: key is BGC name (gbk file name) and value is path to
-                the gbk file
+            The key is BGC name (gbk file name) and value is path to the gbk file.
         """
         bgc_files = {}
         subdirs = list_dirs(data_dir)
@@ -89,7 +89,7 @@ def get_bgcs(self) -> list[BGC]:
         """Get all BGC objects.
 
         Returns:
-            list[BGC]: a list of :class:`~nplinker.genomic.BGC` objects
+            A list of BGC objects
         """
         return self._bgcs
 
@@ -98,11 +98,11 @@ def _parse_bgcs(bgc_files: dict[str, str]) -> list[BGC]:
         """Load given BGC files as BGC objects.
 
         Args:
-            bgc_files(dict[str, str]): key is BGC name and value is path to the
+            bgc_files: key is BGC name and value is path to the
                 BGC gbk file, see method :meth:`.bgc_files`.
 
         Returns:
-            list[BGC]: a list of :class:`~nplinker.genomic.BGC` objects
+            A list of BGC objects
         """
         return [parse_bgc_genbank(file) for file in bgc_files.values()]
 
@@ -111,10 +111,10 @@ def parse_bgc_genbank(file: str) -> BGC:
     """Parse a single BGC gbk file to BGC object.
 
     Args:
-        file(str): Path to BGC gbk file
+        file: Path to BGC gbk file
 
     Returns:
-        BGC: :class:`~nplinker.genomic.BGC` object
+        BGC object
 
     Examples:
         >>> bgc = AntismashBGCLoader.parse_bgc(

diff --git a/src/nplinker/genomics/bgc.py b/src/nplinker/genomics/bgc.py
@@ -28,40 +28,40 @@ def __init__(self, bgc_id: str, /, *product_prediction: str):
         and used by MIBiG.
 
         Args:
-            bgc_id(str): BGC identifier, e.g. MIBiG accession, GenBank accession.
-            product_prediction(tuple[str]): BGC's (predicted) natural products
+            bgc_id: BGC identifier, e.g. MIBiG accession, GenBank accession.
+            product_prediction: BGC's (predicted) natural products
                 or product classes.
 
         Attributes:
-            bgc_id(str): BGC identifier, e.g. MIBiG accession, GenBank accession.
-            product_prediction(tuple[str]): A tuple of (predicted) natural
+            bgc_id: BGC identifier, e.g. MIBiG accession, GenBank accession.
+            product_prediction: A tuple of (predicted) natural
                 products or product classes of the BGC.
                 For antiSMASH's GenBank data, the feature `region /product`
                 gives product information.
                 For MIBiG metadata, its biosynthetic class provides such info.
-            mibig_bgc_class(tuple[str] | None): A tuple of MIBiG biosynthetic
+            mibig_bgc_class: A tuple of MIBiG biosynthetic
                 classes to which the BGC belongs.
                 Defaults to None.
                 MIBiG defines 6 major biosynthetic classes for natural products,
                 including "NRP", "Polyketide", "RiPP", "Terpene", "Saccharide"
                 and "Alkaloid". Note that natural products created by all other
                 biosynthetic mechanisms fall under the category "Other".
                 More details see the publication: https://doi.org/10.1186/s40793-018-0318-y.
-            description(str | None): Brief description of the BGC.
+            description: Brief description of the BGC.
                 Defaults to None.
-            smiles(tuple[str] | None): A tuple of SMILES formulas of the BGC's
+            smiles: A tuple of SMILES formulas of the BGC's
                 products.
                 Defaults to None.
-            antismash_file(str | None): The path to the antiSMASH GenBank file.
+            antismash_file: The path to the antiSMASH GenBank file.
                 Defaults to None.
-            antismash_id(str | None): Identifier of the antiSMASH BGC, referring
+            antismash_id: Identifier of the antiSMASH BGC, referring
                 to the feature `VERSION` of GenBank file.
                 Defaults to None.
-            antismash_region(int | None): AntiSMASH BGC region number, referring
+            antismash_region: AntiSMASH BGC region number, referring
                 to the feature `region` of GenBank file.
                 Defaults to None.
-            parents(set[GCF]): The set of GCFs that contain the BGC.
-            strain(Strain): The strain of the BGC.
+            parents: The set of GCFs that contain the BGC.
+            strain: The strain of the BGC.
 
         .. GenBank features:
             https://www.insdc.org/submitting-standards/feature-table/
@@ -109,7 +109,7 @@ def add_parent(self, gcf: GCF) -> None:
         """Add a parent GCF to the BGC.
 
         Args:
-            gcf(GCF): gene cluster family
+            gcf: gene cluster family
         """
         gcf.add_bgc(self)
 
@@ -143,20 +143,20 @@ def is_mibig(self) -> bool:
             BGC names start with "BGC". It might give false positive result.
 
         Returns:
-            bool: True if it's MIBiG reference BGC
+            True if it's MIBiG reference BGC
         """
         return self.bgc_id.startswith("BGC")
 
     # CG: why not providing whole product but only amino acid as product monomer?
     # this property is not used in NPLinker core business.
     @property
     @deprecated(version="2.0.0", reason="This method will be removed soon")
-    def aa_predictions(self):
+    def aa_predictions(self) -> list:
         """Amino acids as predicted monomers of product.
 
         Returns:
-            list: list of dicts with key as amino acid and value as prediction
-                probability.
+            list of dicts with key as amino acid and value as prediction
+            probability.
         """
         # Load aa predictions and cache them
         self._aa_predictions = None