NPLinker · CunliangGeng · Aug 24, 2023 · Aug 11, 2023 · Aug 11, 2023 · Aug 11, 2023
diff --git a/src/nplinker/genomics/gcf.py b/src/nplinker/genomics/gcf.py
@@ -3,6 +3,7 @@
 from nplinker.logconfig import LogConfig
 from nplinker.strain_collection import StrainCollection
 
+
 if TYPE_CHECKING:
     from nplinker.strains import Strain
     from .bgc import BGC
@@ -33,8 +34,6 @@ def __init__(self, gcf_id: str, /) -> None:
         self.gcf_id = gcf_id
         self._bgcs: set[BGC] = set()
         self.bigscape_class: str | None = None
-        # CG TODO: remove attribute id, see issue 103
-        #    https://github.com/NPLinker/nplinker/issues/103
         self.bgc_ids: set[str] = set()
         self.strains: StrainCollection = StrainCollection()
 
@@ -83,11 +82,11 @@ def detach_bgc(self, bgc: BGC) -> None:
                     return
             self.strains.remove(bgc.strain)
 
-    def has_strain(self, strain: str | Strain) -> bool:
+    def has_strain(self, strain: Strain) -> bool:
         """Check if the given strain exists.
 
         Args:
-            strain(str | Strain): strain id or `Strain` object.
+            strain(Strain): `Strain` object.
 
         Returns:
             bool: True when the given strain exist.

diff --git a/src/nplinker/genomics/genomics.py b/src/nplinker/genomics/genomics.py
@@ -69,39 +69,31 @@ def generate_mappings_genome_id_bgc_id(
     logger.info("Generated genome-BGC mappings file: %s", output_file)
 
 
-def map_strain_to_bgc(strains: StrainCollection, bgcs: list[BGC],
-                      bgc_genome_mapping: dict[str, str]):
+def map_strain_to_bgc(strains: StrainCollection, bgcs: list[BGC]):
     """To set BGC object's strain with representative strain object.
 
     This method changes the list `bgcs` in place.
 
-    It's assumed that BGC's genome id is used as strain's name or alias, and
-    the genome id is used to lookup the representative strain.
-
     Args:
         strains(StrainCollection): A collection of all strain objects.
         bgcs(list[BGC]): A list of BGC objects.
-        bgc_genome_mapping(dict[str, str]): The mappings from BGC id (key) to
-            genome id (value).
 
     Raises:
-        KeyError: BGC id not found in the `bgc_genome_mapping` dict.
         KeyError: Strain id not found in the strain collection.
     """
     for bgc in bgcs:
         try:
-            genome_id = bgc_genome_mapping[bgc.bgc_id]
+            strain_list = strains.lookup(bgc.bgc_id)
+            if len(strain_list) > 1:
+                raise KeyError(
+                    f"Multiple strain objects found for BGC id '{bgc.bgc_id}'."
+                    f"BGC object accept only one strain."
+                )
         except KeyError as e:
             raise KeyError(
-                f"Not found BGC id {bgc.bgc_id} in BGC-genome mappings."
-            ) from e
-        try:
-            strain = strains.lookup(genome_id)
-        except KeyError as e:
-            raise KeyError(
-                f"Strain id {genome_id} from BGC object {bgc.bgc_id} "
-                "not found in the StrainCollection object.") from e
-        bgc.strain = strain
+                f"Strain id '{bgc.bgc_id}' from BGC object '{bgc.bgc_id}' "
+                "not found in the strain collection.") from e
+        bgc.strain = strain_list[0]
 
 
 def map_bgc_to_gcf(bgcs: list[BGC], gcfs: list[GCF]):
@@ -122,8 +114,9 @@ def map_bgc_to_gcf(bgcs: list[BGC], gcfs: list[GCF]):
             try:
                 bgc = bgc_dict[bgc_id]
             except KeyError as e:
-                raise KeyError(f"BGC id {bgc_id} from GCF object {gcf.gcf_id} "
-                               "not found in the list of BGC objects.") from e
+                raise KeyError(
+                    f"BGC id '{bgc_id}' from GCF object '{gcf.gcf_id}' "
+                    "not found in the list of BGC objects.") from e
             gcf.add_bgc(bgc)
 
 
@@ -155,6 +148,7 @@ def get_strains_from_bgcs(bgcs: list[BGC]) -> StrainCollection:
     return sc
 
 
+
 @deprecated(version="1.3.3", reason="It is split to separate functions: " \
             "map_strain_to_bgc, map_bgc_to_gcf, filter_mibig_only_gcf, " \
             "get_bgcs_from_gcfs and get_strains_from_bgcs.")

diff --git a/src/nplinker/genomics/mibig/__init__.py b/src/nplinker/genomics/mibig/__init__.py
@@ -1,6 +1,8 @@
 import logging
 from .mibig_downloader import download_and_extract_mibig_metadata
-from .mibig_loader import MibigBGCLoader, parse_bgc_metadata_json
+from .mibig_loader import MibigLoader
+from .mibig_loader import parse_bgc_metadata_json
 from .mibig_metadata import MibigMetadata
 
+
 logging.getLogger(__name__).addHandler(logging.NullHandler())
diff --git a/src/nplinker/genomics/mibig/mibig_loader.py b/src/nplinker/genomics/mibig/mibig_loader.py
@@ -10,7 +10,7 @@
 logger = LogConfig.getLogger(__name__)
 
 
-class MibigBGCLoader:
+class MibigLoader:
 
     def __init__(self, data_dir: str):
         """Parse MIBiG metadata files and return BGC objects
@@ -26,16 +26,13 @@ def __init__(self, data_dir: str):
         self._metadata_dict = self._parse_metadatas()
         self._bgc_dict = self._parse_bgcs()
 
-    def get_bgc_genome_mapping(self) -> dict[str, str]:
-        """Get the mapping from BGC to genome.
+    def get_strain_bgc_mapping(self) -> dict[str, str]:
+        """Get the mapping from strain to BGC.
 
-        Note that for MIBiG BGC, same value is used for BGC id and genome id.
-        Users don't have to provide genome id for MIBiG BGCs in the
-        `strain_mappings.json` file.
+        Note that for MIBiG BGC, same value is used for strain name and BGC id.
 
         Returns:
-            dict[str, str]: key is BGC id/accession, value is
-                genome id that uses the value of BGC accession.
+            dict[str, str]: key is strain name, value is BGC id.
         """
         return {bid: bid for bid in self._file_dict}
 
@@ -129,4 +126,4 @@ def parse_bgc_metadata_json(file: str) -> BGC:
 
 
 # register as virtual class to prevent metaclass conflicts
-BGCLoaderBase.register(MibigBGCLoader)
+BGCLoaderBase.register(MibigLoader)