Skip to content

Commit

Permalink
change GCF attribute gcf_id to id
Browse files Browse the repository at this point in the history
  • Loading branch information
CunliangGeng committed Jun 10, 2024
1 parent 8835f1c commit 027357a
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 21 deletions.
14 changes: 7 additions & 7 deletions src/nplinker/genomics/gcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,29 @@ class GCF:
tools such as BiG-SCAPE and BiG-SLICE.
Attributes:
gcf_id: id of the GCF object.
id: id of the GCF object.
bgc_ids: a set of BGC ids that belongs to the GCF.
bigscape_class: BiG-SCAPE's BGC class.
BiG-SCAPE's BGC classes are similar to those defined in MiBIG
but have more categories (7 classes). More details see:
https://doi.org/10.1038%2Fs41589-019-0400-9.
"""

def __init__(self, gcf_id: str, /) -> None:
def __init__(self, id: str, /) -> None:
"""Initialize the GCF object.
Args:
gcf_id: id of the GCF object.
id: id of the GCF object.
"""
self.gcf_id = gcf_id
self.id = id
self.bgc_ids: set[str] = set()
self.bigscape_class: str | None = None
self._bgcs: set[BGC] = set()
self._strains: StrainCollection = StrainCollection()

def __str__(self) -> str:
return (
f"GCF(id={self.gcf_id}, #BGC_objects={len(self.bgcs)}, #bgc_ids={len(self.bgc_ids)},"
f"GCF(id={self.id}, #BGC_objects={len(self.bgcs)}, #bgc_ids={len(self.bgc_ids)},"
f"#strains={len(self._strains)})."
)

Expand All @@ -49,7 +49,7 @@ def __repr__(self) -> str:

def __eq__(self, other) -> bool:
if isinstance(other, GCF):
return self.gcf_id == other.gcf_id and self.bgcs == other.bgcs
return self.id == other.id and self.bgcs == other.bgcs
return NotImplemented

def __hash__(self) -> int:
Expand All @@ -58,7 +58,7 @@ def __hash__(self) -> int:
Note that GCF class is a mutable container. We only hash the GCF id to
avoid the hash value changes when `self._bgcs` is updated.
"""
return hash(self.gcf_id)
return hash(self.id)

@property
def bgcs(self) -> set[BGC]:
Expand Down
6 changes: 3 additions & 3 deletions src/nplinker/nplinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,9 @@ def lookup_bgc(self, id):
"""If BGC ``id`` exists, return it. Otherwise return None."""
return self._bgc_lookup.get(id, None)

def lookup_gcf(self, gcf_id):
"""If GCF ``gcf_id`` exists, return it. Otherwise return None."""
return self._gcf_lookup.get(gcf_id, None)
def lookup_gcf(self, id):
"""If GCF ``id`` exists, return it. Otherwise return None."""
return self._gcf_lookup.get(id, None)

def lookup_spectrum(self, id):
"""If Spectrum ``name`` exists, return it. Otherwise return None."""
Expand Down
2 changes: 1 addition & 1 deletion src/nplinker/pickler.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def persistent_id(self, obj):
if isinstance(obj, BGC):
return ("BGC", obj.id)
elif isinstance(obj, GCF):
return ("GCF", obj.gcf_id)
return ("GCF", obj.id)
elif isinstance(obj, Spectrum):
return ("Spectrum", obj.id)
elif isinstance(obj, MolecularFamily):
Expand Down
4 changes: 1 addition & 3 deletions src/nplinker/scoring/metcalf_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,9 +387,7 @@ def _get_links(

links = []
if obj_type == "gcf":
# TODO CG: the hint and mypy warnings will be gone after renaming all
# string ids to `.id`
obj_ids = [gcf.gcf_id for gcf in objects]
obj_ids = [gcf.id for gcf in objects]
# spec-gcf
scores = self.raw_score_spec_gcf.loc[:, obj_ids]
df = self._get_scores_source_gcf(scores, score_cutoff)
Expand Down
6 changes: 3 additions & 3 deletions src/nplinker/scoring/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,19 @@ def get_presence_gcf_strain(gcfs: Sequence[GCF], strains: StrainCollection) -> p
"""Get the occurence of strains in gcfs.
The occurence is a DataFrame with gcfs as rows and strains as columns,
where index is `gcf.gcf_id` and column name is `strain.id`. The values
where index is `gcf.id` and column name is `strain.id`. The values
are 1 if the gcf contains the strain and 0 otherwise.
"""
df_gcf_strain = pd.DataFrame(
np.zeros((len(gcfs), len(strains))),
index=[gcf.gcf_id for gcf in gcfs],
index=[gcf.id for gcf in gcfs],
columns=[strain.id for strain in strains],
dtype=int,
)
for gcf in gcfs:
for strain in strains:
if gcf.has_strain(strain):
df_gcf_strain.loc[gcf.gcf_id, strain.id] = 1
df_gcf_strain.loc[gcf.id, strain.id] = 1
return df_gcf_strain


Expand Down
4 changes: 2 additions & 2 deletions tests/unit/genomics/test_gcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def bgc_without_strain():
def test_init():
"""Test the initialization of GCF."""
gcf = GCF("1")
assert gcf.gcf_id == "1"
assert gcf.id == "1"
assert gcf.bgcs == set()
assert isinstance(gcf.strains, StrainCollection)
assert len(gcf.strains) == 0
Expand Down Expand Up @@ -77,7 +77,7 @@ def test_add_bgc_wo_strain(bgc_without_strain, caplog):
"""Test add_bgc method with a BGC that does have strain."""
gcf = GCF("1")
gcf.add_bgc(bgc_without_strain)
assert gcf.gcf_id == "1"
assert gcf.id == "1"
assert gcf.bgcs == {bgc_without_strain}
assert len(gcf.strains) == 0
assert "No strain specified for the BGC" in caplog.text
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/scoring/test_nplinker_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_get_links_gcf_standardised_false(npl, mc, gcfs, spectra, mfs, strains_l
assert isinstance(links, LinkCollection)
links = links.links # dict of link values
assert len(links) == 3
assert {i.gcf_id for i in links.keys()} == {"gcf1", "gcf2", "gcf3"}
assert {i.id for i in links.keys()} == {"gcf1", "gcf2", "gcf3"}
assert isinstance(links[gcfs[0]][spectra[0]], ObjectLink)
assert links[gcfs[0]][spectra[0]].data(mc) == 12
assert links[gcfs[1]][spectra[0]].data(mc) == -9
Expand All @@ -30,7 +30,7 @@ def test_get_links_gcf_standardised_false(npl, mc, gcfs, spectra, mfs, strains_l
links = npl.get_links(list(gcfs), mc, and_mode=True)
assert isinstance(links, LinkCollection)
links = links.links
assert {i.gcf_id for i in links.keys()} == {"gcf1", "gcf2", "gcf3"}
assert {i.id for i in links.keys()} == {"gcf1", "gcf2", "gcf3"}
assert isinstance(links[gcfs[0]][spectra[0]], ObjectLink)
# test scores
assert links[gcfs[0]][spectra[0]].data(mc) == 12
Expand Down

0 comments on commit 027357a

Please sign in to comment.