Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use uniformed attribute name id #253

Merged
merged 5 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions notebooks/npclassscore_linking/prospecting/class_linking.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python3
"""Initial code for NPClassScore."""

import glob
import os
import sys
Expand Down Expand Up @@ -541,7 +542,7 @@ def class_linking_score(self, obj, target):

if is_spectrum:
# list of list of tuples/None - todo: add to spectrum object
spec_like_classes = self.canopus.spectra_classes.get(str(spec_like.spectrum_id))
spec_like_classes = self.canopus.spectra_classes.get(str(spec_like.id))
spec_like_classes_names_inds = self.canopus.spectra_classes_names_inds
else: # molfam
spec_like_classes = self.canopus.molfam_classes.get(str(spec_like.family_id))
Expand Down Expand Up @@ -654,7 +655,7 @@ def npclass_score(self, obj, target, method="main"):
if is_spectrum:
# list of list of tuples/None - todo: add to spectrum object
# take only 'best' (first) classification per ontology level
all_classes = self.canopus.spectra_classes.get(str(spec_like.spectrum_id))
all_classes = self.canopus.spectra_classes.get(str(spec_like.id))
if all_classes:
spec_like_classes = [
cls_per_lvl
Expand All @@ -675,7 +676,7 @@ def npclass_score(self, obj, target, method="main"):
spec_like_classes_names_inds = self.canopus.molfam_classes_names_inds
if use_mne and not spec_like_classes: # if mne or when main/canopus does not get classes
if is_spectrum:
spec_like_classes = self.molnetenhancer.spectra_classes(spec_like.spectrum_id)
spec_like_classes = self.molnetenhancer.spectra_classes(spec_like.id)
else: # molfam
spec_like_classes = self.molnetenhancer.molfam_classes.get(str(spec_like.family_id))
# classes are same for molfam and spectrum so names are irrespective of is_spectrum
Expand Down Expand Up @@ -777,4 +778,4 @@ def _get_bgc_like_classes(self, bgc_like, is_bgc):
return bgc_like_classes_dict

def _get_bgc_like_gcf(self, bgc_like):
return [gcf for gcf in self.gcfs if bgc_like.bgc_id in [b.bgc_id for b in gcf.bgcs]][0]
return [gcf for gcf in self.gcfs if bgc_like.bgc_id in [b.id for b in gcf.bgcs]][0]
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
# 2. check chemical compound predictions from canopus and molnetenhancer
test_spec = list(npl.spectra)[500]

print(npl.canopus.spectra_classes.get(str(test_spec.spectrum_id)))
print(npl.molnetenhancer.spectra_classes(str(test_spec.spectrum_id)))
print(npl.canopus.spectra_classes.get(str(test_spec.id)))
print(npl.molnetenhancer.spectra_classes(str(test_spec.id)))

# 3. example of a good score, (predicted) NRP linking to a (predicted) peptide like spectrum
print(npl.class_linking_score(list(npl.gcfs)[0], test_spec))
Expand Down
6 changes: 3 additions & 3 deletions src/nplinker/class_info/chem_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,17 +396,17 @@ class prediction for a level. When no class is present, instead of Tuple it will
molfam_classes = {}

for molfam in molfams:
fid = molfam.family_id # the key
fid = molfam.id # the key
spectra = molfam.spectra
# if singleton family, format like 'fid_spectrum-id'
if fid.startswith("singleton-"):
spec_id = spectra[0].spectrum_id
spec_id = spectra[0].id
fid += f"_{spec_id}"
len_molfam = len(spectra)

classes_per_spectra = []
for spec in spectra:
spec_classes = self.spectra_classes.get(spec.spectrum_id)
spec_classes = self.spectra_classes.get(spec.id)
if spec_classes: # account for spectra without prediction
classes_per_spectra.append(spec_classes)

Expand Down
20 changes: 9 additions & 11 deletions src/nplinker/genomics/bgc.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class BGC:
and used by MIBiG.

Attributes:
bgc_id: BGC identifier, e.g. MIBiG accession, GenBank accession.
id: BGC identifier, e.g. MIBiG accession, GenBank accession.
product_prediction: A tuple of (predicted) natural
products or product classes of the BGC.
For antiSMASH's GenBank data, the feature `region /product`
Expand Down Expand Up @@ -59,15 +59,15 @@ class BGC:
strain: The strain of the BGC.
"""

def __init__(self, bgc_id: str, /, *product_prediction: str):
def __init__(self, id: str, /, *product_prediction: str):
"""Initialize the BGC object.

Args:
bgc_id: BGC identifier, e.g. MIBiG accession, GenBank accession.
id: BGC identifier, e.g. MIBiG accession, GenBank accession.
product_prediction: BGC's (predicted) natural products or product classes.
"""
# BGC metadata
self.bgc_id = bgc_id
self.id = id
self.product_prediction = product_prediction

self.mibig_bgc_class: tuple[str] | None = None
Expand All @@ -87,23 +87,21 @@ def __repr__(self):
return str(self)

def __str__(self):
return "{}(bgc_id={}, strain={}, asid={}, region={})".format(
return "{}(id={}, strain={}, asid={}, region={})".format(
self.__class__.__name__,
self.bgc_id,
self.id,
self.strain,
self.antismash_id,
self.antismash_region,
)

def __eq__(self, other) -> bool:
if isinstance(other, BGC):
return (
self.bgc_id == other.bgc_id and self.product_prediction == other.product_prediction
)
return self.id == other.id and self.product_prediction == other.product_prediction
return NotImplemented

def __hash__(self) -> int:
return hash((self.bgc_id, self.product_prediction))
return hash((self.id, self.product_prediction))

def add_parent(self, gcf: GCF) -> None:
"""Add a parent GCF to the BGC.
Expand Down Expand Up @@ -146,7 +144,7 @@ def is_mibig(self) -> bool:
Returns:
True if it's MIBiG reference BGC
"""
return self.bgc_id.startswith("BGC")
return self.id.startswith("BGC")

# CG: why not providing whole product but only amino acid as product monomer?
# this property is not used in NPLinker core business.
Expand Down
20 changes: 10 additions & 10 deletions src/nplinker/genomics/gcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,29 @@ class GCF:
tools such as BiG-SCAPE and BiG-SLICE.

Attributes:
gcf_id: id of the GCF object.
id: id of the GCF object.
bgc_ids: a set of BGC ids that belongs to the GCF.
bigscape_class: BiG-SCAPE's BGC class.
BiG-SCAPE's BGC classes are similar to those defined in MiBIG
but have more categories (7 classes). More details see:
https://doi.org/10.1038%2Fs41589-019-0400-9.
"""

def __init__(self, gcf_id: str, /) -> None:
def __init__(self, id: str, /) -> None:
"""Initialize the GCF object.

Args:
gcf_id: id of the GCF object.
id: id of the GCF object.
"""
self.gcf_id = gcf_id
self.id = id
self.bgc_ids: set[str] = set()
self.bigscape_class: str | None = None
self._bgcs: set[BGC] = set()
self._strains: StrainCollection = StrainCollection()

def __str__(self) -> str:
return (
f"GCF(id={self.gcf_id}, #BGC_objects={len(self.bgcs)}, #bgc_ids={len(self.bgc_ids)},"
f"GCF(id={self.id}, #BGC_objects={len(self.bgcs)}, #bgc_ids={len(self.bgc_ids)},"
f"#strains={len(self._strains)})."
)

Expand All @@ -49,7 +49,7 @@ def __repr__(self) -> str:

def __eq__(self, other) -> bool:
if isinstance(other, GCF):
return self.gcf_id == other.gcf_id and self.bgcs == other.bgcs
return self.id == other.id and self.bgcs == other.bgcs
return NotImplemented

def __hash__(self) -> int:
Expand All @@ -58,7 +58,7 @@ def __hash__(self) -> int:
Note that GCF class is a mutable container. We only hash the GCF id to
avoid the hash value changes when `self._bgcs` is updated.
"""
return hash(self.gcf_id)
return hash(self.id)

@property
def bgcs(self) -> set[BGC]:
Expand All @@ -74,17 +74,17 @@ def add_bgc(self, bgc: BGC) -> None:
"""Add a BGC object to the GCF."""
bgc.parents.add(self)
self._bgcs.add(bgc)
self.bgc_ids.add(bgc.bgc_id)
self.bgc_ids.add(bgc.id)
if bgc.strain is not None:
self._strains.add(bgc.strain)
else:
logger.warning("No strain specified for the BGC %s", bgc.bgc_id)
logger.warning("No strain specified for the BGC %s", bgc.id)

def detach_bgc(self, bgc: BGC) -> None:
"""Remove a child BGC object."""
bgc.parents.remove(self)
self._bgcs.remove(bgc)
self.bgc_ids.remove(bgc.bgc_id)
self.bgc_ids.remove(bgc.id)
if bgc.strain is not None:
for other_bgc in self._bgcs:
if other_bgc.strain == bgc.strain:
Expand Down
6 changes: 3 additions & 3 deletions src/nplinker/genomics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,13 @@ def add_strain_to_bgc(strains: StrainCollection, bgcs: list[BGC]) -> tuple[list[
bgc_without_strain = []
for bgc in bgcs:
try:
strain_list = strains.lookup(bgc.bgc_id)
strain_list = strains.lookup(bgc.id)
except ValueError:
bgc_without_strain.append(bgc)
continue
if len(strain_list) > 1:
raise ValueError(
f"Multiple strain objects found for BGC id '{bgc.bgc_id}'."
f"Multiple strain objects found for BGC id '{bgc.id}'."
f"BGC object accept only one strain."
)
bgc.strain = strain_list[0]
Expand Down Expand Up @@ -136,7 +136,7 @@ def add_bgc_to_gcf(
- The dictionary contains GCF objects as keys and a set of ids of missing
BGC objects as values.
"""
bgc_dict = {bgc.bgc_id: bgc for bgc in bgcs}
bgc_dict = {bgc.id: bgc for bgc in bgcs}
gcf_with_bgc = []
gcf_without_bgc = []
gcf_missing_bgc: dict[GCF, set[str]] = {}
Expand Down
16 changes: 8 additions & 8 deletions src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,22 +98,22 @@ def _load(self) -> None:
for row in reader:
spec1_id = row["CLUSTERID1"]
spec2_id = row["CLUSTERID2"]
family_id = row["ComponentIndex"]
if family_id not in family_dict:
family_dict[family_id] = set([spec1_id, spec2_id])
mf_id = row["ComponentIndex"]
if mf_id not in family_dict:
family_dict[mf_id] = set([spec1_id, spec2_id])
else:
family_dict[family_id].add(spec1_id)
family_dict[family_id].add(spec2_id)
family_dict[mf_id].add(spec1_id)
family_dict[mf_id].add(spec2_id)
# convert dict to list of MolecularFamily objects
for family_id, spectra_ids in family_dict.items():
if family_id == "-1": # "-1" is from GNPS, it means the singleton molecular family
for mf_id, spectra_ids in family_dict.items():
if mf_id == "-1": # "-1" is from GNPS, it means the singleton molecular family
for spectrum_id in spectra_ids:
# family id must be unique, so using "singleton-" + spectrum id as family id
family = MolecularFamily("singleton-" + str(spectrum_id))
family.spectra_ids = set([spectrum_id])
self._mfs.append(family)
else:
# for regular molecular families, use the value of "ComponentIndex" as family id
family = MolecularFamily(family_id)
family = MolecularFamily(mf_id)
family.spectra_ids = spectra_ids
self._mfs.append(family)
2 changes: 1 addition & 1 deletion src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def _load(self):
rt = spec["params"].get("rtinseconds", 0)

spectrum = Spectrum(
spectrum_id=spectrum_id,
id=spectrum_id,
mz=list(spec["m/z array"]),
intensity=list(spec["intensity array"]),
precursor_mz=precursor_mz,
Expand Down
18 changes: 9 additions & 9 deletions src/nplinker/metabolomics/molecular_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,24 @@ class MolecularFamily:
"""Class to model molecular family.

Attributes:
family_id: Unique id for the molecular family.
id: Unique id for the molecular family.
spectra_ids: Set of spectrum ids in the molecular family.
"""

def __init__(self, family_id: str):
def __init__(self, id: str):
"""Initialize the MolecularFamily.

Args:
family_id: Unique id for the molecular family.
id: Unique id for the molecular family.
"""
self.family_id: str = family_id
self.id: str = id
self.spectra_ids: set[str] = set()
self._spectra: set[Spectrum] = set()
self._strains: StrainCollection = StrainCollection()

def __str__(self) -> str:
return (
f"MolecularFamily(family_id={self.family_id}, #Spectrum_objects={len(self._spectra)}, "
f"MolecularFamily(id={self.id}, #Spectrum_objects={len(self._spectra)}, "
f"#spectrum_ids={len(self.spectra_ids)}, #strains={len(self._strains)})"
)

Expand All @@ -38,11 +38,11 @@ def __repr__(self) -> str:

def __eq__(self, other) -> bool:
if isinstance(other, MolecularFamily):
return self.family_id == other.family_id
return self.id == other.id
return NotImplemented

def __hash__(self) -> int:
return hash(self.family_id)
return hash(self.id)

@property
def spectra(self) -> set[Spectrum]:
Expand All @@ -61,7 +61,7 @@ def add_spectrum(self, spectrum: Spectrum) -> None:
spectrum: `Spectrum` object to add to the molecular family.
"""
self._spectra.add(spectrum)
self.spectra_ids.add(spectrum.spectrum_id)
self.spectra_ids.add(spectrum.id)
self._strains = self._strains + spectrum.strains
# add the molecular family to the spectrum
spectrum.family = self
Expand All @@ -73,7 +73,7 @@ def detach_spectrum(self, spectrum: Spectrum) -> None:
spectrum: `Spectrum` object to remove from the molecular family.
"""
self._spectra.remove(spectrum)
self.spectra_ids.remove(spectrum.spectrum_id)
self.spectra_ids.remove(spectrum.id)
self._strains = self._update_strains()
# remove the molecular family from the spectrum
spectrum.family = None
Expand Down
14 changes: 7 additions & 7 deletions src/nplinker/metabolomics/spectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Spectrum:
"""Class to model MS/MS Spectrum.

Attributes:
spectrum_id: the spectrum ID.
id: the spectrum ID.
mz: the list of m/z values.
intensity: the list of intensity values.
precursor_mz: the m/z value of the precursor.
Expand All @@ -30,7 +30,7 @@ class Spectrum:

def __init__(
self,
spectrum_id: str,
id: str,
mz: list[float],
intensity: list[float],
precursor_mz: float,
Expand All @@ -40,15 +40,15 @@ def __init__(
"""Initialize the Spectrum.

Args:
spectrum_id: the spectrum ID.
id: the spectrum ID.
mz: the list of m/z values.
intensity: the list of intensity values.
precursor_mz: the precursor m/z.
rt: the retention time in seconds. Defaults to 0.
metadata: the metadata of the spectrum, i.e. the header infomation
in the MGF file.
"""
self.spectrum_id = spectrum_id
self.id = id
self.mz = mz
self.intensity = intensity
self.precursor_mz = precursor_mz
Expand All @@ -61,18 +61,18 @@ def __init__(
self.family: MolecularFamily | None = None

def __str__(self) -> str:
return f"Spectrum(spectrum_id={self.spectrum_id}, #strains={len(self.strains)})"
return f"Spectrum(id={self.id}, #strains={len(self.strains)})"

def __repr__(self) -> str:
return str(self)

def __eq__(self, other) -> bool:
if isinstance(other, Spectrum):
return self.spectrum_id == other.spectrum_id and self.precursor_mz == other.precursor_mz
return self.id == other.id and self.precursor_mz == other.precursor_mz
return NotImplemented

def __hash__(self) -> int:
return hash((self.spectrum_id, self.precursor_mz))
return hash((self.id, self.precursor_mz))

@cached_property
def peaks(self) -> np.ndarray:
Expand Down
Loading
Loading