Skip to content

Commit

Permalink
change log message from debug to info
Browse files Browse the repository at this point in the history
  • Loading branch information
CunliangGeng committed May 10, 2024
1 parent 2c2876b commit 4e1a027
Show file tree
Hide file tree
Showing 10 changed files with 39 additions and 40 deletions.
2 changes: 1 addition & 1 deletion src/nplinker/class_info/chem_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ class prediction for a level. When no class is present, instead of Tuple it will

# use canopus_treemap to produce NPClassifier classes
# TODO: probably change when sirius v5 comes out
logger.debug("Using canopus_treemap to get NPC classes")
logger.info("Using canopus_treemap to get NPC classes")
canopus_workspace = Canopus(sirius=self._canopus_dir)
npc_file = os.path.join(self._canopus_dir, "npc_summary.tsv")
canopus_workspace.npcSummary().to_csv(npc_file, sep=sep)
Expand Down
4 changes: 2 additions & 2 deletions src/nplinker/genomics/antismash/podp_antismash_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def _ncbi_genbank_search(genbank_id: str, retry_times: int = 3) -> Tag | Navigab
url = NCBI_LOOKUP_URL.format(genbank_id)
retry = 1
while retry <= retry_times:
logger.debug(f"Looking up GenBank data for {genbank_id} at {url}")
logger.info(f"Looking up GenBank data for {genbank_id} at {url}")
resp = httpx.get(url, follow_redirects=True)
if resp.status_code == httpx.codes.OK:
# the page should contain a <dl> element with class "assembly_summary_new". retrieving
Expand Down Expand Up @@ -298,7 +298,7 @@ def _resolve_genbank_accession(genbank_id: str) -> str:

# get rid of any extraneous whitespace
genbank_id = genbank_id.strip()
logger.debug(f'Parsed GenBank ID to "{genbank_id}"')
logger.info(f'Parsed GenBank ID to "{genbank_id}"')

# run a search using the GenBank accession ID
try:
Expand Down
16 changes: 8 additions & 8 deletions src/nplinker/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def _load_metabolomics(self):
Strain objects added (i.e. `MolecularFamily._strains` updated). This means only Spectra
objects with updated strains (i.e. `self.spectra`) can be added to MolecularFamily objects.
"""
logger.debug("\nLoading metabolomics data starts...")
logger.info(f"{'='*40}\nLoading metabolomics data starts...")

# Step 1: load all Spectrum objects
raw_spectra = GNPSSpectrumLoader(GNPS_DEFAULT_PATH / GNPS_SPECTRA_FILENAME).spectra
Expand All @@ -127,7 +127,7 @@ def _load_metabolomics(self):
self.spectra = spectra_with_strains
self.molfams = mf_with_spec

logger.debug("Loading metabolomics data completed\n")
logger.info("Loading metabolomics data completed\n")
return True

def _load_genomics(self):
Expand All @@ -141,10 +141,10 @@ def _load_genomics(self):
added (i.e. `GCF._strains` updated). This means only BGC objects with updated Strain objects
(i.e. `self.bgcs`) can be added to GCF objects.
"""
logger.debug("\nLoading genomics data starts...")
logger.info(f"{'='*40}\nLoading genomics data starts...")

# Step 1: load antismash BGC objects & add strain info
logger.debug("Parsing AntiSMASH directory...")
logger.info("Parsing AntiSMASH directory...")
antismash_bgcs = AntismashBGCLoader(str(defaults.ANTISMASH_DEFAULT_PATH)).get_bgcs()
antismash_bgcs_with_strain, _ = add_strain_to_bgc(self.strains, antismash_bgcs)

Expand All @@ -164,10 +164,10 @@ def _load_genomics(self):
# switch depending on found file. prefer V1 if both are found
if bigscape_cluster_file.exists():
loader = BigscapeGCFLoader(bigscape_cluster_file)
logger.debug(f"Loading BigSCAPE cluster file {bigscape_cluster_file}")
logger.info(f"Loading BigSCAPE cluster file {bigscape_cluster_file}")
elif bigscape_db_file.exists():
loader = BigscapeV2GCFLoader(bigscape_db_file)
logger.debug(f"Loading BigSCAPE database file {bigscape_db_file}")
logger.info(f"Loading BigSCAPE database file {bigscape_db_file}")
else:
raise FileNotFoundError(
f"Neither BigSCAPE cluster file {bigscape_cluster_file} nor database file {bigscape_db_file} were found."
Expand All @@ -190,7 +190,7 @@ def _load_genomics(self):
self.gcfs = all_gcfs_with_bgc
self.mibig_strains_in_use = mibig_strains_in_use

logger.debug("Loading genomics data completed\n")
logger.info("Loading genomics data completed\n")
return True

@deprecated(reason="To be refactored. It was used in the `self.load` method before.")
Expand Down Expand Up @@ -244,7 +244,7 @@ def _load_class_info(self):
chem_classes = ChemClassPredictions(self.canopus_dir, self.molnetenhancer_dir, self._root) # noqa
# if no molfam classes transfer them from spectra (due to old style MN)
if not chem_classes.canopus.molfam_classes and chem_classes.canopus.spectra_classes:
logger.debug("Added chemical compound classes for MFs")
logger.info("Added chemical compound classes for MFs")
chem_classes.canopus.transfer_spec_classes_to_molfams(self.molfams)
# include them in loader
self.chem_classes = chem_classes
Expand Down
6 changes: 3 additions & 3 deletions src/nplinker/nplinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def __init__(self):
for name, method in NPLinker.SCORING_METHODS.items():
if len(config_methods) == 0 or name in config_methods:
self._scoring_methods[name] = method
logger.debug(f"Enabled scoring method: {name}")
logger.info(f"Enabled scoring method: {name}")

self._scoring_methods_setup_complete = {
name: False for name in self._scoring_methods.keys()
Expand Down Expand Up @@ -279,9 +279,9 @@ def get_links(
if (source, target) in shared_strains:
link.shared_strains = shared_strains[(source, target)]

logger.debug("Finished calculating shared strain information")
logger.info("Finished calculating shared strain information")

logger.debug("Final size of link collection is {}".format(len(link_collection)))
logger.info("Final size of link collection is {}".format(len(link_collection)))
return link_collection

def get_common_strains(
Expand Down
8 changes: 4 additions & 4 deletions src/nplinker/parsers/kcb.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,22 +45,22 @@ def __init__(self, bgcs):
if not os.path.exists(bgc.antismash_file):
raise Exception('KCBJSONParser failed to find file "{}"'.format(bgc.antismash_file))

logger.debug(f"KCBJSONParser({len(bgcs)} BGCs)")
logger.info(f"KCBJSONParser({len(bgcs)} BGCs)")

# find the JSON file: TODO is the assumption of there only being a single .json
# file always going to work? otherwise have to try guessing the name based on
# genome IDs
prefix = os.path.dirname(bgcs[0].antismash_file)
json_files = list(filter(lambda f: f.endswith(".json"), os.listdir(prefix)))
logger.debug("Found {} JSON files in {}".format(len(json_files), prefix))
logger.info("Found {} JSON files in {}".format(len(json_files), prefix))

if len(json_files) == 0:
logger.warning("Unable to find an antiSMASH JSON output file in {}".format(prefix))
self.json_filename = None
return

self.json_filename = os.path.join(prefix, json_files[0])
logger.debug(f"Using JSON file {self.json_filename}")
logger.info(f"Using JSON file {self.json_filename}")

def parse_hits(self):
if self.json_filename is None:
Expand Down Expand Up @@ -139,7 +139,7 @@ def parse_hits(self):
if hits is not None:
self.collected_hits.update(hits)

logger.debug(
logger.info(
"KCBJSONParser: collected {} total hit entries".format(len(self.collected_hits))
)

Expand Down
1 change: 0 additions & 1 deletion src/nplinker/scoring/iokr/spectrum_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import os
import pickle
import numpy

# import sys
# sys.path.append('/home/grimur/git/lda')
# from lda.code.formula import Formula
Expand Down
8 changes: 4 additions & 4 deletions src/nplinker/scoring/link_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ def _add_links_from_method(self, method, object_links):
# only results that appear in both sets

if not self._and_mode:
logger.debug(
logger.info(
"Merging {} results from method {} in OR mode".format(
len(object_links), method.name
)
)
self._merge_or_mode(object_links)
else:
logger.debug(
logger.info(
"Merging {} results from method {} in AND mode".format(
len(object_links), method.name
)
Expand Down Expand Up @@ -98,12 +98,12 @@ def _merge_or_mode(self, object_links):
def filter_no_shared_strains(self):
len_before = len(self._link_data)
self.filter_links(lambda x: len(x.shared_strains) > 0)
logger.debug("filter_no_shared_strains: {} => {}".format(len_before, len(self._link_data)))
logger.info("filter_no_shared_strains: {} => {}".format(len_before, len(self._link_data)))

def filter_sources(self, callable_obj):
len_before = len(self._link_data)
self._link_data = {k: v for k, v in self._link_data.items() if callable_obj(k)}
logger.debug("filter_sources: {} => {}".format(len_before, len(self._link_data)))
logger.info("filter_sources: {} => {}".format(len_before, len(self._link_data)))

def filter_targets(self, callable_obj, sources=None):
to_remove = []
Expand Down
8 changes: 4 additions & 4 deletions src/nplinker/scoring/linking/data_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(
"""
self._strains = strains

logger.debug(
logger.info(
"Create occurrence dataframes: spectra<->strains, gcfs<->strains and mfs<->strains."
)
# DataFrame to store occurrence of gcfs/spectra/mfs with respect to strains
Expand All @@ -84,14 +84,14 @@ def __init__(
self.occurrence_mf_strain = self._get_occurrence_mf_strain(mfs, strains)

# DataFrame to store co-occurrence of "spectra<->gcf" or "mfs<->gcf"
logger.debug("Create correlation matrices: spectra<->gcfs.")
logger.info("Create correlation matrices: spectra<->gcfs.")
(
self.cooccurrence_spec_gcf,
self.cooccurrence_spec_notgcf,
self.cooccurrence_notspec_gcf,
self.cooccurrence_notspec_notgcf,
) = self._get_cooccurrence(link_type="spec-gcf")
logger.debug("Create correlation matrices: mol-families<->gcfs.")
logger.info("Create correlation matrices: mol-families<->gcfs.")
(
self.cooccurrence_mf_gcf,
self.cooccurrence_mf_notgcf,
Expand Down Expand Up @@ -240,7 +240,7 @@ def _get_cooccurrence(
raise ValueError(
f"Link type {link_type} is not supported. Use 'spec-gcf' or 'mf-gcf' instead."
)
logger.debug(f"Calculating correlation matrices of type: {link_type}")
logger.info(f"Calculating correlation matrices of type: {link_type}")
m1, m2, m3, m4 = calc_correlation_matrix(met_strain_occurrence, self.occurrence_gcf_strain)
df_met_gcf = pd.DataFrame(
m1,
Expand Down
2 changes: 1 addition & 1 deletion src/nplinker/scoring/linking/link_likelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def calculate_likelihoods(self, data_links, type="spec-gcf"):
"Wrong correlation 'type' given. Must be one of 'spec-gcf', 'mf-gcf'..."
)

logger.debug(f"Calculating likelihood matrices of type: {type}")
logger.info(f"Calculating likelihood matrices of type: {type}")
# Calculate likelihood matrices using calc_likelihood_matrix()
(
P_type2_given_type1,
Expand Down
24 changes: 12 additions & 12 deletions src/nplinker/scoring/metcalf_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def setup(npl: NPLinker):
]
datalinks, linkfinder = None, None
if os.path.exists(cache_file):
logger.debug("MetcalfScoring.setup loading cached data")
logger.info("MetcalfScoring.setup loading cached data")
cache_data = load_pickled_data(npl, cache_file)
cache_ok = True
if cache_data is not None:
Expand All @@ -108,7 +108,7 @@ def setup(npl: NPLinker):
MetcalfScoring.LINKFINDER = LinkFinder()
MetcalfScoring.LINKFINDER.calc_score(MetcalfScoring.DATALINKS, link_type=LINK_TYPES[0])
MetcalfScoring.LINKFINDER.calc_score(MetcalfScoring.DATALINKS, link_type=LINK_TYPES[1])
logger.debug("MetcalfScoring.setup caching results")
logger.info("MetcalfScoring.setup caching results")
save_pickled_data(
(dataset_counts, MetcalfScoring.DATALINKS, MetcalfScoring.LINKFINDER), cache_file
)
Expand Down Expand Up @@ -163,7 +163,7 @@ def get_links(
("LinkFinder object not found. Have you called `MetcalfScoring.setup(npl)`?")
)

logger.debug(f"MetcalfScoring: standardised = {self.standardised}")
logger.info(f"MetcalfScoring: standardised = {self.standardised}")
if not self.standardised:
scores_list = self.LINKFINDER.get_links(*objects, score_cutoff=self.cutoff)
# TODO CG: verify the logics of standardised score and add unit tests
Expand All @@ -180,14 +180,14 @@ def get_links(
GCF | Spectrum | MolecularFamily, dict[GCF | Spectrum | MolecularFamily, ObjectLink]
] = {}
if obj_type == "gcf":
logger.debug(
logger.info(
f"MetcalfScoring: input_type=GCF, result_type=Spec/MolFam, "
f"#inputs={len(objects)}."
)
for scores in scores_list:
# when no links found
if scores.shape[1] == 0:
logger.debug(f'MetcalfScoring: found no "{scores.name}" links')
logger.info(f'MetcalfScoring: found no "{scores.name}" links')
else:
# when links found
for col_index in range(scores.shape[1]):
Expand All @@ -202,16 +202,16 @@ def get_links(
link_scores[gcf][met] = ObjectLink(
gcf, met, self, scores.loc["score", col_index]
)
logger.debug(f"MetcalfScoring: found {len(link_scores)} {scores.name} links.")
logger.info(f"MetcalfScoring: found {len(link_scores)} {scores.name} links.")
else:
logger.debug(
logger.info(
f"MetcalfScoring: input_type=Spec/MolFam, result_type=GCF, "
f"#inputs={len(objects)}."
)
scores = scores_list[0]
# when no links found
if scores.shape[1] == 0:
logger.debug(f'MetcalfScoring: found no links "{scores.name}" for input objects')
logger.info(f'MetcalfScoring: found no links "{scores.name}" for input objects')
else:
for col_index in range(scores.shape[1]):
gcf = self.npl.lookup_gcf(scores.loc["target", col_index])
Expand All @@ -224,10 +224,10 @@ def get_links(
link_scores[met][gcf] = ObjectLink(
met, gcf, self, scores.loc["score", col_index]
)
logger.debug(f"MetcalfScoring: found {len(link_scores)} {scores.name} links.")
logger.info(f"MetcalfScoring: found {len(link_scores)} {scores.name} links.")

link_collection._add_links_from_method(self, link_scores)
logger.debug("MetcalfScoring: completed")
logger.info("MetcalfScoring: completed")
return link_collection

def _calc_standardised_score_met(
Expand All @@ -237,7 +237,7 @@ def _calc_standardised_score_met(
raise ValueError(
"Metcalf mean and std not found. Have you called `MetcalfScoring.setup(npl)`?"
)
logger.debug("Calculating standardised Metcalf scores (met input)")
logger.info("Calculating standardised Metcalf scores (met input)")
raw_score = results[0]
z_scores = []
for col_index in range(raw_score.shape[1]):
Expand Down Expand Up @@ -276,7 +276,7 @@ def _calc_standardised_score_gen(
raise ValueError(
"Metcalf mean and std not found. Have you called `MetcalfScoring.setup(npl)`?"
)
logger.debug("Calculating standardised Metcalf scores (gen input)")
logger.info("Calculating standardised Metcalf scores (gen input)")
postprocessed_scores = []
for raw_score in results:
z_scores = []
Expand Down

0 comments on commit 4e1a027

Please sign in to comment.