change log message from debug to info

NPLinker · May 10, 2024 · 4e1a027 · 4e1a027
1 parent 2c2876b
commit 4e1a027
Show file tree

Hide file tree

Showing 10 changed files with 39 additions and 40 deletions.
diff --git a/src/nplinker/class_info/chem_classes.py b/src/nplinker/class_info/chem_classes.py
@@ -263,7 +263,7 @@ class prediction for a level. When no class is present, instead of Tuple it will
 
             # use canopus_treemap to produce NPClassifier classes
             # TODO: probably change when sirius v5 comes out
-            logger.debug("Using canopus_treemap to get NPC classes")
+            logger.info("Using canopus_treemap to get NPC classes")
             canopus_workspace = Canopus(sirius=self._canopus_dir)
             npc_file = os.path.join(self._canopus_dir, "npc_summary.tsv")
             canopus_workspace.npcSummary().to_csv(npc_file, sep=sep)

diff --git a/src/nplinker/genomics/antismash/podp_antismash_downloader.py b/src/nplinker/genomics/antismash/podp_antismash_downloader.py
@@ -248,7 +248,7 @@ def _ncbi_genbank_search(genbank_id: str, retry_times: int = 3) -> Tag | Navigab
     url = NCBI_LOOKUP_URL.format(genbank_id)
     retry = 1
     while retry <= retry_times:
-        logger.debug(f"Looking up GenBank data for {genbank_id} at {url}")
+        logger.info(f"Looking up GenBank data for {genbank_id} at {url}")
         resp = httpx.get(url, follow_redirects=True)
         if resp.status_code == httpx.codes.OK:
             # the page should contain a <dl> element with class "assembly_summary_new". retrieving
@@ -298,7 +298,7 @@ def _resolve_genbank_accession(genbank_id: str) -> str:
 
     # get rid of any extraneous whitespace
     genbank_id = genbank_id.strip()
-    logger.debug(f'Parsed GenBank ID to "{genbank_id}"')
+    logger.info(f'Parsed GenBank ID to "{genbank_id}"')
 
     # run a search using the GenBank accession ID
     try:

diff --git a/src/nplinker/loader.py b/src/nplinker/loader.py
@@ -102,7 +102,7 @@ def _load_metabolomics(self):
         Strain objects added (i.e. `MolecularFamily._strains` updated). This means only Spectra
         objects with updated strains (i.e. `self.spectra`) can be added to MolecularFamily objects.
         """
-        logger.debug("\nLoading metabolomics data starts...")
+        logger.info(f"{'='*40}\nLoading metabolomics data starts...")
 
         # Step 1: load all Spectrum objects
         raw_spectra = GNPSSpectrumLoader(GNPS_DEFAULT_PATH / GNPS_SPECTRA_FILENAME).spectra
@@ -127,7 +127,7 @@ def _load_metabolomics(self):
         self.spectra = spectra_with_strains
         self.molfams = mf_with_spec
 
-        logger.debug("Loading metabolomics data completed\n")
+        logger.info("Loading metabolomics data completed\n")
         return True
 
     def _load_genomics(self):
@@ -141,10 +141,10 @@ def _load_genomics(self):
         added (i.e. `GCF._strains` updated). This means only BGC objects with updated Strain objects
         (i.e. `self.bgcs`) can be added to GCF objects.
         """
-        logger.debug("\nLoading genomics data starts...")
+        logger.info(f"{'='*40}\nLoading genomics data starts...")
 
         # Step 1: load antismash BGC objects & add strain info
-        logger.debug("Parsing AntiSMASH directory...")
+        logger.info("Parsing AntiSMASH directory...")
         antismash_bgcs = AntismashBGCLoader(str(defaults.ANTISMASH_DEFAULT_PATH)).get_bgcs()
         antismash_bgcs_with_strain, _ = add_strain_to_bgc(self.strains, antismash_bgcs)
 
@@ -164,10 +164,10 @@ def _load_genomics(self):
         # switch depending on found file. prefer V1 if both are found
         if bigscape_cluster_file.exists():
             loader = BigscapeGCFLoader(bigscape_cluster_file)
-            logger.debug(f"Loading BigSCAPE cluster file {bigscape_cluster_file}")
+            logger.info(f"Loading BigSCAPE cluster file {bigscape_cluster_file}")
         elif bigscape_db_file.exists():
             loader = BigscapeV2GCFLoader(bigscape_db_file)
-            logger.debug(f"Loading BigSCAPE database file {bigscape_db_file}")
+            logger.info(f"Loading BigSCAPE database file {bigscape_db_file}")
         else:
             raise FileNotFoundError(
                 f"Neither BigSCAPE cluster file {bigscape_cluster_file} nor database file {bigscape_db_file} were found."
@@ -190,7 +190,7 @@ def _load_genomics(self):
         self.gcfs = all_gcfs_with_bgc
         self.mibig_strains_in_use = mibig_strains_in_use
 
-        logger.debug("Loading genomics data completed\n")
+        logger.info("Loading genomics data completed\n")
         return True
 
     @deprecated(reason="To be refactored. It was used in the `self.load` method before.")
@@ -244,7 +244,7 @@ def _load_class_info(self):
         chem_classes = ChemClassPredictions(self.canopus_dir, self.molnetenhancer_dir, self._root)  # noqa
         # if no molfam classes transfer them from spectra (due to old style MN)
         if not chem_classes.canopus.molfam_classes and chem_classes.canopus.spectra_classes:
-            logger.debug("Added chemical compound classes for MFs")
+            logger.info("Added chemical compound classes for MFs")
             chem_classes.canopus.transfer_spec_classes_to_molfams(self.molfams)
         # include them in loader
         self.chem_classes = chem_classes

diff --git a/src/nplinker/nplinker.py b/src/nplinker/nplinker.py
@@ -69,7 +69,7 @@ def __init__(self):
         for name, method in NPLinker.SCORING_METHODS.items():
             if len(config_methods) == 0 or name in config_methods:
                 self._scoring_methods[name] = method
-                logger.debug(f"Enabled scoring method: {name}")
+                logger.info(f"Enabled scoring method: {name}")
 
         self._scoring_methods_setup_complete = {
             name: False for name in self._scoring_methods.keys()
@@ -279,9 +279,9 @@ def get_links(
                         if (source, target) in shared_strains:
                             link.shared_strains = shared_strains[(source, target)]
 
-        logger.debug("Finished calculating shared strain information")
+        logger.info("Finished calculating shared strain information")
 
-        logger.debug("Final size of link collection is {}".format(len(link_collection)))
+        logger.info("Final size of link collection is {}".format(len(link_collection)))
         return link_collection
 
     def get_common_strains(

diff --git a/src/nplinker/parsers/kcb.py b/src/nplinker/parsers/kcb.py
@@ -45,22 +45,22 @@ def __init__(self, bgcs):
             if not os.path.exists(bgc.antismash_file):
                 raise Exception('KCBJSONParser failed to find file "{}"'.format(bgc.antismash_file))
 
-        logger.debug(f"KCBJSONParser({len(bgcs)} BGCs)")
+        logger.info(f"KCBJSONParser({len(bgcs)} BGCs)")
 
         # find the JSON file: TODO is the assumption of there only being a single .json
         # file always going to work? otherwise have to try guessing the name based on
         # genome IDs
         prefix = os.path.dirname(bgcs[0].antismash_file)
         json_files = list(filter(lambda f: f.endswith(".json"), os.listdir(prefix)))
-        logger.debug("Found {} JSON files in {}".format(len(json_files), prefix))
+        logger.info("Found {} JSON files in {}".format(len(json_files), prefix))
 
         if len(json_files) == 0:
             logger.warning("Unable to find an antiSMASH JSON output file in {}".format(prefix))
             self.json_filename = None
             return
 
         self.json_filename = os.path.join(prefix, json_files[0])
-        logger.debug(f"Using JSON file {self.json_filename}")
+        logger.info(f"Using JSON file {self.json_filename}")
 
     def parse_hits(self):
         if self.json_filename is None:
@@ -139,7 +139,7 @@ def parse_hits(self):
             if hits is not None:
                 self.collected_hits.update(hits)
 
-        logger.debug(
+        logger.info(
             "KCBJSONParser: collected {} total hit entries".format(len(self.collected_hits))
         )
 

diff --git a/src/nplinker/scoring/iokr/spectrum_filters.py b/src/nplinker/scoring/iokr/spectrum_filters.py
@@ -16,7 +16,6 @@
 import os
 import pickle
 import numpy
-
 # import sys
 # sys.path.append('/home/grimur/git/lda')
 # from lda.code.formula import Formula

diff --git a/src/nplinker/scoring/link_collection.py b/src/nplinker/scoring/link_collection.py
@@ -36,14 +36,14 @@ def _add_links_from_method(self, method, object_links):
             # only results that appear in both sets
 
             if not self._and_mode:
-                logger.debug(
+                logger.info(
                     "Merging {} results from method {} in OR mode".format(
                         len(object_links), method.name
                     )
                 )
                 self._merge_or_mode(object_links)
             else:
-                logger.debug(
+                logger.info(
                     "Merging {} results from method {} in AND mode".format(
                         len(object_links), method.name
                     )
@@ -98,12 +98,12 @@ def _merge_or_mode(self, object_links):
     def filter_no_shared_strains(self):
         len_before = len(self._link_data)
         self.filter_links(lambda x: len(x.shared_strains) > 0)
-        logger.debug("filter_no_shared_strains: {} => {}".format(len_before, len(self._link_data)))
+        logger.info("filter_no_shared_strains: {} => {}".format(len_before, len(self._link_data)))
 
     def filter_sources(self, callable_obj):
         len_before = len(self._link_data)
         self._link_data = {k: v for k, v in self._link_data.items() if callable_obj(k)}
-        logger.debug("filter_sources: {} => {}".format(len_before, len(self._link_data)))
+        logger.info("filter_sources: {} => {}".format(len_before, len(self._link_data)))
 
     def filter_targets(self, callable_obj, sources=None):
         to_remove = []

diff --git a/src/nplinker/scoring/linking/data_links.py b/src/nplinker/scoring/linking/data_links.py
@@ -74,7 +74,7 @@ def __init__(
         """
         self._strains = strains
 
-        logger.debug(
+        logger.info(
             "Create occurrence dataframes: spectra<->strains, gcfs<->strains and mfs<->strains."
         )
         # DataFrame to store occurrence of gcfs/spectra/mfs with respect to strains
@@ -84,14 +84,14 @@ def __init__(
         self.occurrence_mf_strain = self._get_occurrence_mf_strain(mfs, strains)
 
         # DataFrame to store co-occurrence of "spectra<->gcf" or "mfs<->gcf"
-        logger.debug("Create correlation matrices: spectra<->gcfs.")
+        logger.info("Create correlation matrices: spectra<->gcfs.")
         (
             self.cooccurrence_spec_gcf,
             self.cooccurrence_spec_notgcf,
             self.cooccurrence_notspec_gcf,
             self.cooccurrence_notspec_notgcf,
         ) = self._get_cooccurrence(link_type="spec-gcf")
-        logger.debug("Create correlation matrices: mol-families<->gcfs.")
+        logger.info("Create correlation matrices: mol-families<->gcfs.")
         (
             self.cooccurrence_mf_gcf,
             self.cooccurrence_mf_notgcf,
@@ -240,7 +240,7 @@ def _get_cooccurrence(
             raise ValueError(
                 f"Link type {link_type} is not supported. Use 'spec-gcf' or 'mf-gcf' instead."
             )
-        logger.debug(f"Calculating correlation matrices of type: {link_type}")
+        logger.info(f"Calculating correlation matrices of type: {link_type}")
         m1, m2, m3, m4 = calc_correlation_matrix(met_strain_occurrence, self.occurrence_gcf_strain)
         df_met_gcf = pd.DataFrame(
             m1,

diff --git a/src/nplinker/scoring/linking/link_likelihood.py b/src/nplinker/scoring/linking/link_likelihood.py
@@ -59,7 +59,7 @@ def calculate_likelihoods(self, data_links, type="spec-gcf"):
                 "Wrong correlation 'type' given. Must be one of 'spec-gcf', 'mf-gcf'..."
             )
 
-        logger.debug(f"Calculating likelihood matrices of type: {type}")
+        logger.info(f"Calculating likelihood matrices of type: {type}")
         # Calculate likelihood matrices using calc_likelihood_matrix()
         (
             P_type2_given_type1,

diff --git a/src/nplinker/scoring/metcalf_scoring.py b/src/nplinker/scoring/metcalf_scoring.py
@@ -86,7 +86,7 @@ def setup(npl: NPLinker):
         ]
         datalinks, linkfinder = None, None
         if os.path.exists(cache_file):
-            logger.debug("MetcalfScoring.setup loading cached data")
+            logger.info("MetcalfScoring.setup loading cached data")
             cache_data = load_pickled_data(npl, cache_file)
             cache_ok = True
             if cache_data is not None:
@@ -108,7 +108,7 @@ def setup(npl: NPLinker):
             MetcalfScoring.LINKFINDER = LinkFinder()
             MetcalfScoring.LINKFINDER.calc_score(MetcalfScoring.DATALINKS, link_type=LINK_TYPES[0])
             MetcalfScoring.LINKFINDER.calc_score(MetcalfScoring.DATALINKS, link_type=LINK_TYPES[1])
-            logger.debug("MetcalfScoring.setup caching results")
+            logger.info("MetcalfScoring.setup caching results")
             save_pickled_data(
                 (dataset_counts, MetcalfScoring.DATALINKS, MetcalfScoring.LINKFINDER), cache_file
             )
@@ -163,7 +163,7 @@ def get_links(
                 ("LinkFinder object not found. Have you called `MetcalfScoring.setup(npl)`?")
             )
 
-        logger.debug(f"MetcalfScoring: standardised = {self.standardised}")
+        logger.info(f"MetcalfScoring: standardised = {self.standardised}")
         if not self.standardised:
             scores_list = self.LINKFINDER.get_links(*objects, score_cutoff=self.cutoff)
         # TODO CG: verify the logics of standardised score and add unit tests
@@ -180,14 +180,14 @@ def get_links(
             GCF | Spectrum | MolecularFamily, dict[GCF | Spectrum | MolecularFamily, ObjectLink]
         ] = {}
         if obj_type == "gcf":
-            logger.debug(
+            logger.info(
                 f"MetcalfScoring: input_type=GCF, result_type=Spec/MolFam, "
                 f"#inputs={len(objects)}."
             )
             for scores in scores_list:
                 # when no links found
                 if scores.shape[1] == 0:
-                    logger.debug(f'MetcalfScoring: found no "{scores.name}" links')
+                    logger.info(f'MetcalfScoring: found no "{scores.name}" links')
                 else:
                     # when links found
                     for col_index in range(scores.shape[1]):
@@ -202,16 +202,16 @@ def get_links(
                         link_scores[gcf][met] = ObjectLink(
                             gcf, met, self, scores.loc["score", col_index]
                         )
-                    logger.debug(f"MetcalfScoring: found {len(link_scores)} {scores.name} links.")
+                    logger.info(f"MetcalfScoring: found {len(link_scores)} {scores.name} links.")
         else:
-            logger.debug(
+            logger.info(
                 f"MetcalfScoring: input_type=Spec/MolFam, result_type=GCF, "
                 f"#inputs={len(objects)}."
             )
             scores = scores_list[0]
             # when no links found
             if scores.shape[1] == 0:
-                logger.debug(f'MetcalfScoring: found no links "{scores.name}" for input objects')
+                logger.info(f'MetcalfScoring: found no links "{scores.name}" for input objects')
             else:
                 for col_index in range(scores.shape[1]):
                     gcf = self.npl.lookup_gcf(scores.loc["target", col_index])
@@ -224,10 +224,10 @@ def get_links(
                     link_scores[met][gcf] = ObjectLink(
                         met, gcf, self, scores.loc["score", col_index]
                     )
-                logger.debug(f"MetcalfScoring: found {len(link_scores)} {scores.name} links.")
+                logger.info(f"MetcalfScoring: found {len(link_scores)} {scores.name} links.")
 
         link_collection._add_links_from_method(self, link_scores)
-        logger.debug("MetcalfScoring: completed")
+        logger.info("MetcalfScoring: completed")
         return link_collection
 
     def _calc_standardised_score_met(
@@ -237,7 +237,7 @@ def _calc_standardised_score_met(
             raise ValueError(
                 "Metcalf mean and std not found. Have you called `MetcalfScoring.setup(npl)`?"
             )
-        logger.debug("Calculating standardised Metcalf scores (met input)")
+        logger.info("Calculating standardised Metcalf scores (met input)")
         raw_score = results[0]
         z_scores = []
         for col_index in range(raw_score.shape[1]):
@@ -276,7 +276,7 @@ def _calc_standardised_score_gen(
             raise ValueError(
                 "Metcalf mean and std not found. Have you called `MetcalfScoring.setup(npl)`?"
             )
-        logger.debug("Calculating standardised Metcalf scores (gen input)")
+        logger.info("Calculating standardised Metcalf scores (gen input)")
         postprocessed_scores = []
         for raw_score in results:
             z_scores = []