Skip to content

Commit

Permalink
add default output path
Browse files Browse the repository at this point in the history
Add the default output path according to the design of https://nplinker.github.io/nplinker/latest/concepts/working_dir_structure/.
  • Loading branch information
CunliangGeng committed Apr 18, 2024
1 parent 36e638b commit 7d36688
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 5 deletions.
1 change: 1 addition & 0 deletions src/nplinker/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@
ANTISMASH_DEFAULT_PATH: Path = config.root_dir / "antismash"
BIGSCAPE_DEFAULT_PATH: Path = config.root_dir / "bigscape"
BIGSCAPE_RUNNING_OUTPUT_PATH: Path = BIGSCAPE_DEFAULT_PATH / "bigscape_running_output"
OUTPUT_DEFAULT_PATH: Path = config.root_dir / "output"
13 changes: 9 additions & 4 deletions src/nplinker/scoring/metcalf_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import TYPE_CHECKING
import numpy as np
import pandas as pd
from nplinker.defaults import OUTPUT_DEFAULT_PATH
from nplinker.genomics import GCF
from nplinker.logconfig import LogConfig
from nplinker.metabolomics import MolecularFamily
Expand Down Expand Up @@ -31,11 +32,13 @@ class MetcalfScoring(ScoringMethod):
DATALINKS: The DataLinks object to use for scoring.
LINKFINDER: The LinkFinder object to use for scoring.
NAME: The name of the scoring method. This is set to 'metcalf'.
CACHE: The name of the cache file to use for storing the MetcalfScoring.
"""

DATALINKS = None
LINKFINDER = None
NAME = "metcalf"
CACHE = "cache_metcalf_scoring.pckl"

def __init__(self, npl: NPLinker) -> None:
"""Create a MetcalfScoring object.
Expand Down Expand Up @@ -69,9 +72,8 @@ def setup(npl: NPLinker):
)
)

cache_dir = os.path.join(npl.root_dir, "metcalf")
cache_file = os.path.join(cache_dir, "metcalf_scores.pckl")
os.makedirs(cache_dir, exist_ok=True)
OUTPUT_DEFAULT_PATH.mkdir(exist_ok=True)
cache_file = OUTPUT_DEFAULT_PATH / MetcalfScoring.CACHE

# the metcalf preprocessing can take a long time for large datasets, so it's
# better to cache as the data won't change unless the number of objects does
Expand Down Expand Up @@ -115,7 +117,8 @@ def setup(npl: NPLinker):

# TODO CG: is it needed? remove it if not
@property
def datalinks(self) -> DataLinks:
def datalinks(self) -> DataLinks | None:
"""Get the DataLinks object used for scoring."""
return MetcalfScoring.DATALINKS

def get_links(
Expand Down Expand Up @@ -309,10 +312,12 @@ def _calc_standardised_score_gen(

# TODO CG: refactor this method
def format_data(self, data):
"""Format the data for display."""
# for metcalf the data will just be a floating point value (i.e. the score)
return f"{data:.4f}"

# TODO CG: refactor this method
def sort(self, objects, reverse=True):
"""Sort the objects based on the score."""
# sort based on score
return sorted(objects, key=lambda objlink: objlink[self], reverse=reverse)
2 changes: 1 addition & 1 deletion tests/integration/test_nplinker_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def npl() -> NPLinker:
npl.load_data()
# remove cached score results before running tests
root_dir = Path(npl.root_dir)
score_cache = root_dir / "metcalf" / "metcalf_scores.pckl"
score_cache = root_dir / "output" / "cache_metcalf_scoring.pckl"
score_cache.unlink(missing_ok=True)
return npl

Expand Down

0 comments on commit 7d36688

Please sign in to comment.