Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor config loader #203

Merged
merged 8 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ namespaces = true # enable data directory to be identified
[tool.setuptools.package-data]
"nplinker.data" = ["*"]
"nplinker.schemas" = ["*"]
"nplinker" = ["nplinker_default.toml"]

[tool.pytest.ini_options]
minversion = "6.0"
Expand Down
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
beautifulsoup4
biopython
deprecated
dynaconf
httpx
numpy
pandas
Expand All @@ -9,7 +10,5 @@ pyteomics
pytest-lazy-fixture
scipy
sortedcontainers
toml
tqdm
typing_extensions
xdg
191 changes: 35 additions & 156 deletions src/nplinker/config.py
Original file line number Diff line number Diff line change
@@ -1,157 +1,36 @@
import argparse
import os
from collections.abc import Mapping
from shutil import copyfile
import toml
from xdg import XDG_CONFIG_HOME
from .logconfig import LogConfig


try:
from importlib.resources import files
except ImportError:
from importlib_resources import files

logger = LogConfig.getLogger(__name__)


class Args:
def __init__(self):
def bool_checker(x):
return str(x).lower() == "true"

# TODO need to finalise these
self.parser = argparse.ArgumentParser(
description="nplinker arguments",
epilog="Note: command-line arguments will override "
"arguments from configuration files",
)
self.parser.add_argument(
"-c", "--config", help="Path to a .toml configuration file", metavar="path"
)
self.parser.add_argument(
"-d",
"--dataset.root",
help='Root path for the dataset to be loaded (or "platform:datasetID" for remote datasets)',
metavar="root",
)
self.parser.add_argument(
"-l",
"--loglevel",
help="Logging verbosity level: DEBUG, INFO, WARNING, ERROR",
metavar="loglevel",
)
self.parser.add_argument(
"-f", "--logfile", help="Redirect logging from stdout to this file", metavar="logfile"
)
self.parser.add_argument(
"-s",
"--log_to_stdout",
help="keep logging to stdout even if --logfile used",
metavar="log_to_stdout",
)

self.parser.add_argument(
"--bigscape-cutoff", help="BIGSCAPE clustering cutoff threshold", metavar="cutoff"
)
self.parser.add_argument(
"--repro-file", help="Filename to store reproducibility data in", metavar="filename"
)

self.args = self.parser.parse_args()

def get_args(self):
# restructure the arguments into a dict with the same nested structure as Config class expects
orig = vars(self.args)
args = {}
for k, v in orig.items():
# ignore any params with no value given
if v is None:
continue

# values with non-dotted names can get inserted directly
if k.find(".") == -1:
args[k] = v
else:
# otherwise add a nested dict for each dotted part, then
# insert the actual value on the innermost level
parts = k.split(".")
root = args
for p in parts[:-1]:
if p not in root:
root[p] = {}
root = root[p]
root[parts[-1]] = v
return args


class Config:
"""Wrapper for all NPLinker configuration options."""

DEFAULT_CONFIG = "nplinker.toml"

def __init__(self, config_dict):
self.default_config_path = os.path.join(XDG_CONFIG_HOME, "nplinker", Config.DEFAULT_CONFIG)
if not os.path.exists(self.default_config_path):
logger.debug("Creating default config file")
os.makedirs(os.path.join(XDG_CONFIG_HOME, "nplinker"), exist_ok=True)
copyfile(
files("nplinker").joinpath("data", Config.DEFAULT_CONFIG), self.default_config_path
)

# load the default per-user config file, then check for one provided as an argument
# and if present use it to override the defaults
logger.debug("Parsing default config file: {}".format(self.default_config_path))
config = toml.load(open(self.default_config_path))
if "config" in config_dict:
logger.debug("Loading user config {}".format(config_dict["config"]))
if config_dict["config"] is not None:
user_config = toml.load(open(config_dict["config"]))
config.update(user_config)
del config_dict["config"]

# remaining values in the dict should override the existing ones from config files
# however if running non-interactively, argparse will set values of all non-specified
# options to None and don't want to wipe out existing settings, so do things this way
def update(d, u):
for k, v in u.items():
if isinstance(v, Mapping):
d[k] = update(d.get(k, {}), v)
elif v is not None:
d[k] = v
return d

config = update(config, config_dict)
self._validate(config)
self.config = config

def _validate(self, config: dict) -> None:
"""Validates the configuration dictionary to ensure that all required
fields are present and have valid values.

Args:
config (dict): The configuration dictionary to validate.

Raises:
ValueError: If the configuration dictionary is missing required
fields or contains invalid values.
"""
if "dataset" not in config:
raise ValueError('Not found config for "dataset".')

root = config["dataset"].get("root")
if root is None:
raise ValueError('Not found config for "root".')

if root.startswith("platform:"):
config["dataset"]["platform_id"] = root.replace("platform:", "")
logger.info("Loading from platform project ID %s", config["dataset"]["platform_id"])
else:
config["dataset"]["platform_id"] = ""
logger.info("Loading from local data in directory %s", root)

antismash = config["dataset"].get("antismash")
allowed_antismash_formats = ["default", "flat"]
if antismash is not None:
if "format" in antismash and antismash["format"] not in allowed_antismash_formats:
raise ValueError(f'Unknown antismash format: {antismash["format"]}')
from pathlib import Path
from dynaconf import Dynaconf
from dynaconf import Validator


# The Dynaconf library is used for loading NPLinker config file.
# Users can set the config file location via the NPLINKER_CONFIG_FILE environment variable before
# running/importing NPLinker. If not set, we default to 'nplinker.toml' file in the current working
# directory.
# The loaded config data is available by importing this module and accessing the 'config' variable.


# Locate the user's config file
user_config_file = os.environ.get("NPLINKER_CONFIG_FILE", "nplinker.toml")
if not os.path.exists(user_config_file):
raise FileNotFoundError(f"Config file '{user_config_file}' not found")

# Locate the default config file
default_config_file = Path(__file__).resolve().parent / "nplinker_default.toml"

# Load config files
config = Dynaconf(settings_files=[user_config_file], preload=[default_config_file])

# Validate config
# Note:
# Validataor parameter `required=False` means the setting (e.g. "loglevel") must not exist rather
# than being optional. So don't set the parameter `required` if the key is optional.
validators = [
Validator("loglevel", is_type_of=str),
Validator("logfile", is_type_of=str),
Validator("repro_file", is_type_of=str),
Validator("log_to_stdout", is_type_of=bool),
]
config.validators.register(*validators)
config.validators.validate()
1 change: 1 addition & 0 deletions src/nplinker/data/nplinker.toml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ repro_file = ""
# of the form: "platform:datasetID". For example, "platform:MSV000079284" would
# load the dataset with ID MSV000079284.
root = "<root directory of dataset>"
platform_id = ""

# you can optionally set the BIGSCAPE clustering cutoff value here. the default value
# is 30, but any of the valid BIGSCAPE clustering thresholds can be used assuming the
Expand Down
25 changes: 12 additions & 13 deletions src/nplinker/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from nplinker.class_info.chem_classes import ChemClassPredictions
from nplinker.class_info.class_matches import ClassMatches
from nplinker.class_info.runcanopus import run_canopus
from nplinker.config import config
from nplinker.genomics import add_bgc_to_gcf
from nplinker.genomics import add_strain_to_bgc
from nplinker.genomics import generate_mappings_genome_id_bgc_id
Expand Down Expand Up @@ -87,27 +88,25 @@ class DatasetLoader:
"Terpene",
]

def __init__(self, config_data):
def __init__(self):
# load the config data
self._config_dataset = config_data["dataset"]
self._config_docker = config_data.get("docker", {})
self._config_webapp = config_data.get("webapp", {})
self._config_antismash = config_data.get("antismash", {})
self._config_overrides = self._config_dataset.get("overrides", {})
self._config_docker = config.get("docker", {})
self._config_webapp = config.get("webapp", {})
self._config_antismash = config.get("antismash", {})
self._config_overrides = config.dataset.get("overrides", {})
# set private attributes
self._antismash_delimiters = self._config_antismash.get(
"antismash_delimiters", self.ANTISMASH_DELIMITERS_DEFAULT
)
self._antismash_ignore_spaces = self._config_antismash.get(
"ignore_spaces", self.ANTISMASH_IGNORE_SPACES_DEFAULT
)
self._bigscape_cutoff = self._config_dataset.get(
"bigscape_cutoff", self.BIGSCAPE_CUTOFF_DEFAULT
)
self._use_mibig = self._config_dataset.get("use_mibig", self.USE_MIBIG_DEFAULT)
self._mibig_version = self._config_dataset.get("mibig_version", self.MIBIG_VERSION_DEFAULT)
self._root = Path(self._config_dataset["root"])
self._platform_id = self._config_dataset["platform_id"]
self._bigscape_cutoff = config.dataset.get("bigscape_cutoff", self.BIGSCAPE_CUTOFF_DEFAULT)
self._use_mibig = config.dataset.get("use_mibig", self.USE_MIBIG_DEFAULT)
self._mibig_version = config.dataset.get("mibig_version", self.MIBIG_VERSION_DEFAULT)
# TODO: the actual value of self._root is set in _start_downloads() method
self._root = Path(config.dataset["root"])
self._platform_id = config.dataset["platform_id"]
self._remote_loading = len(self._platform_id) > 0

# set public attributes
Expand Down
Loading
Loading