NPLinker · CunliangGeng · Feb 23, 2024 · Jan 26, 2024 · Jan 26, 2024 · Jan 26, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -44,6 +44,7 @@ namespaces = true    # enable data directory to be identified
 [tool.setuptools.package-data]
 "nplinker.data"    = ["*"]
 "nplinker.schemas" = ["*"]
+"nplinker" = ["nplinker_default.toml"]
 
 [tool.pytest.ini_options]
 minversion = "6.0"

diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,7 @@
 beautifulsoup4
 biopython
 deprecated
+dynaconf
 httpx
 numpy
 pandas
@@ -9,7 +10,5 @@ pyteomics
 pytest-lazy-fixture
 scipy
 sortedcontainers
-toml
 tqdm
 typing_extensions
-xdg
diff --git a/src/nplinker/config.py b/src/nplinker/config.py
@@ -1,157 +1,36 @@
-import argparse
 import os
-from collections.abc import Mapping
-from shutil import copyfile
-import toml
-from xdg import XDG_CONFIG_HOME
-from .logconfig import LogConfig
-
-
-try:
-    from importlib.resources import files
-except ImportError:
-    from importlib_resources import files
-
-logger = LogConfig.getLogger(__name__)
-
-
-class Args:
-    def __init__(self):
-        def bool_checker(x):
-            return str(x).lower() == "true"
-
-        # TODO need to finalise these
-        self.parser = argparse.ArgumentParser(
-            description="nplinker arguments",
-            epilog="Note: command-line arguments will override "
-            "arguments from configuration files",
-        )
-        self.parser.add_argument(
-            "-c", "--config", help="Path to a .toml configuration file", metavar="path"
-        )
-        self.parser.add_argument(
-            "-d",
-            "--dataset.root",
-            help='Root path for the dataset to be loaded (or "platform:datasetID" for remote datasets)',
-            metavar="root",
-        )
-        self.parser.add_argument(
-            "-l",
-            "--loglevel",
-            help="Logging verbosity level: DEBUG, INFO, WARNING, ERROR",
-            metavar="loglevel",
-        )
-        self.parser.add_argument(
-            "-f", "--logfile", help="Redirect logging from stdout to this file", metavar="logfile"
-        )
-        self.parser.add_argument(
-            "-s",
-            "--log_to_stdout",
-            help="keep logging to stdout even if --logfile used",
-            metavar="log_to_stdout",
-        )
-
-        self.parser.add_argument(
-            "--bigscape-cutoff", help="BIGSCAPE clustering cutoff threshold", metavar="cutoff"
-        )
-        self.parser.add_argument(
-            "--repro-file", help="Filename to store reproducibility data in", metavar="filename"
-        )
-
-        self.args = self.parser.parse_args()
-
-    def get_args(self):
-        # restructure the arguments into a dict with the same nested structure as Config class expects
-        orig = vars(self.args)
-        args = {}
-        for k, v in orig.items():
-            # ignore any params with no value given
-            if v is None:
-                continue
-
-            # values with non-dotted names can get inserted directly
-            if k.find(".") == -1:
-                args[k] = v
-            else:
-                # otherwise add a nested dict for each dotted part, then
-                # insert the actual value on the innermost level
-                parts = k.split(".")
-                root = args
-                for p in parts[:-1]:
-                    if p not in root:
-                        root[p] = {}
-                    root = root[p]
-                root[parts[-1]] = v
-        return args
-
-
-class Config:
-    """Wrapper for all NPLinker configuration options."""
-
-    DEFAULT_CONFIG = "nplinker.toml"
-
-    def __init__(self, config_dict):
-        self.default_config_path = os.path.join(XDG_CONFIG_HOME, "nplinker", Config.DEFAULT_CONFIG)
-        if not os.path.exists(self.default_config_path):
-            logger.debug("Creating default config file")
-            os.makedirs(os.path.join(XDG_CONFIG_HOME, "nplinker"), exist_ok=True)
-            copyfile(
-                files("nplinker").joinpath("data", Config.DEFAULT_CONFIG), self.default_config_path
-            )
-
-        # load the default per-user config file, then check for one provided as an argument
-        # and if present use it to override the defaults
-        logger.debug("Parsing default config file: {}".format(self.default_config_path))
-        config = toml.load(open(self.default_config_path))
-        if "config" in config_dict:
-            logger.debug("Loading user config {}".format(config_dict["config"]))
-            if config_dict["config"] is not None:
-                user_config = toml.load(open(config_dict["config"]))
-                config.update(user_config)
-                del config_dict["config"]
-
-        # remaining values in the dict should override the existing ones from config files
-        # however if running non-interactively, argparse will set values of all non-specified
-        # options to None and don't want to wipe out existing settings, so do things this way
-        def update(d, u):
-            for k, v in u.items():
-                if isinstance(v, Mapping):
-                    d[k] = update(d.get(k, {}), v)
-                elif v is not None:
-                    d[k] = v
-            return d
-
-        config = update(config, config_dict)
-        self._validate(config)
-        self.config = config
-
-    def _validate(self, config: dict) -> None:
-        """Validates the configuration dictionary to ensure that all required
-            fields are present and have valid values.
-
-        Args:
-            config (dict): The configuration dictionary to validate.
-
-        Raises:
-            ValueError: If the configuration dictionary is missing required
-                fields or contains invalid values.
-        """
-        if "dataset" not in config:
-            raise ValueError('Not found config for "dataset".')
-
-        root = config["dataset"].get("root")
-        if root is None:
-            raise ValueError('Not found config for "root".')
-
-        if root.startswith("platform:"):
-            config["dataset"]["platform_id"] = root.replace("platform:", "")
-            logger.info("Loading from platform project ID %s", config["dataset"]["platform_id"])
-        else:
-            config["dataset"]["platform_id"] = ""
-            logger.info("Loading from local data in directory %s", root)
-
-        antismash = config["dataset"].get("antismash")
-        allowed_antismash_formats = ["default", "flat"]
-        if antismash is not None:
-            if "format" in antismash and antismash["format"] not in allowed_antismash_formats:
-                raise ValueError(f'Unknown antismash format: {antismash["format"]}')
+from pathlib import Path
+from dynaconf import Dynaconf
+from dynaconf import Validator
+
+
+# The Dynaconf library is used for loading NPLinker config file.
+# Users can set the config file location via the NPLINKER_CONFIG_FILE environment variable before
+# running/importing NPLinker. If not set, we default to 'nplinker.toml' file in the current working
+# directory.
+# The loaded config data is available by importing this module and accessing the 'config' variable.
+
+
+# Locate the user's config file
+user_config_file = os.environ.get("NPLINKER_CONFIG_FILE", "nplinker.toml")
+if not os.path.exists(user_config_file):
+    raise FileNotFoundError(f"Config file '{user_config_file}' not found")
+
+# Locate the default config file
+default_config_file = Path(__file__).resolve().parent / "nplinker_default.toml"
+
+# Load config files
+config = Dynaconf(settings_files=[user_config_file], preload=[default_config_file])
+
+# Validate config
+# Note:
+# Validataor parameter `required=False` means the setting (e.g. "loglevel") must not exist rather
+# than being optional. So don't set the parameter `required` if the key is optional.
+validators = [
+    Validator("loglevel", is_type_of=str),
+    Validator("logfile", is_type_of=str),
+    Validator("repro_file", is_type_of=str),
+    Validator("log_to_stdout", is_type_of=bool),
+]
+config.validators.register(*validators)
+config.validators.validate()
diff --git a/src/nplinker/data/nplinker.toml b/src/nplinker/data/nplinker.toml
@@ -100,6 +100,7 @@ repro_file = ""
 # of the form: "platform:datasetID". For example, "platform:MSV000079284" would
 # load the dataset with ID MSV000079284.
 root = "<root directory of dataset>"
+platform_id = ""
 
 # you can optionally set the BIGSCAPE clustering cutoff value here. the default value
 # is 30, but any of the valid BIGSCAPE clustering thresholds can be used assuming the

diff --git a/src/nplinker/loader.py b/src/nplinker/loader.py
@@ -5,6 +5,7 @@
 from nplinker.class_info.chem_classes import ChemClassPredictions
 from nplinker.class_info.class_matches import ClassMatches
 from nplinker.class_info.runcanopus import run_canopus
+from nplinker.config import config
 from nplinker.genomics import add_bgc_to_gcf
 from nplinker.genomics import add_strain_to_bgc
 from nplinker.genomics import generate_mappings_genome_id_bgc_id
@@ -87,27 +88,25 @@ class DatasetLoader:
         "Terpene",
     ]
 
-    def __init__(self, config_data):
+    def __init__(self):
         # load the config data
-        self._config_dataset = config_data["dataset"]
-        self._config_docker = config_data.get("docker", {})
-        self._config_webapp = config_data.get("webapp", {})
-        self._config_antismash = config_data.get("antismash", {})
-        self._config_overrides = self._config_dataset.get("overrides", {})
+        self._config_docker = config.get("docker", {})
+        self._config_webapp = config.get("webapp", {})
+        self._config_antismash = config.get("antismash", {})
+        self._config_overrides = config.dataset.get("overrides", {})
         # set private attributes
         self._antismash_delimiters = self._config_antismash.get(
             "antismash_delimiters", self.ANTISMASH_DELIMITERS_DEFAULT
         )
         self._antismash_ignore_spaces = self._config_antismash.get(
             "ignore_spaces", self.ANTISMASH_IGNORE_SPACES_DEFAULT
         )
-        self._bigscape_cutoff = self._config_dataset.get(
-            "bigscape_cutoff", self.BIGSCAPE_CUTOFF_DEFAULT
-        )
-        self._use_mibig = self._config_dataset.get("use_mibig", self.USE_MIBIG_DEFAULT)
-        self._mibig_version = self._config_dataset.get("mibig_version", self.MIBIG_VERSION_DEFAULT)
-        self._root = Path(self._config_dataset["root"])
-        self._platform_id = self._config_dataset["platform_id"]
+        self._bigscape_cutoff = config.dataset.get("bigscape_cutoff", self.BIGSCAPE_CUTOFF_DEFAULT)
+        self._use_mibig = config.dataset.get("use_mibig", self.USE_MIBIG_DEFAULT)
+        self._mibig_version = config.dataset.get("mibig_version", self.MIBIG_VERSION_DEFAULT)
+        # TODO: the actual value of self._root is set in _start_downloads() method
+        self._root = Path(config.dataset["root"])
+        self._platform_id = config.dataset["platform_id"]
         self._remote_loading = len(self._platform_id) > 0
 
         # set public attributes