From 31d9365f236996f42daaece44dfad561361f83d8 Mon Sep 17 00:00:00 2001 From: Giulia Crocioni <55382553+gcroci2@users.noreply.github.com> Date: Wed, 21 Jun 2023 10:13:06 +0200 Subject: [PATCH] refactor: move non-download functionalities out of `PODPDownloader` - `_run_bigscape` (#143) * move `_run_biscape` to runbigscape.py and rename to `podp_run_bigscape` * delete `_run_bigscape` and use `podp_run_bigscape` * fix prospector errors * sort libraries * run yapf * remove if __name__ statement from runbigscape and add type hint for extra_params --------- Co-authored-by: Cunliang Geng --- src/nplinker/pairedomics/downloader.py | 27 ++---------- src/nplinker/pairedomics/runbigscape.py | 58 +++++++++++++++++-------- 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/src/nplinker/pairedomics/downloader.py b/src/nplinker/pairedomics/downloader.py index 562a15e9..1ee6942a 100644 --- a/src/nplinker/pairedomics/downloader.py +++ b/src/nplinker/pairedomics/downloader.py @@ -27,7 +27,7 @@ from nplinker.strain_collection import StrainCollection from nplinker.strains import Strain from . import podp_download_and_extract_antismash_data -from .runbigscape import run_bigscape +from .runbigscape import podp_run_bigscape logger = LogConfig.getLogger(__name__) @@ -143,9 +143,7 @@ def _init_folder_structure(self, local_cache): self.project_json_file = os.path.join(self.local_cache, f'{self.gnps_massive_id}.json') - -# CG: download function - + # download function def get(self, do_bigscape, extra_bigscape_parameters, use_mibig, mibig_version): logger.info('Going to download the metabolomics data file') @@ -167,30 +165,13 @@ def get(self, do_bigscape, extra_bigscape_parameters, use_mibig, if use_mibig: self._download_mibig_json(mibig_version) - self._run_bigscape(do_bigscape, extra_bigscape_parameters) + podp_run_bigscape(self.project_file_cache, self.PFAM_PATH, do_bigscape, + extra_bigscape_parameters) def _is_new_gnps_format(self, directory): # TODO this should test for existence of quantification table instead return os.path.exists(os.path.join(directory, 'qiime2_output')) - def _run_bigscape(self, do_bigscape, extra_bigscape_parameters): - # TODO this currently assumes docker environment, allow customisation? - # can check if in container with: https://stackoverflow.com/questions/20010199/how-to-determine-if-a-process-runs-inside-lxc-docker - if not do_bigscape: - logger.info('BiG-SCAPE disabled by configuration, not running it') - return - - logger.info('Running BiG-SCAPE! extra_bigscape_parameters="%s"', - extra_bigscape_parameters) - try: - run_bigscape('bigscape.py', - os.path.join(self.project_file_cache, 'antismash'), - os.path.join(self.project_file_cache, 'bigscape'), - self.PFAM_PATH, extra_bigscape_parameters) - except Exception as e: - logger.warning( - 'Failed to run BiG-SCAPE on antismash data, error was "%s"', e) - def _download_mibig_json(self, version): output_path = os.path.join(self.project_file_cache, 'mibig_json') diff --git a/src/nplinker/pairedomics/runbigscape.py b/src/nplinker/pairedomics/runbigscape.py index db7d2f9c..41d538f6 100644 --- a/src/nplinker/pairedomics/runbigscape.py +++ b/src/nplinker/pairedomics/runbigscape.py @@ -15,6 +15,7 @@ import os import subprocess import sys +from os import PathLike from ..logconfig import LogConfig logger = LogConfig.getLogger(__name__) @@ -23,24 +24,27 @@ # called in context of nplinker Docker image, where bigscape should be available -def run_bigscape(bigscape_py_path, antismash_path, output_path, pfam_path, - extra_params): +def run_bigscape(bigscape_py_path: str | PathLike, + antismash_path: str | PathLike, output_path: str | PathLike, + pfam_path: str | PathLike, extra_params: str): logger.info( - 'run_bigscape: input="{}", output="{}", extra_params={}"'.format( - antismash_path, output_path, extra_params)) + f'run_bigscape: input="{antismash_path}", output="{output_path}", extra_params={extra_params}"' + ) if os.path.exists(os.path.join(output_path, 'completed')): logger.info('BiG-SCAPE appears to have been run already, skipping!') - logger.info('To force re-run, delete {}'.format( - os.path.join(output_path, 'completed'))) + logger.info('To force re-run, delete {%s}', + os.path.join(output_path, 'completed')) return True try: - subprocess.run([bigscape_py_path, '-h'], capture_output=True) + subprocess.run([bigscape_py_path, '-h'], + capture_output=True, + check=True) except Exception as e: raise Exception( - 'Failed to find/run bigscape.py (path={}, err={})'.format( - bigscape_py_path, e)) + f'Failed to find/run bigscape.py (path={bigscape_py_path}, err={e})' + ) from e if not os.path.exists(antismash_path): raise Exception(f'antismash_path "{antismash_path}" does not exist!') @@ -56,9 +60,11 @@ def run_bigscape(bigscape_py_path, antismash_path, output_path, pfam_path, args.extend(extra_params.split(' ')) logger.info(f'BiG-SCAPE command: {args}') - result = subprocess.run(args, stdout=sys.stdout, stderr=sys.stderr) - logger.info('BiG-SCAPE completed with return code {}'.format( - result.returncode)) + result = subprocess.run(args, + stdout=sys.stdout, + stderr=sys.stderr, + check=True) + logger.info(f'BiG-SCAPE completed with return code {result.returncode}') # use subprocess.CompletedProcess.check_returncode() to test if the BiG-SCAPE # process exited successfully. This throws an exception for non-zero returncodes # which will indicate to the PODPDownloader module that something went wrong. @@ -66,14 +72,28 @@ def run_bigscape(bigscape_py_path, antismash_path, output_path, pfam_path, # use presence of this file as a quick way to check if a previous run # finished or not - open(os.path.join(output_path, 'completed'), 'w').close() + with open(os.path.join(output_path, 'completed'), 'w') as f: + f.close() return True -if __name__ == "__main__": - run_bigscape(sys.argv[1], - sys.argv[2], - sys.argv[3], - sys.argv[4], - cutoffs=[0.3]) +def podp_run_bigscape(project_file_cache: str | PathLike, + PFAM_PATH: str | PathLike, do_bigscape: bool, + extra_bigscape_parameters): + # TODO this currently assumes docker environment, allow customisation? + # can check if in container with: https://stackoverflow.com/questions/20010199/how-to-determine-if-a-process-runs-inside-lxc-docker + if not do_bigscape: + logger.info('BiG-SCAPE disabled by configuration, not running it') + return + + logger.info('Running BiG-SCAPE! extra_bigscape_parameters="%s"', + extra_bigscape_parameters) + try: + run_bigscape('bigscape.py', + os.path.join(project_file_cache, 'antismash'), + os.path.join(project_file_cache, 'bigscape'), PFAM_PATH, + extra_bigscape_parameters) + except Exception as e: + logger.warning( + 'Failed to run BiG-SCAPE on antismash data, error was "%s"', e)