Skip to content

Commit

Permalink
refactor: move non-download functionalities out of PODPDownloader -…
Browse files Browse the repository at this point in the history
… `_run_bigscape` (#143)

* move `_run_biscape` to runbigscape.py and rename to `podp_run_bigscape`

* delete `_run_bigscape` and use `podp_run_bigscape`

* fix prospector errors

* sort libraries

* run yapf

* remove if __name__ statement from runbigscape and add type hint for extra_params

---------

Co-authored-by: Cunliang Geng <c.geng@esciencecenter.nl>
  • Loading branch information
gcroci2 and CunliangGeng committed Jun 21, 2023
1 parent 1188038 commit 31d9365
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 42 deletions.
27 changes: 4 additions & 23 deletions src/nplinker/pairedomics/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from nplinker.strain_collection import StrainCollection
from nplinker.strains import Strain
from . import podp_download_and_extract_antismash_data
from .runbigscape import run_bigscape
from .runbigscape import podp_run_bigscape

logger = LogConfig.getLogger(__name__)

Expand Down Expand Up @@ -143,9 +143,7 @@ def _init_folder_structure(self, local_cache):
self.project_json_file = os.path.join(self.local_cache,
f'{self.gnps_massive_id}.json')


# CG: download function

# download function
def get(self, do_bigscape, extra_bigscape_parameters, use_mibig,
mibig_version):
logger.info('Going to download the metabolomics data file')
Expand All @@ -167,30 +165,13 @@ def get(self, do_bigscape, extra_bigscape_parameters, use_mibig,

if use_mibig:
self._download_mibig_json(mibig_version)
self._run_bigscape(do_bigscape, extra_bigscape_parameters)
podp_run_bigscape(self.project_file_cache, self.PFAM_PATH, do_bigscape,
extra_bigscape_parameters)

def _is_new_gnps_format(self, directory):
# TODO this should test for existence of quantification table instead
return os.path.exists(os.path.join(directory, 'qiime2_output'))

def _run_bigscape(self, do_bigscape, extra_bigscape_parameters):
# TODO this currently assumes docker environment, allow customisation?
# can check if in container with: https://stackoverflow.com/questions/20010199/how-to-determine-if-a-process-runs-inside-lxc-docker
if not do_bigscape:
logger.info('BiG-SCAPE disabled by configuration, not running it')
return

logger.info('Running BiG-SCAPE! extra_bigscape_parameters="%s"',
extra_bigscape_parameters)
try:
run_bigscape('bigscape.py',
os.path.join(self.project_file_cache, 'antismash'),
os.path.join(self.project_file_cache, 'bigscape'),
self.PFAM_PATH, extra_bigscape_parameters)
except Exception as e:
logger.warning(
'Failed to run BiG-SCAPE on antismash data, error was "%s"', e)

def _download_mibig_json(self, version):
output_path = os.path.join(self.project_file_cache, 'mibig_json')

Expand Down
58 changes: 39 additions & 19 deletions src/nplinker/pairedomics/runbigscape.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import os
import subprocess
import sys
from os import PathLike
from ..logconfig import LogConfig

logger = LogConfig.getLogger(__name__)
Expand All @@ -23,24 +24,27 @@
# called in context of nplinker Docker image, where bigscape should be available


def run_bigscape(bigscape_py_path, antismash_path, output_path, pfam_path,
extra_params):
def run_bigscape(bigscape_py_path: str | PathLike,
antismash_path: str | PathLike, output_path: str | PathLike,
pfam_path: str | PathLike, extra_params: str):
logger.info(
'run_bigscape: input="{}", output="{}", extra_params={}"'.format(
antismash_path, output_path, extra_params))
f'run_bigscape: input="{antismash_path}", output="{output_path}", extra_params={extra_params}"'
)

if os.path.exists(os.path.join(output_path, 'completed')):
logger.info('BiG-SCAPE appears to have been run already, skipping!')
logger.info('To force re-run, delete {}'.format(
os.path.join(output_path, 'completed')))
logger.info('To force re-run, delete {%s}',
os.path.join(output_path, 'completed'))
return True

try:
subprocess.run([bigscape_py_path, '-h'], capture_output=True)
subprocess.run([bigscape_py_path, '-h'],
capture_output=True,
check=True)
except Exception as e:
raise Exception(
'Failed to find/run bigscape.py (path={}, err={})'.format(
bigscape_py_path, e))
f'Failed to find/run bigscape.py (path={bigscape_py_path}, err={e})'
) from e

if not os.path.exists(antismash_path):
raise Exception(f'antismash_path "{antismash_path}" does not exist!')
Expand All @@ -56,24 +60,40 @@ def run_bigscape(bigscape_py_path, antismash_path, output_path, pfam_path,
args.extend(extra_params.split(' '))

logger.info(f'BiG-SCAPE command: {args}')
result = subprocess.run(args, stdout=sys.stdout, stderr=sys.stderr)
logger.info('BiG-SCAPE completed with return code {}'.format(
result.returncode))
result = subprocess.run(args,
stdout=sys.stdout,
stderr=sys.stderr,
check=True)
logger.info(f'BiG-SCAPE completed with return code {result.returncode}')
# use subprocess.CompletedProcess.check_returncode() to test if the BiG-SCAPE
# process exited successfully. This throws an exception for non-zero returncodes
# which will indicate to the PODPDownloader module that something went wrong.
result.check_returncode()

# use presence of this file as a quick way to check if a previous run
# finished or not
open(os.path.join(output_path, 'completed'), 'w').close()
with open(os.path.join(output_path, 'completed'), 'w') as f:
f.close()

return True


if __name__ == "__main__":
run_bigscape(sys.argv[1],
sys.argv[2],
sys.argv[3],
sys.argv[4],
cutoffs=[0.3])
def podp_run_bigscape(project_file_cache: str | PathLike,
PFAM_PATH: str | PathLike, do_bigscape: bool,
extra_bigscape_parameters):
# TODO this currently assumes docker environment, allow customisation?
# can check if in container with: https://stackoverflow.com/questions/20010199/how-to-determine-if-a-process-runs-inside-lxc-docker
if not do_bigscape:
logger.info('BiG-SCAPE disabled by configuration, not running it')
return

logger.info('Running BiG-SCAPE! extra_bigscape_parameters="%s"',
extra_bigscape_parameters)
try:
run_bigscape('bigscape.py',
os.path.join(project_file_cache, 'antismash'),
os.path.join(project_file_cache, 'bigscape'), PFAM_PATH,
extra_bigscape_parameters)
except Exception as e:
logger.warning(
'Failed to run BiG-SCAPE on antismash data, error was "%s"', e)

0 comments on commit 31d9365

Please sign in to comment.