From 17beacd3f171d572929e12f2995f2e5740bbb6db Mon Sep 17 00:00:00 2001 From: Jonathan Karr Date: Sun, 21 Mar 2021 13:51:20 -0400 Subject: [PATCH] starting new micro version; improving COMBINE error handling --- biosimulators_utils/_version.py | 2 +- biosimulators_utils/combine/exec.py | 300 +++++++++++++++------------- 2 files changed, 161 insertions(+), 141 deletions(-) diff --git a/biosimulators_utils/_version.py b/biosimulators_utils/_version.py index 0d033dff..fbd991f6 100644 --- a/biosimulators_utils/_version.py +++ b/biosimulators_utils/_version.py @@ -1 +1 @@ -__version__ = '0.1.40' +__version__ = '0.1.41' diff --git a/biosimulators_utils/combine/exec.py b/biosimulators_utils/combine/exec.py index 96de041c..db906c4c 100644 --- a/biosimulators_utils/combine/exec.py +++ b/biosimulators_utils/combine/exec.py @@ -15,8 +15,8 @@ from ..report.data_model import VariableResults, ReportFormat # noqa: F401 from ..sedml.data_model import (SedDocument, Task, Output, Report, DataSet, Plot2D, Curve, # noqa: F401 Plot3D, Surface, Variable) -from ..sedml.io import SedmlSimulationReader # noqa: F401 from .exceptions import CombineArchiveExecutionError, NoSedmlError +from .data_model import CombineArchive from .io import CombineArchiveReader from .utils import get_sedml_contents, get_summary_sedml_contents import datetime @@ -88,151 +88,171 @@ def sed_doc_executer(doc, working_dir, base_out_path, rel_out_path=None, Returns: :obj:`CombineArchiveLog`: log """ - config = get_config() - - # process arguments - if report_formats is None: - report_formats = [ReportFormat(format_value) for format_value in config.REPORT_FORMATS] - - if plot_formats is None: - plot_formats = [PlotFormat(format_value) for format_value in config.PLOT_FORMATS] - - if bundle_outputs is None: - bundle_outputs = config.BUNDLE_OUTPUTS - - if keep_individual_outputs is None: - keep_individual_outputs = config.KEEP_INDIVIDUAL_OUTPUTS - - verbose = config.VERBOSE - - # create temporary directory to unpack archive - archive_tmp_dir = tempfile.mkdtemp() - - # unpack archive and read metadata - archive = CombineArchiveReader.run(archive_filename, archive_tmp_dir) - - # determine files to execute - sedml_contents = get_sedml_contents(archive) - if not sedml_contents: - msg = "COMBINE/OMEX archive '{}' does not contain any executing SED-ML files".format(archive_filename) - raise NoSedmlError(msg) - - # print summary of SED documents - print(get_summary_sedml_contents(archive, archive_tmp_dir)) - - # create output directory - if not os.path.isdir(out_dir): - os.makedirs(out_dir) - - # initialize status and output - supported_features = sed_doc_executer_supported_features - logged_features = sed_doc_executer_logged_features - - if SedDocument not in supported_features: - supported_features = tuple(list(supported_features) + [SedDocument]) - - if SedDocument not in logged_features: - logged_features = tuple(list(logged_features) + [SedDocument]) - - log = init_combine_archive_log(archive, archive_tmp_dir, - supported_features=supported_features, - logged_features=logged_features) - log.status = Status.RUNNING - log.out_dir = out_dir - log.export() - start_time = datetime.datetime.now() - - # execute SED-ML files: execute tasks and save output - exceptions = [] - for i_content, content in enumerate(sedml_contents): - content_filename = os.path.join(archive_tmp_dir, content.location) - content_id = os.path.relpath(content_filename, archive_tmp_dir) - - print('Executing SED-ML file {}: {} ...'.format(i_content, content_id)) - - doc_log = log.sed_documents[content_id] - doc_log.status = Status.RUNNING - doc_log.export() - - with StandardOutputErrorCapturer(relay=verbose) as captured: - doc_start_time = datetime.datetime.now() - try: - working_dir = os.path.dirname(content_filename) - sed_doc_executer(content_filename, - working_dir, - out_dir, - os.path.relpath(content_filename, archive_tmp_dir), - apply_xml_model_changes=apply_xml_model_changes, - report_formats=report_formats, - plot_formats=plot_formats, - log=doc_log, - indent=1) - doc_log.status = Status.SUCCEEDED - except Exception as exception: - exceptions.append(exception) - doc_log.status = Status.FAILED - doc_log.exception = exception - - # update status - doc_log.output = captured.get_text() - doc_log.duration = (datetime.datetime.now() - doc_start_time).total_seconds() + with StandardOutputErrorCapturer(relay=True) as archive_captured: + config = get_config() + verbose = config.VERBOSE + + # initialize status and output + supported_features = sed_doc_executer_supported_features + logged_features = sed_doc_executer_logged_features + + if SedDocument not in supported_features: + supported_features = tuple(list(supported_features) + [SedDocument]) + + if SedDocument not in logged_features: + logged_features = tuple(list(logged_features) + [SedDocument]) + + start_time = datetime.datetime.now() + + # create output directory + if bundle_outputs is None: + bundle_outputs = config.BUNDLE_OUTPUTS + + if keep_individual_outputs is None: + keep_individual_outputs = config.KEEP_INDIVIDUAL_OUTPUTS + + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + # process arguments + if report_formats is None: + report_formats = [ReportFormat(format_value) for format_value in config.REPORT_FORMATS] + + if plot_formats is None: + plot_formats = [PlotFormat(format_value) for format_value in config.PLOT_FORMATS] + + archive = CombineArchive() + archive_tmp_dir = None + try: + # create temporary directory to unpack archive + archive_tmp_dir = tempfile.mkdtemp() + + # unpack archive and read metadata + archive = CombineArchiveReader.run(archive_filename, archive_tmp_dir) + + # determine files to execute + sedml_contents = get_sedml_contents(archive) + if not sedml_contents: + msg = "COMBINE/OMEX archive '{}' does not contain any executing SED-ML files".format(archive_filename) + raise NoSedmlError(msg) + + # print summary of SED documents + print(get_summary_sedml_contents(archive, archive_tmp_dir)) + + except Exception as exception: + log = init_combine_archive_log(archive, archive_tmp_dir, + supported_features=supported_features, + logged_features=logged_features) + log.status = Status.FAILED + log.out_dir = out_dir + log.exception = exception + log.output = archive_captured.get_text() + log.duration = (datetime.datetime.now() - start_time).total_seconds() + log.finalize() + log.export() + raise + + log = init_combine_archive_log(archive, archive_tmp_dir, + supported_features=supported_features, + logged_features=logged_features) + log.status = Status.RUNNING + log.out_dir = out_dir + log.export() + + # execute SED-ML files: execute tasks and save output + exceptions = [] + for i_content, content in enumerate(sedml_contents): + content_filename = os.path.join(archive_tmp_dir, content.location) + content_id = os.path.relpath(content_filename, archive_tmp_dir) + + print('Executing SED-ML file {}: {} ...'.format(i_content, content_id)) + + doc_log = log.sed_documents[content_id] + doc_log.status = Status.RUNNING doc_log.export() - print('') - - if bundle_outputs: - print('Bundling outputs ...') - - # bundle CSV files of reports into zip archive - archive_paths = [os.path.join(out_dir, '**', '*.' + format.value) for format in report_formats if format != ReportFormat.h5] - archive = build_archive_from_paths(archive_paths, out_dir) - if archive.files: - ArchiveWriter().run(archive, os.path.join(out_dir, config.REPORTS_PATH)) - - # bundle PDF files of plots into zip archive - archive_paths = [os.path.join(out_dir, '**', '*.' + format.value) for format in plot_formats] - archive = build_archive_from_paths(archive_paths, out_dir) - if archive.files: - ArchiveWriter().run(archive, os.path.join(out_dir, config.PLOTS_PATH)) - - # cleanup temporary files - print('Cleaning up ...') - if not keep_individual_outputs: - - path_patterns = ( - [os.path.join(out_dir, '**', '*.' + format.value) for format in report_formats if format != ReportFormat.h5] - + [os.path.join(out_dir, '**', '*.' + format.value) for format in plot_formats] - ) - for path_pattern in path_patterns: - for path in glob.glob(path_pattern, recursive=True): - os.remove(path) - - for dir_path, dir_names, file_names in os.walk(out_dir, topdown=False): - for dir_name in list(dir_names): - full_dir_name = os.path.join(dir_path, dir_name) - if not os.path.isdir(full_dir_name): - dir_names.remove(dir_name) - elif not os.listdir(full_dir_name): - # not reachable because directory would - # have already been removed by the iteration for the directory - shutil.rmtree(full_dir_name) # pragma: no cover - dir_names.remove(dir_name) # pragma: no cover - if not dir_names and not file_names: - shutil.rmtree(dir_path) - - shutil.rmtree(archive_tmp_dir) + with StandardOutputErrorCapturer(relay=verbose) as doc_captured: + doc_start_time = datetime.datetime.now() + try: + working_dir = os.path.dirname(content_filename) + sed_doc_executer(content_filename, + working_dir, + out_dir, + os.path.relpath(content_filename, archive_tmp_dir), + apply_xml_model_changes=apply_xml_model_changes, + report_formats=report_formats, + plot_formats=plot_formats, + log=doc_log, + indent=1) + doc_log.status = Status.SUCCEEDED + except Exception as exception: + exceptions.append(exception) + doc_log.status = Status.FAILED + doc_log.exception = exception + + # update status + doc_log.output = doc_captured.get_text() + doc_log.duration = (datetime.datetime.now() - doc_start_time).total_seconds() + doc_log.export() + + print('') + + if bundle_outputs: + print('Bundling outputs ...') + + # bundle CSV files of reports into zip archive + archive_paths = [os.path.join(out_dir, '**', '*.' + format.value) for format in report_formats if format != ReportFormat.h5] + archive = build_archive_from_paths(archive_paths, out_dir) + if archive.files: + ArchiveWriter().run(archive, os.path.join(out_dir, config.REPORTS_PATH)) + + # bundle PDF files of plots into zip archive + archive_paths = [os.path.join(out_dir, '**', '*.' + format.value) for format in plot_formats] + archive = build_archive_from_paths(archive_paths, out_dir) + if archive.files: + ArchiveWriter().run(archive, os.path.join(out_dir, config.PLOTS_PATH)) + + # cleanup temporary files + print('Cleaning up ...') + if not keep_individual_outputs: + + path_patterns = ( + [os.path.join(out_dir, '**', '*.' + format.value) for format in report_formats if format != ReportFormat.h5] + + [os.path.join(out_dir, '**', '*.' + format.value) for format in plot_formats] + ) + for path_pattern in path_patterns: + for path in glob.glob(path_pattern, recursive=True): + os.remove(path) + + for dir_path, dir_names, file_names in os.walk(out_dir, topdown=False): + for dir_name in list(dir_names): + full_dir_name = os.path.join(dir_path, dir_name) + if not os.path.isdir(full_dir_name): + dir_names.remove(dir_name) + elif not os.listdir(full_dir_name): + # not reachable because directory would + # have already been removed by the iteration for the directory + shutil.rmtree(full_dir_name) # pragma: no cover + dir_names.remove(dir_name) # pragma: no cover + if not dir_names and not file_names: + shutil.rmtree(dir_path) + + shutil.rmtree(archive_tmp_dir) + + # update status + log.status = Status.FAILED if exceptions else Status.SUCCEEDED + log.duration = (datetime.datetime.now() - start_time).total_seconds() + log.finalize() + + # summarize execution + print('') + print('============= SUMMARY =============') + print(get_summary_combine_archive_log(log)) # update status - log.status = Status.FAILED if exceptions else Status.SUCCEEDED - log.duration = (datetime.datetime.now() - start_time).total_seconds() - log.finalize() + log.output = archive_captured.get_text() log.export() - # summarize execution - print('') - print('============= SUMMARY =============') - print(get_summary_combine_archive_log(log)) - # raise exceptions if exceptions: msg = 'The COMBINE/OMEX did not execute successfully:\n\n {}'.format(