From 17beacd3f171d572929e12f2995f2e5740bbb6db Mon Sep 17 00:00:00 2001
From: Jonathan Karr <jonrkarr@gmail.com>
Date: Sun, 21 Mar 2021 13:51:20 -0400
Subject: [PATCH] starting new micro version; improving COMBINE error handling

---
 biosimulators_utils/_version.py     |   2 +-
 biosimulators_utils/combine/exec.py | 300 +++++++++++++++-------------
 2 files changed, 161 insertions(+), 141 deletions(-)

diff --git a/biosimulators_utils/_version.py b/biosimulators_utils/_version.py
index 0d033dff..fbd991f6 100644
--- a/biosimulators_utils/_version.py
+++ b/biosimulators_utils/_version.py
@@ -1 +1 @@
-__version__ = '0.1.40'
+__version__ = '0.1.41'
diff --git a/biosimulators_utils/combine/exec.py b/biosimulators_utils/combine/exec.py
index 96de041c..db906c4c 100644
--- a/biosimulators_utils/combine/exec.py
+++ b/biosimulators_utils/combine/exec.py
@@ -15,8 +15,8 @@
 from ..report.data_model import VariableResults, ReportFormat  # noqa: F401
 from ..sedml.data_model import (SedDocument, Task, Output, Report, DataSet, Plot2D, Curve,  # noqa: F401
                                 Plot3D, Surface, Variable)
-from ..sedml.io import SedmlSimulationReader  # noqa: F401
 from .exceptions import CombineArchiveExecutionError, NoSedmlError
+from .data_model import CombineArchive
 from .io import CombineArchiveReader
 from .utils import get_sedml_contents, get_summary_sedml_contents
 import datetime
@@ -88,151 +88,171 @@ def sed_doc_executer(doc, working_dir, base_out_path, rel_out_path=None,
     Returns:
         :obj:`CombineArchiveLog`: log
     """
-    config = get_config()
-
-    # process arguments
-    if report_formats is None:
-        report_formats = [ReportFormat(format_value) for format_value in config.REPORT_FORMATS]
-
-    if plot_formats is None:
-        plot_formats = [PlotFormat(format_value) for format_value in config.PLOT_FORMATS]
-
-    if bundle_outputs is None:
-        bundle_outputs = config.BUNDLE_OUTPUTS
-
-    if keep_individual_outputs is None:
-        keep_individual_outputs = config.KEEP_INDIVIDUAL_OUTPUTS
-
-    verbose = config.VERBOSE
-
-    # create temporary directory to unpack archive
-    archive_tmp_dir = tempfile.mkdtemp()
-
-    # unpack archive and read metadata
-    archive = CombineArchiveReader.run(archive_filename, archive_tmp_dir)
-
-    # determine files to execute
-    sedml_contents = get_sedml_contents(archive)
-    if not sedml_contents:
-        msg = "COMBINE/OMEX archive '{}' does not contain any executing SED-ML files".format(archive_filename)
-        raise NoSedmlError(msg)
-
-    # print summary of SED documents
-    print(get_summary_sedml_contents(archive, archive_tmp_dir))
-
-    # create output directory
-    if not os.path.isdir(out_dir):
-        os.makedirs(out_dir)
-
-    # initialize status and output
-    supported_features = sed_doc_executer_supported_features
-    logged_features = sed_doc_executer_logged_features
-
-    if SedDocument not in supported_features:
-        supported_features = tuple(list(supported_features) + [SedDocument])
-
-    if SedDocument not in logged_features:
-        logged_features = tuple(list(logged_features) + [SedDocument])
-
-    log = init_combine_archive_log(archive, archive_tmp_dir,
-                                   supported_features=supported_features,
-                                   logged_features=logged_features)
-    log.status = Status.RUNNING
-    log.out_dir = out_dir
-    log.export()
-    start_time = datetime.datetime.now()
-
-    # execute SED-ML files: execute tasks and save output
-    exceptions = []
-    for i_content, content in enumerate(sedml_contents):
-        content_filename = os.path.join(archive_tmp_dir, content.location)
-        content_id = os.path.relpath(content_filename, archive_tmp_dir)
-
-        print('Executing SED-ML file {}: {} ...'.format(i_content, content_id))
-
-        doc_log = log.sed_documents[content_id]
-        doc_log.status = Status.RUNNING
-        doc_log.export()
-
-        with StandardOutputErrorCapturer(relay=verbose) as captured:
-            doc_start_time = datetime.datetime.now()
-            try:
-                working_dir = os.path.dirname(content_filename)
-                sed_doc_executer(content_filename,
-                                 working_dir,
-                                 out_dir,
-                                 os.path.relpath(content_filename, archive_tmp_dir),
-                                 apply_xml_model_changes=apply_xml_model_changes,
-                                 report_formats=report_formats,
-                                 plot_formats=plot_formats,
-                                 log=doc_log,
-                                 indent=1)
-                doc_log.status = Status.SUCCEEDED
-            except Exception as exception:
-                exceptions.append(exception)
-                doc_log.status = Status.FAILED
-                doc_log.exception = exception
-
-            # update status
-            doc_log.output = captured.get_text()
-            doc_log.duration = (datetime.datetime.now() - doc_start_time).total_seconds()
+    with StandardOutputErrorCapturer(relay=True) as archive_captured:
+        config = get_config()
+        verbose = config.VERBOSE
+
+        # initialize status and output
+        supported_features = sed_doc_executer_supported_features
+        logged_features = sed_doc_executer_logged_features
+
+        if SedDocument not in supported_features:
+            supported_features = tuple(list(supported_features) + [SedDocument])
+
+        if SedDocument not in logged_features:
+            logged_features = tuple(list(logged_features) + [SedDocument])
+
+        start_time = datetime.datetime.now()
+
+        # create output directory
+        if bundle_outputs is None:
+            bundle_outputs = config.BUNDLE_OUTPUTS
+
+        if keep_individual_outputs is None:
+            keep_individual_outputs = config.KEEP_INDIVIDUAL_OUTPUTS
+
+        if not os.path.isdir(out_dir):
+            os.makedirs(out_dir)
+
+        # process arguments
+        if report_formats is None:
+            report_formats = [ReportFormat(format_value) for format_value in config.REPORT_FORMATS]
+
+        if plot_formats is None:
+            plot_formats = [PlotFormat(format_value) for format_value in config.PLOT_FORMATS]
+
+        archive = CombineArchive()
+        archive_tmp_dir = None
+        try:
+            # create temporary directory to unpack archive
+            archive_tmp_dir = tempfile.mkdtemp()
+
+            # unpack archive and read metadata
+            archive = CombineArchiveReader.run(archive_filename, archive_tmp_dir)
+
+            # determine files to execute
+            sedml_contents = get_sedml_contents(archive)
+            if not sedml_contents:
+                msg = "COMBINE/OMEX archive '{}' does not contain any executing SED-ML files".format(archive_filename)
+                raise NoSedmlError(msg)
+
+            # print summary of SED documents
+            print(get_summary_sedml_contents(archive, archive_tmp_dir))
+
+        except Exception as exception:
+            log = init_combine_archive_log(archive, archive_tmp_dir,
+                                           supported_features=supported_features,
+                                           logged_features=logged_features)
+            log.status = Status.FAILED
+            log.out_dir = out_dir
+            log.exception = exception
+            log.output = archive_captured.get_text()
+            log.duration = (datetime.datetime.now() - start_time).total_seconds()
+            log.finalize()
+            log.export()
+            raise
+
+        log = init_combine_archive_log(archive, archive_tmp_dir,
+                                       supported_features=supported_features,
+                                       logged_features=logged_features)
+        log.status = Status.RUNNING
+        log.out_dir = out_dir
+        log.export()
+
+        # execute SED-ML files: execute tasks and save output
+        exceptions = []
+        for i_content, content in enumerate(sedml_contents):
+            content_filename = os.path.join(archive_tmp_dir, content.location)
+            content_id = os.path.relpath(content_filename, archive_tmp_dir)
+
+            print('Executing SED-ML file {}: {} ...'.format(i_content, content_id))
+
+            doc_log = log.sed_documents[content_id]
+            doc_log.status = Status.RUNNING
             doc_log.export()
 
-    print('')
-
-    if bundle_outputs:
-        print('Bundling outputs ...')
-
-        # bundle CSV files of reports into zip archive
-        archive_paths = [os.path.join(out_dir, '**', '*.' + format.value) for format in report_formats if format != ReportFormat.h5]
-        archive = build_archive_from_paths(archive_paths, out_dir)
-        if archive.files:
-            ArchiveWriter().run(archive, os.path.join(out_dir, config.REPORTS_PATH))
-
-        # bundle PDF files of plots into zip archive
-        archive_paths = [os.path.join(out_dir, '**', '*.' + format.value) for format in plot_formats]
-        archive = build_archive_from_paths(archive_paths, out_dir)
-        if archive.files:
-            ArchiveWriter().run(archive, os.path.join(out_dir, config.PLOTS_PATH))
-
-    # cleanup temporary files
-    print('Cleaning up ...')
-    if not keep_individual_outputs:
-
-        path_patterns = (
-            [os.path.join(out_dir, '**', '*.' + format.value) for format in report_formats if format != ReportFormat.h5]
-            + [os.path.join(out_dir, '**', '*.' + format.value) for format in plot_formats]
-        )
-        for path_pattern in path_patterns:
-            for path in glob.glob(path_pattern, recursive=True):
-                os.remove(path)
-
-        for dir_path, dir_names, file_names in os.walk(out_dir, topdown=False):
-            for dir_name in list(dir_names):
-                full_dir_name = os.path.join(dir_path, dir_name)
-                if not os.path.isdir(full_dir_name):
-                    dir_names.remove(dir_name)
-                elif not os.listdir(full_dir_name):
-                    # not reachable because directory would
-                    # have already been removed by the iteration for the directory
-                    shutil.rmtree(full_dir_name)  # pragma: no cover
-                    dir_names.remove(dir_name)  # pragma: no cover
-            if not dir_names and not file_names:
-                shutil.rmtree(dir_path)
-
-    shutil.rmtree(archive_tmp_dir)
+            with StandardOutputErrorCapturer(relay=verbose) as doc_captured:
+                doc_start_time = datetime.datetime.now()
+                try:
+                    working_dir = os.path.dirname(content_filename)
+                    sed_doc_executer(content_filename,
+                                     working_dir,
+                                     out_dir,
+                                     os.path.relpath(content_filename, archive_tmp_dir),
+                                     apply_xml_model_changes=apply_xml_model_changes,
+                                     report_formats=report_formats,
+                                     plot_formats=plot_formats,
+                                     log=doc_log,
+                                     indent=1)
+                    doc_log.status = Status.SUCCEEDED
+                except Exception as exception:
+                    exceptions.append(exception)
+                    doc_log.status = Status.FAILED
+                    doc_log.exception = exception
+
+                # update status
+                doc_log.output = doc_captured.get_text()
+                doc_log.duration = (datetime.datetime.now() - doc_start_time).total_seconds()
+                doc_log.export()
+
+        print('')
+
+        if bundle_outputs:
+            print('Bundling outputs ...')
+
+            # bundle CSV files of reports into zip archive
+            archive_paths = [os.path.join(out_dir, '**', '*.' + format.value) for format in report_formats if format != ReportFormat.h5]
+            archive = build_archive_from_paths(archive_paths, out_dir)
+            if archive.files:
+                ArchiveWriter().run(archive, os.path.join(out_dir, config.REPORTS_PATH))
+
+            # bundle PDF files of plots into zip archive
+            archive_paths = [os.path.join(out_dir, '**', '*.' + format.value) for format in plot_formats]
+            archive = build_archive_from_paths(archive_paths, out_dir)
+            if archive.files:
+                ArchiveWriter().run(archive, os.path.join(out_dir, config.PLOTS_PATH))
+
+        # cleanup temporary files
+        print('Cleaning up ...')
+        if not keep_individual_outputs:
+
+            path_patterns = (
+                [os.path.join(out_dir, '**', '*.' + format.value) for format in report_formats if format != ReportFormat.h5]
+                + [os.path.join(out_dir, '**', '*.' + format.value) for format in plot_formats]
+            )
+            for path_pattern in path_patterns:
+                for path in glob.glob(path_pattern, recursive=True):
+                    os.remove(path)
+
+            for dir_path, dir_names, file_names in os.walk(out_dir, topdown=False):
+                for dir_name in list(dir_names):
+                    full_dir_name = os.path.join(dir_path, dir_name)
+                    if not os.path.isdir(full_dir_name):
+                        dir_names.remove(dir_name)
+                    elif not os.listdir(full_dir_name):
+                        # not reachable because directory would
+                        # have already been removed by the iteration for the directory
+                        shutil.rmtree(full_dir_name)  # pragma: no cover
+                        dir_names.remove(dir_name)  # pragma: no cover
+                if not dir_names and not file_names:
+                    shutil.rmtree(dir_path)
+
+        shutil.rmtree(archive_tmp_dir)
+
+        # update status
+        log.status = Status.FAILED if exceptions else Status.SUCCEEDED
+        log.duration = (datetime.datetime.now() - start_time).total_seconds()
+        log.finalize()
+
+        # summarize execution
+        print('')
+        print('============= SUMMARY =============')
+        print(get_summary_combine_archive_log(log))
 
     # update status
-    log.status = Status.FAILED if exceptions else Status.SUCCEEDED
-    log.duration = (datetime.datetime.now() - start_time).total_seconds()
-    log.finalize()
+    log.output = archive_captured.get_text()
     log.export()
 
-    # summarize execution
-    print('')
-    print('============= SUMMARY =============')
-    print(get_summary_combine_archive_log(log))
-
     # raise exceptions
     if exceptions:
         msg = 'The COMBINE/OMEX did not execute successfully:\n\n  {}'.format(