From 108afc90fa77f29336acbe1707df90334c184305 Mon Sep 17 00:00:00 2001 From: Eivind Jahren Date: Tue, 30 Jul 2024 12:16:26 +0200 Subject: [PATCH] Move csv_export2 into ert --- pyproject.toml | 2 - src/semeio/workflows/csv_export2/__init__.py | 0 .../workflows/csv_export2/csv_export2.py | 154 ----------- tests/test_console_scripts.py | 1 - tests/workflows/csv_export2/__init__.py | 0 tests/workflows/csv_export2/conftest.py | 69 ----- .../test_ert_integration_errors/csv_data.csv | 17 -- .../workflows/csv_export2/test_integration.py | 255 ------------------ 8 files changed, 498 deletions(-) delete mode 100644 src/semeio/workflows/csv_export2/__init__.py delete mode 100644 src/semeio/workflows/csv_export2/csv_export2.py delete mode 100644 tests/workflows/csv_export2/__init__.py delete mode 100644 tests/workflows/csv_export2/conftest.py delete mode 100644 tests/workflows/csv_export2/snapshots/test_integration/test_ert_integration_errors/csv_data.csv delete mode 100644 tests/workflows/csv_export2/test_integration.py diff --git a/pyproject.toml b/pyproject.toml index 7bca2fd7..2e01aea6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,12 +49,10 @@ repository = "https://github.com/equinor/semeio" [project.entry-points."ert"] semeio_forward_models = "semeio.hook_implementations.forward_models" -CsvExport2Job = "semeio.workflows.csv_export2.csv_export2" AhmAnalysisJob = "semeio.workflows.ahm_analysis.ahmanalysis" LocalisationConfigJob = "semeio.workflows.localisation.local_config_script" [project.entry-points."console_scripts"] -csv_export2 = "semeio.workflows.csv_export2.csv_export2:cli" overburden_timeshift = "semeio.forward_models.scripts.overburden_timeshift:main_entry_point" design2params = "semeio.forward_models.scripts.design2params:main_entry_point" gendata_rft = "semeio.forward_models.scripts.gendata_rft:main_entry_point" diff --git a/src/semeio/workflows/csv_export2/__init__.py b/src/semeio/workflows/csv_export2/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/semeio/workflows/csv_export2/csv_export2.py b/src/semeio/workflows/csv_export2/csv_export2.py deleted file mode 100644 index b916184e..00000000 --- a/src/semeio/workflows/csv_export2/csv_export2.py +++ /dev/null @@ -1,154 +0,0 @@ -import argparse -import sys - -import pandas as pd -from ert import ErtScript, plugin -from fmu import ensemble - -DESCRIPTION = """ -CSV_EXPORT2 will export selected Eclipse summary vectors to a CSV file. -The vector selection is independent of the ``SUMMARY`` keywords in the -ert config file. - -The CSV file will look like: - -======== ==== =========== ==== ====== -ENSEMBLE REAL DATE FOPR FOPT -======== ==== =========== ==== ====== -iter-0 0 2020-01-01 800 0 -iter-0 0 2020-02-01 1000 365000 -iter-0 1 2020-01-01 700 0 -iter-0 1 2020-01-01 1100 401500 -======== ==== =========== ==== ====== - -The time frequency must be chosen. If ``raw``, the original timesteps from -Eclipse is chosen, and it will be individual pr. realization. If ``daily``, -``weekly``, ``monthly`` or ``yearly`` is chosen, only data at those dates are -given for all realization. Rate data (e.g. FOPR) is valid for the given dates, -but can not be summed up to cumulative data when time interpolation. Cumulative -columns (f.ex. FOPT) are time-interpolated linearly. See the `documentation on -fmu-ensemble -`_ -for more details on rate handling. - -Columns are selected by a list of strings, where wildcards characters ``?`` -(matches exactly one character) and ``*`` (matches zero or more characters) can -be used to select multiple columns. - -Column count more than 1000 gives increased probability for problems downstream, -depending on which applications are put into use. Column count depends on the -combination of wildcards used in this workflow and the actual vectors that are -requested in the Eclipse DATA file. A wildcard like ``W*`` can in certain cases -(e.g. Eclipse simulations with 100+ wells) produce thousands of vectors, and can -then be replaced by something more explicit like ``WOPT* WGPT* WWPT*``. -""" # noqa - -EXAMPLES = """ -Example -------- - -Add a file named e.g. ``ert/bin/workflows/QC_CSVEXPORT2`` with the contents:: - - MAKE_DIRECTORY /share/summary/ - EXPORT_RUNPATH * | * - CSV_EXPORT2 /share/summary/.csv monthly F* W* TCPU TIMESTEP - -(where ```` typically points to ``/scratch/..``). Adjust all three -lines to your needs. - -``EXPORT_RUNPATH`` in the workflow file is added to ensure all realizations and -all iterations are included in the RUNPATH file. If you have rerun only a -subset of your ensemble, the RUNPATH file will only contain those unless this -statement is included. - -Add to your ERT config to have the workflow automatically executed on successful -runs:: - - LOAD_WORKFLOW ../bin/workflows/QC_CSVEXPORT2 - HOOK_WORKFLOW QC_CSVEXPORT2 POST_SIMULATION - -""" # noqa - - -def csv_exporter(runpathfile, time_index, outputfile, column_keys=None): - """Export CSV data (summary and parameters) from an EnsembleSet - - The EnsembleSet is described by a runpathfile which must exists - and point to realizations""" - ensemble_set = ensemble.EnsembleSet( - name="ERT EnsembleSet for CSV_EXPORT2", runpathfile=runpathfile - ) - try: - summary = ensemble_set.load_smry(time_index=time_index, column_keys=column_keys) - parameters = ensemble_set.parameters - except KeyError as exc: - raise UserWarning("No data found") from exc - - if not parameters.empty: - pd.merge(summary, parameters).to_csv(outputfile, index=False) - else: - summary.to_csv(outputfile, index=False) - - -class CsvExport2Job(ErtScript): - def run(self, *args, **_): - main(args) - - -def main(args): - parser = csv_export_parser() - args = parser.parse_args(args) - - csv_exporter( - runpathfile=args.runpathfile, - time_index=args.time_index, - outputfile=args.outputfile, - column_keys=args.column_keys, - ) - - print(f"{args.time_index} csv-export written to {args.outputfile}") - - -def csv_export_parser(): - """Setup parser""" - parser = argparse.ArgumentParser() - parser.add_argument( - "runpathfile", - type=str, - help=( - "Path to ERT RUNPATH-file, " - "usually the ERT magic variable can be used" - ), - ) - parser.add_argument( - "outputfile", - type=str, - help="Path to CSV file to be written. The directory pointed to must exist.", - ) - parser.add_argument( - "time_index", - type=str, - default="monthly", - help=( - "Time interval specifier for the output. " - "This argument is passed on to fmu-ensemble, " - "supported specifiers are 'raw', 'daily', 'weekly', 'monthly' and 'yearly'" - ), - ) - parser.add_argument( - "column_keys", nargs="+", default=None, help="List of summary vector wildcards" - ) - return parser - - -@plugin(name="semeio") -def legacy_ertscript_workflow(config): - workflow = config.add_workflow(CsvExport2Job, "CSV_EXPORT2") - workflow.parser = csv_export_parser - workflow.description = DESCRIPTION - workflow.examples = EXAMPLES - workflow.category = "export" - - -def cli(): - main(sys.argv[1:]) diff --git a/tests/test_console_scripts.py b/tests/test_console_scripts.py index e527c827..e8ef66e2 100644 --- a/tests/test_console_scripts.py +++ b/tests/test_console_scripts.py @@ -5,7 +5,6 @@ @pytest.mark.parametrize( "entry_point", [ - "csv_export2", "overburden_timeshift", "design2params", "gendata_rft", diff --git a/tests/workflows/csv_export2/__init__.py b/tests/workflows/csv_export2/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/workflows/csv_export2/conftest.py b/tests/workflows/csv_export2/conftest.py deleted file mode 100644 index 84046c0c..00000000 --- a/tests/workflows/csv_export2/conftest.py +++ /dev/null @@ -1,69 +0,0 @@ -import os - -import pytest - -NORNE_DIR = os.path.join(os.path.dirname(__file__), "../../test_data/norne") - - -def mock_norne_data(reals, iters, parameters=True): - # pylint: disable=consider-using-f-string - """From a single UNSMRY file, produce arbitrary sized ensembles. - - Summary data will be equivalent over realizations, but the - parameters.txt is made unique. - - Writes realization-*/iter-* file structure in cwd. - - Args: - reals (list): integers with realization indices wanted - iters (list): integers with iter indices wanted - parameters (bool): Whether to write parameters.txt in each runpath - """ - for real in reals: - for iteration in iters: - runpath = os.path.join(f"realization-{real}", f"iter-{iteration}") - - os.makedirs(runpath, exist_ok=True) - - os.symlink( - os.path.join(NORNE_DIR, "NORNE_ATW2013.UNSMRY"), - os.path.join(runpath, f"NORNE_{real}.UNSMRY"), - ) - os.symlink( - os.path.join(NORNE_DIR, "NORNE_ATW2013.SMSPEC"), - os.path.join(runpath, f"NORNE_{real}.SMSPEC"), - ) - if parameters: - with open( - os.path.join(runpath, "parameters.txt"), "w", encoding="utf-8" - ) as p_fileh: - p_fileh.write(f"FOO 1{real}{iteration}") - # Ensure fmu-ensemble does not complain on missing STATUS - with open(os.path.join(runpath, "STATUS"), "w", encoding="utf-8") as file_h: - file_h.write("a:b\na: 09:00:00 .... 09:00:01") - - with open("runpathfile", "w", encoding="utf-8") as file_h: - for iteration in iters: - for real in reals: - runpath = os.path.join(f"realization-{real}", f"iter-{iteration}") - file_h.write(f"{real:03d} {runpath} NORNE_{real} {iteration:03d}\n") - - -@pytest.fixture() -def norne_mocked_ensembleset(setup_tmpdir): - # pylint: disable=unused-argument - mock_norne_data(reals=[0, 1], iters=[0, 1], parameters=True) - - -@pytest.fixture() -def norne_mocked_ensembleset_noparams(setup_tmpdir): - # pylint: disable=unused-argument - mock_norne_data(reals=[0, 1], iters=[0, 1], parameters=False) - - -@pytest.fixture(name="setup_tmpdir") -def fixture_setup_tmpdir(tmpdir): - cwd = os.getcwd() - tmpdir.chdir() - yield - os.chdir(cwd) diff --git a/tests/workflows/csv_export2/snapshots/test_integration/test_ert_integration_errors/csv_data.csv b/tests/workflows/csv_export2/snapshots/test_integration/test_ert_integration_errors/csv_data.csv deleted file mode 100644 index 13fef105..00000000 --- a/tests/workflows/csv_export2/snapshots/test_integration/test_ert_integration_errors/csv_data.csv +++ /dev/null @@ -1,17 +0,0 @@ -,ENSEMBLE,REAL,DATE,FOPT,FOO -0,iter-0,0,1997-11-01,0.0,100 -1,iter-0,0,1997-12-01,131841.109375,100 -2,iter-0,0,1998-01-01,427230.78125,100 -3,iter-0,0,1998-02-01,954872.8125,100 -4,iter-0,1,1997-11-01,0.0,110 -5,iter-0,1,1997-12-01,131841.109375,110 -6,iter-0,1,1998-01-01,427230.78125,110 -7,iter-0,1,1998-02-01,954872.8125,110 -8,iter-1,0,1997-11-01,0.0,101 -9,iter-1,0,1997-12-01,131841.109375,101 -10,iter-1,0,1998-01-01,427230.78125,101 -11,iter-1,0,1998-02-01,954872.8125,101 -12,iter-1,1,1997-11-01,0.0,111 -13,iter-1,1,1997-12-01,131841.109375,111 -14,iter-1,1,1998-01-01,427230.78125,111 -15,iter-1,1,1998-02-01,954872.8125,111 diff --git a/tests/workflows/csv_export2/test_integration.py b/tests/workflows/csv_export2/test_integration.py deleted file mode 100644 index 811592bd..00000000 --- a/tests/workflows/csv_export2/test_integration.py +++ /dev/null @@ -1,255 +0,0 @@ -import os -import shutil -import subprocess -from pathlib import Path - -import pandas as pd -import pytest -import rstcheck_core.checker - -from semeio.workflows.csv_export2 import csv_export2 - -NORNE_VECS = ["FGPT", "FLPT", "FOPT", "FVPT", "FWPT"] - - -@pytest.mark.usefixtures("norne_mocked_ensembleset") -def test_that_a_not_found_realization_is_skipped(): - shutil.rmtree("realization-1/iter-1") - csv_export2.csv_exporter( - runpathfile="runpathfile", - time_index="yearly", - outputfile="unsmry--yearly.csv", - column_keys=["F?PT"], - ) - verify_exported_file( - "unsmry--yearly.csv", - ["ENSEMBLE", "REAL", "DATE"] + NORNE_VECS + ["FOO"], - { - ("iter-0", 0), - ("iter-0", 1), - ("iter-1", 0), - }, - ) - - -@pytest.mark.usefixtures("norne_mocked_ensembleset") -def test_that_a_failed_realization_is_skipped(): - os.remove("realization-0/iter-1/NORNE_0.SMSPEC") - csv_export2.csv_exporter( - runpathfile="runpathfile", - time_index="yearly", - outputfile="unsmry--yearly.csv", - column_keys=["F?PT"], - ) - verify_exported_file( - "unsmry--yearly.csv", - ["ENSEMBLE", "REAL", "DATE"] + NORNE_VECS + ["FOO"], - { - ("iter-0", 0), - ("iter-0", 1), - ("iter-1", 1), - }, - ) - - -@pytest.mark.usefixtures("norne_mocked_ensembleset") -def test_that_a_missing_realization_index_is_ok(): - rp_lines = Path("runpathfile").read_text(encoding="utf-8").splitlines() - Path("sliced_runpathfile").write_text( - rp_lines[1] + "\n" + rp_lines[3], encoding="utf-8" - ) - csv_export2.csv_exporter( - runpathfile="sliced_runpathfile", - time_index="yearly", - outputfile="unsmry--yearly.csv", - column_keys=["F?PT"], - ) - verify_exported_file( - "unsmry--yearly.csv", - ["ENSEMBLE", "REAL", "DATE"] + NORNE_VECS + ["FOO"], - { - ("iter-0", 1), - ("iter-1", 1), - }, - ) - - -@pytest.mark.usefixtures("norne_mocked_ensembleset") -def test_that_iterations_in_runpathfile_cannot_be_defaulted(): - shutil.move("realization-0/iter-0", "real0") - shutil.move("realization-1/iter-0", "real1") - shutil.rmtree("realization-0") - shutil.rmtree("realization-1") - Path("runpathfile").write_text( - "000 real0 NORNE_0\n001 real1 NORNE_1\n", encoding="utf-8" - ) - - with pytest.raises(UserWarning): - csv_export2.csv_exporter( - runpathfile="runpathfile", - time_index="yearly", - outputfile="unsmry--yearly.csv", - column_keys=["F?PT"], - ) - - -def test_empty_file_yields_user_warning(): - with open("empty_file", "a", encoding="utf-8") as empty_file, pytest.raises( - UserWarning, match="No data found" - ): - csv_export2.csv_exporter( - runpathfile=empty_file.name, - time_index="raw", - outputfile="unsmry--yearly.csv", - column_keys=["*"], - ) - - -@pytest.mark.parametrize("input_rst", [csv_export2.DESCRIPTION, csv_export2.EXAMPLES]) -def test_valid_rst(input_rst): - """ - Check that the documentation passed through the plugin system is - valid rst - """ - assert not list(rstcheck_core.checker.check_source(input_rst)) - - -@pytest.mark.usefixtures("norne_mocked_ensembleset") -def test_norne_ensemble(): - csv_export2.csv_exporter( - runpathfile="runpathfile", - time_index="yearly", - outputfile="unsmry--yearly.csv", - column_keys=["F?PT"], - ) - verify_exported_file( - "unsmry--yearly.csv", - ["ENSEMBLE", "REAL", "DATE"] + NORNE_VECS + ["FOO"], - { - ("iter-0", 0), - ("iter-0", 1), - ("iter-1", 0), - ("iter-1", 1), - }, - ) - - -@pytest.mark.usefixtures("norne_mocked_ensembleset_noparams") -def test_norne_ensemble_noparams(): - csv_export2.csv_exporter( - runpathfile="runpathfile", - time_index="yearly", - outputfile="unsmry--yearly.csv", - column_keys=["FOPT"], - ) - verify_exported_file( - "unsmry--yearly.csv", - ["ENSEMBLE", "REAL", "DATE", "FOPT"], - { - ("iter-0", 0), - ("iter-0", 1), - ("iter-1", 0), - ("iter-1", 1), - }, - ) - - -def verify_exported_file(exported_file_name, result_header, result_iter_rel): - """Verify an exported CSV file with respect to: - - * Exactly the set of requested headers is found - * The realizations and iterations that exist must equal - given set of tuples. - - Args: - exported_file_name (str): path to CSV file. - result_header (list of str): The strings required in the header. - result_iter_real (set): Set of 2-tuples: {(iterstring, realidx)} - """ - dframe = pd.read_csv(exported_file_name) - assert set(dframe.columns) == set(result_header) - assert ( - set(dframe[["ENSEMBLE", "REAL"]].itertuples(index=False, name=None)) - == result_iter_rel - ) - - -@pytest.mark.ert_integration -@pytest.mark.usefixtures("norne_mocked_ensembleset") -def test_ert_integration(): - """Mock an ERT config and test the workflow""" - with open("FOO.DATA", "w", encoding="utf-8") as file_h: - file_h.write("--Empty") - - with open("wf_csvexport", "w", encoding="utf-8") as file_h: - file_h.write( - # This workflow is representing the example in csv_export2.py: - "MAKE_DIRECTORY csv_output\n" - "EXPORT_RUNPATH * | *\n" # (not really relevant in mocked case) - "CSV_EXPORT2 runpathfile csv_output/data.csv monthly FOPT\n" - # Example in documentation uses which is - # linked to the RUNPATH keyword that we don't use in this - # test (mocking data gets more complex if that is to be used) - ) - - ert_config = [ - "ECLBASE FOO.DATA", - "QUEUE_SYSTEM LOCAL", - "NUM_REALIZATIONS 2", - "LOAD_WORKFLOW wf_csvexport", - "HOOK_WORKFLOW wf_csvexport PRE_SIMULATION", - ] - - ert_config_fname = "test.ert" - with open(ert_config_fname, "w", encoding="utf-8") as file_h: - file_h.write("\n".join(ert_config)) - - subprocess.run(["ert", "test_run", ert_config_fname], check=True) - - assert pd.read_csv("csv_output/data.csv").shape == (16, 5) - - -@pytest.mark.ert_integration -@pytest.mark.usefixtures("norne_mocked_ensembleset") -def test_ert_integration_errors(snapshot): - """Test CSV_EXPORT2 when runpathfile points to non-existing realizations - - This test proves that CSV_EXPORT2 happily skips non-existing - realizations, but emits a warning that there is no STATUS file. - """ - with open("FOO.DATA", "w", encoding="utf-8") as file_h: - file_h.write("--Empty") - - # Append a not-existing realizations to the runpathfile: - with open("runpathfile", "a", encoding="utf-8") as file_h: - file_h.write("002 realization-2/iter-0 NORNE_1 000") - - with open("wf_csvexport", "w", encoding="utf-8") as file_h: - file_h.write("CSV_EXPORT2 runpathfile data.csv monthly FOPT\n") - - ert_config = [ - "ECLBASE FOO.DATA", - "QUEUE_SYSTEM LOCAL", - "NUM_REALIZATIONS 2", - "LOAD_WORKFLOW wf_csvexport", - "HOOK_WORKFLOW wf_csvexport PRE_SIMULATION", - ] - - ert_config_fname = "test.ert" - with open(ert_config_fname, "w", encoding="utf-8") as file_h: - file_h.write("\n".join(ert_config)) - - subprocess.run(["ert", "test_run", ert_config_fname], check=True) - - log_file = next(Path("logs").glob("ert-log*txt")) - ertlog = log_file.read_text(encoding="utf-8") - - assert "No STATUS file" in ertlog - assert "realization-2/iter-0" in ertlog - - assert os.path.exists("data.csv") - data = pd.read_csv("data.csv") - snapshot.assert_match( - data.to_csv(lineterminator="\n"), - "csv_data.csv", - )