From e01755cdcf2757be08d237b2cd4970301d6f927c Mon Sep 17 00:00:00 2001 From: Eivind Jahren Date: Fri, 16 Aug 2024 15:52:59 +0200 Subject: [PATCH] Revert "Move csv_export2 into ert" This reverts commit 00c73cbef1e4eed24d18cc08983615906945a935. --- pyproject.toml | 3 + src/semeio/workflows/csv_export2/__init__.py | 0 .../workflows/csv_export2/csv_export2.py | 154 +++++++++++ tests/test_console_scripts.py | 1 + tests/workflows/csv_export2/__init__.py | 0 tests/workflows/csv_export2/conftest.py | 69 +++++ .../test_ert_integration_errors/csv_data.csv | 17 ++ .../workflows/csv_export2/test_integration.py | 255 ++++++++++++++++++ 8 files changed, 499 insertions(+) create mode 100644 src/semeio/workflows/csv_export2/__init__.py create mode 100644 src/semeio/workflows/csv_export2/csv_export2.py create mode 100644 tests/workflows/csv_export2/__init__.py create mode 100644 tests/workflows/csv_export2/conftest.py create mode 100644 tests/workflows/csv_export2/snapshots/test_integration/test_ert_integration_errors/csv_data.csv create mode 100644 tests/workflows/csv_export2/test_integration.py diff --git a/pyproject.toml b/pyproject.toml index 7a2765314..7bca2fd7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ dependencies = [ "scipy", "xlrd", "pyscal>=0.4.0", + "fmu-ensemble>1.6.5", "segyio", "xtgeo>=2.15", ] @@ -48,10 +49,12 @@ repository = "https://github.com/equinor/semeio" [project.entry-points."ert"] semeio_forward_models = "semeio.hook_implementations.forward_models" +CsvExport2Job = "semeio.workflows.csv_export2.csv_export2" AhmAnalysisJob = "semeio.workflows.ahm_analysis.ahmanalysis" LocalisationConfigJob = "semeio.workflows.localisation.local_config_script" [project.entry-points."console_scripts"] +csv_export2 = "semeio.workflows.csv_export2.csv_export2:cli" overburden_timeshift = "semeio.forward_models.scripts.overburden_timeshift:main_entry_point" design2params = "semeio.forward_models.scripts.design2params:main_entry_point" gendata_rft = "semeio.forward_models.scripts.gendata_rft:main_entry_point" diff --git a/src/semeio/workflows/csv_export2/__init__.py b/src/semeio/workflows/csv_export2/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/semeio/workflows/csv_export2/csv_export2.py b/src/semeio/workflows/csv_export2/csv_export2.py new file mode 100644 index 000000000..b916184e2 --- /dev/null +++ b/src/semeio/workflows/csv_export2/csv_export2.py @@ -0,0 +1,154 @@ +import argparse +import sys + +import pandas as pd +from ert import ErtScript, plugin +from fmu import ensemble + +DESCRIPTION = """ +CSV_EXPORT2 will export selected Eclipse summary vectors to a CSV file. +The vector selection is independent of the ``SUMMARY`` keywords in the +ert config file. + +The CSV file will look like: + +======== ==== =========== ==== ====== +ENSEMBLE REAL DATE FOPR FOPT +======== ==== =========== ==== ====== +iter-0 0 2020-01-01 800 0 +iter-0 0 2020-02-01 1000 365000 +iter-0 1 2020-01-01 700 0 +iter-0 1 2020-01-01 1100 401500 +======== ==== =========== ==== ====== + +The time frequency must be chosen. If ``raw``, the original timesteps from +Eclipse is chosen, and it will be individual pr. realization. If ``daily``, +``weekly``, ``monthly`` or ``yearly`` is chosen, only data at those dates are +given for all realization. Rate data (e.g. FOPR) is valid for the given dates, +but can not be summed up to cumulative data when time interpolation. Cumulative +columns (f.ex. FOPT) are time-interpolated linearly. See the `documentation on +fmu-ensemble +`_ +for more details on rate handling. + +Columns are selected by a list of strings, where wildcards characters ``?`` +(matches exactly one character) and ``*`` (matches zero or more characters) can +be used to select multiple columns. + +Column count more than 1000 gives increased probability for problems downstream, +depending on which applications are put into use. Column count depends on the +combination of wildcards used in this workflow and the actual vectors that are +requested in the Eclipse DATA file. A wildcard like ``W*`` can in certain cases +(e.g. Eclipse simulations with 100+ wells) produce thousands of vectors, and can +then be replaced by something more explicit like ``WOPT* WGPT* WWPT*``. +""" # noqa + +EXAMPLES = """ +Example +------- + +Add a file named e.g. ``ert/bin/workflows/QC_CSVEXPORT2`` with the contents:: + + MAKE_DIRECTORY /share/summary/ + EXPORT_RUNPATH * | * + CSV_EXPORT2 /share/summary/.csv monthly F* W* TCPU TIMESTEP + +(where ```` typically points to ``/scratch/..``). Adjust all three +lines to your needs. + +``EXPORT_RUNPATH`` in the workflow file is added to ensure all realizations and +all iterations are included in the RUNPATH file. If you have rerun only a +subset of your ensemble, the RUNPATH file will only contain those unless this +statement is included. + +Add to your ERT config to have the workflow automatically executed on successful +runs:: + + LOAD_WORKFLOW ../bin/workflows/QC_CSVEXPORT2 + HOOK_WORKFLOW QC_CSVEXPORT2 POST_SIMULATION + +""" # noqa + + +def csv_exporter(runpathfile, time_index, outputfile, column_keys=None): + """Export CSV data (summary and parameters) from an EnsembleSet + + The EnsembleSet is described by a runpathfile which must exists + and point to realizations""" + ensemble_set = ensemble.EnsembleSet( + name="ERT EnsembleSet for CSV_EXPORT2", runpathfile=runpathfile + ) + try: + summary = ensemble_set.load_smry(time_index=time_index, column_keys=column_keys) + parameters = ensemble_set.parameters + except KeyError as exc: + raise UserWarning("No data found") from exc + + if not parameters.empty: + pd.merge(summary, parameters).to_csv(outputfile, index=False) + else: + summary.to_csv(outputfile, index=False) + + +class CsvExport2Job(ErtScript): + def run(self, *args, **_): + main(args) + + +def main(args): + parser = csv_export_parser() + args = parser.parse_args(args) + + csv_exporter( + runpathfile=args.runpathfile, + time_index=args.time_index, + outputfile=args.outputfile, + column_keys=args.column_keys, + ) + + print(f"{args.time_index} csv-export written to {args.outputfile}") + + +def csv_export_parser(): + """Setup parser""" + parser = argparse.ArgumentParser() + parser.add_argument( + "runpathfile", + type=str, + help=( + "Path to ERT RUNPATH-file, " + "usually the ERT magic variable can be used" + ), + ) + parser.add_argument( + "outputfile", + type=str, + help="Path to CSV file to be written. The directory pointed to must exist.", + ) + parser.add_argument( + "time_index", + type=str, + default="monthly", + help=( + "Time interval specifier for the output. " + "This argument is passed on to fmu-ensemble, " + "supported specifiers are 'raw', 'daily', 'weekly', 'monthly' and 'yearly'" + ), + ) + parser.add_argument( + "column_keys", nargs="+", default=None, help="List of summary vector wildcards" + ) + return parser + + +@plugin(name="semeio") +def legacy_ertscript_workflow(config): + workflow = config.add_workflow(CsvExport2Job, "CSV_EXPORT2") + workflow.parser = csv_export_parser + workflow.description = DESCRIPTION + workflow.examples = EXAMPLES + workflow.category = "export" + + +def cli(): + main(sys.argv[1:]) diff --git a/tests/test_console_scripts.py b/tests/test_console_scripts.py index e8ef66e25..e527c8279 100644 --- a/tests/test_console_scripts.py +++ b/tests/test_console_scripts.py @@ -5,6 +5,7 @@ @pytest.mark.parametrize( "entry_point", [ + "csv_export2", "overburden_timeshift", "design2params", "gendata_rft", diff --git a/tests/workflows/csv_export2/__init__.py b/tests/workflows/csv_export2/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/workflows/csv_export2/conftest.py b/tests/workflows/csv_export2/conftest.py new file mode 100644 index 000000000..84046c0cc --- /dev/null +++ b/tests/workflows/csv_export2/conftest.py @@ -0,0 +1,69 @@ +import os + +import pytest + +NORNE_DIR = os.path.join(os.path.dirname(__file__), "../../test_data/norne") + + +def mock_norne_data(reals, iters, parameters=True): + # pylint: disable=consider-using-f-string + """From a single UNSMRY file, produce arbitrary sized ensembles. + + Summary data will be equivalent over realizations, but the + parameters.txt is made unique. + + Writes realization-*/iter-* file structure in cwd. + + Args: + reals (list): integers with realization indices wanted + iters (list): integers with iter indices wanted + parameters (bool): Whether to write parameters.txt in each runpath + """ + for real in reals: + for iteration in iters: + runpath = os.path.join(f"realization-{real}", f"iter-{iteration}") + + os.makedirs(runpath, exist_ok=True) + + os.symlink( + os.path.join(NORNE_DIR, "NORNE_ATW2013.UNSMRY"), + os.path.join(runpath, f"NORNE_{real}.UNSMRY"), + ) + os.symlink( + os.path.join(NORNE_DIR, "NORNE_ATW2013.SMSPEC"), + os.path.join(runpath, f"NORNE_{real}.SMSPEC"), + ) + if parameters: + with open( + os.path.join(runpath, "parameters.txt"), "w", encoding="utf-8" + ) as p_fileh: + p_fileh.write(f"FOO 1{real}{iteration}") + # Ensure fmu-ensemble does not complain on missing STATUS + with open(os.path.join(runpath, "STATUS"), "w", encoding="utf-8") as file_h: + file_h.write("a:b\na: 09:00:00 .... 09:00:01") + + with open("runpathfile", "w", encoding="utf-8") as file_h: + for iteration in iters: + for real in reals: + runpath = os.path.join(f"realization-{real}", f"iter-{iteration}") + file_h.write(f"{real:03d} {runpath} NORNE_{real} {iteration:03d}\n") + + +@pytest.fixture() +def norne_mocked_ensembleset(setup_tmpdir): + # pylint: disable=unused-argument + mock_norne_data(reals=[0, 1], iters=[0, 1], parameters=True) + + +@pytest.fixture() +def norne_mocked_ensembleset_noparams(setup_tmpdir): + # pylint: disable=unused-argument + mock_norne_data(reals=[0, 1], iters=[0, 1], parameters=False) + + +@pytest.fixture(name="setup_tmpdir") +def fixture_setup_tmpdir(tmpdir): + cwd = os.getcwd() + tmpdir.chdir() + yield + os.chdir(cwd) diff --git a/tests/workflows/csv_export2/snapshots/test_integration/test_ert_integration_errors/csv_data.csv b/tests/workflows/csv_export2/snapshots/test_integration/test_ert_integration_errors/csv_data.csv new file mode 100644 index 000000000..13fef105f --- /dev/null +++ b/tests/workflows/csv_export2/snapshots/test_integration/test_ert_integration_errors/csv_data.csv @@ -0,0 +1,17 @@ +,ENSEMBLE,REAL,DATE,FOPT,FOO +0,iter-0,0,1997-11-01,0.0,100 +1,iter-0,0,1997-12-01,131841.109375,100 +2,iter-0,0,1998-01-01,427230.78125,100 +3,iter-0,0,1998-02-01,954872.8125,100 +4,iter-0,1,1997-11-01,0.0,110 +5,iter-0,1,1997-12-01,131841.109375,110 +6,iter-0,1,1998-01-01,427230.78125,110 +7,iter-0,1,1998-02-01,954872.8125,110 +8,iter-1,0,1997-11-01,0.0,101 +9,iter-1,0,1997-12-01,131841.109375,101 +10,iter-1,0,1998-01-01,427230.78125,101 +11,iter-1,0,1998-02-01,954872.8125,101 +12,iter-1,1,1997-11-01,0.0,111 +13,iter-1,1,1997-12-01,131841.109375,111 +14,iter-1,1,1998-01-01,427230.78125,111 +15,iter-1,1,1998-02-01,954872.8125,111 diff --git a/tests/workflows/csv_export2/test_integration.py b/tests/workflows/csv_export2/test_integration.py new file mode 100644 index 000000000..811592bd7 --- /dev/null +++ b/tests/workflows/csv_export2/test_integration.py @@ -0,0 +1,255 @@ +import os +import shutil +import subprocess +from pathlib import Path + +import pandas as pd +import pytest +import rstcheck_core.checker + +from semeio.workflows.csv_export2 import csv_export2 + +NORNE_VECS = ["FGPT", "FLPT", "FOPT", "FVPT", "FWPT"] + + +@pytest.mark.usefixtures("norne_mocked_ensembleset") +def test_that_a_not_found_realization_is_skipped(): + shutil.rmtree("realization-1/iter-1") + csv_export2.csv_exporter( + runpathfile="runpathfile", + time_index="yearly", + outputfile="unsmry--yearly.csv", + column_keys=["F?PT"], + ) + verify_exported_file( + "unsmry--yearly.csv", + ["ENSEMBLE", "REAL", "DATE"] + NORNE_VECS + ["FOO"], + { + ("iter-0", 0), + ("iter-0", 1), + ("iter-1", 0), + }, + ) + + +@pytest.mark.usefixtures("norne_mocked_ensembleset") +def test_that_a_failed_realization_is_skipped(): + os.remove("realization-0/iter-1/NORNE_0.SMSPEC") + csv_export2.csv_exporter( + runpathfile="runpathfile", + time_index="yearly", + outputfile="unsmry--yearly.csv", + column_keys=["F?PT"], + ) + verify_exported_file( + "unsmry--yearly.csv", + ["ENSEMBLE", "REAL", "DATE"] + NORNE_VECS + ["FOO"], + { + ("iter-0", 0), + ("iter-0", 1), + ("iter-1", 1), + }, + ) + + +@pytest.mark.usefixtures("norne_mocked_ensembleset") +def test_that_a_missing_realization_index_is_ok(): + rp_lines = Path("runpathfile").read_text(encoding="utf-8").splitlines() + Path("sliced_runpathfile").write_text( + rp_lines[1] + "\n" + rp_lines[3], encoding="utf-8" + ) + csv_export2.csv_exporter( + runpathfile="sliced_runpathfile", + time_index="yearly", + outputfile="unsmry--yearly.csv", + column_keys=["F?PT"], + ) + verify_exported_file( + "unsmry--yearly.csv", + ["ENSEMBLE", "REAL", "DATE"] + NORNE_VECS + ["FOO"], + { + ("iter-0", 1), + ("iter-1", 1), + }, + ) + + +@pytest.mark.usefixtures("norne_mocked_ensembleset") +def test_that_iterations_in_runpathfile_cannot_be_defaulted(): + shutil.move("realization-0/iter-0", "real0") + shutil.move("realization-1/iter-0", "real1") + shutil.rmtree("realization-0") + shutil.rmtree("realization-1") + Path("runpathfile").write_text( + "000 real0 NORNE_0\n001 real1 NORNE_1\n", encoding="utf-8" + ) + + with pytest.raises(UserWarning): + csv_export2.csv_exporter( + runpathfile="runpathfile", + time_index="yearly", + outputfile="unsmry--yearly.csv", + column_keys=["F?PT"], + ) + + +def test_empty_file_yields_user_warning(): + with open("empty_file", "a", encoding="utf-8") as empty_file, pytest.raises( + UserWarning, match="No data found" + ): + csv_export2.csv_exporter( + runpathfile=empty_file.name, + time_index="raw", + outputfile="unsmry--yearly.csv", + column_keys=["*"], + ) + + +@pytest.mark.parametrize("input_rst", [csv_export2.DESCRIPTION, csv_export2.EXAMPLES]) +def test_valid_rst(input_rst): + """ + Check that the documentation passed through the plugin system is + valid rst + """ + assert not list(rstcheck_core.checker.check_source(input_rst)) + + +@pytest.mark.usefixtures("norne_mocked_ensembleset") +def test_norne_ensemble(): + csv_export2.csv_exporter( + runpathfile="runpathfile", + time_index="yearly", + outputfile="unsmry--yearly.csv", + column_keys=["F?PT"], + ) + verify_exported_file( + "unsmry--yearly.csv", + ["ENSEMBLE", "REAL", "DATE"] + NORNE_VECS + ["FOO"], + { + ("iter-0", 0), + ("iter-0", 1), + ("iter-1", 0), + ("iter-1", 1), + }, + ) + + +@pytest.mark.usefixtures("norne_mocked_ensembleset_noparams") +def test_norne_ensemble_noparams(): + csv_export2.csv_exporter( + runpathfile="runpathfile", + time_index="yearly", + outputfile="unsmry--yearly.csv", + column_keys=["FOPT"], + ) + verify_exported_file( + "unsmry--yearly.csv", + ["ENSEMBLE", "REAL", "DATE", "FOPT"], + { + ("iter-0", 0), + ("iter-0", 1), + ("iter-1", 0), + ("iter-1", 1), + }, + ) + + +def verify_exported_file(exported_file_name, result_header, result_iter_rel): + """Verify an exported CSV file with respect to: + + * Exactly the set of requested headers is found + * The realizations and iterations that exist must equal + given set of tuples. + + Args: + exported_file_name (str): path to CSV file. + result_header (list of str): The strings required in the header. + result_iter_real (set): Set of 2-tuples: {(iterstring, realidx)} + """ + dframe = pd.read_csv(exported_file_name) + assert set(dframe.columns) == set(result_header) + assert ( + set(dframe[["ENSEMBLE", "REAL"]].itertuples(index=False, name=None)) + == result_iter_rel + ) + + +@pytest.mark.ert_integration +@pytest.mark.usefixtures("norne_mocked_ensembleset") +def test_ert_integration(): + """Mock an ERT config and test the workflow""" + with open("FOO.DATA", "w", encoding="utf-8") as file_h: + file_h.write("--Empty") + + with open("wf_csvexport", "w", encoding="utf-8") as file_h: + file_h.write( + # This workflow is representing the example in csv_export2.py: + "MAKE_DIRECTORY csv_output\n" + "EXPORT_RUNPATH * | *\n" # (not really relevant in mocked case) + "CSV_EXPORT2 runpathfile csv_output/data.csv monthly FOPT\n" + # Example in documentation uses which is + # linked to the RUNPATH keyword that we don't use in this + # test (mocking data gets more complex if that is to be used) + ) + + ert_config = [ + "ECLBASE FOO.DATA", + "QUEUE_SYSTEM LOCAL", + "NUM_REALIZATIONS 2", + "LOAD_WORKFLOW wf_csvexport", + "HOOK_WORKFLOW wf_csvexport PRE_SIMULATION", + ] + + ert_config_fname = "test.ert" + with open(ert_config_fname, "w", encoding="utf-8") as file_h: + file_h.write("\n".join(ert_config)) + + subprocess.run(["ert", "test_run", ert_config_fname], check=True) + + assert pd.read_csv("csv_output/data.csv").shape == (16, 5) + + +@pytest.mark.ert_integration +@pytest.mark.usefixtures("norne_mocked_ensembleset") +def test_ert_integration_errors(snapshot): + """Test CSV_EXPORT2 when runpathfile points to non-existing realizations + + This test proves that CSV_EXPORT2 happily skips non-existing + realizations, but emits a warning that there is no STATUS file. + """ + with open("FOO.DATA", "w", encoding="utf-8") as file_h: + file_h.write("--Empty") + + # Append a not-existing realizations to the runpathfile: + with open("runpathfile", "a", encoding="utf-8") as file_h: + file_h.write("002 realization-2/iter-0 NORNE_1 000") + + with open("wf_csvexport", "w", encoding="utf-8") as file_h: + file_h.write("CSV_EXPORT2 runpathfile data.csv monthly FOPT\n") + + ert_config = [ + "ECLBASE FOO.DATA", + "QUEUE_SYSTEM LOCAL", + "NUM_REALIZATIONS 2", + "LOAD_WORKFLOW wf_csvexport", + "HOOK_WORKFLOW wf_csvexport PRE_SIMULATION", + ] + + ert_config_fname = "test.ert" + with open(ert_config_fname, "w", encoding="utf-8") as file_h: + file_h.write("\n".join(ert_config)) + + subprocess.run(["ert", "test_run", ert_config_fname], check=True) + + log_file = next(Path("logs").glob("ert-log*txt")) + ertlog = log_file.read_text(encoding="utf-8") + + assert "No STATUS file" in ertlog + assert "realization-2/iter-0" in ertlog + + assert os.path.exists("data.csv") + data = pd.read_csv("data.csv") + snapshot.assert_match( + data.to_csv(lineterminator="\n"), + "csv_data.csv", + )