From 11328295b06da7a8af68731d4899da9aacdb45c6 Mon Sep 17 00:00:00 2001
From: "Oddvar Lia (ST MSU GEO)" <olia@equinor.com>
Date: Thu, 26 Sep 2024 13:59:32 +0200
Subject: [PATCH] Generated test data is written to tmp directory, added
 subscript doc minimum version

---
 docs/scripts/field_statistics.rst             |   8 +
 ...IELD_PARAM_STATISTICS => FIELD_STATISTICS} |   0
 .../field_statistics/field_statistics.py      | 100 +++++++----
 .../wf_field_param_statistics                 |   1 -
 .../field_statistics/wf_field_statistics      |   2 +
 tests/test_field_statistics.py                | 159 ++++++++++--------
 6 files changed, 164 insertions(+), 106 deletions(-)
 create mode 100644 docs/scripts/field_statistics.rst
 rename src/subscript/field_statistics/{WF_FIELD_PARAM_STATISTICS => FIELD_STATISTICS} (100%)
 delete mode 100644 src/subscript/field_statistics/wf_field_param_statistics
 create mode 100644 src/subscript/field_statistics/wf_field_statistics
diff --git a/docs/scripts/field_statistics.rst b/docs/scripts/field_statistics.rst
new file mode 100644
index 000000000..9baa83529
--- /dev/null
+++ b/docs/scripts/field_statistics.rst
@@ -0,0 +1,8 @@
+FIELD_STATISTICS
+=================
+
+.. argparse::
+   :module: subscript.field_statistics.field_statistics
+   :func: get_parser
+   :prog: field_statistics
+   
\ No newline at end of file
diff --git a/src/subscript/field_statistics/WF_FIELD_PARAM_STATISTICS b/src/subscript/field_statistics/FIELD_STATISTICS
similarity index 100%
rename from src/subscript/field_statistics/WF_FIELD_PARAM_STATISTICS
rename to src/subscript/field_statistics/FIELD_STATISTICS
diff --git a/src/subscript/field_statistics/field_statistics.py b/src/subscript/field_statistics/field_statistics.py
index cc8ba98e5..66105b2f5 100644
--- a/src/subscript/field_statistics/field_statistics.py
+++ b/src/subscript/field_statistics/field_statistics.py
@@ -18,10 +18,12 @@
 import sys
 from pathlib import Path
 
+import ert
 import fmu.config.utilities as utils
 import numpy as np
 import xtgeo
 import yaml
+from ert.config import ErtScript
 
 import subscript
 
@@ -80,7 +82,7 @@
 
   # Example config file for wf_field_param_statistics
 
-field_stat:
+  field_stat:
     # Number of realizations for specified ensemble
     # Required.
     nreal: 100
@@ -139,7 +141,8 @@
         "Volon":   ["phit"]
 
     # Size of ertbox grid for (nx, ny, nz)
-    # Required.
+    # Required if the ERTBOX grid is not found as a file
+    # under rms/output/aps/ERTBOX.EGRID
     ertbox_size: [92, 146, 66]
 
     # Standard deviation estimator.
@@ -163,16 +166,21 @@
 LOAD_WORKFLOW           ../../bin/workflows/wf_field_param_statistics
 
 -- The workflow file to be located under ert/bin/workflows:
-WF_FIELD_PARAM_STATISTICS <FIELD_STAT_CONFIG_FILE>  <CONFIG_PATH>  <SCRATCH>/<USER>/<CASE_DIR>
-
--- The workflow job file to be located under ert/bin/jobs:
--- Workflow job for ERT to calculate
--- - mean and stdev of ensemble of continuous 3D parameters with name <name> saved for geogrid 
---   under <ensemble_path>/realization-*/iter-*/share/results/grids/geogrid--<name>.roff
--- - estimate facies probabilities of discrete 3D parameters with name <name> saved for geogrid
---   under <ensemble_path>/realization-*/iter-*/share/results/grids/geogrid--<name>.roff
+FIELD_STATISTICS -c <FIELD_STAT_CONFIG_FILE>
+                 -p <CONFIG_PATH>
+                 -e <SCRATCH>/<USER>/<CASE_DIR>
+                 -r <RESULT_PATH>
+-- Workflow job for ERT to calculate:
+--   Mean and standard deviatons of specified continuous 3D parameters.
+--   Estimate of facies probabilities from discrete 3D parameter for facies.
+-- The input realizations are found under:
+--   <ensemble_path>/realization-*/iter-*/share/results/grids/geogrid--<name>.roff
+-- The output mean and standard deviations and facies probability estimates are saved
+-- under a directory specified by the user.
+-- The first three command line arguments are required, the last one (<RESULT_PATH>)
+-- has default 'share/grid_statistics' under <ensemble_path>.
 INTERNAL   False
-EXECUTABLE  ../scripts/wf_field_param_statistics.py
+EXECUTABLE  ../scripts/field_statistics.py
 
 MIN_ARG   6
 ARG_TYPE    0   STRING
@@ -187,15 +195,20 @@
 
 
 """  # noqa
+DEFAULT_RELATIVE_RESULT_PATH = "share/grid_statistics"
+GLOBAL_VARIABLES_FILE = "../../fmuconfig/output/global_variables.yml"
+ERTBOX_GRID_PATH = "../../rms/output/aps/ERTBOX.EGRID"
 
 
 def main():
     """Invocated from the command line, parsing command line arguments"""
     parser = get_parser()
     args = parser.parse_args()
-
     logger.setLevel(logging.INFO)
+    field_stat(args)
+
 
+def field_stat(args):
     # parse the config file for this script
     if not Path(args.configfile).exists():
         sys.exit("No such file:" + args.configfile)
@@ -207,23 +220,20 @@ def main():
     # Path to FMU project models ert/model directory (ordinary CONFIG PATH in ERT)
     if not Path(args.ertconfigpath).exists():
         sys.exit("No such file:" + args.ertconfigpath)
-    ert_config_path = args.ertconfigpath
+    ert_config_path = Path(args.ertconfigpath)
 
     # Path to ensemble on SCRATCH disk
     if not Path(args.ensemblepath).exists():
         sys.exit("No such file:" + args.ensemblepath)
-    ens_path = args.ensemblepath
+    ens_path = Path(args.ensemblepath)
 
-    # Relative path for result of ensemble statistics calculations
-    # relative to ensemble path on scratch disk
+    # Path for result of ensemble statistics calculations
     # Default path is defined.
-    result_path = "share/grid_statistics"
-    if Path(args.resultpath).exists():
-        result_path = args.resultpath
-
-    glob_var_config_path = (
-        ert_config_path + "/../../fmuconfig/output/global_variables.yml"
-    )
+    relative_result_path = DEFAULT_RELATIVE_RESULT_PATH
+    if args.resultpath:
+        relative_result_path = Path(args.resultpath)
+    result_path = ens_path / relative_result_path
+    glob_var_config_path = ert_config_path / Path(GLOBAL_VARIABLES_FILE)
     cfg_global = utils.yaml_load(glob_var_config_path)["global"]
     keyword = "FACIES_ZONE"
     if keyword in cfg_global:
@@ -232,11 +242,11 @@ def main():
         raise KeyError(f"Missing keyword: {keyword} in {glob_var_config_path}")
 
     # The ERTBOX grid file location in FMU
-    ertbox_path = ert_config_path + "/../../rms/output/aps/ERTBOX.EGRID"
+    ertbox_path = ert_config_path / ERTBOX_GRID_PATH
     ertbox_size = get_ertbox_size(ertbox_path)
     logger.info(f"Config path to FMU project: {ert_config_path}")
     logger.info(f"Ensemble path on scratch disk: {ens_path}")
-    logger.info(f"Result relative path on scratch disk: {result_path}")
+    logger.info(f"Result path on scratch disk: {result_path}")
     logger.info(f"ERTBOX size:  {ertbox_size}")
 
     calc_stats(
@@ -374,13 +384,11 @@ def get_values_in_ertbox(
         )
     if conformity.upper() in ["PROPORTIONAL", "TOP_CONFORM"]:
         ertbox_prop_values[:, :, :nz_zone] = prop_values[:, :, start_layer:end_layer]
-    #        print(f"Top conform or proportional zone: {zone_name}")
     elif conformity.upper() == "BASE_CONFORM":
         start_layer_ertbox = ertbox_size[2] - nz_zone
         ertbox_prop_values[:, :, start_layer_ertbox:] = prop_values[
             :, :, start_layer:end_layer
         ]
-    #        print(f"Base conform zone: {zone_name}")
 
     return ertbox_prop_values
 
@@ -405,7 +413,6 @@ def set_subgrid_names(grid, zone_code_names=None, new_subgrids=None):
 
 
 def write_mean_stdev_nactive(
-    ensemble_path,
     iter_number,
     zone_name,
     param_name,
@@ -414,7 +421,7 @@ def write_mean_stdev_nactive(
     ncount_active_values,
     result_path,
 ):
-    output_path = ensemble_path / Path(result_path)
+    output_path = result_path
     if not output_path.exists():
         # Create the directory
         output_path.mkdir()
@@ -466,7 +473,7 @@ def write_fraction_nactive(
     result_path,
     ncount_active_values=None,
 ):
-    output_path = ensemble_path / Path(result_path)
+    output_path = result_path
     if not output_path.exists():
         # Create the directory
         output_path.mkdir()
@@ -753,7 +760,6 @@ def calc_stats(
                     # Write mean, stdev
                     if calc_mean and calc_stdev:
                         write_mean_stdev_nactive(
-                            ensemble_path,
                             iter_number,
                             zone_name,
                             param_name,
@@ -881,5 +887,37 @@ def calc_stats(
                         logger.info(txt)
 
 
+class FieldStatistics(ErtScript):
+    """This class defines the ERT workflow hook.
+
+    It is constructed to work identical to the command line except
+
+      * field_statistics is upper-cased to FIELD_STATISTICS
+      * All option names with double-dash must be enclosed in "" to avoid
+        interference with the ERT comment characters "--".
+    """
+
+    # pylint: disable=too-few-public-methods
+    def run(self, *args):
+        # pylint: disable=no-self-use
+        """Pass the ERT workflow arguments on to the same parser as the command
+        line."""
+        parser = get_parser()
+        parsed_args = parser.parse_args(args)
+        field_stat(parsed_args)
+
+
+@ert.plugin(name="subscript")
+def legacy_ertscript_workflow(config):
+    """A hook for usage of this script in an ERT workflow,
+    using the legacy hook format."""
+
+    workflow = config.add_workflow(FieldStatistics, "FIELD_STATISTICS")
+    workflow.parser = get_parser
+    workflow.description = DESCRIPTION
+    workflow.examples = EXAMPLES
+    workflow.category = CATEGORY
+
+
 if __name__ == "__main__":
     main()
diff --git a/src/subscript/field_statistics/wf_field_param_statistics b/src/subscript/field_statistics/wf_field_param_statistics
deleted file mode 100644
index 96948ff57..000000000
--- a/src/subscript/field_statistics/wf_field_param_statistics
+++ /dev/null
@@ -1 +0,0 @@
-WF_FIELD_PARAM_STATISTICS  -c <FIELD_STAT_CONFIG_FILE>  -p <CONFIG_PATH>  -e <SCRATCH>/<USER>/<CASE_DIR>
diff --git a/src/subscript/field_statistics/wf_field_statistics b/src/subscript/field_statistics/wf_field_statistics
new file mode 100644
index 000000000..c83727b3d
--- /dev/null
+++ b/src/subscript/field_statistics/wf_field_statistics
@@ -0,0 +1,2 @@
+FIELD_STATISTICS  -c <FIELD_STAT_CONFIG_FILE>  -p <CONFIG_PATH>  -e <SCRATCH>/<USER>/<CASE_DIR> -r <RESULT_PATH>
+
diff --git a/tests/test_field_statistics.py b/tests/test_field_statistics.py
index 4f3bec22f..c4303a44a 100644
--- a/tests/test_field_statistics.py
+++ b/tests/test_field_statistics.py
@@ -1,4 +1,5 @@
 # import logging
+import shutil
 from pathlib import Path
 
 import fmu.config.utilities as utils
@@ -21,12 +22,13 @@
 # logger = subscript.getLogger(__name__)
 # logger.setLevel(logging.INFO)
 
-TESTDATA = Path(__file__).absolute().parent / "testdata_field_statistics"
-ENS_PATH = Path(__file__).absolute().parent / "testdata_field_statistics" / "ensemble"
-ERT_CONFIG_PATH = (
-    Path(__file__).absolute().parent / "testdata_field_statistics" / "ert" / "model"
-)
+TESTDATA = Path("testdata_field_statistics")
+ENSEMBLE = Path("ensemble")
 RESULT_PATH = Path("share/grid_statistics")
+ERT_CONFIG_PATH = Path("ert/model")
+DATADIR = Path(__file__).absolute().parent / TESTDATA
+GLOBAL_VARIABLES_FILE = Path("../../fmuconfig/output/global_variables.yml")
+
 
 CONFIG_DICT = {
     "nreal": 10,
@@ -55,22 +57,12 @@
     "ertbox_size": [5, 6, 5],
     "use_population_stdev": False,
 }
-GLOB_VAR_CFG_PATH = ERT_CONFIG_PATH / Path(
-    "../../fmuconfig/output/global_variables.yml"
-)
-CFG_GLOBAL = utils.yaml_load(GLOB_VAR_CFG_PATH)["global"]
-KEYWORD = "FACIES_ZONE"
-if KEYWORD in CFG_GLOBAL:
-    FACIES_PER_ZONE = CFG_GLOBAL[KEYWORD]
-else:
-    raise KeyError(f"Missing keyword: {KEYWORD} in {GLOB_VAR_CFG_PATH}")
-
-
-def make_box_grid(dimensions, grid_name, ens_path):
-    filename = ens_path / Path("share/grid_statistics") / Path(grid_name + ".roff")
-    filename_egrid = (
-        ens_path / Path("share/grid_statistics") / Path(grid_name.upper() + ".EGRID")
-    )
+
+
+def make_box_grid(dimensions, grid_name, result_path):
+    filename = result_path / Path(grid_name + ".roff")
+    filename_egrid = result_path / Path(grid_name.upper() + ".EGRID")
+
     grid = xtgeo.create_box_grid(dimensions)
     grid.name = grid_name
     print(f"Grid name:  {grid.name}")
@@ -116,7 +108,6 @@ def make_ensemble_test_data(
 
     iteration_list = [0, 3]
     zone_code_names = config_dict["zone_code_names"]
-    facies_per_zone = facies_per_zone
     discrete_param_name_per_zone = config_dict["discrete_property_param_per_zone"]
     param_name_per_zone = config_dict["continuous_property_param_per_zone"]
     nreal = 10
@@ -241,11 +232,12 @@ def make_ensemble_test_data(
                         xtgeo_geogrid.set_actnum(xtgeo_active)
                         set_subgrid_names(xtgeo_geogrid, new_subgrids=subgrid_dict)
                         xtgeo_geogrid.to_file(filename_grid, fformat="roff")
-
-    print(
-        "Finished make test data for ensemble for zone "
-        f"{zone_name} for iteration {iter_number}"
-    )
+            if print_info:
+                print(
+                    "Testdata for ensemble for zone "
+                    f"{zone_name} for iteration {iter_number} completed."
+                )
+    print("Finished making testdata ensemble")
 
 
 def assign_values_continuous_param(
@@ -389,9 +381,43 @@ def assign_values_discrete_param(
     return values, all_code_names
 
 
+def make_test_case(tmp_path, config_dict):
+    """Makes a test data set based on the input config_dict"""
+    tmp_testdata_path = tmp_path / TESTDATA
+    shutil.copytree(DATADIR, tmp_testdata_path)
+
+    ens_path = tmp_testdata_path / ENSEMBLE
+    ert_config_path = tmp_testdata_path / ERT_CONFIG_PATH
+    result_path = ens_path / RESULT_PATH
+
+    glob_cfg_path = ert_config_path / GLOBAL_VARIABLES_FILE
+    cfg_global = utils.yaml_load(glob_cfg_path)["global"]
+    keyword = "FACIES_ZONE"
+    if keyword in cfg_global:
+        facies_per_zone = cfg_global[keyword]
+    else:
+        raise KeyError(f"Missing keyword: {keyword} in {glob_cfg_path}")
+
+    (nx, ny, nz) = config_dict["ertbox_size"]
+
+    # Write file with ERTBOX grid for the purpose to import to visualize
+    # the test data in e.g. RMS. Saved in share directory at
+    # top of ensemble directory
+    make_box_grid((nx, ny, nz), "ERTBOX", result_path)
+
+    # Write file with geogrid for the purpose to import to visualize
+    # the test data in e.g. RMS". Geogrid for the test data has 3 zones,
+    # each with 5 layers. Saved in share directory at top of ensemble directory
+    make_box_grid((nx, ny, nz * 3), "Geogrid", result_path)
+
+    # Make ensemble of test data
+    make_ensemble_test_data(config_dict, facies_per_zone, nx, ny, nz, ens_path)
+    return facies_per_zone, ens_path, result_path, ert_config_path, (nx, ny, nz)
+
+
 def compare_with_referencedata(ens_path, result_path, print_check=False):
     lines = []
-    file_list = Path(ens_path) / Path(result_path) / Path("referencedata/files.txt")
+    file_list = result_path / Path("referencedata/files.txt")
     with open(file_list, "r") as file:
         lines = file.readlines()
     is_ok = []
@@ -403,10 +429,8 @@ def compare_with_referencedata(ens_path, result_path, print_check=False):
         name = nameinput.strip()
         words = name.split("_")
         if words[0] in ["mean", "stdev", "prob"]:
-            fullfilename = Path(ens_path) / Path(result_path) / Path("ertbox--" + name)
-            reference_filename = (
-                Path(ens_path) / Path(result_path) / Path("referencedata") / Path(name)
-            )
+            fullfilename = result_path / Path("ertbox--" + name)
+            reference_filename = result_path / Path("referencedata") / Path(name)
 
             grid_property = xtgeo.gridproperty_from_file(fullfilename, fformat="roff")
             grid_property_reference = xtgeo.gridproperty_from_file(
@@ -434,33 +458,18 @@ def compare_with_referencedata(ens_path, result_path, print_check=False):
 
 
 @pytest.mark.parametrize(
-    "config_dict, ens_path, ert_config_path, facies_per_zone, result_path",
-    [(CONFIG_DICT, ENS_PATH, ERT_CONFIG_PATH, FACIES_PER_ZONE, RESULT_PATH)],
+    "config_dict",
+    [CONFIG_DICT],
 )
 def test_calc_statistics(
+    tmp_path,
     config_dict,
-    ens_path,
-    ert_config_path,
-    facies_per_zone,
-    result_path,
     ertbox_size=None,
 ):
-    """Main test script"""
-
-    (nx, ny, nz) = config_dict["ertbox_size"]
-
-    # Write file with ERTBOX grid for the purpose to import to visualize
-    # the test data in e.g. RMS. Saved in share directory at
-    # top of ensemble directory
-    make_box_grid((nx, ny, nz), "ERTBOX", ens_path)
-
-    # Write file with geogrid for the purpose to import to visualize
-    # the test data in e.g. RMS". Geogrid for the test data has 3 zones,
-    # each with 5 layers. Saved in share directory at top of ensemble directory
-    make_box_grid((nx, ny, nz * 3), "Geogrid", ens_path)
-
-    # Make ensemble of test data
-    make_ensemble_test_data(config_dict, facies_per_zone, nx, ny, nz, ens_path)
+    # Create testdata for an ensemble to be used
+    facies_per_zone, ens_path, result_path, ert_config_path, ertbox_size = (
+        make_test_case(tmp_path, config_dict)
+    )
 
     # Run the calculations of mean, stdev, prob
     print("Calculate statistics")
@@ -867,41 +876,43 @@ def test_get_specification(
 
 
 @pytest.mark.parametrize(
-    "config_path, ens_path",
-    [
-        (
-            Path(__file__).absolute().parent
-            / "testdata_field_statistics"
-            / "config_example.yml",
-            ENS_PATH,
-        )
-    ],
+    "config_file, config_dict",
+    [(Path("config_example.yml"), CONFIG_DICT)],
 )
-def test_main(config_path, ens_path, print_info=True):
-    # Requires that the test data is already generated by
+def test_main(tmp_path, config_file, config_dict, print_info=True):
     import subprocess
     import sys
 
-    config_file = config_path.as_posix()
+    # First make an ensemble to be used as testdata. This is based on the config_dict
+    _, ens_path, result_path, ert_config_path, _ = make_test_case(tmp_path, config_dict)
+    tmp_testdata_path = tmp_path / TESTDATA
+    config_path = tmp_testdata_path / Path(config_file)
+    ert_config_path = tmp_testdata_path / ERT_CONFIG_PATH
+    ens_path = tmp_testdata_path / ENSEMBLE
+    result_path = ens_path / RESULT_PATH
+
+    # Run the main script as a subprocess
     script_name = Path(__file__).absolute().parent.parent / Path(
         "src/subscript/field_statistics/field_statistics.py"
     )
     if print_info:
         print(f"\nRun script:  {script_name}")
-    remove_file_path = ens_path / Path("share/grid_statistics/ertbox--*.roff")
-    print(f"Remove path:  {remove_file_path.as_posix()}")
+    remove_file_path = result_path / Path("ertbox--*.roff")
     subprocess.run(["rm", "-f", remove_file_path])
     subprocess.run(
         [
             sys.executable,
-            script_name,
+            script_name.as_posix(),
             "-c",
-            config_file,
+            config_path.as_posix(),
             "-p",
-            ERT_CONFIG_PATH,
+            ert_config_path.as_posix(),
             "-e",
-            ENS_PATH,
+            ens_path.as_posix(),
+            "-r",
+            result_path.as_posix(),
         ]
     )
-
-    assert compare_with_referencedata(ENS_PATH, RESULT_PATH, print_check=True)
+    # For this test not to fail, the CONFIG_DICT and the specified
+    # config file in yaml format must define the same setup
+    assert compare_with_referencedata(ens_path, result_path, print_check=True)