equinor · oddvarlia · Nov 7, 2021 · May 21, 2022 · oyvindeide · Jan 17, 2022
diff --git a/semeio/workflows/localisation/local_config_script.py b/semeio/workflows/localisation/local_config_script.py
@@ -1,26 +1,25 @@
-from ert_shared.libres_facade import LibresFacade
 from ert_shared.plugins.plugin_manager import hook_implementation
 
 import semeio.workflows.localisation.local_script_lib as local
 from semeio.communication import SemeioScript
-from semeio.workflows.localisation.localisation_config import LocalisationConfig
+from semeio.workflows.localisation.localisation_config import (
+    LocalisationConfig,
+    get_max_gen_obs_size_for_expansion,
+)
 
 
 class LocalisationConfigJob(SemeioScript):
     def run(self, *args):
         ert = self.ert()
-        facade = LibresFacade(self.ert())
+
         # Clear all correlations
         local.clear_correlations(ert)
 
         # Read yml file with specifications
         config_dict = local.read_localisation_config(args)
 
-        # Get all observations from ert instance
-        obs_keys = [
-            facade.get_observation_key(nr)
-            for nr, _ in enumerate(facade.get_observations())
-        ]
+        expand_gen_obs_max_size = get_max_gen_obs_size_for_expansion(config_dict)
+        obs_keys = local.get_obs_from_ert(ert, expand_gen_obs_max_size)
 
         ert_parameters = local.get_param_from_ert(ert.ensembleConfig())
 
@@ -35,6 +34,7 @@ def run(self, *args):
             ert_parameters.to_dict(),
             ert.getLocalConfig(),
             ert.ensembleConfig(),
+            ert.getObservations(),
             ert.eclConfig().getGrid(),
         )
 
@@ -195,6 +195,51 @@ def run(self, *args):
           scalingfactors: [1.0, 0.5, 0.3]
           smooth_ranges: [2,3]
 
+Example 2:
+------------
+In this example the optional keyword **max_gen_obs_size** is specified.
+The value 1000 means that all observation nodes of type GEN_OBS having less
+than 1000 observations are specified in the form::
+
+ nodename:index
+
+where **index** is an integer from 0 to 999.
+All GEN_OBS nodes with more than 1000 observations
+are specified in the form nodename only. The reason not to enable to specify
+individual observations from GEN_OBS of any size is performance e.g. when
+GEN_OBS nodes of seismic data is used.
+
+The first example below (2A) specifies all observations by::
+
+ GENOBS_NODE:*
+
+The second example (2B) has selected a few observations from the
+GENOBS_NODE::
+
+  ["GENOBS_NODE:0","GENOBS_NODE:3","GENOBS_NODE:55"]
+
+Example 2A::
+
+  max_gen_obs_size: 1000
+  log_level:2
+  correlations:
+    - name: CORR1
+       obs_group:
+          add: ["GENOBS_NODE:*"]
+       param_group:
+          add: ["PARAM_NODE:*"]
+
+Example 2B::
+
+  max_gen_obs_size: 100
+  log_level:2
+  correlations:
+    - name: CORR1
+       obs_group:
+          add: ["GENOBS_NODE:0","GENOBS_NODE:3","GENOBS_NODE:55"]
+       param_group:
+          add: ["PARAM_NODE:*"]
+
 
 Keywords
 -----------
@@ -212,6 +257,16 @@ def run(self, *args):
       and make it possible to visualise them. Is only relevant when using
       **field_scale** with methods calculating the scaling factors.
 
+:max_gen_obs_size:
+      Specify the max size of GEN_OBS type of observation nodes that
+      can specify individual observations. Individual observations are specified
+      by nodename:index where index is the observation number in the
+      observation file associated with the GEN_OBS type node.
+      The keyword is optional. If not specified or specified with value 0,
+      this means that observations of type GEN_OBS is specified by
+      nodename only. Individual observations can not be specified in this case
+      which means that all observations in the GEN_OBS node is used.
+
 :correlations:
       List of specifications of correlation groups. A correlation group
       specify a set of observations and a set of model parameters.
@@ -296,6 +351,13 @@ def run(self, *args):
       The nodename represents all field values for all grid cells in the whole
       3D or 2D grid the field belongs to.
 
+      For observations specified with GENERAL_OBSERVATION keyword in ERT config file,
+      it is possible to specify the observations by either *node_name*
+      or *node_name:index*. Default is to specify by *node_name* only which means
+      to include all observation from this ERT identifier.
+      The alternative option is to use the keyword **max_gen_obs_size**
+      described above and specify individual observations by *node_name:index*.
+
 :remove:
       For details see the keyword **add:**. The main purpose of **remove** is to
       have a quick and easy way to specify all parameters or observations

diff --git a/semeio/workflows/localisation/local_script_lib.py b/semeio/workflows/localisation/local_script_lib.py
@@ -1,4 +1,5 @@
 # pylint: disable=W0201
+# pylint: disable=C0302
 import math
 import yaml
 import cwrap
@@ -15,13 +16,16 @@
 from ecl.eclfile import Ecl3DKW
 from ecl.ecl_type import EclDataType
 from ecl.grid.ecl_grid import EclGrid
+
 from res.enkf.enums.ert_impl_type_enum import ErtImplType
 from res.enkf.enums.enkf_var_type_enum import EnkfVarType
+from res.enkf import EnkfObservationImplementationType
 
 from semeio.workflows.localisation.localisation_debug_settings import (
     LogLevel,
     debug_print,
 )
+from ert_shared.libres_facade import LibresFacade
 
 
 @dataclass
@@ -170,14 +174,14 @@ def active_index_for_parameter(node_name, param_name, ert_param_dict):
 
 
 def activate_gen_kw_param(
-    model_param_group, node_name, param_list, ert_param_dict, log_level=LogLevel.OFF
+    ministep, node_name, param_list, ert_param_dict, log_level=LogLevel.OFF
 ):
     """
     Activate the selected parameters for the specified node.
     The param_list contains the list of parameters defined in GEN_KW
     for this node to be activated.
     """
-    active_param_list = model_param_group.getActiveList(node_name)
+    active_param_list = ministep.getActiveList(node_name)
     debug_print("Set active parameters", LogLevel.LEVEL2, log_level)
     for param_name in param_list:
         index = active_index_for_parameter(node_name, param_name, ert_param_dict)
@@ -191,15 +195,15 @@ def activate_gen_kw_param(
 
 
 def activate_gen_param(
-    model_param_group, node_name, param_list, data_size, log_level=LogLevel.OFF
+    ministep, node_name, param_list, data_size, log_level=LogLevel.OFF
 ):
     """
     Activate the selected parameters for the specified node.
     The param_list contains a list of names that are integer numbers
     for the parameter indices to be activated for parameters belonging
     to the specified GEN_PARAM node.
     """
-    active_param_list = model_param_group.getActiveList(node_name)
+    active_param_list = ministep.getActiveList(node_name)
     for param_name in param_list:
         index = int(param_name)
         if index < 0 or index >= data_size:
@@ -564,6 +568,7 @@ def add_ministeps(
     ert_param_dict,
     ert_local_config,
     ert_ensemble_config,
+    ert_obs,
     grid_for_field,
 ):
     # pylint: disable-msg=too-many-branches
@@ -579,17 +584,17 @@ def add_ministeps(
     )
 
     for count, corr_spec in enumerate(user_config.correlations):
+
         ministep_name = corr_spec.name
         ministep = ert_local_config.createMinistep(ministep_name)
         debug_print(
             f"Define ministep: {ministep_name}", LogLevel.LEVEL1, user_config.log_level
         )
 
-        param_group_name = ministep_name + "_param_group"
         obs_group_name = ministep_name + "_obs_group"
         obs_group = ert_local_config.createObsdata(obs_group_name)
 
-        obs_list = corr_spec.obs_group.result_items
+        obs_dict = Parameters.from_list(corr_spec.obs_group.result_items).to_dict()
         param_dict = Parameters.from_list(corr_spec.param_group.result_items).to_dict()
 
         # Setup model parameter group
@@ -780,20 +785,40 @@ def add_ministeps(
                         user_config.log_level,
                     )
 
-        # Setup observation group
-        for obs_name in obs_list:
+        # Setup observation group. For GEN_OBS type
+        # the observation specification can be of the form obs_node_name:index
+        # if individual observations from a GEN_OBS node is chosen or
+        # only obs_node_name if all observations in GEN_OBS is active.
+        obs_type = EnkfObservationImplementationType.GEN_OBS
+        key_list_gen_obs = ert_obs.getTypedKeylist(obs_type)
+        for obs_node_name, obs_index_list in obs_dict.items():
+            obs_group.addNode(obs_node_name)
             debug_print(
-                f"Add obs node: {obs_name}", LogLevel.LEVEL2, user_config.log_level
+                f"Add obs node: {obs_node_name}", LogLevel.LEVEL2, user_config.log_level
             )
-            obs_group.addNode(obs_name)
+            if obs_node_name in key_list_gen_obs:
+                # An observation node of type GEN_OBS
+                if len(obs_index_list) > 0:
+                    active_obs_list = obs_group.getActiveList(obs_node_name)
+                    if len(obs_index_list) > 50:
+                        debug_print(
+                            f"More than 50 active obs for {obs_node_name}",
+                            LogLevel.LEVEL3,
+                            user_config.log_level,
+                        )
 
-        # Setup ministep
-        debug_print(
-            f"Attach {param_group_name} to ministep {ministep_name}",
-            LogLevel.LEVEL1,
-            user_config.log_level,
-        )
+                    for string_index in obs_index_list:
+                        index = int(string_index)
+                        if len(obs_index_list) <= 50:
+                            debug_print(
+                                f"Active obs for {obs_node_name}  index: {index}",
+                                LogLevel.LEVEL3,
+                                user_config.log_level,
+                            )
+
+                        active_obs_list.addActiveIndex(index)
 
+        # Setup ministep
         debug_print(
             f"Attach {obs_group_name} to ministep {ministep_name}",
             LogLevel.LEVEL1,
@@ -973,3 +998,31 @@ def write_qc_parameter(
                 grid.write_grdecl(scaling_kw, file)
             # Increase parameter number to define unique parameter name
             cls.scaling_param_number = cls.scaling_param_number + 1
+
+
+def get_obs_from_ert(ert, expand_gen_obs_max_size):
+    facade = LibresFacade(ert)
+    ert_obs = facade.get_observations()
+    obs_keys = []
+    if expand_gen_obs_max_size == 0:
+        obs_keys = [facade.get_observation_key(nr) for nr, _ in enumerate(ert_obs)]
+        return obs_keys
+
+    for nr, _ in enumerate(ert_obs):
+        key = facade.get_observation_key(nr)
+        impl_type = facade.get_impl_type_name_for_obs_key(key)
+        if impl_type == "GEN_OBS":
+            obs_vector = ert_obs[key]
+            timestep = obs_vector.activeStep()
+            obs_node = obs_vector.getNode(timestep)
+            data_size = obs_node.getSize()
+            if data_size <= expand_gen_obs_max_size:
+                obs_key_with_index_list = [
+                    key + ":" + str(item) for item in range(data_size)
+                ]
+                obs_keys.extend(obs_key_with_index_list)
+            else:
+                obs_keys.append(key)
+        else:
+            obs_keys.append(key)
+    return obs_keys
diff --git a/semeio/workflows/localisation/localisation_config.py b/semeio/workflows/localisation/localisation_config.py
@@ -355,6 +355,28 @@ def validate_surface_scale(cls, value):
             )
 
 
+class MaxGenObsSize(PydanticBaseModel):
+    """
+    max_gen_obs_size:  Integer >=0. Default: 0
+                             If it is > 0, it defines that all GEN_OBS observations is
+                             expanded into the form nodename:index. The user
+                             must specify GEN_OBS type observations in
+                             the form nodename:index or nodename:* if
+                             all observations for a GEN_OBS node is used.
+                             The max_gen_obs_size value is a threshold value.
+                             If a GEN_OBS node has more observations than
+                             max_gen_obs_size specified by the user,
+                             the obs node is not expanded and the
+                             user also must specify the obs node only by its
+                             nodename, not in expanded form. Typical use of this is
+                             to let nodes containing moderate number of observations
+                             be expanded, while nodes having large number of
+                             observations are not expanded.
+    """
+
+    max_gen_obs_size: Optional[conint(ge=0)] = 0
+
+
 class LocalisationConfig(BaseModel):
     """
     observations:  A list of observations from ERT in format nodename
@@ -367,13 +389,23 @@ class LocalisationConfig(BaseModel):
     log_level:       Integer defining how much log output to write to screen
     write_scaling_factors: Turn on writing calculated scaling parameters to file.
                             Possible values: True/False. Default: False
+    max_gen_obs_size:  Integer defining max size for a GEN_OBS node to
+                            be expanded in the form nodename:index.
+                            If the observation node of type GEN_OBS has more
+                            observations than this number, it can only be specified with
+                            node name which then represents the whole set of
+                            observations for the node.
+                            Possible values: Integers >= 0
+                            Default: 0 which means that GEN_OBS nodes are specified
+                            with node name only.
     """
 
     observations: List[str]
     parameters: List[str]
     correlations: List[CorrelationConfig]
     log_level: Optional[conint(ge=0, le=5)] = 1
     write_scaling_factors: Optional[bool] = False
+    max_gen_obs_size: Optional[conint(ge=0)] = 0
 
     @validator("log_level")
     def validate_log_level(cls, level):
@@ -406,3 +438,9 @@ def _check_specification(items_to_add, items_to_remove, valid_items):
         added_items = added_items.difference(removed_items)
     added_items = list(added_items)
     return sorted(added_items)
+
+
+def get_max_gen_obs_size_for_expansion(config_dict):
+    tmp_config = MaxGenObsSize(**config_dict)
+    value = tmp_config.max_gen_obs_size
+    return value
diff --git a/tests/jobs/conftest.py b/tests/jobs/conftest.py
@@ -40,3 +40,18 @@ def setup_poly_ert(tmpdir, test_data_root):
 
     yield
     os.chdir(cwd)
+
+
+@pytest.fixture()
+def setup_poly_gen_param_ert(tmpdir, test_data_root):
+    cwd = os.getcwd()
+    tmpdir.chdir()
+    test_data_dir = os.path.join(test_data_root, "poly_gen_param")
+    shutil.copytree(test_data_dir, "test_data")
+    os.chdir(os.path.join("test_data"))
+
+    res_config = ResConfig("poly.ert")
+
+    yield res_config
+
+    os.chdir(cwd)