From a8199cf910a39c5bdb4bcb1683bc4fd5a8ddef13 Mon Sep 17 00:00:00 2001
From: "Oddvar Lia (ST MSU GEO)" <olia@equinor.com>
Date: Sun, 7 Nov 2021 22:22:02 +0100
Subject: [PATCH] Added option to specify GEN_OBS nodes in the form
 nodename:index

This makes it possible to specify individual observations from an GEN_OBS node
in localisation.
Add tests to verify active param and active obs in ministeps
---
 .../localisation/local_config_script.py       | 112 +++++++--
 .../localisation/local_script_lib.py          | 232 +++++++++++++++++-
 .../localisation/localisation_config.py       |  39 +++
 .../localisation/test_configs/test_config.py  | 107 ++++++++
 tests/jobs/localisation/test_integration.py   | 207 +++++++++++++++-
 5 files changed, 665 insertions(+), 32 deletions(-)

diff --git a/semeio/workflows/localisation/local_config_script.py b/semeio/workflows/localisation/local_config_script.py
index ab0c8dabf..b4081e146 100644
--- a/semeio/workflows/localisation/local_config_script.py
+++ b/semeio/workflows/localisation/local_config_script.py
@@ -1,26 +1,25 @@
-from ert_shared.libres_facade import LibresFacade
 from ert_shared.plugins.plugin_manager import hook_implementation
 
 import semeio.workflows.localisation.local_script_lib as local
 from semeio.communication import SemeioScript
-from semeio.workflows.localisation.localisation_config import LocalisationConfig
+from semeio.workflows.localisation.localisation_config import (
+    LocalisationConfig,
+    get_max_gen_obs_size_for_expansion,
+)
 
 
 class LocalisationConfigJob(SemeioScript):
     def run(self, *args):
         ert = self.ert()
-        facade = LibresFacade(self.ert())
+
         # Clear all correlations
         local.clear_correlations(ert)
 
         # Read yml file with specifications
         config_dict = local.read_localisation_config(args)
 
-        # Get all observations from ert instance
-        obs_keys = [
-            facade.get_observation_key(nr)
-            for nr, _ in enumerate(facade.get_observations())
-        ]
+        expand_gen_obs_max_size = get_max_gen_obs_size_for_expansion(config_dict)
+        obs_keys = local.get_obs_from_ert(ert, expand_gen_obs_max_size)
 
         ert_parameters = local.get_param_from_ert(ert.ensembleConfig())
 
@@ -35,8 +34,17 @@ def run(self, *args):
             ert_parameters.to_dict(),
             ert.getLocalConfig(),
             ert.ensembleConfig(),
+            ert.getObservations(),
             ert.eclConfig().getGrid(),
         )
+        if config.verify_active:
+            local.verify_ministep_active_param(
+                config.correlations,
+                ert.getLocalConfig(),
+                ert.ensembleConfig(),
+                ert_parameters.to_dict(),
+            )
+            local.verify_ministep_active_obs(config.correlations, ert)
 
 
 DESCRIPTION = """
@@ -56,8 +64,9 @@ def run(self, *args):
 Features
 ----------
 The following features are implemented:
-
- - The user defines groups of model parameters and observations.
+ - The user defines groups of model parameters and observations,
+   called correlation groups or ministeps. It is possible to specify many correlation
+   groups.
  - Wildcard notation can be used to specify a selection of model parameter groups
    and observation groups.
  - For scalar parameters coming from the ERT keywords GEN_KW and GEN_PARAM,
@@ -68,8 +77,9 @@ def run(self, *args):
    value corresponding to a grid cell (i,j,k) in location (x,y,z) is reduced by a
    scaling factor varying by distance from a reference point e.g at a location (X,Y,Z),
    usually specified to be close to an observation group.
- - Multiple pairs of groups of model parameters and observations can be specified
-   to have active correlations.
+ - A requirement is that a pair of observation and model parameter (obs, param)
+   is only appearing once to avoid double specification of the same active
+   correlation.
 
 
 Using the localisation setup in ERT
@@ -84,27 +94,35 @@ def run(self, *args):
  - Specify to automatically run the workflow after the initial ensemble is created,
    but before the first update by using the HOOK_WORKFLOW keyword
    with the option PRE_FIRST_UPDATE.
+ - To QC the specification of the config file for localisation, it is possible to
+   run the workflow before running initial ensemble also, but due to limitations
+   in ERT implementation GEN_PARAM type of parameter nodes will have empty
+   list of parameters if the workflow is run before initialization. If  GEN_PARAM
+   nodes are used in correlation groups, an error message may appear.
+
 """
 
 EXAMPLES = """
-Example configuration
+Example configurations
 -------------------------
 
 The configuration file is a YAML format file where pairs of groups of observations
 and groups of model parameters are specified.
 
-Per default, all correlations between the
-observations from the observation group and model parameters from the model
-parameter group are active and unmodified. All other combinations of pairs of
-observations and model parameters not specified in a correlation group, are inactive
-or set to 0. But it is possible to specify many correlation groups. If a pair of
-observation and model parameter appear multiple times
+Per default, all correlations between the observations from the observation 
+group and model parameters from the model parameter group are active 
+and unmodified. All other combinations of pairs of observations and model 
+parameters not specified in a correlation group, are inactive or set to 0. 
+But it is possible to specify many correlation groups. If a pair of observation 
+and model parameter appear multiple times
 (e.g. because they are member of multiple correlation groups),
 an error message is raised.
 
 It is also possible to scale down correlations that are specified for 3D and 2D fields.
 
-In the example below, four correlation groups are defined.
+Example 1:
+------------
+In the first example below, four correlation groups are defined.
 The first correlation group is called ``CORR1`` (a user defined name),
 and defines all observations to have active correlation with all model
 parameters starting with ``aps_valysar_grf`` and with ``GEO:PARAM``.
@@ -170,6 +188,50 @@ def run(self, *args):
           active_segments: [ 1,2,4]
           scalingfactors: [1.0, 0.5, 0.3]
 
+Example 2:
+------------
+In this example the optional keyword **max_gen_obs_size** is specified. 
+The value 1000 means that all observation nodes of type GEN_OBS having less 
+than 1000 observations are specified in the form::
+
+ nodename:index 
+
+where **index** is an integer from 0 to 999. 
+All GEN_OBS nodes with more than 1000 observations
+are specified in the form nodename only. The reason not to enable to specify
+individual observations from GEN_OBS of any size is performance e.g. when 
+GEN_OBS nodes of seismic data is used. The first example below (2A) 
+specifies all observations by:: 
+
+ GENOBS_NODE:*
+
+The second example (2B) has selected a few observations from the 
+GENOBS_NODE::
+
+  ["GENOBS_NODE:0","GENOBS_NODE:3","GENOBS_NODE:55"]
+
+Example 2A::
+
+  max_gen_obs_size: 1000
+  log_level:2
+  correlations:
+    - name: CORR1
+       obs_group:
+          add: ["GENOBS_NODE:*"]
+       param_group:
+          add: ["PARAM_NODE:*"]
+
+Example 2B::
+
+  max_gen_obs_size: 100
+  log_level:2
+  correlations:
+    - name: CORR1
+       obs_group:
+          add: ["GENOBS_NODE:0","GENOBS_NODE:3","GENOBS_NODE:55"]
+       param_group:
+          add: ["PARAM_NODE:*"]
+
 
 Keywords
 -----------
@@ -187,6 +249,16 @@ def run(self, *args):
       and make it possible to visualise them. Is only relevant when using
       **field_scale** with methods calculating the scaling factors.
 
+:max_gen_obs_size:
+      Specify the max size of GEN_OBS type of observation nodes that
+      can specify individual observations. Individual observations are specified
+      by nodename:index where index is the observation number in the 
+      observation file associated with the GEN_OBS type node.
+      The keyword is optional. If not specified or specified with value 0,
+      this means that observations of type GEN_OBS is specified by
+      nodename only. Individual observations can not be specified in this case
+      which means that all observations in the GEN_OBS node is used.
+
 :correlations:
       List of specifications of correlation groups. A correlation group
       specify a set of observations and a set of model parameters.
diff --git a/semeio/workflows/localisation/local_script_lib.py b/semeio/workflows/localisation/local_script_lib.py
index 4daa534da..f11d8f9ff 100644
--- a/semeio/workflows/localisation/local_script_lib.py
+++ b/semeio/workflows/localisation/local_script_lib.py
@@ -1,4 +1,5 @@
 # pylint: disable=W0201
+# pylint: disable=C0302
 import math
 import yaml
 import cwrap
@@ -15,13 +16,17 @@
 from ecl.eclfile import Ecl3DKW
 from ecl.ecl_type import EclDataType
 from ecl.grid.ecl_grid import EclGrid
+
 from res.enkf.enums.ert_impl_type_enum import ErtImplType
 from res.enkf.enums.enkf_var_type_enum import EnkfVarType
+from res.enkf.enums.active_mode_enum import ActiveMode
+from res.enkf import EnkfObservationImplementationType
 
 from semeio.workflows.localisation.localisation_debug_settings import (
     LogLevel,
     debug_print,
 )
+from ert_shared.libres_facade import LibresFacade
 
 
 @dataclass
@@ -524,11 +529,12 @@ def add_ministeps(
     ert_param_dict,
     ert_local_config,
     ert_ensemble_config,
+    ert_obs,
     grid_for_field,
 ):
     # pylint: disable-msg=too-many-branches
     # pylint: disable-msg=R0915
-
+    # pylint: disable-msg=R1702
     debug_print("Add all ministeps:", LogLevel.LEVEL1, user_config.log_level)
     ScalingValues.initialize()
     # Read all region files used in correlation groups,
@@ -539,6 +545,7 @@ def add_ministeps(
     )
 
     for count, corr_spec in enumerate(user_config.correlations):
+
         ministep_name = corr_spec.name
         ministep = ert_local_config.createMinistep(ministep_name)
         debug_print(
@@ -551,7 +558,7 @@ def add_ministeps(
         obs_group_name = ministep_name + "_obs_group"
         obs_group = ert_local_config.createObsdata(obs_group_name)
 
-        obs_list = corr_spec.obs_group.result_items
+        obs_dict = Parameters.from_list(corr_spec.obs_group.result_items).to_dict()
         param_dict = Parameters.from_list(corr_spec.param_group.result_items).to_dict()
 
         # Setup model parameter group
@@ -714,12 +721,38 @@ def add_ministeps(
                         user_config.log_level,
                     )
 
-        # Setup observation group
-        for obs_name in obs_list:
+        # Setup observation group. For GEN_OBS type
+        # the observation specification can be of the form obs_node_name:index
+        # if individual observations from a GEN_OBS node is chosen or
+        # only obs_node_name if all observations in GEN_OBS is active.
+        obs_type = EnkfObservationImplementationType.GEN_OBS
+        key_list_gen_obs = ert_obs.getTypedKeylist(obs_type)
+        for obs_node_name, obs_index_list in obs_dict.items():
+            obs_group.addNode(obs_node_name)
             debug_print(
-                f"Add obs node: {obs_name}", LogLevel.LEVEL2, user_config.log_level
+                f"Add obs node: {obs_node_name}", LogLevel.LEVEL2, user_config.log_level
             )
-            obs_group.addNode(obs_name)
+            if obs_node_name in key_list_gen_obs:
+                # An observation node of type GEN_OBS
+                if len(obs_index_list) > 0:
+                    active_obs_list = obs_group.getActiveList(obs_node_name)
+                    if len(obs_index_list) > 50:
+                        debug_print(
+                            f"More than 50 active obs for {obs_node_name}",
+                            LogLevel.LEVEL3,
+                            user_config.log_level,
+                        )
+
+                    for string_index in obs_index_list:
+                        index = int(string_index)
+                        if len(obs_index_list) <= 50:
+                            debug_print(
+                                f"Active obs for {obs_node_name}  index: {index}",
+                                LogLevel.LEVEL3,
+                                user_config.log_level,
+                            )
+
+                        active_obs_list.addActiveIndex(index)
 
         # Setup ministep
         debug_print(
@@ -744,6 +777,165 @@ def add_ministeps(
         ert_local_config.getUpdatestep().attachMinistep(ministep)
 
 
+def get_corr_group_spec(correlations_spec_list, name):
+    found = False
+    corr_spec = None
+    for count, corr_spec in enumerate(correlations_spec_list):
+        corr_group_name = corr_spec.name
+        if name == corr_group_name:
+            found = True
+            break
+    if not found:
+        raise ValueError(
+            f"Can not find correlation group: {name} in user specification."
+        )
+    return corr_spec
+
+
+def verify_ministep_active_param(
+    corr_spec_list, ert_local_config, ert_ensemble_config, ert_param_dict
+):
+    """
+    Script to verify that the local config matches the specified user config for
+    parameters of type GEN_KW and GEN_PARAM.
+    Reports mismatch if found and silent if OK.
+    Used for test purpose.
+    """
+    print("\nVerify ministep setup for active parameters:")
+    updatestep = ert_local_config.getUpdatestep()
+    for ministep in updatestep:
+        print(f"Ministep: {ministep.name()}")
+        # User specification
+        corr_spec = get_corr_group_spec(corr_spec_list, ministep.name())
+        param_dict = Parameters.from_list(corr_spec.param_group.result_items).to_dict()
+
+        # Data from local config, only one param group in a ministep here.
+        param_group_name = ministep.name() + "_param_group"
+        if param_group_name not in ministep:
+            raise ValueError(
+                f"For ministep: {ministep.name()} there does not exist "
+                f"any parameter group : {param_group_name}"
+            )
+        param_group = ministep[param_group_name]
+        node_names = list(param_group.keys())
+        for node_name in node_names:
+            node = ert_ensemble_config.getNode(node_name)
+            impl_type = node.getImplementationType()
+            active_list_obj = param_group.getActiveList(node_name)
+            if node_name not in param_dict:
+                raise ValueError(
+                    f"Ministep {ministep.name()} with parameter group "
+                    f"{param_group_name} has node name {node_name} "
+                    "that is not specified."
+                )
+            user_spec_param_list = param_dict[node_name]
+
+            # Check only cases with partly active set of parameter
+            if active_list_obj.getMode() == ActiveMode.PARTLY_ACTIVE:
+                if impl_type == ErtImplType.GEN_KW:
+                    spec_index_list = []
+                    for nr, user_param_name in enumerate(user_spec_param_list):
+                        spec_index_list.append(
+                            active_index_for_parameter(
+                                node_name, user_param_name, ert_param_dict
+                            )
+                        )
+
+                elif impl_type == ErtImplType.GEN_DATA:
+                    spec_index_list = [
+                        int(user_spec_param_list[i])
+                        for i in range(len(user_spec_param_list))
+                    ]
+
+                active_index_list = active_list_obj.getActiveIndexList()
+                spec_index_list.sort()
+                active_index_list.sort()
+                print(f" Param node: {node_name}")
+                print(f"   Active param indices (user specified):  {spec_index_list}")
+                print(f"   Active param indices (ministep):  {active_index_list}")
+                if len(spec_index_list) != len(active_index_list):
+                    raise ValueError(
+                        f"For ministep: {ministep.name()} the number of "
+                        "active parameters are: "
+                        f"{len(active_index_list)} \n"
+                        "while the specified number of active parameters "
+                        f"are: {len(spec_index_list)}"
+                    )
+                err = False
+                for nr, index in enumerate(active_index_list):
+                    if index != int(spec_index_list[nr]):
+                        err = True
+                if err:
+                    raise ValueError(
+                        f" For ministep: {ministep.name()} and "
+                        f"parameter node: {node_name}:\n"
+                        "Mismatch between specified active parameters "
+                        f"and active parameters in the ministep.\n"
+                        f"Specified: {spec_index_list}\n"
+                        f"In ministep: {active_index_list}\n"
+                    )
+
+
+def verify_ministep_active_obs(corr_spec_list, ert):
+    # pylint: disable=R1702
+    """
+    Script to verify that the local config matches the specified user config for
+    active observations.
+    Reports mismatch if found and silent if OK.
+    Used for test purpose.
+    """
+    print("\nVerify ministep setup for active observations:")
+    facade = LibresFacade(ert)
+    ert_obs = facade.get_observations()
+    ert_local_config = ert.getLocalConfig()
+
+    updatestep = ert_local_config.getUpdatestep()
+    for ministep in updatestep:
+        print(f"Ministep: {ministep.name()}")
+        # User specification
+        corr_spec = get_corr_group_spec(corr_spec_list, ministep.name())
+        obs_dict = Parameters.from_list(corr_spec.obs_group.result_items).to_dict()
+
+        # Data from local config, only one obs group in a ministep here.
+        local_obs_data = ministep.getLocalObsData()
+        for obs_node in local_obs_data:
+            key = obs_node.key()
+            impl_type = facade.get_impl_type_name_for_obs_key(key)
+            if impl_type == "GEN_OBS":
+                active_list_obj = obs_node.getActiveList()
+                if active_list_obj.getMode() == ActiveMode.PARTLY_ACTIVE:
+                    obs_vector = ert_obs[key]
+                    # Always 1 timestep for a GEN_OBS
+                    timestep = obs_vector.activeStep()
+                    genobs_node = obs_vector.getNode(timestep)
+                    data_size = genobs_node.getSize()
+                    active_list_obj = obs_node.getActiveList()
+                    active_index_list = active_list_obj.getActiveIndexList()
+                    active_index_list.sort()
+
+                    # From user specification
+                    str_list = obs_dict[key]
+                    spec_index_list = [int(str_list[i]) for i in range(len(str_list))]
+                    spec_index_list.sort()
+                    err = False
+                    for nr, index in enumerate(active_index_list):
+                        if index != spec_index_list[nr]:
+                            err = True
+                    if err:
+                        raise ValueError(
+                            f" For ministep: {ministep.name()} and "
+                            f"observation node: {key}:\n"
+                            "Mismatch between specified active observations and "
+                            "active observations  defined in the ministep.\n"
+                            f"Specified: {spec_index_list}\n"
+                            f"In ministep: {active_index_list}\n"
+                        )
+                    print(f" Obs node: {key}")
+                    print(f"   Full size of obs node: {data_size}")
+                    print(f"   Active obs indices (user specified):  {spec_index_list}")
+                    print(f"   Active obs indices (ministep):  {active_index_list}")
+
+
 def clear_correlations(ert):
     local_config = ert.getLocalConfig()
     local_config.clear()
@@ -856,3 +1048,31 @@ def write_qc_parameter(
                 grid.write_grdecl(scaling_kw, file)
             # Increase parameter number to define unique parameter name
             cls.scaling_param_number = cls.scaling_param_number + 1
+
+
+def get_obs_from_ert(ert, expand_gen_obs_max_size):
+    facade = LibresFacade(ert)
+    ert_obs = facade.get_observations()
+    obs_keys = []
+    if expand_gen_obs_max_size == 0:
+        obs_keys = [facade.get_observation_key(nr) for nr, _ in enumerate(ert_obs)]
+        return obs_keys
+
+    for nr, _ in enumerate(ert_obs):
+        key = facade.get_observation_key(nr)
+        impl_type = facade.get_impl_type_name_for_obs_key(key)
+        if impl_type == "GEN_OBS":
+            obs_vector = ert_obs[key]
+            timestep = obs_vector.activeStep()
+            obs_node = obs_vector.getNode(timestep)
+            data_size = obs_node.getSize()
+            if data_size <= expand_gen_obs_max_size:
+                obs_key_with_index_list = [
+                    key + ":" + str(item) for item in range(data_size)
+                ]
+                obs_keys.extend(obs_key_with_index_list)
+            else:
+                obs_keys.append(key)
+        else:
+            obs_keys.append(key)
+    return obs_keys
diff --git a/semeio/workflows/localisation/localisation_config.py b/semeio/workflows/localisation/localisation_config.py
index 5e9c7e734..e9078403c 100644
--- a/semeio/workflows/localisation/localisation_config.py
+++ b/semeio/workflows/localisation/localisation_config.py
@@ -297,6 +297,28 @@ def validate_surface_scale(cls, value):
             )
 
 
+class MaxGenObsSize(PydanticBaseModel):
+    """
+    max_gen_obs_size:  Integer >=0. Default: 0
+                             If it is > 0, it defines that all GEN_OBS observations is
+                             expanded into the form nodename:index. The user
+                             must specify GEN_OBS type observations in
+                             the form nodename:index or nodename:* if
+                             all observations for a GEN_OBS node is used.
+                             The max_gen_obs_size value is a threshold value.
+                             If a GEN_OBS node has more observations than
+                             max_gen_obs_size specified by the user,
+                             the obs node is not expanded and the
+                             user also must specify the obs node only by its
+                             nodename, not in expanded form. Typical use of this is
+                             to let nodes containing moderate number of observations
+                             be expanded, while nodes having large number of
+                             observations are not expanded.
+    """
+
+    max_gen_obs_size: Optional[conint(ge=0)] = 0
+
+
 class LocalisationConfig(BaseModel):
     """
     observations:  A list of observations from ERT in format nodename
@@ -309,6 +331,15 @@ class LocalisationConfig(BaseModel):
     log_level:       Integer defining how much log output to write to screen
     write_scaling_factors: Turn on writing calculated scaling parameters to file.
                             Possible values: True/False. Default: False
+    max_gen_obs_size:  Integer defining max size for a GEN_OBS node to
+                            be expanded in the form nodename:index.
+                            If the observation node of type GEN_OBS has more
+                            observations than this number, it can only be specified with
+                            node name which then represents the whole set of
+                            observations for the node.
+                            Possible values: Integers >= 0
+                            Default: 0 which means that GEN_OBS nodes are specified
+                            with node name only.
     """
 
     observations: List[str]
@@ -316,6 +347,8 @@ class LocalisationConfig(BaseModel):
     correlations: List[CorrelationConfig]
     log_level: Optional[conint(ge=0, le=5)] = 1
     write_scaling_factors: Optional[bool] = False
+    verify_active: Optional[bool] = False
+    max_gen_obs_size: Optional[conint(ge=0)] = 0
 
     @validator("log_level")
     def validate_log_level(cls, level):
@@ -348,3 +381,9 @@ def _check_specification(items_to_add, items_to_remove, valid_items):
         added_items = added_items.difference(removed_items)
     added_items = list(added_items)
     return sorted(added_items)
+
+
+def get_max_gen_obs_size_for_expansion(config_dict):
+    tmp_config = MaxGenObsSize(**config_dict)
+    value = tmp_config.max_gen_obs_size
+    return value
diff --git a/tests/jobs/localisation/test_configs/test_config.py b/tests/jobs/localisation/test_configs/test_config.py
index 9d92b831f..56045ffbe 100644
--- a/tests/jobs/localisation/test_configs/test_config.py
+++ b/tests/jobs/localisation/test_configs/test_config.py
@@ -12,6 +12,15 @@
 
 
 ERT_OBS = ["OBS1", "OBS2", "OBS11", "OBS22", "OBS12", "OBS13", "OBS14", "OBS3"]
+ERT_GEN_OBS = [
+    "GENOBSA:0",
+    "GENOBSA:1",
+    "GENOBSA:2",
+    "GENOBSB:0",
+    "GENOBSB:1",
+    "GENOBSC:0",
+]
+
 ERT_PARAM = [
     "PARAM_NODE1:PARAM1",
     "PARAM_NODE1:PARAM2",
@@ -141,6 +150,52 @@ def test_simple_config(param_group_add, expected):
     assert sorted(conf.correlations[0].param_group.result_items) == sorted(expected)
 
 
+@pytest.mark.parametrize(
+    "obs_group_add, obs_group_remove, expected",
+    [
+        (
+            "GENOBS*",
+            [],
+            [
+                "GENOBSA:0",
+                "GENOBSA:1",
+                "GENOBSA:2",
+                "GENOBSB:0",
+                "GENOBSB:1",
+                "GENOBSC:0",
+            ],
+        ),
+        (
+            ["GENOBSB:*"],
+            ["GENOBSB:0"],
+            ["GENOBSB:1"],
+        ),
+        (
+            ["*"],
+            ["*B:0"],
+            ["GENOBSA:0", "GENOBSA:1", "GENOBSA:2", "GENOBSB:1", "GENOBSC:0"],
+        ),
+    ],
+)
+def test_gen_obs_config(obs_group_add, obs_group_remove, expected):
+    data = {
+        "log_level": 2,
+        "max_gen_obs_size": 10,
+        "correlations": [
+            {
+                "name": "some_name",
+                "obs_group": {
+                    "add": obs_group_add,
+                    "remove": obs_group_remove,
+                },
+                "param_group": {"add": ["PARAM_NODE1:*"]},
+            }
+        ],
+    }
+    conf = LocalisationConfig(observations=ERT_GEN_OBS, parameters=ERT_PARAM, **data)
+    assert sorted(conf.correlations[0].obs_group.result_items) == sorted(expected)
+
+
 @pytest.mark.parametrize(
     "obs_group_add, param_group_add,  param_group_remove, expected_error",
     [
@@ -716,3 +771,55 @@ def test_missing_keyword_errors_method_gaussian_decay():
     }
     with pytest.raises(ValueError, match=expected_error):
         LocalisationConfig(observations=["OBS1"], parameters=["PARAM_NODE1"], **data)
+
+
+def test_missing_param():
+    expected_error = "correlations -> 0 -> param_group -> result_items"
+    data = {
+        "log_level": 2,
+        "correlations": [
+            {
+                "name": "CORR",
+                "obs_group": {
+                    "add": ["OBS1"],
+                },
+                "param_group": {
+                    "add": ["*"],
+                    "remove": ["*"],
+                },
+                "ref_point": [250, 250],
+                "field_scale": {
+                    "method": "gaussian_decay",
+                    "main_range": 1000,
+                },
+            },
+        ],
+    }
+    with pytest.raises(ValueError, match=expected_error):
+        LocalisationConfig(observations=["OBS1"], parameters=["PARAM_NODE1"], **data)
+
+
+def test_missing_obs():
+    expected_error = "correlations -> 0 -> obs_group -> result_items"
+    data = {
+        "log_level": 2,
+        "correlations": [
+            {
+                "name": "CORR",
+                "obs_group": {
+                    "add": ["OBS1"],
+                    "remove": ["OBS1"],
+                },
+                "param_group": {
+                    "add": ["*"],
+                },
+                "ref_point": [250, 250],
+                "field_scale": {
+                    "method": "gaussian_decay",
+                    "main_range": 1000,
+                },
+            },
+        ],
+    }
+    with pytest.raises(ValueError, match=expected_error):
+        LocalisationConfig(observations=["OBS1"], parameters=["PARAM_NODE1"], **data)
diff --git a/tests/jobs/localisation/test_integration.py b/tests/jobs/localisation/test_integration.py
index 21f061ab0..34c1243ab 100644
--- a/tests/jobs/localisation/test_integration.py
+++ b/tests/jobs/localisation/test_integration.py
@@ -2,6 +2,7 @@
 import yaml
 import pytest
 from res.enkf import EnKFMain, ResConfig
+
 from semeio.workflows.localisation.local_config_script import LocalisationConfigJob
 
 from xtgeo.surface.regular_surface import RegularSurface
@@ -24,6 +25,7 @@ def test_localisation(setup_ert, obs_group_add, param_group_add, expected):
     ert = EnKFMain(setup_ert)
     config = {
         "log_level": 4,
+        "verify_active": True,
         "correlations": [
             {
                 "name": "CORR1",
@@ -90,6 +92,79 @@ def test_localisation(setup_ert, obs_group_add, param_group_add, expected):
     assert result == expected_result
 
 
+def test_localisation_gen_kw(setup_ert):
+    ert = EnKFMain(setup_ert, verbose=True)
+    config = {
+        "log_level": 4,
+        "verify_active": True,
+        "max_gen_obs_size": 1000,
+        "correlations": [
+            {
+                "name": "CORR12",
+                "obs_group": {"add": ["WPR_DIFF_1:0", "WPR_DIFF_1:3"]},
+                "param_group": {
+                    "add": [
+                        "SNAKE_OIL_PARAM:OP1_PERSISTENCE",
+                        "SNAKE_OIL_PARAM:OP1_OCTAVES",
+                    ],
+                },
+            },
+            {
+                "name": "CORR3",
+                "obs_group": {"add": "WPR_DIFF_1:2"},
+                "param_group": {
+                    "add": "SNAKE_OIL_PARAM:OP1_DIVERGENCE_SCALE",
+                },
+            },
+            {
+                "name": "CORR4",
+                "obs_group": {
+                    "add": "*",
+                    "remove": ["WPR_DIFF_1:1", "WPR_DIFF_1:0"],
+                },
+                "param_group": {
+                    "add": "SNAKE_OIL_PARAM:OP1_OFFSET",
+                },
+            },
+            {
+                "name": "CORR5",
+                "obs_group": {"add": "*"},
+                "param_group": {
+                    "add": "SNAKE_OIL_PARAM:OP2_PERSISTENCE",
+                },
+            },
+            {
+                "name": "CORR6",
+                "obs_group": {"add": "*"},
+                "param_group": {
+                    "add": "SNAKE_OIL_PARAM:OP2_OCTAVES",
+                },
+            },
+            {
+                "name": "CORR789",
+                "obs_group": {"add": "*"},
+                "param_group": {
+                    "add": [
+                        "SNAKE_OIL_PARAM:OP2_DIVERGENCE_SCALE",
+                        "SNAKE_OIL_PARAM:OP2_OFFSET",
+                        "SNAKE_OIL_PARAM:BPR_555_PERSISTENCE",
+                    ],
+                },
+            },
+            {
+                "name": "CORR10",
+                "obs_group": {"add": "*"},
+                "param_group": {
+                    "add": "SNAKE_OIL_PARAM:BPR_138_PERSISTENCE",
+                },
+            },
+        ],
+    }
+    with open("local_config.yaml", "w", encoding="utf-8") as fout:
+        yaml.dump(config, fout)
+    LocalisationConfigJob(ert).run("local_config.yaml")
+
+
 # This test does not work properly since it is run before initial ensemble is
 # created and in that case the number of parameters attached to a GEN_PARAM node
 # is 0.
@@ -113,6 +188,7 @@ def test_localisation_gen_param(
     ert = EnKFMain(res_config)
     config = {
         "log_level": 2,
+        "verify_active": True,
         "correlations": [
             {
                 "name": "CORR1",
@@ -171,6 +247,7 @@ def test_localisation_surf(
     ert = EnKFMain(res_config)
     config = {
         "log_level": 3,
+        "verify_active": True,
         "correlations": [
             {
                 "name": "CORR1",
@@ -226,7 +303,6 @@ def test_localisation_field1(
                 values = np.zeros((nx, ny, nz), dtype=np.float32)
                 property_field.values = values + 0.1 * n
                 filename = pname + "_" + str(n) + ".roff"
-                print(f"Write file: {filename}")
                 property_field.to_file(filename, fformat="roff", name=pname)
 
             fout.write(
@@ -240,6 +316,7 @@ def test_localisation_field1(
     config = {
         "log_level": 3,
         "write_scaling_factors": True,
+        "verify_active": True,
         "correlations": [
             {
                 "name": "CORR1",
@@ -250,7 +327,7 @@ def test_localisation_field1(
                     "add": ["G1", "G2"],
                 },
                 "field_scale": {
-                    "method": "gaussian_decay",
+                    "method": "exponential_decay",
                     "main_range": 1700,
                     "perp_range": 850,
                     "azimuth": 200,
@@ -327,7 +404,6 @@ def create_box_grid_with_inactive_and_active_cells(
     if has_inactive_values:
         grid.inactivate_outside(polygon, force_close=True)
 
-    print(f" Write file: {output_grid_file}")
     grid.to_file(output_grid_file, fformat="egrid")
     return grid
 
@@ -368,7 +444,6 @@ def create_region_parameter(filename, grid):
             else:
                 values[i, j, k] = 4
     region_param.values = values
-    print(f"Write file: {filename}")
     region_param.to_file(filename, fformat="grdecl", name=region_param_name)
 
 
@@ -397,9 +472,8 @@ def create_field_and_scaling_param_and_update_poly_ert(
                 values = np.zeros((nx, ny, nz), dtype=np.float32)
                 property_field.values = values + 0.1 * n
                 filename = property_name + "_" + str(n) + ".roff"
-                print(f"Write file: {filename}")
                 property_field.to_file(filename, fformat="roff", name=property_name)
-            print(f"Write file: {scaling_filename}\n")
+
             scaling_field.to_file(scaling_filename, fformat="grdecl", name=scaling_name)
 
             fout.write(
@@ -431,6 +505,7 @@ def test_localisation_field2(setup_poly_ert):
     config = {
         "log_level": 3,
         "write_scaling_factors": True,
+        "verify_active": True,
         "correlations": [
             {
                 "name": "CORR_GAUSSIAN",
@@ -518,3 +593,123 @@ def test_localisation_field2(setup_poly_ert):
     with open("local_config.yaml", "w") as fout:
         yaml.dump(config, fout)
     LocalisationConfigJob(ert).run("local_config.yaml")
+
+
+def test_localisation_gen_obs(
+    setup_poly_ert,
+):
+    res_config = ResConfig("poly.ert")
+    ert = EnKFMain(res_config)
+    config = {
+        "log_level": 2,
+        "max_gen_obs_size": 1000,
+        "verify_active": True,
+        "correlations": [
+            {
+                "name": "CORR1",
+                "obs_group": {
+                    "add": ["POLY_OBS:*"],
+                },
+                "param_group": {
+                    "add": ["*"],
+                },
+            },
+        ],
+    }
+    with open("local_config_gen_obs.yaml", "w", encoding="utf-8") as fout:
+        yaml.dump(config, fout)
+    LocalisationConfigJob(ert).run("local_config_gen_obs.yaml")
+    expected = {}
+    expected["CORR1"] = [0, 1, 2, 3, 4]
+
+    ert_local_config = ert.getLocalConfig()
+    updatestep = ert_local_config.getUpdatestep()
+    active_indices = {}
+    for ministep in updatestep:
+        local_obs_data = ministep.getLocalObsData()
+        for obs_node in local_obs_data:
+            active_list_obj = obs_node.getActiveList()
+            active_indices_list = active_list_obj.getActiveIndexList()
+            active_indices_list.sort()
+            active_indices[ministep.name()] = active_indices_list
+
+    assert active_indices == expected
+
+
+@pytest.mark.parametrize(
+    "obs_group_add1, obs_group_remove1, obs_group_add2, obs_group_remove2, expected",
+    [
+        (
+            ["POLY_OBS:0", "POLY_OBS:1", "POLY_OBS:2"],
+            [],
+            ["POLY_OBS:3", "POLY_OBS:4"],
+            ["POLY_OBS:3"],
+            {
+                "CORR1": [0, 1, 2],
+                "CORR2": [4],
+            },
+        ),
+        (
+            ["POLY_OBS:*"],
+            ["POLY_OBS:1*", "POLY_OBS:3"],
+            ["POLY_OBS:3"],
+            ["POLY_OBS:1"],
+            {
+                "CORR1": [0, 2, 4],
+                "CORR2": [3],
+            },
+        ),
+    ],
+)
+def test_localisation_gen_obs2(
+    setup_poly_ert,
+    obs_group_add1,
+    obs_group_remove1,
+    obs_group_add2,
+    obs_group_remove2,
+    expected,
+):
+    res_config = ResConfig("poly.ert")
+    ert = EnKFMain(res_config)
+    config = {
+        "log_level": 2,
+        "max_gen_obs_size": 1000,
+        "verify_active": True,
+        "correlations": [
+            {
+                "name": "CORR1",
+                "obs_group": {
+                    "add": obs_group_add1,
+                    "remove": obs_group_remove1,
+                },
+                "param_group": {
+                    "add": ["*"],
+                },
+            },
+            {
+                "name": "CORR2",
+                "obs_group": {
+                    "add": obs_group_add2,
+                    "remove": obs_group_remove2,
+                },
+                "param_group": {
+                    "add": ["*"],
+                },
+            },
+        ],
+    }
+    with open("local_config_gen_obs2.yaml", "w", encoding="utf-8") as fout:
+        yaml.dump(config, fout)
+    LocalisationConfigJob(ert).run("local_config_gen_obs2.yaml")
+    ert_local_config = ert.getLocalConfig()
+    updatestep = ert_local_config.getUpdatestep()
+    active_indices = {}
+    for ministep in updatestep:
+        local_obs_data = ministep.getLocalObsData()
+        for obs_node in local_obs_data:
+            active_list_obj = obs_node.getActiveList()
+            active_indices_list = active_list_obj.getActiveIndexList()
+            active_indices_list.sort()
+            active_indices[ministep.name()] = active_indices_list
+
+    assert active_indices == expected