Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Localisation: Added option to specify GEN_OBS nodes in the form nodename:index in localisation config file #363

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
78 changes: 70 additions & 8 deletions semeio/workflows/localisation/local_config_script.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
from ert_shared.libres_facade import LibresFacade
from ert_shared.plugins.plugin_manager import hook_implementation

import semeio.workflows.localisation.local_script_lib as local
from semeio.communication import SemeioScript
from semeio.workflows.localisation.localisation_config import LocalisationConfig
from semeio.workflows.localisation.localisation_config import (
LocalisationConfig,
get_max_gen_obs_size_for_expansion,
)


class LocalisationConfigJob(SemeioScript):
def run(self, *args):
ert = self.ert()
facade = LibresFacade(self.ert())

# Clear all correlations
local.clear_correlations(ert)

# Read yml file with specifications
config_dict = local.read_localisation_config(args)

# Get all observations from ert instance
obs_keys = [
facade.get_observation_key(nr)
for nr, _ in enumerate(facade.get_observations())
]
expand_gen_obs_max_size = get_max_gen_obs_size_for_expansion(config_dict)
obs_keys = local.get_obs_from_ert(ert, expand_gen_obs_max_size)

ert_parameters = local.get_param_from_ert(ert.ensembleConfig())

Expand All @@ -35,6 +34,7 @@ def run(self, *args):
ert_parameters.to_dict(),
ert.getLocalConfig(),
ert.ensembleConfig(),
ert.getObservations(),
ert.eclConfig().getGrid(),
)

Expand Down Expand Up @@ -195,6 +195,51 @@ def run(self, *args):
scalingfactors: [1.0, 0.5, 0.3]
smooth_ranges: [2,3]

Example 2:
------------
In this example the optional keyword **max_gen_obs_size** is specified.
The value 1000 means that all observation nodes of type GEN_OBS having less
than 1000 observations are specified in the form::

nodename:index

where **index** is an integer from 0 to 999.
All GEN_OBS nodes with more than 1000 observations
are specified in the form nodename only. The reason not to enable to specify
individual observations from GEN_OBS of any size is performance e.g. when
GEN_OBS nodes of seismic data is used.

The first example below (2A) specifies all observations by::

GENOBS_NODE:*

The second example (2B) has selected a few observations from the
GENOBS_NODE::

["GENOBS_NODE:0","GENOBS_NODE:3","GENOBS_NODE:55"]

Example 2A::

max_gen_obs_size: 1000
log_level:2
correlations:
- name: CORR1
obs_group:
add: ["GENOBS_NODE:*"]
param_group:
add: ["PARAM_NODE:*"]

Example 2B::

max_gen_obs_size: 100
log_level:2
correlations:
- name: CORR1
obs_group:
add: ["GENOBS_NODE:0","GENOBS_NODE:3","GENOBS_NODE:55"]
param_group:
add: ["PARAM_NODE:*"]


Keywords
-----------
Expand All @@ -212,6 +257,16 @@ def run(self, *args):
and make it possible to visualise them. Is only relevant when using
**field_scale** with methods calculating the scaling factors.

:max_gen_obs_size:
Specify the max size of GEN_OBS type of observation nodes that
can specify individual observations. Individual observations are specified
by nodename:index where index is the observation number in the
observation file associated with the GEN_OBS type node.
The keyword is optional. If not specified or specified with value 0,
this means that observations of type GEN_OBS is specified by
nodename only. Individual observations can not be specified in this case
which means that all observations in the GEN_OBS node is used.

:correlations:
List of specifications of correlation groups. A correlation group
specify a set of observations and a set of model parameters.
Expand Down Expand Up @@ -296,6 +351,13 @@ def run(self, *args):
The nodename represents all field values for all grid cells in the whole
3D or 2D grid the field belongs to.

For observations specified with GENERAL_OBSERVATION keyword in ERT config file,
it is possible to specify the observations by either *node_name*
or *node_name:index*. Default is to specify by *node_name* only which means
to include all observation from this ERT identifier.
The alternative option is to use the keyword **max_gen_obs_size**
described above and specify individual observations by *node_name:index*.

:remove:
For details see the keyword **add:**. The main purpose of **remove** is to
have a quick and easy way to specify all parameters or observations
Expand Down
85 changes: 69 additions & 16 deletions semeio/workflows/localisation/local_script_lib.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# pylint: disable=W0201
# pylint: disable=C0302
import math
import yaml
import cwrap
Expand All @@ -15,13 +16,16 @@
from ecl.eclfile import Ecl3DKW
from ecl.ecl_type import EclDataType
from ecl.grid.ecl_grid import EclGrid

from res.enkf.enums.ert_impl_type_enum import ErtImplType
from res.enkf.enums.enkf_var_type_enum import EnkfVarType
from res.enkf import EnkfObservationImplementationType

from semeio.workflows.localisation.localisation_debug_settings import (
LogLevel,
debug_print,
)
from ert_shared.libres_facade import LibresFacade


@dataclass
Expand Down Expand Up @@ -170,14 +174,14 @@ def active_index_for_parameter(node_name, param_name, ert_param_dict):


def activate_gen_kw_param(
model_param_group, node_name, param_list, ert_param_dict, log_level=LogLevel.OFF
ministep, node_name, param_list, ert_param_dict, log_level=LogLevel.OFF
):
"""
Activate the selected parameters for the specified node.
The param_list contains the list of parameters defined in GEN_KW
for this node to be activated.
"""
active_param_list = model_param_group.getActiveList(node_name)
active_param_list = ministep.getActiveList(node_name)
debug_print("Set active parameters", LogLevel.LEVEL2, log_level)
for param_name in param_list:
index = active_index_for_parameter(node_name, param_name, ert_param_dict)
Expand All @@ -191,15 +195,15 @@ def activate_gen_kw_param(


def activate_gen_param(
model_param_group, node_name, param_list, data_size, log_level=LogLevel.OFF
ministep, node_name, param_list, data_size, log_level=LogLevel.OFF
):
"""
Activate the selected parameters for the specified node.
The param_list contains a list of names that are integer numbers
for the parameter indices to be activated for parameters belonging
to the specified GEN_PARAM node.
"""
active_param_list = model_param_group.getActiveList(node_name)
active_param_list = ministep.getActiveList(node_name)
for param_name in param_list:
index = int(param_name)
if index < 0 or index >= data_size:
Expand Down Expand Up @@ -564,6 +568,7 @@ def add_ministeps(
ert_param_dict,
ert_local_config,
ert_ensemble_config,
ert_obs,
grid_for_field,
):
# pylint: disable-msg=too-many-branches
Expand All @@ -579,17 +584,17 @@ def add_ministeps(
)

for count, corr_spec in enumerate(user_config.correlations):

ministep_name = corr_spec.name
ministep = ert_local_config.createMinistep(ministep_name)
debug_print(
f"Define ministep: {ministep_name}", LogLevel.LEVEL1, user_config.log_level
)

param_group_name = ministep_name + "_param_group"
obs_group_name = ministep_name + "_obs_group"
obs_group = ert_local_config.createObsdata(obs_group_name)

obs_list = corr_spec.obs_group.result_items
obs_dict = Parameters.from_list(corr_spec.obs_group.result_items).to_dict()
param_dict = Parameters.from_list(corr_spec.param_group.result_items).to_dict()

# Setup model parameter group
Expand Down Expand Up @@ -780,20 +785,40 @@ def add_ministeps(
user_config.log_level,
)

# Setup observation group
for obs_name in obs_list:
# Setup observation group. For GEN_OBS type
# the observation specification can be of the form obs_node_name:index
# if individual observations from a GEN_OBS node is chosen or
# only obs_node_name if all observations in GEN_OBS is active.
obs_type = EnkfObservationImplementationType.GEN_OBS
key_list_gen_obs = ert_obs.getTypedKeylist(obs_type)
for obs_node_name, obs_index_list in obs_dict.items():
obs_group.addNode(obs_node_name)
debug_print(
f"Add obs node: {obs_name}", LogLevel.LEVEL2, user_config.log_level
f"Add obs node: {obs_node_name}", LogLevel.LEVEL2, user_config.log_level
)
obs_group.addNode(obs_name)
if obs_node_name in key_list_gen_obs:
# An observation node of type GEN_OBS
if len(obs_index_list) > 0:
active_obs_list = obs_group.getActiveList(obs_node_name)
if len(obs_index_list) > 50:
oddvarlia marked this conversation as resolved.
Show resolved Hide resolved
debug_print(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would still argue that this should be a documentation issue instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean that the user should set the number, hardcoded to 50, or that I should drop the debug print out here? I can of course write a sentence in the user doc, but I don't think the users care much about this as long as the output is not too large. Maybe I have misunderstood something?

f"More than 50 active obs for {obs_node_name}",
LogLevel.LEVEL3,
user_config.log_level,
)

# Setup ministep
debug_print(
f"Attach {param_group_name} to ministep {ministep_name}",
LogLevel.LEVEL1,
user_config.log_level,
)
for string_index in obs_index_list:
index = int(string_index)
if len(obs_index_list) <= 50:
oddvarlia marked this conversation as resolved.
Show resolved Hide resolved
debug_print(
f"Active obs for {obs_node_name} index: {index}",
LogLevel.LEVEL3,
user_config.log_level,
)

active_obs_list.addActiveIndex(index)

# Setup ministep
debug_print(
f"Attach {obs_group_name} to ministep {ministep_name}",
LogLevel.LEVEL1,
Expand Down Expand Up @@ -973,3 +998,31 @@ def write_qc_parameter(
grid.write_grdecl(scaling_kw, file)
# Increase parameter number to define unique parameter name
cls.scaling_param_number = cls.scaling_param_number + 1


def get_obs_from_ert(ert, expand_gen_obs_max_size):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to avoid the special casing of GEN_OBS?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If not we should add some testing of it

Copy link
Contributor Author

@oddvarlia oddvarlia Jan 17, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason why I defined this 'expand_gen_obs_max_size' was to avoid having to expand a GEN_OBS node into the list of type nodename: index for index from 0 to size where the size sometimes can be very large (seismic data with 100.000's of values). The use of individual GEN_OBS observations is primarily meant for GEN_OBS nodes of moderate size. Examples I was thinking about was e.g geophysical data like gravimetry measurements or other cases where the number of observations in a GEN_OBS node is < 100. In this case it can be meaningful to setup localisations between individual observations and model parameters. For seismic data, the obvious way of handling that is to split the seismic data into a moderate number of groups (e.g one per fault block or segment) and define one GEN_OBS node for each of these and use these without referring to individual observations within the GEN_OBS node. Sometimes, like for isolated segments, it can be relevant to setup localisation where a GEN_OBS node for a segment is used together with the model parameter values related to the same segment. I thought at least that there might be unnecessary overhead to work with lists of 100.000's of observations in the localisation script and introduced this expand_gen_obs_max_size parameter to handle ERT models where there exists GEN_OBS nodes having large number of observations and GEN_OBS nodes having moderate number or small number of observations ( < expand_gen_obs_max_size). Do you have any better idea here?

facade = LibresFacade(ert)
ert_obs = facade.get_observations()
obs_keys = []
if expand_gen_obs_max_size == 0:
obs_keys = [facade.get_observation_key(nr) for nr, _ in enumerate(ert_obs)]
return obs_keys

for nr, _ in enumerate(ert_obs):
key = facade.get_observation_key(nr)
impl_type = facade.get_impl_type_name_for_obs_key(key)
if impl_type == "GEN_OBS":
obs_vector = ert_obs[key]
timestep = obs_vector.activeStep()
obs_node = obs_vector.getNode(timestep)
data_size = obs_node.getSize()
if data_size <= expand_gen_obs_max_size:
obs_key_with_index_list = [
key + ":" + str(item) for item in range(data_size)
]
obs_keys.extend(obs_key_with_index_list)
else:
obs_keys.append(key)
else:
obs_keys.append(key)
return obs_keys
38 changes: 38 additions & 0 deletions semeio/workflows/localisation/localisation_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,28 @@ def validate_surface_scale(cls, value):
)


class MaxGenObsSize(PydanticBaseModel):
"""
max_gen_obs_size: Integer >=0. Default: 0
If it is > 0, it defines that all GEN_OBS observations is
expanded into the form nodename:index. The user
must specify GEN_OBS type observations in
the form nodename:index or nodename:* if
all observations for a GEN_OBS node is used.
The max_gen_obs_size value is a threshold value.
If a GEN_OBS node has more observations than
max_gen_obs_size specified by the user,
the obs node is not expanded and the
user also must specify the obs node only by its
nodename, not in expanded form. Typical use of this is
to let nodes containing moderate number of observations
be expanded, while nodes having large number of
observations are not expanded.
"""

max_gen_obs_size: Optional[conint(ge=0)] = 0


class LocalisationConfig(BaseModel):
"""
observations: A list of observations from ERT in format nodename
Expand All @@ -367,13 +389,23 @@ class LocalisationConfig(BaseModel):
log_level: Integer defining how much log output to write to screen
write_scaling_factors: Turn on writing calculated scaling parameters to file.
Possible values: True/False. Default: False
max_gen_obs_size: Integer defining max size for a GEN_OBS node to
be expanded in the form nodename:index.
If the observation node of type GEN_OBS has more
observations than this number, it can only be specified with
node name which then represents the whole set of
observations for the node.
Possible values: Integers >= 0
Default: 0 which means that GEN_OBS nodes are specified
with node name only.
"""

observations: List[str]
parameters: List[str]
correlations: List[CorrelationConfig]
log_level: Optional[conint(ge=0, le=5)] = 1
write_scaling_factors: Optional[bool] = False
max_gen_obs_size: Optional[conint(ge=0)] = 0

@validator("log_level")
def validate_log_level(cls, level):
Expand Down Expand Up @@ -406,3 +438,9 @@ def _check_specification(items_to_add, items_to_remove, valid_items):
added_items = added_items.difference(removed_items)
added_items = list(added_items)
return sorted(added_items)


def get_max_gen_obs_size_for_expansion(config_dict):
tmp_config = MaxGenObsSize(**config_dict)
value = tmp_config.max_gen_obs_size
return value
15 changes: 15 additions & 0 deletions tests/jobs/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,18 @@ def setup_poly_ert(tmpdir, test_data_root):

yield
os.chdir(cwd)


@pytest.fixture()
def setup_poly_gen_param_ert(tmpdir, test_data_root):
cwd = os.getcwd()
tmpdir.chdir()
test_data_dir = os.path.join(test_data_root, "poly_gen_param")
shutil.copytree(test_data_dir, "test_data")
os.chdir(os.path.join("test_data"))

res_config = ResConfig("poly.ert")

yield res_config

os.chdir(cwd)
Loading