Skip to content

Commit 59a1e10

Browse files
authored
Metadata functionality in EnsembleTableProvider (#1135)
1 parent 7c35594 commit 59a1e10

File tree

7 files changed

+82
-2
lines changed

7 files changed

+82
-2
lines changed

.github/workflows/subsurface.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ jobs:
5555
pip install "scipy<1.9.3" # breaking change in scipy==1.9.3
5656
pip install "pytest<7.2.0"
5757
pip install "pytest-xdist<3.0"
58+
pip install "xtgeo<2.20.2"
5859
pip install .
5960
6061
# Testing against our latest release (including pre-releases)

tests/unit_tests/provider_tests/test_ensemble_table_provider.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pandas as pd
55

66
from webviz_subsurface._providers import (
7+
ColumnMetadata,
78
EnsembleTableProvider,
89
EnsembleTableProviderFactory,
910
)
@@ -58,6 +59,8 @@ def test_synthetic_get_column_data(testdata_folder: Path) -> None:
5859
assert df.shape == (4, 2)
5960
assert df.columns.tolist() == ["REAL", "STR"]
6061

62+
assert model.column_metadata("REAL") is None
63+
6164

6265
def test_create_from_aggregated_csv_file_smry_csv(
6366
testdata_folder: Path, tmp_path: Path
@@ -85,6 +88,10 @@ def test_create_from_aggregated_csv_file_smry_csv(
8588
assert valdf.columns[1] == "YEARS"
8689
assert valdf["REAL"].nunique() == 3
8790

91+
# No metadata in csv files
92+
meta: Optional[ColumnMetadata] = provider.column_metadata("FOPR")
93+
assert meta is None
94+
8895

8996
def test_create_from_per_realization_csv_file(
9097
testdata_folder: Path, tmp_path: Path
@@ -110,6 +117,10 @@ def test_create_from_per_realization_csv_file(
110117
assert valdf["CONIDX"].nunique() == 24
111118
assert sorted(valdf["CONIDX"].unique()) == list(range(1, 25))
112119

120+
# No metadata in csv files
121+
meta: Optional[ColumnMetadata] = provider.column_metadata("CONIDX")
122+
assert meta is None
123+
113124

114125
def test_create_from_per_realization_arrow_file(
115126
testdata_folder: Path, tmp_path: Path
@@ -126,6 +137,11 @@ def test_create_from_per_realization_arrow_file(
126137
assert "FOPT" in valdf.columns
127138
assert valdf["REAL"].nunique() == 100
128139

140+
# Test metadata
141+
meta: Optional[ColumnMetadata] = provider.column_metadata("FOPR")
142+
assert meta is not None
143+
assert meta.unit == "SM3/DAY"
144+
129145

130146
def test_create_from_per_realization_parameter_file(
131147
testdata_folder: Path, tmp_path: Path
@@ -140,6 +156,12 @@ def test_create_from_per_realization_parameter_file(
140156
assert "GLOBVAR:FAULT_SEAL_SCALING" in valdf.columns
141157
assert valdf["REAL"].nunique() == 100
142158

159+
# No metadata in parameter files
160+
meta: Optional[ColumnMetadata] = provider.column_metadata(
161+
"GLOBVAR:FAULT_SEAL_SCALING"
162+
)
163+
assert meta is None
164+
143165

144166
def test_create_provider_set_from_aggregated_csv_file(tmp_path: Path) -> None:
145167
"""This tests importing a csv file with an ensemble column with multiple
@@ -165,3 +187,7 @@ def test_create_provider_set_from_aggregated_csv_file(tmp_path: Path) -> None:
165187
"STOIIP_OIL",
166188
"SOURCE",
167189
}.issubset(set(provider.column_names()))
190+
191+
# No metadata in csv files
192+
meta: Optional[ColumnMetadata] = provider.column_metadata("ZONE")
193+
assert meta is None

webviz_subsurface/_providers/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
SurfaceServer,
2828
)
2929
from .ensemble_table_provider import (
30+
ColumnMetadata,
3031
EnsembleTableProvider,
3132
EnsembleTableProviderFactory,
3233
EnsembleTableProviderImplArrow,
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
from .ensemble_table_provider import EnsembleTableProvider
1+
from .ensemble_table_provider import ColumnMetadata, EnsembleTableProvider
22
from .ensemble_table_provider_factory import EnsembleTableProviderFactory
33
from .ensemble_table_provider_impl_arrow import EnsembleTableProviderImplArrow
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from typing import Optional
2+
3+
import pyarrow as pa
4+
5+
from .ensemble_table_provider import ColumnMetadata
6+
7+
8+
def create_column_metadata_from_field_meta(
9+
field: pa.Field,
10+
) -> Optional[ColumnMetadata]:
11+
"""Create VectorMetadata from keywords stored in the field's metadata"""
12+
13+
meta_dict = field.metadata
14+
if not meta_dict:
15+
return None
16+
17+
try:
18+
unit_bytestr = meta_dict[b"unit"]
19+
except KeyError:
20+
return ColumnMetadata(unit=None)
21+
22+
return ColumnMetadata(
23+
unit=unit_bytestr.decode("ascii"),
24+
)

webviz_subsurface/_providers/ensemble_table_provider/ensemble_table_provider.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
import abc
2+
from dataclasses import dataclass
23
from typing import List, Optional, Sequence
34

45
import pandas as pd
56

67

8+
@dataclass(frozen=True)
9+
class ColumnMetadata:
10+
unit: Optional[str]
11+
12+
713
class EnsembleTableProvider(abc.ABC):
814
@abc.abstractmethod
915
def column_names(self) -> List[str]:
@@ -18,3 +24,12 @@ def get_column_data(
1824
self, column_names: Sequence[str], realizations: Optional[Sequence[int]] = None
1925
) -> pd.DataFrame:
2026
...
27+
28+
@abc.abstractmethod
29+
def column_metadata(self, column_name: str) -> Optional[ColumnMetadata]:
30+
"""Returns metadata for the specified column.
31+
32+
Returns None if no metadata is found for the column.
33+
Returns a empty ColumnMetadata object if there is metadata, but it's
34+
not the columns specified in ColumnMetadata.
35+
"""

webviz_subsurface/_providers/ensemble_table_provider/ensemble_table_provider_impl_arrow.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
add_per_vector_min_max_to_table_schema_metadata,
1414
find_min_max_for_numeric_table_columns,
1515
)
16-
from .ensemble_table_provider import EnsembleTableProvider
16+
from ._field_metadata import create_column_metadata_from_field_meta
17+
from .ensemble_table_provider import ColumnMetadata, EnsembleTableProvider
1718

1819
# Since PyArrow's actual compute functions are not seen by pylint
1920
# pylint: disable=no-member
@@ -170,6 +171,13 @@ def from_backing_store(
170171

171172
return None
172173

174+
def _get_or_read_schema(self) -> pa.Schema:
175+
if self._cached_reader:
176+
return self._cached_reader.schema
177+
178+
source = pa.memory_map(self._arrow_file_name, "r")
179+
return pa.ipc.RecordBatchFileReader(source).schema
180+
173181
def column_names(self) -> List[str]:
174182
return self._column_names
175183

@@ -215,3 +223,8 @@ def get_column_data(
215223
)
216224

217225
return df
226+
227+
def column_metadata(self, column_name: str) -> Optional[ColumnMetadata]:
228+
schema = self._get_or_read_schema()
229+
field = schema.field(column_name)
230+
return create_column_metadata_from_field_meta(field)

0 commit comments

Comments
 (0)