Skip to content

Commit

Permalink
Improve plotting performance
Browse files Browse the repository at this point in the history
  • Loading branch information
frode-aarstad committed Jan 18, 2024
1 parent 8a6fc5f commit 91ea363
Show file tree
Hide file tree
Showing 12 changed files with 106 additions and 154 deletions.
6 changes: 2 additions & 4 deletions src/ert/dark_storage/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ def get_response_names(ensemble: EnsembleReader) -> List[str]:
def data_for_key(
ensemble: EnsembleReader,
key: str,
realization_index: Optional[int] = None,
) -> pd.DataFrame:
"""Returns a pandas DataFrame with the datapoints for a given key for a
given case. The row index is the realization number, and the columns are an
Expand All @@ -39,10 +38,10 @@ def data_for_key(
if key.startswith("LOG10_"):
key = key[6:]
if key in ensemble.get_summary_keyset():
data = ensemble.load_all_summary_data([key], realization_index)
data = ensemble.load_summary(key)
data = data[key].unstack(level="Date")
elif key in ensemble.get_gen_kw_keyset():
data = ensemble.load_all_gen_kw_data(key.split(":")[0], realization_index)
data = ensemble.load_all_gen_kw_data(key.split(":")[0])
if data.empty:
return pd.DataFrame()
data = data[key].to_frame().dropna()
Expand All @@ -56,7 +55,6 @@ def data_for_key(
data = ensemble.load_gen_data(
key,
report_step,
realization_index,
).T
except (ValueError, KeyError):
return pd.DataFrame()
Expand Down
3 changes: 0 additions & 3 deletions src/ert/dark_storage/endpoints/ensembles.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from fastapi import APIRouter, Body, Depends

from ert.dark_storage import json_schema as js
from ert.dark_storage.common import ensemble_parameter_names, get_response_names
from ert.dark_storage.enkf import get_storage
from ert.storage import StorageAccessor

Expand Down Expand Up @@ -36,8 +35,6 @@ def get_ensemble(
experiment_id=ensemble.experiment_id,
userdata={"name": ensemble.name},
size=ensemble.ensemble_size,
parameter_names=ensemble_parameter_names(storage, ensemble_id),
response_names=get_response_names(ensemble),
child_ensemble_ids=[],
)

Expand Down
2 changes: 0 additions & 2 deletions src/ert/dark_storage/endpoints/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,6 @@ def get_experiment_ensembles(
experiment_id=ens.experiment_id,
userdata={"name": ens.name},
size=ens.ensemble_size,
parameter_names=[],
response_names=[],
child_ensemble_ids=[],
)
for ens in db.get_experiment(experiment_id).ensembles
Expand Down
22 changes: 7 additions & 15 deletions src/ert/dark_storage/endpoints/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from typing import Any, Dict, List, Mapping, Optional, Union
from uuid import UUID, uuid4

import pandas as pd
from fastapi import APIRouter, Body, Depends, File, Header, Request, UploadFile, status
from fastapi.responses import Response
from typing_extensions import Annotated
Expand Down Expand Up @@ -134,10 +133,7 @@ async def post_record_observations(
async def get_record_observations(
*,
res: LibresFacade = DEFAULT_LIBRESFACADE,
db: StorageReader = DEFAULT_STORAGE,
ensemble_id: UUID,
name: str,
realization_index: Optional[int] = None,
) -> List[js.ObservationOut]:
obs_keys = res.observation_keys(name)
obss = observations_for_obs_keys(res, obs_keys)
Expand Down Expand Up @@ -174,15 +170,8 @@ async def get_ensemble_record(
name: str,
ensemble_id: UUID,
accept: Annotated[Union[str, None], Header()] = None,
realization_index: Optional[int] = None,
label: Optional[str] = None,
) -> Any:
dataframe = data_for_key(db.get_ensemble(ensemble_id), name, realization_index)
if realization_index is not None:
# dataframe.loc returns a Series, and when we reconstruct a DataFrame
# from a Series, it defaults to be oriented the wrong way, so we must
# transpose it
dataframe = pd.DataFrame(dataframe.loc[realization_index]).T
dataframe = data_for_key(db.get_ensemble(ensemble_id), name)

media_type = accept if accept is not None else "text/csv"
if media_type == "application/x-parquet":
Expand Down Expand Up @@ -249,15 +238,18 @@ def get_ensemble_responses(
ensemble_id: UUID,
) -> Mapping[str, js.RecordOut]:
response_map: Dict[str, js.RecordOut] = {}

ens = db.get_ensemble(ensemble_id)
name_dict = {}

for obs in res.get_observations():
name_dict[obs.observation_key] = obs.observation_type

for name in ens.get_summary_keyset():
obs_keys = res.observation_keys(name)
response_map[str(name)] = js.RecordOut(
id=UUID(int=0),
name=name,
userdata={"data_origin": "Summary"},
has_observations=len(obs_keys) != 0,
has_observations=name in name_dict,
)

for name in res.get_gen_data_keys():
Expand Down
15 changes: 1 addition & 14 deletions src/ert/dark_storage/json_schema/ensemble.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,18 @@
from typing import Any, List, Mapping, Optional
from uuid import UUID

from pydantic import BaseModel, Field, model_validator
from typing_extensions import Self
from pydantic import BaseModel, Field


class _Ensemble(BaseModel):
size: int
parameter_names: List[str]
response_names: List[str]
active_realizations: List[int] = []


class EnsembleIn(_Ensemble):
update_id: Optional[UUID] = None
userdata: Mapping[str, Any] = {}

@model_validator(mode="after")
def check_names_no_overlap(self) -> Self:
"""
Verify that `parameter_names` and `response_names` don't overlap. Ie, no
record can be both a parameter and a response.
"""
if not set(self.parameter_names).isdisjoint(set(self.response_names)):
raise ValueError("parameters and responses cannot have a name in common")
return self


class EnsembleOut(_Ensemble):
id: UUID
Expand Down
32 changes: 13 additions & 19 deletions src/ert/gui/tools/plot/plot_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,23 +68,6 @@ def _check_response(response: requests.Response):
f"{response.text} from url: {response.url}."
)

def _get_experiments(self) -> dict:
with StorageService.session() as client:
response: requests.Response = client.get(
"/experiments", timeout=self._timeout
)
self._check_response(response)
return response.json()

def _get_ensembles(self, experiement_id) -> List:
with StorageService.session() as client:
response: requests.Response = client.get(
f"/experiments/{experiement_id}/ensembles", timeout=self._timeout
)
self._check_response(response)
response_json = response.json()
return response_json

def all_data_type_keys(self) -> List:
"""Returns a list of all the keys except observation keys.
Expand All @@ -94,9 +77,20 @@ def all_data_type_keys(self) -> List:
the key"""

all_keys = {}

with StorageService.session() as client:
for experiment in self._get_experiments():
for ensemble in self._get_ensembles(experiment["id"]):
response: requests.Response = client.get(
"/experiments", timeout=self._timeout
)
self._check_response(response)

for experiment in response.json():
response: requests.Response = client.get(
f"/experiments/{experiment['id']}/ensembles", timeout=self._timeout
)
self._check_response(response)

for ensemble in response.json():
response: requests.Response = client.get(
f"/ensembles/{ensemble['id']}/responses", timeout=self._timeout
)
Expand Down
7 changes: 4 additions & 3 deletions src/ert/gui/tools/plot/plot_window.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import time
from typing import List

from httpx import RequestError
Expand Down Expand Up @@ -42,12 +43,10 @@
class PlotWindow(QMainWindow):
def __init__(self, config_file, parent):
QMainWindow.__init__(self, parent)

t = time.perf_counter()
logger.info("PlotWindow __init__")

self.setMinimumWidth(850)
self.setMinimumHeight(650)

self.setWindowTitle(f"Plotting - {config_file}")
self.activateWindow()

Expand Down Expand Up @@ -109,6 +108,8 @@ def __init__(self, config_file, parent):
self._data_type_keys_widget.selectDefault()
self._updateCustomizer(current_plot_widget)

logger.info(f"PlotWindow __init__ done. time={time.perf_counter() -t}")

def currentPlotChanged(self):
key_def = self.getSelectedKey()
if key_def is None:
Expand Down
87 changes: 62 additions & 25 deletions src/ert/storage/local_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,24 @@ def get_realization_mask_without_parent_failure(self) -> npt.NDArray[np.bool_]:
)

def get_realization_mask_with_parameters(self) -> npt.NDArray[np.bool_]:
return np.array([self._get_parameter(i) for i in range(self.ensemble_size)])
return np.array(
[
self._all_parameters_exist_for_realization(i)
for i in range(self.ensemble_size)
]
)

def get_realization_mask_with_responses(self) -> npt.NDArray[np.bool_]:
return np.array([self._get_response(i) for i in range(self.ensemble_size)])
def get_realization_mask_with_responses(
self, key: Optional[str] = None
) -> npt.NDArray[np.bool_]:
return np.array(
[
self._all_responses_exist_for_realization(i, key)
for i in range(self.ensemble_size)
]
)

def _get_parameter(self, realization: int) -> bool:
def _all_parameters_exist_for_realization(self, realization: int) -> bool:
if not self.experiment.parameter_configuration:
return False
path = self.mount_point / f"realization-{realization}"
Expand All @@ -117,10 +129,16 @@ def _get_parameter(self, realization: int) -> bool:
for parameter in self.experiment.parameter_configuration
)

def _get_response(self, realization: int) -> bool:
def _all_responses_exist_for_realization(
self, realization: int, key: Optional[str] = None
) -> bool:
if not self.experiment.response_configuration:
return False
path = self.mount_point / f"realization-{realization}"

if key:
return (path / f"{key}.nc").exists()

return all(
(path / f"{response}.nc").exists()
for response in self._filter_response_configuration()
Expand Down Expand Up @@ -180,10 +198,11 @@ def realizations_initialized(self, realizations: List[int]) -> bool:

return all((responses[real] or parameters[real]) for real in realizations)

def get_realization_list_with_responses(self) -> List[int]:
return [
idx for idx, b in enumerate(self.get_realization_mask_with_responses()) if b
]
def get_realization_list_with_responses(
self, key: Optional[str] = None
) -> List[int]:
mask = self.get_realization_mask_with_responses(key)
return np.where(mask)[0].tolist()

def set_failure(
self,
Expand Down Expand Up @@ -221,9 +240,9 @@ def _find_state(realization: int) -> RealizationStorageState:
failure = self.get_failure(realization)
assert failure
return failure.type
if self._get_response(realization):
if self._all_responses_exist_for_realization(realization):
return RealizationStorageState.HAS_DATA
if self._get_parameter(realization):
if self._all_parameters_exist_for_realization(realization):
return RealizationStorageState.INITIALIZED
else:
return RealizationStorageState.UNDEFINED
Expand Down Expand Up @@ -253,20 +272,14 @@ def _get_gen_data_config(self, key: str) -> GenDataConfig:

@deprecated("Check the experiment for registered responses")
def get_gen_data_keyset(self) -> List[str]:
keylist = [
k
for k, v in self.experiment.response_info.items()
if "_ert_kind" in v and v["_ert_kind"] == "GenDataConfig"
]

gen_data_list = []
for key in keylist:
gen_data_config = self._get_gen_data_config(key)
if gen_data_config.report_steps is None:
gen_data_list.append(f"{key}@0")
else:
for report_step in gen_data_config.report_steps:
gen_data_list.append(f"{key}@{report_step}")
for k, v in self.experiment.response_configuration.items():
if isinstance(v, GenDataConfig):
if v.report_steps is None:
gen_data_list.append(f"{k}@0")
else:
for report_step in v.report_steps:
gen_data_list.append(f"{k}@{report_step}")
return sorted(gen_data_list, key=lambda k: k.lower())

@deprecated("Check the experiment for registered parameters")
Expand All @@ -293,7 +306,7 @@ def load_gen_data(
report_step: int,
realization_index: Optional[int] = None,
) -> pd.DataFrame:
realizations = self.get_realization_list_with_responses()
realizations = self.get_realization_list_with_responses(key)
if realization_index is not None:
if realization_index not in realizations:
raise IndexError(f"No such realization {realization_index}")
Expand Down Expand Up @@ -368,6 +381,29 @@ def load_responses(
assert isinstance(response, xr.Dataset)
return response

def load_responses_summary(self, key: str) -> xr.Dataset:
loaded = []
for realization in range(self.ensemble_size):
input_path = self.mount_point / f"realization-{realization}" / "summary.nc"
if input_path.exists():
ds = xr.open_dataset(input_path, engine="scipy")
ds = ds.query(name=f'name=="{key}"')
loaded.append(ds)
return xr.combine_nested(loaded, concat_dim="realization")

def load_summary(self, key: str) -> pd.DataFrame:
try:
df = self.load_responses_summary(key).to_dataframe()
except (ValueError, KeyError):
return pd.DataFrame()

df = df.unstack(level="name")
df.columns = [col[1] for col in df.columns.values]
df.index = df.index.rename(
{"time": "Date", "realization": "Realization"}
).reorder_levels(["Realization", "Date"])
return df

@deprecated("Use load_responses")
def load_all_summary_data(
self,
Expand All @@ -386,6 +422,7 @@ def load_all_summary_data(
df = self.load_responses("summary", tuple(realizations)).to_dataframe()
except (ValueError, KeyError):
return pd.DataFrame()

df = df.unstack(level="name")
df.columns = [col[1] for col in df.columns.values]
df.index = df.index.rename(
Expand Down
8 changes: 4 additions & 4 deletions tests/performance_tests/performance_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,9 @@ def dark_storage_app(monkeypatch):
folder = py.path.local(tempfile.mkdtemp())
make_poly_example(
folder,
"../../test-data/poly_template",
gen_data_count=34,
gen_data_entries=15,
"test-data/poly_template",
gen_data_count=3400,
gen_data_entries=150,
summary_data_entries=100,
reals=200,
summary_data_count=4000,
Expand All @@ -163,7 +163,7 @@ def dark_storage_app(monkeypatch):
sum_obs_every=10,
gen_obs_every=1,
parameter_entries=10,
parameter_count=8,
parameter_count=10,
update_steps=1,
)
print(folder)
Loading

0 comments on commit 91ea363

Please sign in to comment.