Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Give warning if runpath disk space is close to full on ert startup #9193

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 39 additions & 1 deletion src/ert/config/model_config.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,25 @@
from __future__ import annotations

import contextlib
import logging
import os.path
import shutil
from datetime import datetime
from pathlib import Path
from typing import List, Optional, no_type_check

from pydantic import field_validator
from pydantic.dataclasses import dataclass

from .parsing import ConfigDict, ConfigKeys, ConfigValidationError, HistorySource
from ert.shared.status.utils import byte_with_unit

from .parsing import (
ConfigDict,
ConfigKeys,
ConfigValidationError,
ConfigWarning,
HistorySource,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -37,6 +48,11 @@ def str_to_datetime(date_str: str) -> datetime:
DEFAULT_JOBNAME_FORMAT = "<CONFIG_FILE>-<IENS>"
DEFAULT_ECLBASE_FORMAT = "ECLBASE<IENS>"

FULL_DISK_PERCENTAGE_THRESHOLD = 0.97
MINIMUM_BYTES_LEFT_ON_DISK_THRESHOLD = 200 * 1000**3 # 200 GB
# We give warning if free disk space is less than MINIMUM_BYTES_LEFT_ON_DISK_THRESHOLD
# and used space in percentage is greater than FULL_DISK_PERCENTAGE_THRESHOLD


@dataclass
class ModelConfig:
Expand Down Expand Up @@ -75,6 +91,19 @@ def validate_runpath(cls, runpath_format_string: str) -> str:
f"`{runpath_format_string}`. Valid example: "
f"`{DEFAULT_RUNPATH}` "
)
with contextlib.suppress(Exception):
mount_dir = _get_mount_directory(runpath_format_string)
total_space, used_space, free_space = shutil.disk_usage(mount_dir)
percentage_used = used_space / total_space
if (
percentage_used > FULL_DISK_PERCENTAGE_THRESHOLD
and free_space < MINIMUM_BYTES_LEFT_ON_DISK_THRESHOLD
):
msg = (
f"Low disk space: {byte_with_unit(free_space)} free on {mount_dir !s}."
" Consider freeing up some space to ensure successful simulation runs."
)
ConfigWarning.warn(msg)
return result

@field_validator("jobname_format_string", mode="before")
Expand Down Expand Up @@ -134,3 +163,12 @@ def _replace_runpath_format(format_string: str) -> str:
format_string = format_string.replace("%d", "<IENS>", 1)
format_string = format_string.replace("%d", "<ITER>", 1)
return format_string


def _get_mount_directory(runpath: str) -> Path:
path = Path(runpath).absolute()

while not path.is_mount():
path = path.parent

return path
56 changes: 55 additions & 1 deletion tests/ert/unit_tests/config/test_model_config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from pathlib import Path
from unittest.mock import patch

import pytest

from ert.config import ModelConfig
from ert.config.parsing import ConfigKeys, ConfigValidationError
from ert.config.parsing import ConfigKeys, ConfigValidationError, ConfigWarning


def test_default_model_config_run_path(tmpdir):
Expand Down Expand Up @@ -61,3 +64,54 @@ def test_that_invalid_time_map_file_raises_config_validation_error(tmpdir):

with pytest.raises(ConfigValidationError, match="Could not read timemap file"):
_ = ModelConfig.from_dict({ConfigKeys.TIME_MAP: "time_map.txt"})


@pytest.mark.parametrize(
"total_space, used_space, to_warn, expected_warning",
[
pytest.param(
10 * 1000**4, # 10 TB
9.75 * 1000**4, # 9.75 TB
False,
None,
id="Low disk space percentage on large disk",
),
pytest.param(
100 * 1000**3, # 100 GB
99 * 1000**3, # 99 GB
True,
"Low disk space: 1.00 GB free on",
id="Low disk space small disk",
),
pytest.param(
10 * 1000**5, # 10 PB
9.99994 * 1000**5, # 9.99994 PB
True,
"Low disk space: 60.00 GB free on",
id="Low disk space small disk",
),
pytest.param(
100 * 1000**3, # 100 GB
75 * 1000**3, # 75 GB
False,
None,
id="Sufficient disk space",
),
],
)
def test_warning_when_full_disk(
tmp_path, recwarn, total_space, used_space, to_warn, expected_warning
):
Path(tmp_path / "simulations").mkdir()
runpath = f"{tmp_path !s}/simulations/realization-%d/iter-%d"
with patch(
"ert.config.model_config.shutil.disk_usage",
return_value=(total_space, used_space, total_space - used_space),
):
if to_warn:
with pytest.warns(ConfigWarning, match=expected_warning):
_ = ModelConfig(num_realizations=1, runpath_format_string=runpath)
else:
_ = ModelConfig(num_realizations=1, runpath_format_string=runpath)
for w in recwarn:
assert not issubclass(w.category, ConfigWarning)
Loading