Skip to content

Commit

Permalink
Merge pull request #193 from Deltares/fix/GEOLIB-216-models-refactoring
Browse files Browse the repository at this point in the history
Fix/geolib 216 models refactoring (restored)
  • Loading branch information
wfaustmann authored Aug 12, 2024
2 parents 68398b1 + e5fc380 commit 33ca087
Show file tree
Hide file tree
Showing 14 changed files with 201 additions and 211 deletions.
3 changes: 0 additions & 3 deletions geolib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,3 @@
"""

__version__ = "2.3.0"

from . import utils
from .models import *
3 changes: 2 additions & 1 deletion geolib/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"""

from .base_model_structure import BaseDataClass, BaseModelStructure # isort:skip
from .base_model import BaseModel, BaseModelList
from .base_model import BaseModel
from .base_model_list import BaseModelList
from .dfoundations import DFoundationsModel
from .dsettlement import DSettlementModel
from .dsheetpiling import DSheetPilingModel
Expand Down
29 changes: 29 additions & 0 deletions geolib/models/base_data_class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from pydantic import BaseModel

from geolib._compat import IS_PYDANTIC_V2

if IS_PYDANTIC_V2:
from pydantic import ConfigDict

from .meta import MetaData

settings = MetaData()


class BaseDataClass(BaseModel):
"""Base class for *all* pydantic classes in GEOLib."""

if IS_PYDANTIC_V2:
model_config = ConfigDict(
validate_assignment=True,
arbitrary_types_allowed=True,
validate_default=True,
extra=settings.extra_fields,
)
else:

class Config:
validate_assignment = True
arbitrary_types_allowed = True
validate_all = True
extra = settings.extra_fields
133 changes: 8 additions & 125 deletions geolib/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
"""
import abc
import logging
import os
from abc import abstractmethod, abstractproperty
from pathlib import Path, PosixPath, WindowsPath
from subprocess import Popen, run
from types import CoroutineType
from abc import abstractmethod
from pathlib import Path
from subprocess import run
from typing import List, Optional, Type, Union

import requests
Expand All @@ -24,10 +22,9 @@

from geolib.errors import CalculationError
from geolib.models import BaseDataClass

from .base_model_structure import BaseModelStructure
from .meta import MetaData
from .parsers import BaseParserProvider
from geolib.models.base_model_structure import BaseModelStructure
from geolib.models.meta import MetaData
from geolib.models.parsers import BaseParserProvider

logger = logging.getLogger(__name__)
meta = MetaData()
Expand Down Expand Up @@ -162,7 +159,8 @@ def console_flags(self) -> List[str]:
def console_flags_post(self) -> List[str]:
return []

@abstractproperty
@property
@abstractmethod
def parser_provider_type(self) -> Type[BaseParserProvider]:
"""Returns the parser provider type of the current concrete class.
Expand Down Expand Up @@ -218,121 +216,6 @@ def set_meta_property(self, key: str, value: str) -> None:
raise ValueError(f"Metadata property {key} does not exist.")


class BaseModelList(BaseDataClass):
"""Hold multiple models that can be executed in parallel.
Note that all models need to have a unique filename
otherwise they will overwrite eachother. This also helps with
identifying them later."""

models: List[BaseModel]
errors: List[str] = []

def execute(
self,
calculation_folder: DirectoryPath,
timeout_in_seconds: int = meta.timeout,
nprocesses: Optional[int] = os.cpu_count(),
) -> "BaseModelList":
"""Execute all models in this class in parallel.
We split the list to separate folders and call a batch processes on each folder.
Note that the order of models will change.
"""

# manual check as remote execution could result in zero models
if len(self.models) == 0:
raise ValueError("Can't execute with zero models.")

lead_model = self.models[0]
processes = []
output_models = []
errors = []

# Divide the models over n processes and make sure to copy them to prevent aliasing
split_models = [self.models[i::nprocesses] for i in range(nprocesses)]
for i, models in enumerate(split_models):
if len(models) == 0:
continue
unique_folder = calculation_folder / str(i)
unique_folder.mkdir(parents=True, exist_ok=True)

for model in models:
fn = unique_folder / model.filename.name
model.serialize(fn.resolve())

executable = meta.console_folder / lead_model.default_console_path
if not executable.exists():
logger.error(
f"Please make sure the `geolib.env` file points to the console folder. GEOLib now can't find it at `{executable}`"
)
raise CalculationError(
-1, f"Console executable not found at {executable}."
)

process = Popen(
[str(executable)] + lead_model.console_flags + [str(i)],
cwd=str(calculation_folder.resolve()),
)
processes.append(process)

# Wait for all processes to be done
for process in processes:
logger.debug(f"Executed with {process.args}")
process.wait(timeout=timeout_in_seconds)

# Iterate over the models
for i, models in enumerate(split_models):
for model in models:
model = model.copy(deep=True) # prevent aliasing
output_filename = output_filename_from_input(model)
if output_filename.exists():
try:
model.parse(output_filename)
output_models.append(model)

except ValidationError:
logger.warning(
f"Ouput file generated but parsing of {output_filename.name} failed."
)
error = model.get_error_context()
errors.append(error)
else:
logger.warning(
f"Model @ {output_filename.name} failed. Please check the .err file and batchlog.txt in its folder."
)
error = model.get_error_context()
errors.append(error)

return self.__class__(models=output_models, errors=errors)

def execute_remote(self, endpoint: HttpUrl) -> "BaseModelList":
"""Execute all models in this class in parallel on a remote endpoint.
Note that the order of models will change.
"""
lead_model = self.models[0]

response = requests.post(
requests.compat.urljoin(
endpoint, f"calculate/{lead_model.__class__.__name__.lower()}s"
),
data="[" + ",".join((model.json() for model in self.models)) + "]",
auth=HTTPBasicAuth(meta.gl_username, meta.gl_password),
)
if response.status_code == 200:
models = response.json()["models"]
errors = response.json()["errors"]
stripped_models = []
for model in models:
# remove possibly invalid external metadata
model.get("meta", {}).pop("console_folder", None)
stripped_models.append(lead_model.__class__(**model))
return self.__class__(models=stripped_models, errors=errors)
else:
raise CalculationError(response.status_code, response.text)


def output_filename_from_input(model: BaseModel, extension: str = None) -> Path:
if not extension:
extension = model.parser_provider_type().output_parsers[-1].suffix_list[0]
Expand Down
131 changes: 131 additions & 0 deletions geolib/models/base_model_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import logging
import os
from subprocess import Popen
from typing import List, Optional

import requests
from pydantic import DirectoryPath, HttpUrl
from requests.auth import HTTPBasicAuth

from geolib.errors import CalculationError
from geolib.models import meta
from geolib.models.base_data_class import BaseDataClass
from geolib.models.base_model import BaseModel, output_filename_from_input

logger = logging.getLogger(__name__)
meta = meta.MetaData()


class BaseModelList(BaseDataClass):
"""Hold multiple models that can be executed in parallel.
Note that all models need to have a unique filename
otherwise they will overwrite eachother. This also helps with
identifying them later."""

models: List[BaseModel]
errors: List[str] = []

def execute(
self,
calculation_folder: DirectoryPath,
timeout_in_seconds: int = meta.timeout,
nprocesses: Optional[int] = os.cpu_count(),
) -> "BaseModelList":
"""Execute all models in this class in parallel.
We split the list to separate folders and call a batch processes on each folder.
Note that the order of models will change.
"""

# manual check as remote execution could result in zero models
if len(self.models) == 0:
raise ValueError("Can't execute with zero models.")

lead_model = self.models[0]
processes = []
output_models = []
errors = []

# Divide the models over n processes and make sure to copy them to prevent aliasing
split_models = [self.models[i::nprocesses] for i in range(nprocesses)]
for i, models in enumerate(split_models):
if len(models) == 0:
continue
unique_folder = calculation_folder / str(i)
unique_folder.mkdir(parents=True, exist_ok=True)

for model in models:
fn = unique_folder / model.filename.name
model.serialize(fn.resolve())

executable = meta.console_folder / lead_model.default_console_path
if not executable.exists():
logger.error(
f"Please make sure the `geolib.env` file points to the console folder. GEOLib now can't find it at `{executable}`"
)
raise CalculationError(
-1, f"Console executable not found at {executable}."
)

process = Popen(
[str(executable)] + lead_model.console_flags + [str(i)],
cwd=str(calculation_folder.resolve()),
)
processes.append(process)

# Wait for all processes to be done
for process in processes:
logger.debug(f"Executed with {process.args}")
process.wait(timeout=timeout_in_seconds)

# Iterate over the models
for models in split_models:
for model in models:
model = model.copy(deep=True) # prevent aliasing
output_filename = output_filename_from_input(model)
if output_filename.exists():
try:
model.parse(output_filename)
output_models.append(model)

except ValidationError:
logger.warning(
f"Ouput file generated but parsing of {output_filename.name} failed."
)
error = model.get_error_context()
errors.append(error)
else:
logger.warning(
f"Model @ {output_filename.name} failed. Please check the .err file and batchlog.txt in its folder."
)
error = model.get_error_context()
errors.append(error)

return self.__class__(models=output_models, errors=errors)

def execute_remote(self, endpoint: HttpUrl) -> "BaseModelList":
"""Execute all models in this class in parallel on a remote endpoint.
Note that the order of models will change.
"""
lead_model = self.models[0]

response = requests.post(
requests.compat.urljoin(
endpoint, f"calculate/{lead_model.__class__.__name__.lower()}s"
),
data="[" + ",".join((model.json() for model in self.models)) + "]",
auth=HTTPBasicAuth(meta.gl_username, meta.gl_password),
)
if response.status_code == 200:
models = response.json()["models"]
errors = response.json()["errors"]
stripped_models = []
for model in models:
# remove possibly invalid external metadata
model.get("meta", {}).pop("console_folder", None)
stripped_models.append(lead_model.__class__(**model))
return self.__class__(models=stripped_models, errors=errors)
else:
raise CalculationError(response.status_code, response.text)
30 changes: 1 addition & 29 deletions geolib/models/base_model_structure.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,9 @@
import abc
from math import isfinite

from pydantic import BaseModel
from geolib.models.base_data_class import BaseDataClass

from geolib._compat import IS_PYDANTIC_V2

if IS_PYDANTIC_V2:
from pydantic import ConfigDict

from .meta import MetaData
from .validators import BaseValidator

settings = MetaData()


class BaseDataClass(BaseModel):
"""Base class for *all* pydantic classes in GEOLib."""

if IS_PYDANTIC_V2:
model_config = ConfigDict(
validate_assignment=True,
arbitrary_types_allowed=True,
validate_default=True,
extra=settings.extra_fields,
)
else:

class Config:
validate_assignment = True
arbitrary_types_allowed = True
validate_all = True
extra = settings.extra_fields


class BaseModelStructure(BaseDataClass, abc.ABC):
@property
Expand Down
Loading

0 comments on commit 33ca087

Please sign in to comment.