Skip to content

Commit

Permalink
minor changes in yaml docstring and code
Browse files Browse the repository at this point in the history
  • Loading branch information
rizac committed Sep 27, 2023
1 parent 82cb0ae commit ca60f41
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 62 deletions.
2 changes: 1 addition & 1 deletion egsim/smtk/flatfile/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""flatfile pandas root module"""
"""flatfile root module"""

from io import IOBase, TextIOBase, TextIOWrapper
from datetime import date, datetime
Expand Down
13 changes: 5 additions & 8 deletions egsim/smtk/flatfile/columns.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,26 @@
"""
module containing all column metadata information stored in the associated
YAML file
flatfile columns functions. see associated YAML file for info
"""
from __future__ import annotations

import re
from datetime import datetime, date
from enum import Enum, ReprEnum
from os.path import join, dirname
from typing import Union, Any
from collections.abc import Iterable

import numpy as np
import pandas as pd
from pandas.core.arrays import PandasArray
from pandas.core.dtypes.base import ExtensionDtype
from typing import Union, Any, Iterable

# try to speed up yaml.safe_load (https://pyyaml.org/wiki/PyYAMLDocumentation):
from yaml import load as yaml_load

try:
from yaml import CSafeLoader as SafeLoader # faster, if available
except ImportError:
from yaml import SafeLoader # same as using yaml.safe_load

import numpy as np
import pandas as pd


class ColumnType(Enum):
"""Flatfile column type"""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
"""flatfile pandas module"""
"""flatfile functions for residuals analysis"""
from collections.abc import Collection, Iterable

from pandas.core.indexes.numeric import IntegerIndex
from scipy.interpolate import interp1d

from typing import Union # , Iterable
from typing import Union

import numpy as np
import pandas as pd
from scipy.interpolate import interp1d
from openquake.hazardlib.gsim.base import GMPE
from openquake.hazardlib.scalerel import PeerMSR
from openquake.hazardlib.contexts import RuptureContext

from .columns import (get_rupture_params, get_all_names_of,
get_intensity_measures, MissingColumn,
from .columns import (get_all_names_of, get_intensity_measures, MissingColumn,
InvalidDataInColumn, InvalidColumnName, ConflictingColumns)
from .. import (get_SA_period, get_imts_defined_for, get_distances_required_by,
get_rupture_params_required_by, get_sites_params_required_by)
Expand Down Expand Up @@ -59,9 +54,9 @@ def get_station_id_column_names(flatfile: pd.DataFrame) -> list[str, ...]:
return col_names


def setup_flatfile_for_residuals(flatfile: pd.DataFrame,
gsims: Collection[GMPE],
imts: Collection[str]) -> pd.DataFrame:
def get_flatfile_for_residual_analysis(flatfile: pd.DataFrame,
gsims: Collection[GMPE],
imts: Collection[str]) -> pd.DataFrame:
"""Return a new dataframe with all columns required to compute residuals
from the given models (`gsim`) and intensity measures (`imts`) given with
periods, when needed (e.g. "SA(0.2)")
Expand All @@ -76,11 +71,11 @@ def setup_flatfile_for_residuals(flatfile: pd.DataFrame,
# concat all new dataframes in this list, then return a ne one from it:
new_dataframes = []
# prepare the flatfile for the required imts:
imts_flatfile = setup_flatfile_for_imts(flatfile, imts)
imts_flatfile = get_required_imts(flatfile, imts)
if not imts_flatfile.empty:
new_dataframes.append(imts_flatfile)
# prepare the flatfile for the required ground motion properties:
props_flatfile = setup_flatfile_for_gsims(flatfile, gsims)
props_flatfile = get_required_ground_motion_properties(flatfile, gsims)
if not props_flatfile.empty:
new_dataframes.append(props_flatfile)

Expand All @@ -90,7 +85,7 @@ def setup_flatfile_for_residuals(flatfile: pd.DataFrame,
return pd.concat(new_dataframes, axis=1)


def setup_flatfile_for_imts(flatfile: pd.DataFrame, imts: Collection[str]) -> pd.DataFrame:
def get_required_imts(flatfile: pd.DataFrame, imts: Collection[str]) -> pd.DataFrame:
"""Return a new dataframe with all columns required to compute residuals
for the given intensity measures (`imts`) given with
periods, when needed (e.g. "SA(0.2)")
Expand All @@ -114,16 +109,15 @@ def setup_flatfile_for_imts(flatfile: pd.DataFrame, imts: Collection[str]) -> pd
new_dataframes.append(flatfile[sorted(non_sa_imts)])
# prepare the flatfile for SA (create new columns by interpolation if necessary):
if sa_imts:
sa_dataframe = setup_flatfile_for_sa(flatfile, sa_imts)
sa_dataframe = get_required_sa(flatfile, sa_imts)
if not sa_dataframe.empty:
new_dataframes.append(sa_dataframe)
if not new_dataframes:
return pd.DataFrame(columns=flatfile.columns) # empty dataframe
return pd.concat(new_dataframes, axis=1)


def setup_flatfile_for_sa(flatfile: pd.DataFrame, sa_imts: Iterable[str]) \
-> pd.DataFrame:
def get_required_sa(flatfile: pd.DataFrame, sa_imts: Iterable[str]) -> pd.DataFrame:
"""Return a new Dataframe with the SA columns defined in `sa_imts`
The returned DataFrame will have all strings supplied in `sa_imts` as columns,
with relative values copied (or inferred via interpolation) from the given flatfile
Expand Down Expand Up @@ -175,20 +169,20 @@ def setup_flatfile_for_sa(flatfile: pd.DataFrame, sa_imts: Iterable[str]) \
return new_flatfile


def setup_flatfile_for_gsims(flatfile: pd.DataFrame,
def get_required_ground_motion_properties(flatfile: pd.DataFrame,
gsims: Iterable[GMPE]) -> pd.DataFrame:
"""Return a new dataframe with all columns required to compute residuals
from the given models (`gsim`), i.e. all columns denoting ground motion
properties required by the passed models
"""
props_flatfile = pd.DataFrame(index=flatfile.index)
for prop in get_required_ground_motion_properties(gsims):
for prop in get_required_ground_motion_property_names(gsims):
props_flatfile[prop] = \
get_ground_motion_property_values(flatfile, prop)
return props_flatfile


def get_required_ground_motion_properties(gsims: Union[GMPE, Iterable[GMPE]]) \
def get_required_ground_motion_property_names(gsims: Union[GMPE, Iterable[GMPE]]) \
-> set[str]:
"""Return a Python set containing the required ground motion properties
(rupture or sites parameter, distance measure, all as `str`) for the given
Expand Down
60 changes: 28 additions & 32 deletions egsim/smtk/residuals/__init__.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,24 @@
"""
Residuals module
"""

from __future__ import annotations # https://peps.python.org/pep-0563/
from collections.abc import Iterable, Collection

from pandas import RangeIndex
from typing import Union

from math import sqrt

import numpy as np
import pandas as pd
from pandas import RangeIndex
from pandas.core.indexes.numeric import IntegerIndex

from scipy.special import erf

from openquake.hazardlib.gsim.base import GMPE
from openquake.hazardlib import imt, const
from openquake.hazardlib.contexts import RuptureContext

from .. import check_gsim_list, get_gsim_name, get_SA_period #, convert_accel_units
from ..flatfile.preparation import (get_event_id_column_names,
get_station_id_column_names,
setup_flatfile_for_residuals)
from .. import check_gsim_list, get_gsim_name, get_SA_period
from ..flatfile.residuals import (get_event_id_column_names,
get_station_id_column_names,
get_flatfile_for_residual_analysis)
from ..flatfile.columns import InvalidColumn, MissingColumn, get_rupture_params


Expand Down Expand Up @@ -64,18 +60,18 @@ def get_residuals(gsims: Iterable[str], imts: Collection[str],
(according to Equation 9 of Scherbaum et al (2004)) should be computed
"""
gsims = check_gsim_list(gsims)
flatfile2 = setup_flatfile_for_residuals(flatfile, gsims.values(), imts)
flatfile_r = get_flatfile_for_residual_analysis(flatfile, gsims.values(), imts)
# copy event columns (raises if columns not found):
ev_cols = get_event_id_column_names(flatfile)
flatfile2[ev_cols] = flatfile[ev_cols]
flatfile_r[ev_cols] = flatfile[ev_cols]
# copy station columns (for the moment not used, so skip if no station columns)
try:
st_cols = get_station_id_column_names(flatfile)
flatfile2[st_cols] = flatfile[st_cols]
flatfile_r[st_cols] = flatfile[st_cols]
except InvalidColumn:
pass
# compute residuals:
residuals = calculate_flatfile_residuals(gsims, imts, flatfile2,
residuals = calculate_flatfile_residuals(gsims, imts, flatfile_r,
normalise=normalise)
# concatenate expected in flatfile (add new columns):
flatfile[list(residuals.columns)] = residuals
Expand All @@ -88,7 +84,7 @@ def calculate_flatfile_residuals(gsims: dict[str, GMPE], imts: Iterable[str],
flatfile: pd.DataFrame, normalise=True) -> pd.DataFrame:
residuals:pd.DataFrame = pd.DataFrame(index=flatfile.index)
imts = list(imts)
# computget the observations (compute the log for all once here):
# compute the observations (compute the log for all once here):
observations = pd.DataFrame(index=flatfile.index,
columns=imts,
data=np.log(flatfile[imts]))
Expand All @@ -108,6 +104,23 @@ def calculate_flatfile_residuals(gsims: dict[str, GMPE], imts: Iterable[str],
return residuals


def yield_event_contexts(flatfile: pd.DataFrame) -> Iterable[EventContext]:
"""Group the flatfile by events, and yield `EventContext`s objects, one for
each event"""
# assure each row has a unique int id from 0 until row_count-1:
if not isinstance(flatfile.index, RangeIndex):
flatfile.reset_index(drop=True, inplace=True)

# check event id column or use the event location to group events:
# group flatfile by events. Use ev. id (_EVENT_COLUMNS[0]) or, when
# no ID found, event spatio-temporal coordinates (_EVENT_COLUMNS[1:])
ev_sub_flatfiles = flatfile.groupby(get_event_id_column_names(flatfile))

for ev_id, dfr in ev_sub_flatfiles:
if not dfr.empty: # for safety ...
yield EventContext(dfr)


class EventContext(RuptureContext):
"""A RuptureContext accepting a flatfile (pandas DataFrame) as input"""

Expand Down Expand Up @@ -151,23 +164,6 @@ def __getattr__(self, column_name):
return values


def yield_event_contexts(flatfile: pd.DataFrame) -> Iterable[EventContext]:
"""Group the flatfile by events, and yield `EventContext`s objects, one for
each event"""
# assure each row has a unique int id from 0 until row_count-1:
if not isinstance(flatfile.index, RangeIndex):
flatfile.reset_index(drop=True, inplace=True)

# check event id column or use the event location to group events:
# group flatfile by events. Use ev. id (_EVENT_COLUMNS[0]) or, when
# no ID found, event spatio-temporal coordinates (_EVENT_COLUMNS[1:])
ev_sub_flatfiles = flatfile.groupby(get_event_id_column_names(flatfile))

for ev_id, dfr in ev_sub_flatfiles:
if not dfr.empty: # for safety ...
yield EventContext(dfr)


def calculate_expected_motions(gsims: Iterable[GMPE], imts: Iterable[str],
ctx: EventContext) -> pd.DataFrame:
"""
Expand Down

0 comments on commit ca60f41

Please sign in to comment.