Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove attribute-style accesses to (Geo)DataFrames columns and xarray Datasets variables and attributes #939

Merged
merged 14 commits into from
Sep 9, 2024
Merged
2 changes: 1 addition & 1 deletion climada/engine/forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def __init__(
if exposure_name is None:
try:
self.exposure_name = u_coord.country_to_iso(
exposure.gdf.region_id.unique()[0], "name"
exposure.gdf["region_id"].unique()[0], "name"
)
except (KeyError, AttributeError):
self.exposure_name = "custom"
Expand Down
62 changes: 31 additions & 31 deletions climada/engine/impact.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,8 @@
date = hazard.date,
frequency = hazard.frequency,
frequency_unit = hazard.frequency_unit,
coord_exp = np.stack([exposures.gdf.latitude.values,
exposures.gdf.longitude.values],
coord_exp = np.stack([exposures.gdf['latitude'].values,
exposures.gdf['longitude'].values],
axis=1),
crs = exposures.crs,
unit = exposures.value_unit,
Expand Down Expand Up @@ -1081,25 +1081,25 @@
# pylint: disable=no-member
LOGGER.info('Reading %s', file_name)
imp_df = pd.read_csv(file_name)
imp = cls(haz_type=imp_df.haz_type[0])
imp.unit = imp_df.unit[0]
imp.tot_value = imp_df.tot_value[0]
imp.aai_agg = imp_df.aai_agg[0]
imp.event_id = imp_df.event_id[~np.isnan(imp_df.event_id)].values
imp = cls(haz_type=imp_df['haz_type'][0])
imp.unit = imp_df['unit'][0]
imp.tot_value = imp_df['tot_value'][0]
imp.aai_agg = imp_df['aai_agg'][0]
imp.event_id = imp_df['event_id'][~np.isnan(imp_df['event_id'])].values
num_ev = imp.event_id.size
imp.event_name = imp_df.event_name[:num_ev].values.tolist()
imp.date = imp_df.event_date[:num_ev].values
imp.at_event = imp_df.at_event[:num_ev].values
imp.frequency = imp_df.event_frequency[:num_ev].values
imp.frequency_unit = imp_df.frequency_unit[0] if 'frequency_unit' in imp_df \
imp.event_name = imp_df['event_name'][:num_ev].values.tolist()
imp.date = imp_df['event_date'][:num_ev].values
imp.at_event = imp_df['at_event'][:num_ev].values
imp.frequency = imp_df['event_frequency'][:num_ev].values
imp.frequency_unit = imp_df['frequency_unit'][0] if 'frequency_unit' in imp_df \
else DEF_FREQ_UNIT
imp.eai_exp = imp_df.eai_exp[~np.isnan(imp_df.eai_exp)].values
imp.eai_exp = imp_df['eai_exp'][~np.isnan(imp_df['eai_exp'])].values
num_exp = imp.eai_exp.size
imp.coord_exp = np.zeros((num_exp, 2))
imp.coord_exp[:, 0] = imp_df.exp_lat[:num_exp]
imp.coord_exp[:, 1] = imp_df.exp_lon[:num_exp]
imp.coord_exp[:, 0] = imp_df['exp_lat'][:num_exp]
imp.coord_exp[:, 1] = imp_df['exp_lon'][:num_exp]
try:
imp.crs = u_coord.to_crs_user_input(imp_df.exp_crs.values[0])
imp.crs = u_coord.to_crs_user_input(imp_df['exp_crs'].values[0])
except AttributeError:
imp.crs = DEF_CRS

Expand Down Expand Up @@ -1129,23 +1129,23 @@
dfr = pd.read_excel(file_name)
imp = cls(haz_type=str(dfr['haz_type'][0]))

imp.unit = dfr.unit[0]
imp.tot_value = dfr.tot_value[0]
imp.aai_agg = dfr.aai_agg[0]
imp.unit = dfr['unit'][0]
imp.tot_value = dfr['tot_value'][0]
imp.aai_agg = dfr['aai_agg'][0]

imp.event_id = dfr.event_id[~np.isnan(dfr.event_id.values)].values
imp.event_name = dfr.event_name[:imp.event_id.size].values
imp.date = dfr.event_date[:imp.event_id.size].values
imp.frequency = dfr.event_frequency[:imp.event_id.size].values
imp.frequency_unit = dfr.frequency_unit[0] if 'frequency_unit' in dfr else DEF_FREQ_UNIT
imp.at_event = dfr.at_event[:imp.event_id.size].values
imp.event_id = dfr['event_id'][~np.isnan(dfr['event_id'].values)].values
imp.event_name = dfr['event_name'][:imp.event_id.size].values
imp.date = dfr['event_date'][:imp.event_id.size].values
imp.frequency = dfr['event_frequency'][:imp.event_id.size].values
imp.frequency_unit = dfr['frequency_unit'][0] if 'frequency_unit' in dfr else DEF_FREQ_UNIT
imp.at_event = dfr['at_event'][:imp.event_id.size].values

imp.eai_exp = dfr.eai_exp[~np.isnan(dfr.eai_exp.values)].values
imp.eai_exp = dfr['eai_exp'][~np.isnan(dfr['eai_exp'].values)].values
imp.coord_exp = np.zeros((imp.eai_exp.size, 2))
imp.coord_exp[:, 0] = dfr.exp_lat.values[:imp.eai_exp.size]
imp.coord_exp[:, 1] = dfr.exp_lon.values[:imp.eai_exp.size]
imp.coord_exp[:, 0] = dfr['exp_lat'].values[:imp.eai_exp.size]
imp.coord_exp[:, 1] = dfr['exp_lon'].values[:imp.eai_exp.size]
try:
imp.crs = u_coord.to_csr_user_input(dfr.exp_crs.values[0])
imp.crs = u_coord.to_csr_user_input(dfr['exp_crs'].values[0])
except AttributeError:
imp.crs = DEF_CRS

Expand Down Expand Up @@ -1324,14 +1324,14 @@
np.array([haz.intensity.max() for haz in haz_list]).max()]

if 'vmin' not in args_exp:
args_exp['vmin'] = exp.gdf.value.values.min()
args_exp['vmin'] = exp.gdf['value'].values.min()

if 'vmin' not in args_imp:
args_imp['vmin'] = np.array([imp.eai_exp.min() for imp in imp_list
if imp.eai_exp.size]).min()

if 'vmax' not in args_exp:
args_exp['vmax'] = exp.gdf.value.values.max()
args_exp['vmax'] = exp.gdf['value'].values.max()

Check warning on line 1334 in climada/engine/impact.py

View check run for this annotation

Jenkins - WCR / Code Coverage

Not covered lines

Lines 1327-1334 are not covered by tests

if 'vmax' not in args_imp:
args_imp['vmax'] = np.array([imp.eai_exp.max() for imp in imp_list
Expand Down
14 changes: 7 additions & 7 deletions climada/engine/impact_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ def impact(self, save_mat=True, assign_centroids=True,
exp_gdf.size, self.n_events)
imp_mat_gen = self.imp_mat_gen(exp_gdf, impf_col)

insured = ('cover' in exp_gdf and exp_gdf.cover.max() >= 0) \
or ('deductible' in exp_gdf and exp_gdf.deductible.max() > 0)
insured = ('cover' in exp_gdf and exp_gdf['cover'].max() >= 0) \
or ('deductible' in exp_gdf and exp_gdf['deductible'].max() > 0)
if insured:
LOGGER.info("cover and/or deductible columns detected,"
" going to calculate insured impact")
Expand Down Expand Up @@ -253,8 +253,8 @@ def minimal_exp_gdf(self, impf_col, assign_centroids, ignore_cover, ignore_deduc
" Run 'exposures.assign_centroids()' beforehand or set"
" 'assign_centroids' to 'True'")
mask = (
(self.exposures.gdf.value.values == self.exposures.gdf.value.values) # value != NaN
& (self.exposures.gdf.value.values != 0) # value != 0
(self.exposures.gdf['value'].values == self.exposures.gdf['value'].values)# value != NaN
& (self.exposures.gdf['value'].values != 0) # value != 0
& (self.exposures.gdf[self.hazard.centr_exp_col].values >= 0) # centroid assigned
)

Expand Down Expand Up @@ -320,7 +320,7 @@ def _chunk_exp_idx(haz_size, idx_exp_impf):
)
idx_exp_impf = (exp_gdf[impf_col].values == impf_id).nonzero()[0]
for exp_idx in _chunk_exp_idx(self.hazard.size, idx_exp_impf):
exp_values = exp_gdf.value.values[exp_idx]
exp_values = exp_gdf['value'].values[exp_idx]
cent_idx = exp_gdf[self.hazard.centr_exp_col].values[exp_idx]
yield (
self.impact_matrix(exp_values, cent_idx, impf),
Expand Down Expand Up @@ -363,10 +363,10 @@ def insured_mat_gen(self, imp_mat_gen, exp_gdf, impf_col):
haz_type=self.hazard.haz_type,
fun_id=impf_id)
if 'deductible' in exp_gdf:
deductible = exp_gdf.deductible.values[exp_idx]
deductible = exp_gdf['deductible'].values[exp_idx]
mat = self.apply_deductible_to_mat(mat, deductible, self.hazard, cent_idx, impf)
if 'cover' in exp_gdf:
cover = exp_gdf.cover.values[exp_idx]
cover = exp_gdf['cover'].values[exp_idx]
mat = self.apply_cover_to_mat(mat, cover)
yield (mat, exp_idx)

Expand Down
89 changes: 46 additions & 43 deletions climada/engine/impact_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,119 +355,120 @@
'Date_start_EM_ordinal', 'Disaster_name',
'EM_ID', 'ibtracsID', 'allocation_level',
'possible_track', 'possible_track_all'])
lookup.hit_country = data.ISO
lookup.Date_start_EM = data.Date_start_clean
lookup.Disaster_name = data.Disaster_name
lookup.EM_ID = data.Disaster_No
lookup['hit_country'] = data['ISO']
lookup['Date_start_EM'] = data['Date_start_clean']
lookup['Disaster_name'] = data['Disaster_name']
lookup['EM_ID'] = data['Disaster_No']
lookup = lookup.reset_index(drop=True)
# create ordinals
for i in range(0, len(data.Date_start_clean.values)):
lookup.Date_start_EM_ordinal[i] = datetime.toordinal(
datetime.strptime(lookup.Date_start_EM.values[i], '%Y-%m-%d'))
for i in range(0, len(data['Date_start_clean'].values)):
lookup['Date_start_EM_ordinal'][i] = datetime.toordinal(
datetime.strptime(lookup['Date_start_EM'].values[i], '%Y-%m-%d'))
# ordinals to numeric
lookup.Date_start_EM_ordinal = pd.to_numeric(lookup.Date_start_EM_ordinal)
lookup['Date_start_EM_ordinal'] = pd.to_numeric(lookup['Date_start_EM_ordinal'])
# select time
emdat_start = datetime.toordinal(datetime.strptime(start, '%Y-%m-%d'))
emdat_end = datetime.toordinal(datetime.strptime(end, '%Y-%m-%d'))

lookup = lookup[lookup.Date_start_EM_ordinal.values > emdat_start]
lookup = lookup[lookup.Date_start_EM_ordinal.values < emdat_end]
lookup = lookup[lookup['Date_start_EM_ordinal'].values > emdat_start]
lookup = lookup[lookup['Date_start_EM_ordinal'].values < emdat_end]

return lookup


# Function to relate EM disaster to IBtrack using hit countries and time
def emdat_possible_hit(lookup, hit_countries, delta_t):
"""relate EM disaster to hazard using hit countries and time

Parameters
----------
lookup : pd.DataFrame
to relate EMdatID to hazard
delta_t :
max time difference of start of EMdat event and hazard
hit_countries:


Returns
-------
list with possible hits
"""
# lookup: PD dataframe that relates EMdatID to an IBtracsID
# tracks: processed IBtracks with info which track hit which country
# delta_t: time difference of start of EMdat and IBrtacks
possible_hit_all = []
for i in range(0, len(lookup.EM_ID.values)):
for i in range(0, len(lookup['EM_ID'].values)):
possible_hit = []
country_tracks = hit_countries[
hit_countries['hit_country'] == lookup.hit_country.values[i]]
for j in range(0, len(country_tracks.Date_start.values)):
if (lookup.Date_start_EM_ordinal.values[i] - country_tracks.Date_start.values[j]) < \
delta_t and (lookup.Date_start_EM_ordinal.values[i] -
country_tracks.Date_start.values[j]) >= 0:
possible_hit.append(country_tracks.ibtracsID.values[j])
hit_countries['hit_country'] == lookup['hit_country'].values[i]]
for j in range(0, len(country_tracks['Date_start'].values)):
if (lookup['Date_start_EM_ordinal'].values[i] -
country_tracks['Date_start'].values[j]) < \
delta_t and (lookup['Date_start_EM_ordinal'].values[i] -
country_tracks['Date_start'].values[j]) >= 0:
possible_hit.append(country_tracks['ibtracsID'].values[j])
possible_hit_all.append(possible_hit)

return possible_hit_all


# function to check if EM_ID has been assigned already
def match_em_id(lookup, poss_hit):
"""function to check if EM_ID has been assigned already and combine possible hits

Parameters
----------
lookup : pd.dataframe
to relate EMdatID to hazard
poss_hit : list
with possible hits

Returns
-------
list
with all possible hits per EMdat ID
"""
possible_hit_all = []
for i in range(0, len(lookup.EM_ID.values)):
for i in range(0, len(lookup['EM_ID'].values)):
possible_hit = []
# lookup without line i
#lookup_match = lookup.drop(i)
lookup_match = lookup
# Loop over check if EM dat ID is the same
for i_match in range(0, len(lookup_match.EM_ID.values)):
if lookup.EM_ID.values[i] == lookup_match.EM_ID.values[i_match]:
for i_match in range(0, len(lookup_match['EM_ID'].values)):
if lookup['EM_ID'].values[i] == lookup_match['EM_ID'].values[i_match]:
possible_hit.append(poss_hit[i])
possible_hit_all.append(possible_hit)
return possible_hit_all


def assign_track_to_em(lookup, possible_tracks_1, possible_tracks_2, level):
"""function to assign a hazard to an EMdat event
to get some confidene into the procedure, hazards get only assigned
if there is no other hazard occuring at a bigger time interval in that country
Thus a track of possible_tracks_1 gets only assigned if there are no other
tracks in possible_tracks_2.
The confidence can be expressed with a certainty level

Parameters
----------
lookup : pd.DataFrame
to relate EMdatID to hazard
possible_tracks_1 : list
list of possible hits with smaller time horizon
possible_tracks_2 : list
list of possible hits with larger time horizon
level : int
level of confidence

Returns
-------
pd.DataFrame
lookup with assigend tracks and possible hits
"""

for i, _ in enumerate(possible_tracks_1):

Check warning on line 470 in climada/engine/impact_data.py

View check run for this annotation

Jenkins - WCR / Pylint

too-many-nested-blocks

LOW: Too many nested blocks (6/5)
Raw output
Used when a function or a method has too many nested blocks. This makes thecode less understandable and maintainable.
if np.isnan(lookup.allocation_level.values[i]):
if np.isnan(lookup['allocation_level'].values[i]):
number_emdat_id = len(possible_tracks_1[i])
# print(number_emdat_id)
for j in range(0, number_emdat_id):
Expand All @@ -479,14 +480,15 @@
if all(possible_tracks_1[i][0] == possible_tracks_1[i][k]
for k in range(0, len(possible_tracks_1[i]))):
# check that track ID has not been assigned to that country already
ctry_lookup = lookup[lookup['hit_country'] == lookup.hit_country.values[i]]
if possible_tracks_1[i][0][0] not in ctry_lookup.ibtracsID.values:
lookup.ibtracsID.values[i] = possible_tracks_1[i][0][0]
lookup.allocation_level.values[i] = level
ctry_lookup = lookup[lookup['hit_country']
== lookup['hit_country'].values[i]]
if possible_tracks_1[i][0][0] not in ctry_lookup['ibtracsID'].values:
lookup['ibtracsID'].values[i] = possible_tracks_1[i][0][0]
lookup['allocation_level'].values[i] = level
elif possible_tracks_1[i][j] != []:
lookup.possible_track.values[i] = possible_tracks_1[i]
lookup['possible_track'].values[i] = possible_tracks_1[i]
else:
lookup.possible_track_all.values[i] = possible_tracks_1[i]
lookup['possible_track_all'].values[i] = possible_tracks_1[i]
return lookup


Expand All @@ -507,13 +509,13 @@
# merge checkset and lookup
check = pd.merge(checkset, lookup[['hit_country', 'EM_ID', 'ibtracsID']],
on=['hit_country', 'EM_ID'])
check_size = len(check.ibtracsID.values)
# not assigned values
not_assigned = check.ibtracsID.isnull().sum(axis=0)
check_size = len(check['ibtracsID'].values)
# not assigned values]
not_assigned = check['ibtracsID'].isnull().sum(axis=0)
# correct assigned values
correct = sum(check.ibtracsID.values == check.IBtracsID_checked.values)
correct = sum(check['ibtracsID'].values == check['IBtracsID_checked'].values)
# wrongly assigned values
wrong = len(check.ibtracsID.values) - not_assigned - correct
wrong = len(check['ibtracsID'].values) - not_assigned - correct

Check warning on line 518 in climada/engine/impact_data.py

View check run for this annotation

Jenkins - WCR / Code Coverage

Not covered lines

Lines 358-518 are not covered by tests
print('%.1f%% tracks assigned correctly, %.1f%% wrongly, %.1f%% not assigned'
% (correct / check_size * 100,
wrong / check_size * 100,
Expand Down Expand Up @@ -707,7 +709,7 @@
List of names of countries impacted by the disaster (sub-)types
"""
df_data = clean_emdat_df(emdat_file_csv, hazard=hazard, year_range=year_range)
countries_iso3a = list(df_data.ISO.unique())
countries_iso3a = list(df_data['ISO'].unique())
countries_names = list()
for iso3a in countries_iso3a:
try:
Expand Down Expand Up @@ -800,26 +802,27 @@
year_range=year_range, target_version=version)

df_data[imp_str + " scaled"] = scale_impact2refyear(df_data[imp_str].values,
df_data.Year.values, df_data.ISO.values,
df_data['Year'].values,
df_data['ISO'].values,
reference_year=reference_year)

def country_df(df_data):
for data_iso in df_data.ISO.unique():
for data_iso in df_data['ISO'].unique():
country = u_coord.country_to_iso(data_iso, "alpha3")

df_country = df_data.loc[df_data.ISO == country]
df_country = df_data.loc[df_data['ISO'] == country]
if not df_country.size:
continue

# Retrieve impact data for all years
all_years = np.arange(min(df_data.Year), max(df_data.Year) + 1)
all_years = np.arange(min(df_data['Year']), max(df_data['Year']) + 1)
data_out = pd.DataFrame.from_records(
[
(
year,
np.nansum(df_country[df_country.Year.isin([year])][imp_str]),
np.nansum(df_country[df_country['Year'].isin([year])][imp_str]),
np.nansum(
df_country[df_country.Year.isin([year])][
df_country[df_country['Year'].isin([year])][
imp_str + " scaled"
]
),
Expand Down Expand Up @@ -894,13 +897,13 @@
df_data['year'] = df_data['Year']
df_data['reference_year'] = reference_year
df_data['impact'] = df_data[imp_str]
df_data['impact_scaled'] = scale_impact2refyear(df_data[imp_str].values, df_data.Year.values,
df_data.ISO.values,
df_data['impact_scaled'] = scale_impact2refyear(df_data[imp_str].values, df_data['Year'].values,
df_data['ISO'].values,
reference_year=reference_year)
df_data['region_id'] = np.nan
for country in df_data.ISO.unique():
for country in df_data['ISO'].unique():
try:
df_data.loc[df_data.ISO == country, 'region_id'] = \
df_data.loc[df_data['ISO'] == country, 'region_id'] = \
u_coord.country_to_iso(country, "numeric")
except LookupError:
LOGGER.warning('ISO3alpha code not found in iso_country: %s', country)
Expand Down
6 changes: 3 additions & 3 deletions climada/engine/test/test_impact.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def test_from_eih_pass(self):
np.testing.assert_array_almost_equal(imp.at_event, fake_at_event)
np.testing.assert_array_almost_equal(
imp.coord_exp,
np.stack([exp.gdf.latitude.values, exp.gdf.longitude.values], axis=1)
np.stack([exp.gdf['latitude'].values, exp.gdf['longitude'].values], axis=1)
)

def test_pyproj_crs(self):
Expand Down Expand Up @@ -513,7 +513,7 @@ def test_local_exceedance_imp_pass(self):
impact_rp = impact.local_exceedance_imp(return_periods=(10, 40))

self.assertIsInstance(impact_rp, np.ndarray)
self.assertEqual(impact_rp.size, 2 * ent.exposures.gdf.value.size)
self.assertEqual(impact_rp.size, 2 * ent.exposures.gdf['value'].size)
self.assertAlmostEqual(np.max(impact_rp), 2916964966.388219, places=5)
self.assertAlmostEqual(np.min(impact_rp), 444457580.131494, places=5)

Expand Down Expand Up @@ -941,7 +941,7 @@ def test_match_centroids(self):
fake_aai_agg = np.sum(fake_eai_exp)
imp = Impact.from_eih(exp, HAZ, fake_at_event, fake_eai_exp, fake_aai_agg)
imp_centr = imp.match_centroids(HAZ)
np.testing.assert_array_equal(imp_centr, exp.gdf.centr_TC)
np.testing.assert_array_equal(imp_centr, exp.gdf['centr_TC'])


class TestImpactH5IO(unittest.TestCase):
Expand Down
Loading