Skip to content

Commit

Permalink
Remove attribute-style accesses to (Geo)DataFrames columns and xarray…
Browse files Browse the repository at this point in the history
… Datasets variables and attributes (#939)

* Remove attribute-style access, first draft

* Remove attribute-style accesses from engine

* Further removal of attribute-style accesses

* Remove attribute-style accesses in hazard module

* Remove attribute-style accesses in util

* Remove attribute-style accesses in storm_europe and correct mistake

* Mention policy in coding conventions

* Fix linter issues

* Remove files that were mistakenly commited

* Remove some more missed instances of attribute-style access

* Remove attr-style accesses in tests

* Remove some missed accesses to dataset attrs

---------

Co-authored-by: luseverin <luca.severino@usys.ethz.ch>
Co-authored-by: emanuel-schmid <schmide@ethz.ch>
  • Loading branch information
3 people authored Sep 9, 2024
1 parent 36f4735 commit 2ba8be9
Show file tree
Hide file tree
Showing 38 changed files with 959 additions and 937 deletions.
2 changes: 1 addition & 1 deletion climada/engine/forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def __init__(
if exposure_name is None:
try:
self.exposure_name = u_coord.country_to_iso(
exposure.gdf.region_id.unique()[0], "name"
exposure.gdf["region_id"].unique()[0], "name"
)
except (KeyError, AttributeError):
self.exposure_name = "custom"
Expand Down
62 changes: 31 additions & 31 deletions climada/engine/impact.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,8 @@ def from_eih(cls, exposures, hazard, at_event, eai_exp, aai_agg, imp_mat=None):
date = hazard.date,
frequency = hazard.frequency,
frequency_unit = hazard.frequency_unit,
coord_exp = np.stack([exposures.gdf.latitude.values,
exposures.gdf.longitude.values],
coord_exp = np.stack([exposures.gdf['latitude'].values,
exposures.gdf['longitude'].values],
axis=1),
crs = exposures.crs,
unit = exposures.value_unit,
Expand Down Expand Up @@ -1081,25 +1081,25 @@ def from_csv(cls, file_name):
# pylint: disable=no-member
LOGGER.info('Reading %s', file_name)
imp_df = pd.read_csv(file_name)
imp = cls(haz_type=imp_df.haz_type[0])
imp.unit = imp_df.unit[0]
imp.tot_value = imp_df.tot_value[0]
imp.aai_agg = imp_df.aai_agg[0]
imp.event_id = imp_df.event_id[~np.isnan(imp_df.event_id)].values
imp = cls(haz_type=imp_df['haz_type'][0])
imp.unit = imp_df['unit'][0]
imp.tot_value = imp_df['tot_value'][0]
imp.aai_agg = imp_df['aai_agg'][0]
imp.event_id = imp_df['event_id'][~np.isnan(imp_df['event_id'])].values
num_ev = imp.event_id.size
imp.event_name = imp_df.event_name[:num_ev].values.tolist()
imp.date = imp_df.event_date[:num_ev].values
imp.at_event = imp_df.at_event[:num_ev].values
imp.frequency = imp_df.event_frequency[:num_ev].values
imp.frequency_unit = imp_df.frequency_unit[0] if 'frequency_unit' in imp_df \
imp.event_name = imp_df['event_name'][:num_ev].values.tolist()
imp.date = imp_df['event_date'][:num_ev].values
imp.at_event = imp_df['at_event'][:num_ev].values
imp.frequency = imp_df['event_frequency'][:num_ev].values
imp.frequency_unit = imp_df['frequency_unit'][0] if 'frequency_unit' in imp_df \
else DEF_FREQ_UNIT
imp.eai_exp = imp_df.eai_exp[~np.isnan(imp_df.eai_exp)].values
imp.eai_exp = imp_df['eai_exp'][~np.isnan(imp_df['eai_exp'])].values
num_exp = imp.eai_exp.size
imp.coord_exp = np.zeros((num_exp, 2))
imp.coord_exp[:, 0] = imp_df.exp_lat[:num_exp]
imp.coord_exp[:, 1] = imp_df.exp_lon[:num_exp]
imp.coord_exp[:, 0] = imp_df['exp_lat'][:num_exp]
imp.coord_exp[:, 1] = imp_df['exp_lon'][:num_exp]
try:
imp.crs = u_coord.to_crs_user_input(imp_df.exp_crs.values[0])
imp.crs = u_coord.to_crs_user_input(imp_df['exp_crs'].values[0])
except AttributeError:
imp.crs = DEF_CRS

Expand Down Expand Up @@ -1129,23 +1129,23 @@ def from_excel(cls, file_name):
dfr = pd.read_excel(file_name)
imp = cls(haz_type=str(dfr['haz_type'][0]))

imp.unit = dfr.unit[0]
imp.tot_value = dfr.tot_value[0]
imp.aai_agg = dfr.aai_agg[0]
imp.unit = dfr['unit'][0]
imp.tot_value = dfr['tot_value'][0]
imp.aai_agg = dfr['aai_agg'][0]

imp.event_id = dfr.event_id[~np.isnan(dfr.event_id.values)].values
imp.event_name = dfr.event_name[:imp.event_id.size].values
imp.date = dfr.event_date[:imp.event_id.size].values
imp.frequency = dfr.event_frequency[:imp.event_id.size].values
imp.frequency_unit = dfr.frequency_unit[0] if 'frequency_unit' in dfr else DEF_FREQ_UNIT
imp.at_event = dfr.at_event[:imp.event_id.size].values
imp.event_id = dfr['event_id'][~np.isnan(dfr['event_id'].values)].values
imp.event_name = dfr['event_name'][:imp.event_id.size].values
imp.date = dfr['event_date'][:imp.event_id.size].values
imp.frequency = dfr['event_frequency'][:imp.event_id.size].values
imp.frequency_unit = dfr['frequency_unit'][0] if 'frequency_unit' in dfr else DEF_FREQ_UNIT
imp.at_event = dfr['at_event'][:imp.event_id.size].values

imp.eai_exp = dfr.eai_exp[~np.isnan(dfr.eai_exp.values)].values
imp.eai_exp = dfr['eai_exp'][~np.isnan(dfr['eai_exp'].values)].values
imp.coord_exp = np.zeros((imp.eai_exp.size, 2))
imp.coord_exp[:, 0] = dfr.exp_lat.values[:imp.eai_exp.size]
imp.coord_exp[:, 1] = dfr.exp_lon.values[:imp.eai_exp.size]
imp.coord_exp[:, 0] = dfr['exp_lat'].values[:imp.eai_exp.size]
imp.coord_exp[:, 1] = dfr['exp_lon'].values[:imp.eai_exp.size]
try:
imp.crs = u_coord.to_csr_user_input(dfr.exp_crs.values[0])
imp.crs = u_coord.to_csr_user_input(dfr['exp_crs'].values[0])
except AttributeError:
imp.crs = DEF_CRS

Expand Down Expand Up @@ -1324,14 +1324,14 @@ def video_direct_impact(exp, impf_set, haz_list, file_name='',
np.array([haz.intensity.max() for haz in haz_list]).max()]

if 'vmin' not in args_exp:
args_exp['vmin'] = exp.gdf.value.values.min()
args_exp['vmin'] = exp.gdf['value'].values.min()

if 'vmin' not in args_imp:
args_imp['vmin'] = np.array([imp.eai_exp.min() for imp in imp_list
if imp.eai_exp.size]).min()

if 'vmax' not in args_exp:
args_exp['vmax'] = exp.gdf.value.values.max()
args_exp['vmax'] = exp.gdf['value'].values.max()

Check warning on line 1334 in climada/engine/impact.py

View check run for this annotation

Jenkins - WCR / Code Coverage

Not covered lines

Lines 1327-1334 are not covered by tests

if 'vmax' not in args_imp:
args_imp['vmax'] = np.array([imp.eai_exp.max() for imp in imp_list
Expand Down
14 changes: 7 additions & 7 deletions climada/engine/impact_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ def impact(self, save_mat=True, assign_centroids=True,
exp_gdf.size, self.n_events)
imp_mat_gen = self.imp_mat_gen(exp_gdf, impf_col)

insured = ('cover' in exp_gdf and exp_gdf.cover.max() >= 0) \
or ('deductible' in exp_gdf and exp_gdf.deductible.max() > 0)
insured = ('cover' in exp_gdf and exp_gdf['cover'].max() >= 0) \
or ('deductible' in exp_gdf and exp_gdf['deductible'].max() > 0)
if insured:
LOGGER.info("cover and/or deductible columns detected,"
" going to calculate insured impact")
Expand Down Expand Up @@ -253,8 +253,8 @@ def minimal_exp_gdf(self, impf_col, assign_centroids, ignore_cover, ignore_deduc
" Run 'exposures.assign_centroids()' beforehand or set"
" 'assign_centroids' to 'True'")
mask = (
(self.exposures.gdf.value.values == self.exposures.gdf.value.values) # value != NaN
& (self.exposures.gdf.value.values != 0) # value != 0
(self.exposures.gdf['value'].values == self.exposures.gdf['value'].values)# value != NaN
& (self.exposures.gdf['value'].values != 0) # value != 0
& (self.exposures.gdf[self.hazard.centr_exp_col].values >= 0) # centroid assigned
)

Expand Down Expand Up @@ -320,7 +320,7 @@ def _chunk_exp_idx(haz_size, idx_exp_impf):
)
idx_exp_impf = (exp_gdf[impf_col].values == impf_id).nonzero()[0]
for exp_idx in _chunk_exp_idx(self.hazard.size, idx_exp_impf):
exp_values = exp_gdf.value.values[exp_idx]
exp_values = exp_gdf['value'].values[exp_idx]
cent_idx = exp_gdf[self.hazard.centr_exp_col].values[exp_idx]
yield (
self.impact_matrix(exp_values, cent_idx, impf),
Expand Down Expand Up @@ -363,10 +363,10 @@ def insured_mat_gen(self, imp_mat_gen, exp_gdf, impf_col):
haz_type=self.hazard.haz_type,
fun_id=impf_id)
if 'deductible' in exp_gdf:
deductible = exp_gdf.deductible.values[exp_idx]
deductible = exp_gdf['deductible'].values[exp_idx]
mat = self.apply_deductible_to_mat(mat, deductible, self.hazard, cent_idx, impf)
if 'cover' in exp_gdf:
cover = exp_gdf.cover.values[exp_idx]
cover = exp_gdf['cover'].values[exp_idx]
mat = self.apply_cover_to_mat(mat, cover)
yield (mat, exp_idx)

Expand Down
89 changes: 46 additions & 43 deletions climada/engine/impact_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,23 +355,23 @@ def create_lookup(emdat_data, start, end, disaster_subtype='Tropical cyclone'):
'Date_start_EM_ordinal', 'Disaster_name',
'EM_ID', 'ibtracsID', 'allocation_level',
'possible_track', 'possible_track_all'])
lookup.hit_country = data.ISO
lookup.Date_start_EM = data.Date_start_clean
lookup.Disaster_name = data.Disaster_name
lookup.EM_ID = data.Disaster_No
lookup['hit_country'] = data['ISO']
lookup['Date_start_EM'] = data['Date_start_clean']
lookup['Disaster_name'] = data['Disaster_name']
lookup['EM_ID'] = data['Disaster_No']
lookup = lookup.reset_index(drop=True)
# create ordinals
for i in range(0, len(data.Date_start_clean.values)):
lookup.Date_start_EM_ordinal[i] = datetime.toordinal(
datetime.strptime(lookup.Date_start_EM.values[i], '%Y-%m-%d'))
for i in range(0, len(data['Date_start_clean'].values)):
lookup['Date_start_EM_ordinal'][i] = datetime.toordinal(
datetime.strptime(lookup['Date_start_EM'].values[i], '%Y-%m-%d'))
# ordinals to numeric
lookup.Date_start_EM_ordinal = pd.to_numeric(lookup.Date_start_EM_ordinal)
lookup['Date_start_EM_ordinal'] = pd.to_numeric(lookup['Date_start_EM_ordinal'])
# select time
emdat_start = datetime.toordinal(datetime.strptime(start, '%Y-%m-%d'))
emdat_end = datetime.toordinal(datetime.strptime(end, '%Y-%m-%d'))

lookup = lookup[lookup.Date_start_EM_ordinal.values > emdat_start]
lookup = lookup[lookup.Date_start_EM_ordinal.values < emdat_end]
lookup = lookup[lookup['Date_start_EM_ordinal'].values > emdat_start]
lookup = lookup[lookup['Date_start_EM_ordinal'].values < emdat_end]

return lookup

Expand All @@ -397,15 +397,16 @@ def emdat_possible_hit(lookup, hit_countries, delta_t):
# tracks: processed IBtracks with info which track hit which country
# delta_t: time difference of start of EMdat and IBrtacks
possible_hit_all = []
for i in range(0, len(lookup.EM_ID.values)):
for i in range(0, len(lookup['EM_ID'].values)):
possible_hit = []
country_tracks = hit_countries[
hit_countries['hit_country'] == lookup.hit_country.values[i]]
for j in range(0, len(country_tracks.Date_start.values)):
if (lookup.Date_start_EM_ordinal.values[i] - country_tracks.Date_start.values[j]) < \
delta_t and (lookup.Date_start_EM_ordinal.values[i] -
country_tracks.Date_start.values[j]) >= 0:
possible_hit.append(country_tracks.ibtracsID.values[j])
hit_countries['hit_country'] == lookup['hit_country'].values[i]]
for j in range(0, len(country_tracks['Date_start'].values)):
if (lookup['Date_start_EM_ordinal'].values[i] -
country_tracks['Date_start'].values[j]) < \
delta_t and (lookup['Date_start_EM_ordinal'].values[i] -
country_tracks['Date_start'].values[j]) >= 0:
possible_hit.append(country_tracks['ibtracsID'].values[j])
possible_hit_all.append(possible_hit)

return possible_hit_all
Expand All @@ -428,14 +429,14 @@ def match_em_id(lookup, poss_hit):
with all possible hits per EMdat ID
"""
possible_hit_all = []
for i in range(0, len(lookup.EM_ID.values)):
for i in range(0, len(lookup['EM_ID'].values)):
possible_hit = []
# lookup without line i
#lookup_match = lookup.drop(i)
lookup_match = lookup
# Loop over check if EM dat ID is the same
for i_match in range(0, len(lookup_match.EM_ID.values)):
if lookup.EM_ID.values[i] == lookup_match.EM_ID.values[i_match]:
for i_match in range(0, len(lookup_match['EM_ID'].values)):
if lookup['EM_ID'].values[i] == lookup_match['EM_ID'].values[i_match]:
possible_hit.append(poss_hit[i])
possible_hit_all.append(possible_hit)
return possible_hit_all
Expand Down Expand Up @@ -467,7 +468,7 @@ def assign_track_to_em(lookup, possible_tracks_1, possible_tracks_2, level):
"""

for i, _ in enumerate(possible_tracks_1):

Check warning on line 470 in climada/engine/impact_data.py

View check run for this annotation

Jenkins - WCR / Pylint

too-many-nested-blocks

LOW: Too many nested blocks (6/5)
Raw output
Used when a function or a method has too many nested blocks. This makes thecode less understandable and maintainable.
if np.isnan(lookup.allocation_level.values[i]):
if np.isnan(lookup['allocation_level'].values[i]):
number_emdat_id = len(possible_tracks_1[i])
# print(number_emdat_id)
for j in range(0, number_emdat_id):
Expand All @@ -479,14 +480,15 @@ def assign_track_to_em(lookup, possible_tracks_1, possible_tracks_2, level):
if all(possible_tracks_1[i][0] == possible_tracks_1[i][k]
for k in range(0, len(possible_tracks_1[i]))):
# check that track ID has not been assigned to that country already
ctry_lookup = lookup[lookup['hit_country'] == lookup.hit_country.values[i]]
if possible_tracks_1[i][0][0] not in ctry_lookup.ibtracsID.values:
lookup.ibtracsID.values[i] = possible_tracks_1[i][0][0]
lookup.allocation_level.values[i] = level
ctry_lookup = lookup[lookup['hit_country']
== lookup['hit_country'].values[i]]
if possible_tracks_1[i][0][0] not in ctry_lookup['ibtracsID'].values:
lookup['ibtracsID'].values[i] = possible_tracks_1[i][0][0]
lookup['allocation_level'].values[i] = level
elif possible_tracks_1[i][j] != []:
lookup.possible_track.values[i] = possible_tracks_1[i]
lookup['possible_track'].values[i] = possible_tracks_1[i]
else:
lookup.possible_track_all.values[i] = possible_tracks_1[i]
lookup['possible_track_all'].values[i] = possible_tracks_1[i]
return lookup


Expand All @@ -507,13 +509,13 @@ def check_assigned_track(lookup, checkset):
# merge checkset and lookup
check = pd.merge(checkset, lookup[['hit_country', 'EM_ID', 'ibtracsID']],
on=['hit_country', 'EM_ID'])
check_size = len(check.ibtracsID.values)
# not assigned values
not_assigned = check.ibtracsID.isnull().sum(axis=0)
check_size = len(check['ibtracsID'].values)
# not assigned values]
not_assigned = check['ibtracsID'].isnull().sum(axis=0)
# correct assigned values
correct = sum(check.ibtracsID.values == check.IBtracsID_checked.values)
correct = sum(check['ibtracsID'].values == check['IBtracsID_checked'].values)
# wrongly assigned values
wrong = len(check.ibtracsID.values) - not_assigned - correct
wrong = len(check['ibtracsID'].values) - not_assigned - correct

Check warning on line 518 in climada/engine/impact_data.py

View check run for this annotation

Jenkins - WCR / Code Coverage

Not covered lines

Lines 358-518 are not covered by tests
print('%.1f%% tracks assigned correctly, %.1f%% wrongly, %.1f%% not assigned'
% (correct / check_size * 100,
wrong / check_size * 100,
Expand Down Expand Up @@ -707,7 +709,7 @@ def emdat_countries_by_hazard(emdat_file_csv, hazard=None, year_range=None):
List of names of countries impacted by the disaster (sub-)types
"""
df_data = clean_emdat_df(emdat_file_csv, hazard=hazard, year_range=year_range)
countries_iso3a = list(df_data.ISO.unique())
countries_iso3a = list(df_data['ISO'].unique())
countries_names = list()
for iso3a in countries_iso3a:
try:
Expand Down Expand Up @@ -800,26 +802,27 @@ def emdat_impact_yearlysum(emdat_file_csv, countries=None, hazard=None, year_ran
year_range=year_range, target_version=version)

df_data[imp_str + " scaled"] = scale_impact2refyear(df_data[imp_str].values,
df_data.Year.values, df_data.ISO.values,
df_data['Year'].values,
df_data['ISO'].values,
reference_year=reference_year)

def country_df(df_data):
for data_iso in df_data.ISO.unique():
for data_iso in df_data['ISO'].unique():
country = u_coord.country_to_iso(data_iso, "alpha3")

df_country = df_data.loc[df_data.ISO == country]
df_country = df_data.loc[df_data['ISO'] == country]
if not df_country.size:
continue

# Retrieve impact data for all years
all_years = np.arange(min(df_data.Year), max(df_data.Year) + 1)
all_years = np.arange(min(df_data['Year']), max(df_data['Year']) + 1)
data_out = pd.DataFrame.from_records(
[
(
year,
np.nansum(df_country[df_country.Year.isin([year])][imp_str]),
np.nansum(df_country[df_country['Year'].isin([year])][imp_str]),
np.nansum(
df_country[df_country.Year.isin([year])][
df_country[df_country['Year'].isin([year])][
imp_str + " scaled"
]
),
Expand Down Expand Up @@ -894,13 +897,13 @@ def emdat_impact_event(emdat_file_csv, countries=None, hazard=None, year_range=N
df_data['year'] = df_data['Year']
df_data['reference_year'] = reference_year
df_data['impact'] = df_data[imp_str]
df_data['impact_scaled'] = scale_impact2refyear(df_data[imp_str].values, df_data.Year.values,
df_data.ISO.values,
df_data['impact_scaled'] = scale_impact2refyear(df_data[imp_str].values, df_data['Year'].values,
df_data['ISO'].values,
reference_year=reference_year)
df_data['region_id'] = np.nan
for country in df_data.ISO.unique():
for country in df_data['ISO'].unique():
try:
df_data.loc[df_data.ISO == country, 'region_id'] = \
df_data.loc[df_data['ISO'] == country, 'region_id'] = \
u_coord.country_to_iso(country, "numeric")
except LookupError:
LOGGER.warning('ISO3alpha code not found in iso_country: %s', country)
Expand Down
6 changes: 3 additions & 3 deletions climada/engine/test/test_impact.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def test_from_eih_pass(self):
np.testing.assert_array_almost_equal(imp.at_event, fake_at_event)
np.testing.assert_array_almost_equal(
imp.coord_exp,
np.stack([exp.gdf.latitude.values, exp.gdf.longitude.values], axis=1)
np.stack([exp.gdf['latitude'].values, exp.gdf['longitude'].values], axis=1)
)

def test_pyproj_crs(self):
Expand Down Expand Up @@ -513,7 +513,7 @@ def test_local_exceedance_imp_pass(self):
impact_rp = impact.local_exceedance_imp(return_periods=(10, 40))

self.assertIsInstance(impact_rp, np.ndarray)
self.assertEqual(impact_rp.size, 2 * ent.exposures.gdf.value.size)
self.assertEqual(impact_rp.size, 2 * ent.exposures.gdf['value'].size)
self.assertAlmostEqual(np.max(impact_rp), 2916964966.388219, places=5)
self.assertAlmostEqual(np.min(impact_rp), 444457580.131494, places=5)

Expand Down Expand Up @@ -941,7 +941,7 @@ def test_match_centroids(self):
fake_aai_agg = np.sum(fake_eai_exp)
imp = Impact.from_eih(exp, HAZ, fake_at_event, fake_eai_exp, fake_aai_agg)
imp_centr = imp.match_centroids(HAZ)
np.testing.assert_array_equal(imp_centr, exp.gdf.centr_TC)
np.testing.assert_array_equal(imp_centr, exp.gdf['centr_TC'])


class TestImpactH5IO(unittest.TestCase):
Expand Down
Loading

0 comments on commit 2ba8be9

Please sign in to comment.