CLIMADA-project · emanuel-schmid · Sep 9, 2024 · Jul 22, 2024 · Jul 23, 2024 · Jul 30, 2024
diff --git a/climada/engine/forecast.py b/climada/engine/forecast.py
@@ -186,7 +186,7 @@ def __init__(
         if exposure_name is None:
             try:
                 self.exposure_name = u_coord.country_to_iso(
-                    exposure.gdf.region_id.unique()[0], "name"
+                    exposure.gdf["region_id"].unique()[0], "name"
                 )
             except (KeyError, AttributeError):
                 self.exposure_name = "custom"

diff --git a/climada/engine/impact.py b/climada/engine/impact.py
@@ -243,8 +243,8 @@
             date = hazard.date,
             frequency = hazard.frequency,
             frequency_unit = hazard.frequency_unit,
-            coord_exp = np.stack([exposures.gdf.latitude.values,
-                                  exposures.gdf.longitude.values],
+            coord_exp = np.stack([exposures.gdf['latitude'].values,
+                                  exposures.gdf['longitude'].values],
                                  axis=1),
             crs = exposures.crs,
             unit = exposures.value_unit,
@@ -1081,25 +1081,25 @@
         # pylint: disable=no-member
         LOGGER.info('Reading %s', file_name)
         imp_df = pd.read_csv(file_name)
-        imp = cls(haz_type=imp_df.haz_type[0])
-        imp.unit = imp_df.unit[0]
-        imp.tot_value = imp_df.tot_value[0]
-        imp.aai_agg = imp_df.aai_agg[0]
-        imp.event_id = imp_df.event_id[~np.isnan(imp_df.event_id)].values
+        imp = cls(haz_type=imp_df['haz_type'][0])
+        imp.unit = imp_df['unit'][0]
+        imp.tot_value = imp_df['tot_value'][0]
+        imp.aai_agg = imp_df['aai_agg'][0]
+        imp.event_id = imp_df['event_id'][~np.isnan(imp_df['event_id'])].values
         num_ev = imp.event_id.size
-        imp.event_name = imp_df.event_name[:num_ev].values.tolist()
-        imp.date = imp_df.event_date[:num_ev].values
-        imp.at_event = imp_df.at_event[:num_ev].values
-        imp.frequency = imp_df.event_frequency[:num_ev].values
-        imp.frequency_unit = imp_df.frequency_unit[0] if 'frequency_unit' in imp_df \
+        imp.event_name = imp_df['event_name'][:num_ev].values.tolist()
+        imp.date = imp_df['event_date'][:num_ev].values
+        imp.at_event = imp_df['at_event'][:num_ev].values
+        imp.frequency = imp_df['event_frequency'][:num_ev].values
+        imp.frequency_unit = imp_df['frequency_unit'][0] if 'frequency_unit' in imp_df \
                              else DEF_FREQ_UNIT
-        imp.eai_exp = imp_df.eai_exp[~np.isnan(imp_df.eai_exp)].values
+        imp.eai_exp = imp_df['eai_exp'][~np.isnan(imp_df['eai_exp'])].values
         num_exp = imp.eai_exp.size
         imp.coord_exp = np.zeros((num_exp, 2))
-        imp.coord_exp[:, 0] = imp_df.exp_lat[:num_exp]
-        imp.coord_exp[:, 1] = imp_df.exp_lon[:num_exp]
+        imp.coord_exp[:, 0] = imp_df['exp_lat'][:num_exp]
+        imp.coord_exp[:, 1] = imp_df['exp_lon'][:num_exp]
         try:
-            imp.crs = u_coord.to_crs_user_input(imp_df.exp_crs.values[0])
+            imp.crs = u_coord.to_crs_user_input(imp_df['exp_crs'].values[0])
         except AttributeError:
             imp.crs = DEF_CRS
 
@@ -1129,23 +1129,23 @@
         dfr = pd.read_excel(file_name)
         imp = cls(haz_type=str(dfr['haz_type'][0]))
 
-        imp.unit = dfr.unit[0]
-        imp.tot_value = dfr.tot_value[0]
-        imp.aai_agg = dfr.aai_agg[0]
+        imp.unit = dfr['unit'][0]
+        imp.tot_value = dfr['tot_value'][0]
+        imp.aai_agg = dfr['aai_agg'][0]
 
-        imp.event_id = dfr.event_id[~np.isnan(dfr.event_id.values)].values
-        imp.event_name = dfr.event_name[:imp.event_id.size].values
-        imp.date = dfr.event_date[:imp.event_id.size].values
-        imp.frequency = dfr.event_frequency[:imp.event_id.size].values
-        imp.frequency_unit = dfr.frequency_unit[0] if 'frequency_unit' in dfr else DEF_FREQ_UNIT
-        imp.at_event = dfr.at_event[:imp.event_id.size].values
+        imp.event_id = dfr['event_id'][~np.isnan(dfr['event_id'].values)].values
+        imp.event_name = dfr['event_name'][:imp.event_id.size].values
+        imp.date = dfr['event_date'][:imp.event_id.size].values
+        imp.frequency = dfr['event_frequency'][:imp.event_id.size].values
+        imp.frequency_unit = dfr['frequency_unit'][0] if 'frequency_unit' in dfr else DEF_FREQ_UNIT
+        imp.at_event = dfr['at_event'][:imp.event_id.size].values
 
-        imp.eai_exp = dfr.eai_exp[~np.isnan(dfr.eai_exp.values)].values
+        imp.eai_exp = dfr['eai_exp'][~np.isnan(dfr['eai_exp'].values)].values
         imp.coord_exp = np.zeros((imp.eai_exp.size, 2))
-        imp.coord_exp[:, 0] = dfr.exp_lat.values[:imp.eai_exp.size]
-        imp.coord_exp[:, 1] = dfr.exp_lon.values[:imp.eai_exp.size]
+        imp.coord_exp[:, 0] = dfr['exp_lat'].values[:imp.eai_exp.size]
+        imp.coord_exp[:, 1] = dfr['exp_lon'].values[:imp.eai_exp.size]
         try:
-            imp.crs = u_coord.to_csr_user_input(dfr.exp_crs.values[0])
+            imp.crs = u_coord.to_csr_user_input(dfr['exp_crs'].values[0])
         except AttributeError:
             imp.crs = DEF_CRS
 
@@ -1324,14 +1324,14 @@
                  np.array([haz.intensity.max() for haz in haz_list]).max()]
 
         if 'vmin' not in args_exp:
-            args_exp['vmin'] = exp.gdf.value.values.min()
+            args_exp['vmin'] = exp.gdf['value'].values.min()
 
         if 'vmin' not in args_imp:
             args_imp['vmin'] = np.array([imp.eai_exp.min() for imp in imp_list
                                          if imp.eai_exp.size]).min()
 
         if 'vmax' not in args_exp:
-            args_exp['vmax'] = exp.gdf.value.values.max()
+            args_exp['vmax'] = exp.gdf['value'].values.max()
 
         if 'vmax' not in args_imp:
             args_imp['vmax'] = np.array([imp.eai_exp.max() for imp in imp_list

diff --git a/climada/engine/impact_calc.py b/climada/engine/impact_calc.py
@@ -154,8 +154,8 @@ def impact(self, save_mat=True, assign_centroids=True,
                     exp_gdf.size, self.n_events)
         imp_mat_gen = self.imp_mat_gen(exp_gdf, impf_col)
 
-        insured = ('cover' in exp_gdf and exp_gdf.cover.max() >= 0) \
-               or ('deductible' in exp_gdf and exp_gdf.deductible.max() > 0)
+        insured = ('cover' in exp_gdf and exp_gdf['cover'].max() >= 0) \
+               or ('deductible' in exp_gdf and exp_gdf['deductible'].max() > 0)
         if insured:
             LOGGER.info("cover and/or deductible columns detected,"
                         " going to calculate insured impact")
@@ -253,8 +253,8 @@ def minimal_exp_gdf(self, impf_col, assign_centroids, ignore_cover, ignore_deduc
                              " Run 'exposures.assign_centroids()' beforehand or set"
                              " 'assign_centroids' to 'True'")
         mask = (
-            (self.exposures.gdf.value.values == self.exposures.gdf.value.values)  # value != NaN
-            & (self.exposures.gdf.value.values != 0)                              # value != 0
+            (self.exposures.gdf['value'].values == self.exposures.gdf['value'].values)# value != NaN
+            & (self.exposures.gdf['value'].values != 0)                              # value != 0
             & (self.exposures.gdf[self.hazard.centr_exp_col].values >= 0)    # centroid assigned
         )
 
@@ -320,7 +320,7 @@ def _chunk_exp_idx(haz_size, idx_exp_impf):
                 )
             idx_exp_impf = (exp_gdf[impf_col].values == impf_id).nonzero()[0]
             for exp_idx in _chunk_exp_idx(self.hazard.size, idx_exp_impf):
-                exp_values = exp_gdf.value.values[exp_idx]
+                exp_values = exp_gdf['value'].values[exp_idx]
                 cent_idx = exp_gdf[self.hazard.centr_exp_col].values[exp_idx]
                 yield (
                     self.impact_matrix(exp_values, cent_idx, impf),
@@ -363,10 +363,10 @@ def insured_mat_gen(self, imp_mat_gen, exp_gdf, impf_col):
                 haz_type=self.hazard.haz_type,
                 fun_id=impf_id)
             if 'deductible' in exp_gdf:
-                deductible = exp_gdf.deductible.values[exp_idx]
+                deductible = exp_gdf['deductible'].values[exp_idx]
                 mat = self.apply_deductible_to_mat(mat, deductible, self.hazard, cent_idx, impf)
             if 'cover' in exp_gdf:
-                cover = exp_gdf.cover.values[exp_idx]
+                cover = exp_gdf['cover'].values[exp_idx]
                 mat = self.apply_cover_to_mat(mat, cover)
             yield (mat, exp_idx)
 

diff --git a/climada/engine/impact_data.py b/climada/engine/impact_data.py
@@ -355,119 +355,120 @@
                                    'Date_start_EM_ordinal', 'Disaster_name',
                                    'EM_ID', 'ibtracsID', 'allocation_level',
                                    'possible_track', 'possible_track_all'])
-    lookup.hit_country = data.ISO
-    lookup.Date_start_EM = data.Date_start_clean
-    lookup.Disaster_name = data.Disaster_name
-    lookup.EM_ID = data.Disaster_No
+    lookup['hit_country'] = data['ISO']
+    lookup['Date_start_EM'] = data['Date_start_clean']
+    lookup['Disaster_name'] = data['Disaster_name']
+    lookup['EM_ID'] = data['Disaster_No']
     lookup = lookup.reset_index(drop=True)
     # create ordinals
-    for i in range(0, len(data.Date_start_clean.values)):
-        lookup.Date_start_EM_ordinal[i] = datetime.toordinal(
-            datetime.strptime(lookup.Date_start_EM.values[i], '%Y-%m-%d'))
+    for i in range(0, len(data['Date_start_clean'].values)):
+        lookup['Date_start_EM_ordinal'][i] = datetime.toordinal(
+            datetime.strptime(lookup['Date_start_EM'].values[i], '%Y-%m-%d'))
         # ordinals to numeric
-    lookup.Date_start_EM_ordinal = pd.to_numeric(lookup.Date_start_EM_ordinal)
+    lookup['Date_start_EM_ordinal'] = pd.to_numeric(lookup['Date_start_EM_ordinal'])
     # select time
     emdat_start = datetime.toordinal(datetime.strptime(start, '%Y-%m-%d'))
     emdat_end = datetime.toordinal(datetime.strptime(end, '%Y-%m-%d'))
 
-    lookup = lookup[lookup.Date_start_EM_ordinal.values > emdat_start]
-    lookup = lookup[lookup.Date_start_EM_ordinal.values < emdat_end]
+    lookup = lookup[lookup['Date_start_EM_ordinal'].values > emdat_start]
+    lookup = lookup[lookup['Date_start_EM_ordinal'].values < emdat_end]
 
     return lookup
 

 # Function to relate EM disaster to IBtrack using hit countries and time
 def emdat_possible_hit(lookup, hit_countries, delta_t):
    """relate EM disaster to hazard using hit countries and time

    Parameters
    ----------
    lookup : pd.DataFrame
        to relate EMdatID to hazard
    delta_t :
        max time difference of start of EMdat event and hazard
    hit_countries:


    Returns
    -------
    list with possible hits
    """
    # lookup: PD dataframe that relates EMdatID to an IBtracsID
     # tracks: processed IBtracks with info which track hit which country
     # delta_t: time difference of start of EMdat and IBrtacks
     possible_hit_all = []
-    for i in range(0, len(lookup.EM_ID.values)):
+    for i in range(0, len(lookup['EM_ID'].values)):
         possible_hit = []
         country_tracks = hit_countries[
-            hit_countries['hit_country'] == lookup.hit_country.values[i]]
-        for j in range(0, len(country_tracks.Date_start.values)):
-            if (lookup.Date_start_EM_ordinal.values[i] - country_tracks.Date_start.values[j]) < \
-                delta_t and (lookup.Date_start_EM_ordinal.values[i] -
-                             country_tracks.Date_start.values[j]) >= 0:
-                possible_hit.append(country_tracks.ibtracsID.values[j])
+            hit_countries['hit_country'] == lookup['hit_country'].values[i]]
+        for j in range(0, len(country_tracks['Date_start'].values)):
+            if (lookup['Date_start_EM_ordinal'].values[i] -
+                country_tracks['Date_start'].values[j]) < \
+                delta_t and (lookup['Date_start_EM_ordinal'].values[i] -
+                             country_tracks['Date_start'].values[j]) >= 0:
+                possible_hit.append(country_tracks['ibtracsID'].values[j])
         possible_hit_all.append(possible_hit)
 
     return possible_hit_all


 # function to check if EM_ID has been assigned already
 def match_em_id(lookup, poss_hit):
    """function to check if EM_ID has been assigned already and combine possible hits

        Parameters
        ----------
        lookup : pd.dataframe
            to relate EMdatID to hazard
        poss_hit : list
            with possible hits

        Returns
        -------
        list
             with all possible hits per EMdat ID
         """
     possible_hit_all = []
-    for i in range(0, len(lookup.EM_ID.values)):
+    for i in range(0, len(lookup['EM_ID'].values)):
         possible_hit = []
         # lookup without line i
         #lookup_match = lookup.drop(i)
         lookup_match = lookup
         # Loop over check if EM dat ID is the same
-        for i_match in range(0, len(lookup_match.EM_ID.values)):
-            if lookup.EM_ID.values[i] == lookup_match.EM_ID.values[i_match]:
+        for i_match in range(0, len(lookup_match['EM_ID'].values)):
+            if lookup['EM_ID'].values[i] == lookup_match['EM_ID'].values[i_match]:
                 possible_hit.append(poss_hit[i])
         possible_hit_all.append(possible_hit)
     return possible_hit_all


 def assign_track_to_em(lookup, possible_tracks_1, possible_tracks_2, level):
    """function to assign a hazard to an EMdat event
        to get some confidene into the procedure, hazards get only assigned
        if there is no other hazard occuring at a bigger time interval in that country
        Thus a track of possible_tracks_1 gets only assigned if there are no other
        tracks in possible_tracks_2.
        The confidence can be expressed with a certainty level

        Parameters
        ----------
        lookup : pd.DataFrame
            to relate EMdatID to hazard
        possible_tracks_1 : list
            list of possible hits with smaller time horizon
        possible_tracks_2 : list
            list of possible hits with larger time horizon
        level : int
            level of confidence

        Returns
        -------
        pd.DataFrame
            lookup with assigend tracks and possible hits
     """
 
     for i, _ in enumerate(possible_tracks_1):
-        if np.isnan(lookup.allocation_level.values[i]):
+        if np.isnan(lookup['allocation_level'].values[i]):
             number_emdat_id = len(possible_tracks_1[i])
             # print(number_emdat_id)
             for j in range(0, number_emdat_id):
@@ -479,14 +480,15 @@
                     if all(possible_tracks_1[i][0] == possible_tracks_1[i][k]
                            for k in range(0, len(possible_tracks_1[i]))):
                         # check that track ID has not been assigned to that country already
-                        ctry_lookup = lookup[lookup['hit_country'] == lookup.hit_country.values[i]]
-                        if possible_tracks_1[i][0][0] not in ctry_lookup.ibtracsID.values:
-                            lookup.ibtracsID.values[i] = possible_tracks_1[i][0][0]
-                            lookup.allocation_level.values[i] = level
+                        ctry_lookup = lookup[lookup['hit_country']
+                                             == lookup['hit_country'].values[i]]
+                        if possible_tracks_1[i][0][0] not in ctry_lookup['ibtracsID'].values:
+                            lookup['ibtracsID'].values[i] = possible_tracks_1[i][0][0]
+                            lookup['allocation_level'].values[i] = level
                 elif possible_tracks_1[i][j] != []:
-                    lookup.possible_track.values[i] = possible_tracks_1[i]
+                    lookup['possible_track'].values[i] = possible_tracks_1[i]
         else:
-            lookup.possible_track_all.values[i] = possible_tracks_1[i]
+            lookup['possible_track_all'].values[i] = possible_tracks_1[i]
     return lookup
 
 
@@ -507,13 +509,13 @@
     # merge checkset and lookup
     check = pd.merge(checkset, lookup[['hit_country', 'EM_ID', 'ibtracsID']],
                      on=['hit_country', 'EM_ID'])
-    check_size = len(check.ibtracsID.values)
-    # not assigned values
-    not_assigned = check.ibtracsID.isnull().sum(axis=0)
+    check_size = len(check['ibtracsID'].values)
+    # not assigned values]
+    not_assigned = check['ibtracsID'].isnull().sum(axis=0)
     # correct assigned values
-    correct = sum(check.ibtracsID.values == check.IBtracsID_checked.values)
+    correct = sum(check['ibtracsID'].values == check['IBtracsID_checked'].values)
     # wrongly assigned values
-    wrong = len(check.ibtracsID.values) - not_assigned - correct
+    wrong = len(check['ibtracsID'].values) - not_assigned - correct
     print('%.1f%% tracks assigned correctly, %.1f%% wrongly, %.1f%% not assigned'
           % (correct / check_size * 100,
              wrong / check_size * 100,
@@ -707,7 +709,7 @@
         List of names of countries impacted by the disaster (sub-)types
     """
     df_data = clean_emdat_df(emdat_file_csv, hazard=hazard, year_range=year_range)
-    countries_iso3a = list(df_data.ISO.unique())
+    countries_iso3a = list(df_data['ISO'].unique())
     countries_names = list()
     for iso3a in countries_iso3a:
         try:
@@ -800,26 +802,27 @@
                              year_range=year_range, target_version=version)
 
     df_data[imp_str + " scaled"] = scale_impact2refyear(df_data[imp_str].values,
-                                                        df_data.Year.values, df_data.ISO.values,
+                                                        df_data['Year'].values,
+                                                        df_data['ISO'].values,
                                                         reference_year=reference_year)
 
     def country_df(df_data):
-        for data_iso in df_data.ISO.unique():
+        for data_iso in df_data['ISO'].unique():
             country = u_coord.country_to_iso(data_iso, "alpha3")
 
-            df_country = df_data.loc[df_data.ISO == country]
+            df_country = df_data.loc[df_data['ISO'] == country]
             if not df_country.size:
                 continue
 
             # Retrieve impact data for all years
-            all_years = np.arange(min(df_data.Year), max(df_data.Year) + 1)
+            all_years = np.arange(min(df_data['Year']), max(df_data['Year']) + 1)
             data_out = pd.DataFrame.from_records(
                 [
                     (
                         year,
-                        np.nansum(df_country[df_country.Year.isin([year])][imp_str]),
+                        np.nansum(df_country[df_country['Year'].isin([year])][imp_str]),
                         np.nansum(
-                            df_country[df_country.Year.isin([year])][
+                            df_country[df_country['Year'].isin([year])][
                                 imp_str + " scaled"
                             ]
                         ),
@@ -894,13 +897,13 @@
     df_data['year'] = df_data['Year']
     df_data['reference_year'] = reference_year
     df_data['impact'] = df_data[imp_str]
-    df_data['impact_scaled'] = scale_impact2refyear(df_data[imp_str].values, df_data.Year.values,
-                                                    df_data.ISO.values,
+    df_data['impact_scaled'] = scale_impact2refyear(df_data[imp_str].values, df_data['Year'].values,
+                                                    df_data['ISO'].values,
                                                     reference_year=reference_year)
     df_data['region_id'] = np.nan
-    for country in df_data.ISO.unique():
+    for country in df_data['ISO'].unique():
         try:
-            df_data.loc[df_data.ISO == country, 'region_id'] = \
+            df_data.loc[df_data['ISO'] == country, 'region_id'] = \
                 u_coord.country_to_iso(country, "numeric")
         except LookupError:
             LOGGER.warning('ISO3alpha code not found in iso_country: %s', country)

diff --git a/climada/engine/test/test_impact.py b/climada/engine/test/test_impact.py
@@ -111,7 +111,7 @@ def test_from_eih_pass(self):
         np.testing.assert_array_almost_equal(imp.at_event, fake_at_event)
         np.testing.assert_array_almost_equal(
             imp.coord_exp,
-            np.stack([exp.gdf.latitude.values, exp.gdf.longitude.values], axis=1)
+            np.stack([exp.gdf['latitude'].values, exp.gdf['longitude'].values], axis=1)
             )
 
     def test_pyproj_crs(self):
@@ -513,7 +513,7 @@ def test_local_exceedance_imp_pass(self):
         impact_rp = impact.local_exceedance_imp(return_periods=(10, 40))
 
         self.assertIsInstance(impact_rp, np.ndarray)
-        self.assertEqual(impact_rp.size, 2 * ent.exposures.gdf.value.size)
+        self.assertEqual(impact_rp.size, 2 * ent.exposures.gdf['value'].size)
         self.assertAlmostEqual(np.max(impact_rp), 2916964966.388219, places=5)
         self.assertAlmostEqual(np.min(impact_rp), 444457580.131494, places=5)
 
@@ -941,7 +941,7 @@ def test_match_centroids(self):
         fake_aai_agg = np.sum(fake_eai_exp)
         imp = Impact.from_eih(exp, HAZ, fake_at_event, fake_eai_exp, fake_aai_agg)
         imp_centr = imp.match_centroids(HAZ)
-        np.testing.assert_array_equal(imp_centr, exp.gdf.centr_TC)
+        np.testing.assert_array_equal(imp_centr, exp.gdf['centr_TC'])
 
 
 class TestImpactH5IO(unittest.TestCase):