Fixed errors and deprecation warnings from Pandas 2.2.2

gherka · Sep 2, 2024 · 283ef50 · 283ef50
1 parent 5f1b147
commit 283ef50
Show file tree

Hide file tree

Showing 13 changed files with 2,943 additions and 2,931 deletions.
diff --git a/exhibit/core/constraints.py b/exhibit/core/constraints.py
@@ -210,11 +210,20 @@ def process_custom_constraints(self, custom_constraints):
                         _kwargs = kwargs_dict.get(action, {})
                         _kwargs.update(spec_action_kwargs)
 
-                        # overwrite the original DF row IDs with the adjusted ones
-                        output_df.loc[cc_filter_idx] = action_func(
+                        # because the result of the action can be a different dtype compared
+                        # to the original (like int to float, particularly involving NULLs)
+                        # we need to capture the resultant dtype first, and then cast the 
+                        # original df to match it to avoid Pandas errors.
+                        action_df = action_func(
                             output_df, cc_filter_idx, target_str,
                             cc_partitions, **_kwargs)
+
+                        action_dtypes = action_df.dtypes
 
+                        output_df = output_df.astype(action_dtypes)
+
+                        # overwrite the original DF row IDs with the adjusted ones
+                        output_df.loc[cc_filter_idx] = action_df
         return output_df
 
     def adjust_dataframe_to_fit_constraint(self, anon_df, basic_constraint):
@@ -1231,12 +1240,13 @@ def shift_distribution(
 
                 final_result.append(new_series)
                 continue
-
+
+        # return the DF, matching the dtypes of the original (relevant for dates)
         new_df = pd.concat(
             final_result + 
             [df.loc[filter_idx, [x for x in df.columns if x not in target_cols]]],
             axis=1
-        ).reindex(columns=df.columns)
+        ).reindex(columns=df.columns).astype(df.dtypes)
 
         return new_df
 

diff --git a/exhibit/core/exhibit.py b/exhibit/core/exhibit.py
@@ -344,9 +344,10 @@ def execute_spec(self):
                             )
 
             if col in geo_action_targets:
-                # add placeholders to avoid errors when generating missing data
+                # add float placeholders to avoid errors when generating missing data
                 geo_cols = [f"{col}_latitude", f"{col}_longitude"]
-                anon_df[geo_cols] = 0
+                # use 0.0 to ensure column dtype is float so that we could null them later
+                anon_df[geo_cols] = 0.0
                 continue
 
             h3_table_name = self.spec_dict["columns"][col]["h3_table"]
@@ -444,7 +445,7 @@ def execute_spec(self):
                         anon_df[derived_col] = generate_derived_column(anon_df, derived_def)
                         break             
             # change the missing data placeholder back to NAs
-            anon_df.loc[:, cat_cols] = anon_df.loc[:, cat_cols].applymap(
+            anon_df.loc[:, cat_cols] = anon_df.loc[:, cat_cols].map(
             lambda x: np.nan if x == MISSING_DATA_STR else x)
 
         #8) GENERATE DERIVED COLUMNS IF ANY ARE SPECIFIED