Skip to content

Commit

Permalink
Fixed errors and deprecation warnings from Pandas 2.2.2
Browse files Browse the repository at this point in the history
  • Loading branch information
gherka committed Sep 2, 2024
1 parent 5f1b147 commit 283ef50
Show file tree
Hide file tree
Showing 13 changed files with 2,943 additions and 2,931 deletions.
18 changes: 14 additions & 4 deletions exhibit/core/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,20 @@ def process_custom_constraints(self, custom_constraints):
_kwargs = kwargs_dict.get(action, {})
_kwargs.update(spec_action_kwargs)

# overwrite the original DF row IDs with the adjusted ones
output_df.loc[cc_filter_idx] = action_func(
# because the result of the action can be a different dtype compared
# to the original (like int to float, particularly involving NULLs)
# we need to capture the resultant dtype first, and then cast the
# original df to match it to avoid Pandas errors.
action_df = action_func(
output_df, cc_filter_idx, target_str,
cc_partitions, **_kwargs)

action_dtypes = action_df.dtypes

output_df = output_df.astype(action_dtypes)

# overwrite the original DF row IDs with the adjusted ones
output_df.loc[cc_filter_idx] = action_df
return output_df

def adjust_dataframe_to_fit_constraint(self, anon_df, basic_constraint):
Expand Down Expand Up @@ -1231,12 +1240,13 @@ def shift_distribution(

final_result.append(new_series)
continue


# return the DF, matching the dtypes of the original (relevant for dates)
new_df = pd.concat(
final_result +
[df.loc[filter_idx, [x for x in df.columns if x not in target_cols]]],
axis=1
).reindex(columns=df.columns)
).reindex(columns=df.columns).astype(df.dtypes)

return new_df

Expand Down
7 changes: 4 additions & 3 deletions exhibit/core/exhibit.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,10 @@ def execute_spec(self):
)

if col in geo_action_targets:
# add placeholders to avoid errors when generating missing data
# add float placeholders to avoid errors when generating missing data
geo_cols = [f"{col}_latitude", f"{col}_longitude"]
anon_df[geo_cols] = 0
# use 0.0 to ensure column dtype is float so that we could null them later
anon_df[geo_cols] = 0.0
continue

h3_table_name = self.spec_dict["columns"][col]["h3_table"]
Expand Down Expand Up @@ -444,7 +445,7 @@ def execute_spec(self):
anon_df[derived_col] = generate_derived_column(anon_df, derived_def)
break
# change the missing data placeholder back to NAs
anon_df.loc[:, cat_cols] = anon_df.loc[:, cat_cols].applymap(
anon_df.loc[:, cat_cols] = anon_df.loc[:, cat_cols].map(
lambda x: np.nan if x == MISSING_DATA_STR else x)

#8) GENERATE DERIVED COLUMNS IF ANY ARE SPECIFIED
Expand Down
Loading

0 comments on commit 283ef50

Please sign in to comment.