Skip to content

Commit

Permalink
Added download of synthetic aggregates (#91)
Browse files Browse the repository at this point in the history
Co-authored-by: Darren Edge <darren.edge@example.com>
  • Loading branch information
darrenedge and Darren Edge authored Feb 7, 2025
1 parent 28f600f commit 9124483
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 6 deletions.
1 change: 1 addition & 0 deletions app/workflows/anonymize_case_data/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def create_session(self, prefix):
self.anonymize_last_synthetic_file_name = SessionVariable("", prefix)
self.anonymize_last_aggregate_file_name = SessionVariable("", prefix)
self.anonymize_synthetic_df = SessionVariable(pd.DataFrame(), prefix)
self.anonymize_synthetic_aggregate_df = SessionVariable(pd.DataFrame(), prefix)
self.anonymize_aggregate_df = SessionVariable(pd.DataFrame(), prefix)
self.anonymize_epsilon = SessionVariable(12.0, prefix)
# self.anonymize_sen_agg_rep = SessionVariable(pd.DataFrame(), prefix)
Expand Down
22 changes: 16 additions & 6 deletions app/workflows/anonymize_case_data/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def create(sv: ds_variables.SessionVariables, workflow: None):
)
sv.anonymize_synthetic_df.value = acd.synthetic_df
sv.anonymize_aggregate_df.value = acd.aggregate_df
sv.anonymize_synthetic_aggregate_df.value = acd.synthetic_aggregate_df
sv.anonymize_delta.value = f"{acd.delta:.2e}"
if epsilon > 12:
st.warning(
Expand Down Expand Up @@ -209,12 +210,21 @@ def create(sv: ds_variables.SessionVariables, workflow: None):
use_container_width=True,
height=700,
)
st.download_button(
"Download Synthetic data",
data=sv.anonymize_synthetic_df.value.to_csv(index=False),
file_name="synthetic_data.csv",
mime="text/csv",
)
ca, cb = st.columns([4,5])
with ca:
st.download_button(
"Download Synthetic data",
data=sv.anonymize_synthetic_df.value.to_csv(index=False),
file_name="synthetic_data.csv",
mime="text/csv",
)
with cb:
st.download_button(
"Download Synthetic aggregates",
data=sv.anonymize_synthetic_aggregate_df.value.to_csv(index=False),
file_name="synthetic_aggregate_data.csv",
mime="text/csv",
)

with queries_tab:
if (
Expand Down
14 changes: 14 additions & 0 deletions intelligence_toolkit/anonymize_case_data/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def __init__(self) -> None:
self.delta = 0
self.sensitive_df = pd.DataFrame()
self.aggregate_df = pd.DataFrame()
self.synthetic_aggregate_df = pd.DataFrame()
self.synthetic_df = pd.DataFrame()
self.aggregate_error_report = pd.DataFrame()
self.synthetic_error_report = pd.DataFrame()
Expand Down Expand Up @@ -175,6 +176,19 @@ def anonymize_case_data(
self.aggregate_df = self.aggregate_df.sort_values(
by=["protected_count"], ascending=False
)

self.synthetic_aggregate_df = pd.DataFrame(
data=synthetic_aggregates.items(),
columns=["selections", "protected_count"],
)
self.synthetic_aggregate_df.loc[len(self.synthetic_aggregate_df)] = [
"record_count",
self.protected_number_of_records,
]
self.synthetic_aggregate_df = self.synthetic_aggregate_df.sort_values(
by=["protected_count"], ascending=False
)

self.aggregate_error_report = ErrorReport(
sensitive_aggregates_parsed, dp_aggregates_parsed
).gen()
Expand Down

0 comments on commit 9124483

Please sign in to comment.