Skip to content

Commit 5122b24

Browse files
committed
new JSON output format
1 parent b998d3b commit 5122b24

File tree

4 files changed

+93
-64
lines changed

4 files changed

+93
-64
lines changed

acro/acro.py

+16-10
Original file line numberDiff line numberDiff line change
@@ -271,11 +271,10 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals
271271
mask.replace({0: False, 1: True}, inplace=True)
272272
masks[name] = mask
273273

274-
# build the properties dictionary
275-
properties: dict = {"method": "crosstab", "suppressed": self.suppress}
276-
utils.update_table_properties(masks, properties)
274+
# build the sdc dictionary
275+
sdc: dict = utils.get_table_sdc(masks, self.suppress)
277276
# get the status and summary
278-
status, summary = utils.get_summary(properties)
277+
status, summary = utils.get_summary(sdc)
279278
# apply the suppression
280279
safe_table, outcome = utils.apply_suppression(table, masks)
281280
if self.suppress:
@@ -284,7 +283,8 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals
284283
self.results.add(
285284
status=status,
286285
output_type="table",
287-
properties=properties,
286+
properties={"method": "crosstab"},
287+
sdc=sdc,
288288
command=command,
289289
summary=summary,
290290
outcome=outcome,
@@ -410,11 +410,10 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals
410410
data, values, index, columns, aggfunc=agg
411411
)
412412

413-
# build the properties dictionary
414-
properties: dict = {"method": "pivot_table", "suppressed": self.suppress}
415-
utils.update_table_properties(masks, properties)
413+
# build the sdc dictionary
414+
sdc: dict = utils.get_table_sdc(masks, self.suppress)
416415
# get the status and summary
417-
status, summary = utils.get_summary(properties)
416+
status, summary = utils.get_summary(sdc)
418417
# apply the suppression
419418
safe_table, outcome = utils.apply_suppression(table, masks)
420419
if self.suppress:
@@ -423,7 +422,8 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals
423422
self.results.add(
424423
status=status,
425424
output_type="table",
426-
properties=properties,
425+
properties={"method": "pivot_table"},
426+
sdc=sdc,
427427
command=command,
428428
summary=summary,
429429
outcome=outcome,
@@ -503,6 +503,7 @@ def ols( # pylint: disable=too-many-locals
503503
status=status,
504504
output_type="regression",
505505
properties={"method": "ols", "dof": dof},
506+
sdc={},
506507
command=command,
507508
summary=summary,
508509
outcome=DataFrame(),
@@ -566,6 +567,7 @@ def olsr( # pylint: disable=too-many-locals,keyword-arg-before-vararg
566567
status=status,
567568
output_type="regression",
568569
properties={"method": "olsr", "dof": dof},
570+
sdc={},
569571
command=command,
570572
summary=summary,
571573
outcome=DataFrame(),
@@ -614,6 +616,7 @@ def logit( # pylint: disable=too-many-arguments,too-many-locals
614616
status=status,
615617
output_type="regression",
616618
properties={"method": "logit", "dof": dof},
619+
sdc={},
617620
command=command,
618621
summary=summary,
619622
outcome=DataFrame(),
@@ -677,6 +680,7 @@ def logitr( # pylint: disable=too-many-locals,keyword-arg-before-vararg
677680
status=status,
678681
output_type="regression",
679682
properties={"method": "logitr", "dof": dof},
683+
sdc={},
680684
command=command,
681685
summary=summary,
682686
outcome=DataFrame(),
@@ -725,6 +729,7 @@ def probit( # pylint: disable=too-many-arguments,too-many-locals
725729
status=status,
726730
output_type="regression",
727731
properties={"method": "probit", "dof": dof},
732+
sdc={},
728733
command=command,
729734
summary=summary,
730735
outcome=DataFrame(),
@@ -788,6 +793,7 @@ def probitr( # pylint: disable=too-many-locals,keyword-arg-before-vararg
788793
status=status,
789794
output_type="regression",
790795
properties={"method": "probitr", "dof": dof},
796+
sdc={},
791797
command=command,
792798
summary=summary,
793799
outcome=DataFrame(),

acro/record.py

+39-15
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ class Record: # pylint: disable=too-many-instance-attributes,too-few-public-met
6969
Type of output, e.g., "regression"
7070
properties : dict
7171
Dictionary containing structured output data.
72+
sdc : dict
73+
Dictionary containing SDC results.
7274
command : str
7375
String representation of the operation performed.
7476
summary : str
@@ -91,6 +93,7 @@ def __init__( # pylint: disable=too-many-arguments
9193
status: str,
9294
output_type: str,
9395
properties: dict,
96+
sdc: dict,
9497
command: str,
9598
summary: str,
9699
outcome: DataFrame,
@@ -109,6 +112,8 @@ def __init__( # pylint: disable=too-many-arguments
109112
Type of output, e.g., "regression"
110113
properties : dict
111114
Dictionary containing structured output data.
115+
sdc : dict
116+
Dictionary containing SDC results.
112117
command : str
113118
String representation of the operation performed.
114119
summary : str
@@ -124,6 +129,7 @@ def __init__( # pylint: disable=too-many-arguments
124129
self.status: str = status
125130
self.output_type: str = output_type
126131
self.properties: dict = properties
132+
self.sdc: dict = sdc
127133
self.command: str = command
128134
self.summary: str = summary
129135
self.outcome: DataFrame = outcome
@@ -182,6 +188,7 @@ def __str__(self) -> str:
182188
f"status: {self.status}\n"
183189
f"type: {self.output_type}\n"
184190
f"properties: {self.properties}\n"
191+
f"sdc: {self.sdc}\n"
185192
f"command: {self.command}\n"
186193
f"summary: {self.summary}\n"
187194
f"outcome: {self.outcome}\n"
@@ -205,6 +212,7 @@ def add( # pylint: disable=too-many-arguments
205212
status: str,
206213
output_type: str,
207214
properties: dict,
215+
sdc: dict,
208216
command: str,
209217
summary: str,
210218
outcome: DataFrame,
@@ -221,6 +229,8 @@ def add( # pylint: disable=too-many-arguments
221229
Type of output, e.g., "regression"
222230
properties : dict
223231
Dictionary containing structured output data.
232+
sdc : dict
233+
Dictionary containing SDC results.
224234
command : str
225235
String representation of the operation performed.
226236
summary : str
@@ -237,6 +247,7 @@ def add( # pylint: disable=too-many-arguments
237247
status=status,
238248
output_type=output_type,
239249
properties=properties,
250+
sdc=sdc,
240251
command=command,
241252
summary=summary,
242253
outcome=outcome,
@@ -319,6 +330,7 @@ def add_custom(self, filename: str, comment: str | None = None) -> None:
319330
status="review",
320331
output_type="custom",
321332
properties={},
333+
sdc={},
322334
command="custom",
323335
summary="review",
324336
outcome=DataFrame(),
@@ -439,18 +451,22 @@ def finalise_json(self, path: str) -> None:
439451
"status": val.status,
440452
"type": val.output_type,
441453
"properties": val.properties,
454+
"files": [],
455+
"outcome": json.loads(val.outcome.to_json()),
442456
"command": val.command,
443457
"summary": val.summary,
444-
"outcome": json.loads(val.outcome.to_json()),
445-
"output": val.serialize_output(path),
446458
"timestamp": val.timestamp,
447459
"comments": val.comments,
448460
"exception": val.exception,
449461
}
462+
files: list[str] = val.serialize_output(path)
463+
for file in files:
464+
outputs[key]["files"].append({"name": file, "sdc": val.sdc})
465+
450466
results: dict = {"version": __version__, "results": outputs}
451467
filename: str = os.path.normpath(f"{path}/results.json")
452-
with open(filename, "w", newline="", encoding="utf-8") as file:
453-
json.dump(results, file, indent=4, sort_keys=False)
468+
with open(filename, "w", newline="", encoding="utf-8") as handle:
469+
json.dump(results, handle, indent=4, sort_keys=False)
454470

455471
def finalise_excel(self, path: str) -> None:
456472
"""Writes outputs to an excel spreadsheet.
@@ -539,21 +555,29 @@ def load_records(path: str) -> Records:
539555
"""
540556
records = Records()
541557
filename = os.path.normpath(f"{path}/results.json")
542-
with open(filename, newline="", encoding="utf-8") as file:
543-
data = json.load(file)
558+
with open(filename, newline="", encoding="utf-8") as handle:
559+
data = json.load(handle)
544560
if data["version"] != __version__: # pragma: no cover
545561
raise ValueError("error loading output")
546562
for key, val in data["results"].items():
563+
files: list[dict] = val["files"]
564+
filenames: list = []
565+
sdcs: list = []
566+
for file in files:
567+
filenames.append(file["name"])
568+
sdcs.append(file["sdc"])
547569
records.results[key] = Record(
548-
val["uid"],
549-
val["status"],
550-
val["type"],
551-
val["properties"],
552-
val["command"],
553-
val["summary"],
554-
load_outcome(val["outcome"]),
555-
load_output(path, val["output"]),
556-
val["comments"],
570+
uid=val["uid"],
571+
status=val["status"],
572+
output_type=val["type"],
573+
properties=val["properties"],
574+
sdc=sdcs[0],
575+
command=val["command"],
576+
summary=val["summary"],
577+
outcome=load_outcome(val["outcome"]),
578+
output=load_output(path, filenames),
579+
comments=val["comments"],
557580
)
581+
records.results[key].exception = val["exception"]
558582
records.results[key].timestamp = val["timestamp"]
559583
return records

acro/utils.py

+30-28
Original file line numberDiff line numberDiff line change
@@ -218,44 +218,45 @@ def apply_suppression(
218218
return safe_df, outcome_df
219219

220220

221-
def update_table_properties(masks: dict[str, DataFrame], properties: dict) -> None:
222-
"""Updates the properties dictionary using the suppression masks.
221+
def get_table_sdc(masks: dict[str, DataFrame], suppress: bool) -> dict:
222+
"""Returns the SDC dictionary using the suppression masks.
223223
224224
Parameters
225225
----------
226226
masks : dict[str, DataFrame]
227227
Dictionary of tables specifying suppression masks for application.
228-
properties : dict
229-
Properties of the SDC checks.
228+
suppress : bool
229+
Whether suppression has been applied.
230230
"""
231231
# summary of cells to be suppressed
232-
properties["negative"] = 0
233-
properties["missing"] = 0
234-
properties["threshold"] = 0
235-
properties["p-ratio"] = 0
236-
properties["nk-rule"] = 0
232+
sdc: dict = {"summary": {"suppressed": suppress}, "cells": {}}
233+
sdc["summary"]["negative"] = 0
234+
sdc["summary"]["missing"] = 0
235+
sdc["summary"]["threshold"] = 0
236+
sdc["summary"]["p-ratio"] = 0
237+
sdc["summary"]["nk-rule"] = 0
237238
for name, mask in masks.items():
238-
properties[name] = int(mask.to_numpy().sum())
239+
sdc["summary"][name] = int(mask.to_numpy().sum())
239240
# positions of cells to be suppressed
240-
properties["sdc"] = {}
241-
properties["sdc"]["negative"] = []
242-
properties["sdc"]["missing"] = []
243-
properties["sdc"]["threshold"] = []
244-
properties["sdc"]["p-ratio"] = []
245-
properties["sdc"]["nk-rule"] = []
241+
sdc["cells"]["negative"] = []
242+
sdc["cells"]["missing"] = []
243+
sdc["cells"]["threshold"] = []
244+
sdc["cells"]["p-ratio"] = []
245+
sdc["cells"]["nk-rule"] = []
246246
for name, mask in masks.items():
247247
true_positions = np.column_stack(np.where(mask.values))
248248
for pos in true_positions:
249249
row_index, col_index = pos
250-
properties["sdc"][name].append([int(row_index), int(col_index)])
250+
sdc["cells"][name].append([int(row_index), int(col_index)])
251+
return sdc
251252

252253

253-
def get_summary(properties: dict) -> tuple[str, str]:
254+
def get_summary(sdc: dict) -> tuple[str, str]:
254255
"""Returns the status and summary of the suppression masks.
255256
256257
Parameters
257258
----------
258-
properties : dict
259+
sdc : dict
259260
Properties of the SDC checks.
260261
261262
Returns
@@ -267,22 +268,23 @@ def get_summary(properties: dict) -> tuple[str, str]:
267268
"""
268269
status: str = "pass"
269270
summary: str = ""
270-
sup: str = "suppressed" if properties["suppressed"] else "may need suppressing"
271-
if properties["negative"]:
271+
sdc_summary = sdc["summary"]
272+
sup: str = "suppressed" if sdc_summary["suppressed"] else "may need suppressing"
273+
if sdc_summary["negative"] > 0:
272274
summary += "negative values found"
273275
status = "review"
274-
elif properties["missing"]:
276+
elif sdc_summary["missing"] > 0:
275277
summary += "missing values found"
276278
status = "review"
277279
else:
278-
if properties["threshold"] > 0:
279-
summary += f"threshold: {properties['threshold']} cells {sup}; "
280+
if sdc_summary["threshold"] > 0:
281+
summary += f"threshold: {sdc_summary['threshold']} cells {sup}; "
280282
status = "fail"
281-
if properties["p-ratio"] > 0:
282-
summary += f"p-ratio: {properties['p-ratio']} cells {sup}; "
283+
if sdc_summary["p-ratio"] > 0:
284+
summary += f"p-ratio: {sdc_summary['p-ratio']} cells {sup}; "
283285
status = "fail"
284-
if properties["nk-rule"] > 0:
285-
summary += f"nk-rule: {properties['nk-rule']} cells {sup}; "
286+
if sdc_summary["nk-rule"] > 0:
287+
summary += f"nk-rule: {sdc_summary['nk-rule']} cells {sup}; "
286288
status = "fail"
287289
if summary != "":
288290
summary = f"{status}; {summary}"

test/test_initial.py

+8-11
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def test_crosstab_threshold(data, acro):
6767
output = acro.results.get_index(0)
6868
total_nan: int = output.output[0]["R/G"].isnull().sum()
6969
assert total_nan == 6
70-
positions = output.properties["sdc"]["threshold"]
70+
positions = output.sdc["cells"]["threshold"]
7171
for pos in positions:
7272
row, col = pos
7373
assert np.isnan(output.output[0].iloc[row, col])
@@ -287,11 +287,17 @@ def test_finalise_json(data, acro):
287287
loaded: Records = load_records(PATH)
288288
orig = result.get_index(0)
289289
read = loaded.get_index(0)
290+
print("*****************************")
291+
print(orig)
292+
print("*****************************")
293+
print(read)
294+
print("*****************************")
290295
# check equal
291296
assert orig.uid == read.uid
292297
assert orig.status == read.status
293298
assert orig.output_type == read.output_type
294299
assert orig.properties == read.properties
300+
assert orig.sdc == read.sdc
295301
assert orig.command == read.command
296302
assert orig.summary == read.summary
297303
assert orig.comments == read.comments
@@ -301,16 +307,7 @@ def test_finalise_json(data, acro):
301307
with open(os.path.normpath(f"{PATH}/results.json"), encoding="utf-8") as file:
302308
json_data = json.load(file)
303309
results: dict = json_data["results"]
304-
assert results[orig.uid]["output"][0] == f"{orig.uid}_0.csv"
305-
# regression check: the outcome fields are dicts not strings
306-
assert results[orig.uid]["outcome"]["R/G"] == {
307-
"2010": "threshold; ",
308-
"2011": "threshold; ",
309-
"2012": "threshold; ",
310-
"2013": "threshold; ",
311-
"2014": "threshold; ",
312-
"2015": "threshold; ",
313-
}
310+
assert results[orig.uid]["files"][0]["name"] == f"{orig.uid}_0.csv"
314311

315312

316313
def test_rename_output(data, acro):

0 commit comments

Comments
 (0)