Skip to content

Commit 4602179

Browse files
committed
updating pivot_table
1 parent d5c70b5 commit 4602179

File tree

3 files changed

+69
-0
lines changed

3 files changed

+69
-0
lines changed

CHANGELOG.md

+9
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,15 @@
33
## Development
44

55
Changes:
6+
* Update table suppression when totals are true for pivot table ([#165](https://github.com/AI-SDC/ACRO/pull/165))
7+
* Fix the problem of shape mismatch when there are two columns and the aggfunc is count or sum ([#167](https://github.com/AI-SDC/ACRO/pull/167))
8+
* Remove all files and folders created during testing ([#168](https://github.com/AI-SDC/ACRO/pull/168))
9+
* Create an example notebook with simple examples of acro ([#170](https://github.com/AI-SDC/ACRO/pull/170))
10+
* Add support for histogram ([#176](https://github.com/AI-SDC/ACRO/pull/176))
11+
* Add inherited members from acro_tables and acro_regression to the sphinx docs ([#177](https://github.com/AI-SDC/ACRO/pull/177))
12+
* Update the R help function ([#178](https://github.com/AI-SDC/ACRO/pull/178))
13+
* Update the finalise function by checking the provided folder name and ask for new one if it exists ([#179](https://github.com/AI-SDC/ACRO/pull/179))
14+
* Add histogram and survival analysis to R ([#182](https://github.com/AI-SDC/ACRO/pull/182))
615

716
## Version 0.4.3 (Sep 22, 2023)
817

acro/acro_tables.py

+27
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,9 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals
244244
(hierarchical indexes) on the index and columns of the result
245245
DataFrame.
246246
247+
To provide consistent behaviour with different aggregation functions,
248+
'empty' rows or columns -i.e. that are all NaN or 0 (count,sum) are removed.
249+
247250
Parameters
248251
----------
249252
data : DataFrame
@@ -307,6 +310,29 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals
307310
sort,
308311
)
309312

313+
# delete empty rows and columns from table
314+
deleted_rows = []
315+
deleted_cols = []
316+
# define empty columns and rows using boolean masks
317+
empty_cols_mask = table.sum(axis=0) == 0
318+
empty_rows_mask = table.sum(axis=1) == 0
319+
320+
deleted_cols = list(table.columns[empty_cols_mask])
321+
table = table.loc[:, ~empty_cols_mask]
322+
deleted_rows = list(table.index[empty_rows_mask])
323+
table = table.loc[~empty_rows_mask, :]
324+
325+
# create a message with the deleted column's names
326+
comments = []
327+
if deleted_cols:
328+
msg_cols = ", ".join(str(col) for col in deleted_cols)
329+
comments.append(f"Empty columns: {msg_cols} were deleted.")
330+
if deleted_rows:
331+
msg_rows = ", ".join(str(row) for row in deleted_rows)
332+
comments.append(f"Empty rows: {msg_rows} were deleted.")
333+
if comments:
334+
logger.info(" ".join(comments))
335+
310336
# suppression masks to apply based on the following checks
311337
masks: dict[str, DataFrame] = {}
312338

@@ -387,6 +413,7 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals
387413
summary=summary,
388414
outcome=outcome,
389415
output=[table],
416+
comments=comments,
390417
)
391418
return table
392419

test/test_initial.py

+33
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,39 @@ def test_pivot_table_cols(data, acro):
169169
shutil.rmtree(PATH)
170170

171171

172+
def test_pivot_table_with_aggfunc_sum(data, acro):
173+
"""Test the pivot table with two columns and aggfunc sum."""
174+
acro = ACRO(suppress=False)
175+
_ = acro.pivot_table(
176+
data,
177+
index="year",
178+
columns=["grant_type", "survivor"],
179+
values="inc_grants",
180+
aggfunc="sum",
181+
)
182+
_ = acro.pivot_table(
183+
data,
184+
index=["grant_type", "survivor"],
185+
columns="year",
186+
values="inc_grants",
187+
aggfunc="sum",
188+
)
189+
acro.add_exception("output_0", "Let me have it")
190+
acro.add_exception("output_1", "I need this output")
191+
results: Records = acro.finalise(PATH)
192+
output_0 = results.get_index(0)
193+
output_1 = results.get_index(1)
194+
comment_0 = (
195+
"Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted."
196+
)
197+
comment_1 = (
198+
"Empty rows: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted."
199+
)
200+
assert output_0.comments == [comment_0]
201+
assert output_1.comments == [comment_1]
202+
shutil.rmtree(PATH)
203+
204+
172205
def test_ols(data, acro):
173206
"""Ordinary Least Squares test."""
174207
new_df = data[["inc_activity", "inc_grants", "inc_donations", "total_costs"]]

0 commit comments

Comments
 (0)