From 4a4bc6479100495d9f1e855e75f1584a81aa0ca1 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 27 Aug 2024 08:15:11 -0400 Subject: [PATCH] fix(pivot-wider): handle the case of empty `id_cols` (#9912) --- ibis/backends/tests/test_generic.py | 48 +++++++++++++++++++++++++++++ ibis/expr/types/relations.py | 29 ++++++++++++++++- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 6e0118458087..52e514ef8bbd 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -2453,3 +2453,51 @@ def test_union_generates_predictable_aliases(con): expr = ibis.union(sub1, sub2) df = con.execute(expr) assert len(df) == 2 + + +@pytest.mark.parametrize("id_cols", [s.none(), [], s.c()]) +def test_pivot_wider_empty_id_columns(con, backend, id_cols, monkeypatch): + monkeypatch.setattr(ibis.options, "default_backend", con) + data = pd.DataFrame( + { + "id": range(10), + "actual": [0, 1, 1, 0, 0, 1, 0, 0, 0, 1], + "prediction": [1, 0, 0, 1, 0, 0, 0, 0, 0, 1], + } + ) + t = ibis.memtable(data) + expr = t.mutate( + outcome=( + ibis.case() + .when((_["actual"] == 0) & (_["prediction"] == 0), "TN") + .when((_["actual"] == 0) & (_["prediction"] == 1), "FP") + .when((_["actual"] == 1) & (_["prediction"] == 0), "FN") + .when((_["actual"] == 1) & (_["prediction"] == 1), "TP") + .end() + ) + ) + expr = expr.pivot_wider( + id_cols=id_cols, + names_from="outcome", + values_from="outcome", + values_agg=_.count(), + names_sort=True, + ) + result = expr.to_pandas() + expected = pd.DataFrame({"FN": [3], "FP": [2], "TN": [4], "TP": [1]}) + backend.assert_frame_equal(result, expected) + + +@pytest.mark.notyet( + ["mysql", "risingwave", "impala", "mssql", "druid", "exasol", "oracle", "flink"], + raises=com.OperationNotDefinedError, + reason="backend doesn't support Arbitrary agg", +) +def test_simple_pivot_wider(con, backend, monkeypatch): + monkeypatch.setattr(ibis.options, "default_backend", con) + data = pd.DataFrame({"outcome": ["yes", "no"], "counted": [3, 4]}) + t = ibis.memtable(data) + expr = t.pivot_wider(names_from="outcome", values_from="counted", names_sort=True) + result = expr.to_pandas() + expected = pd.DataFrame({"no": [4], "yes": [3]}) + backend.assert_frame_equal(result, expected) diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 7a05c1dd87a2..99609707325f 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -4075,6 +4075,27 @@ def pivot_wider( │ … │ … │ … │ … │ … │ … │ … │ … │ … │ └───────┴─────────┴───────┴────────┴───────┴─────────┴───────┴───────┴───┘ + You can do simple transpose-like operations using `pivot_wider` + + >>> t = ibis.memtable(dict(outcome=["yes", "no"], counted=[3, 4])) + >>> t + ┏━━━━━━━━━┳━━━━━━━━━┓ + ┃ outcome ┃ counted ┃ + ┡━━━━━━━━━╇━━━━━━━━━┩ + │ string │ int64 │ + ├─────────┼─────────┤ + │ yes │ 3 │ + │ no │ 4 │ + └─────────┴─────────┘ + >>> t.pivot_wider(names_from="outcome", values_from="counted", names_sort=True) + ┏━━━━━━━┳━━━━━━━┓ + ┃ no ┃ yes ┃ + ┡━━━━━━━╇━━━━━━━┩ + │ int64 │ int64 │ + ├───────┼───────┤ + │ 4 │ 3 │ + └───────┴───────┘ + Fill missing pivoted values using `values_fill` >>> fish_encounters.pivot_wider( @@ -4411,7 +4432,13 @@ def pivot_wider( key = names_sep.join(filter(None, key_components)) aggs[key] = arg if values_fill is None else arg.coalesce(values_fill) - return self.group_by(id_cols).aggregate(**aggs) + grouping_keys = id_cols.expand(self) + + # no id columns, so do an ungrouped aggregation + if not grouping_keys: + return self.aggregate(**aggs) + + return self.group_by(*grouping_keys).aggregate(**aggs) def relocate( self,