narwhals-dev · MarcoGorelli · Sep 6, 2024 · Sep 6, 2024 · Sep 6, 2024 · Sep 6, 2024
diff --git a/narwhals/_arrow/group_by.py b/narwhals/_arrow/group_by.py
@@ -15,6 +15,12 @@
     from narwhals._arrow.expr import ArrowExpr
     from narwhals._arrow.typing import IntoArrowExpr
 
+POLARS_TO_ARROW_AGGREGATIONS = {
+    "n_unique": "count_distinct",
+    "std": "stddev",
+    "var": "variance",
+}
+
 
 class ArrowGroupBy:
     def __init__(self, df: ArrowDataFrame, keys: list[str]) -> None:
@@ -112,6 +118,7 @@ def agg_arrow(
                 raise AssertionError(msg)
 
             function_name = remove_prefix(expr._function_name, "col->")
+            function_name = POLARS_TO_ARROW_AGGREGATIONS.get(function_name, function_name)
             for root_name, output_name in zip(expr._root_names, expr._output_names):
                 if function_name != "len":
                     simple_aggregations[output_name] = (

diff --git a/narwhals/_dask/group_by.py b/narwhals/_dask/group_by.py
@@ -10,12 +10,27 @@
 from narwhals.utils import remove_prefix
 
 if TYPE_CHECKING:
+    import dask.dataframe as dd
+
     from narwhals._dask.dataframe import DaskLazyFrame
     from narwhals._dask.expr import DaskExpr
     from narwhals._dask.typing import IntoDaskExpr
 
-POLARS_TO_PANDAS_AGGREGATIONS = {
+
+def n_unique() -> dd.Aggregation:
+    import dask.dataframe as dd  # ignore-banned-import
+
+    return dd.Aggregation(
+        name="nunique",
+        chunk=lambda s: s.apply(lambda x: list(set(x))),
+        agg=lambda s0: s0.obj.groupby(level=list(range(s0.obj.index.nlevels))).sum(),
+        finalize=lambda s1: s1.apply(lambda final: len(set(final))),
+    )
+
+
+POLARS_TO_DASK_AGGREGATIONS = {
     "len": "size",
+    "n_unique": n_unique(),
 }
 
 
@@ -93,7 +108,7 @@ def agg_dask(
                     msg = "Safety assertion failed, please report a bug to https://github.com/narwhals-dev/narwhals/issues"
                     raise AssertionError(msg)
 
-                function_name = POLARS_TO_PANDAS_AGGREGATIONS.get(
+                function_name = POLARS_TO_DASK_AGGREGATIONS.get(
                     expr._function_name, expr._function_name
                 )
                 for output_name in expr._output_names:
@@ -108,9 +123,7 @@ def agg_dask(
                 raise AssertionError(msg)
 
             function_name = remove_prefix(expr._function_name, "col->")
-            function_name = POLARS_TO_PANDAS_AGGREGATIONS.get(
-                function_name, function_name
-            )
+            function_name = POLARS_TO_DASK_AGGREGATIONS.get(function_name, function_name)
             for root_name, output_name in zip(expr._root_names, expr._output_names):
                 simple_aggregations[output_name] = (root_name, function_name)
         try:

diff --git a/narwhals/_pandas_like/group_by.py b/narwhals/_pandas_like/group_by.py
@@ -21,6 +21,7 @@
 
 POLARS_TO_PANDAS_AGGREGATIONS = {
     "len": "size",
+    "n_unique": "nunique",
 }
 
 

diff --git a/tests/test_group_by.py b/tests/test_group_by.py
@@ -102,6 +102,17 @@ def test_group_by_len(constructor: Any) -> None:
     compare_dicts(result, expected)
 
 
+def test_group_by_n_unique(constructor: Any) -> None:
+    result = (
+        nw.from_native(constructor(data))
+        .group_by("a")
+        .agg(nw.col("b").n_unique())
+        .sort("a")
+    )
+    expected = {"a": [1, 3], "b": [1, 1]}
+    compare_dicts(result, expected)
+
+
 def test_group_by_empty_result_pandas() -> None:
     df_any = pd.DataFrame({"a": [1, 2, 3], "b": [4, 3, 2]})
     df = nw.from_native(df_any, eager_only=True)
-Original file line number
+Diff line change
@@ Expand Up / @@ -21,6 +21,7 @@ @@
     POLARS_TO_PANDAS_AGGREGATIONS = {
         "len": "size",
+        "n_unique": "nunique",
     }
@@ Expand Down @@