Skip to content

Commit

Permalink
dask lazy eval
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi committed Sep 6, 2024
1 parent 5b07223 commit 055334a
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions narwhals/_dask/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def n_unique() -> dd.Aggregation:

POLARS_TO_DASK_AGGREGATIONS = {
"len": "size",
"n_unique": n_unique(),
"n_unique": n_unique,
}


Expand Down Expand Up @@ -100,7 +100,7 @@ def agg_dask(
break

if all_simple_aggs:
simple_aggregations: dict[str, tuple[str, str]] = {}
simple_aggregations: dict[str, tuple[str, str | dd.Aggregation]] = {}
for expr in exprs:
if expr._depth == 0:
# e.g. agg(nw.len()) # noqa: ERA001
Expand All @@ -124,6 +124,10 @@ def agg_dask(

function_name = remove_prefix(expr._function_name, "col->")
function_name = POLARS_TO_DASK_AGGREGATIONS.get(function_name, function_name)

# deal with n_unique case in a "lazy" mode to not depend on dask globally
function_name = function_name() if callable(function_name) else function_name

for root_name, output_name in zip(expr._root_names, expr._output_names):
simple_aggregations[output_name] = (root_name, function_name)
try:
Expand Down

0 comments on commit 055334a

Please sign in to comment.