Skip to content

Commit

Permalink
feat: arrow and dask expr clip method (#729)
Browse files Browse the repository at this point in the history
* feat: arrow and dask clip

* merge main, add returns_scalar
  • Loading branch information
FBruzzesi authored Aug 7, 2024
1 parent 465a1c7 commit e3f9259
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 18 deletions.
7 changes: 7 additions & 0 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,13 @@ def quantile(
def gather_every(self: Self, n: int, offset: int = 0) -> Self:
return reuse_series_implementation(self, "gather_every", n=n, offset=offset)

def clip(
self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None
) -> Self:
return reuse_series_implementation(
self, "clip", lower_bound=lower_bound, upper_bound=upper_bound
)

def over(self: Self, keys: list[str]) -> Self:
def func(df: ArrowDataFrame) -> list[ArrowSeries]:
if self._output_names is None:
Expand Down
12 changes: 12 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,18 @@ def quantile(
def gather_every(self: Self, n: int, offset: int = 0) -> Self:
return self._from_native_series(self._native_series[offset::n])

def clip(
self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None
) -> Self:
pa = get_pyarrow()
pc = get_pyarrow_compute()

arr = self._native_series
arr = pc.max_element_wise(arr, pa.scalar(lower_bound, type=arr.type))
arr = pc.min_element_wise(arr, pa.scalar(upper_bound, type=arr.type))

return self._from_native_series(arr)

@property
def shape(self) -> tuple[int]:
return (len(self._native_series),)
Expand Down
11 changes: 11 additions & 0 deletions narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,17 @@ def fill_null(self, value: Any) -> DaskExpr:
returns_scalar=False,
)

def clip(
self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None
) -> Self:
return self._from_call(
lambda _input, _lower, _upper: _input.clip(lower=_lower, upper=_upper),
"clip",
lower_bound,
upper_bound,
returns_scalar=False,
)

@property
def str(self: Self) -> DaskExprStringNamespace:
return DaskExprStringNamespace(self)
Expand Down
6 changes: 3 additions & 3 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1868,11 +1868,11 @@ def clip(
│ i64 │
╞═════╡
│ -1 │
1
1
│ -1 │
3
3
│ -1 │
3
3
└─────┘
"""
return self.__class__(
Expand Down
6 changes: 3 additions & 3 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,11 +634,11 @@ def clip(
Series: '' [i64]
[
-1
1
1
-1
3
3
-1
3
3
]
"""
return self._from_compliant_series(
Expand Down
35 changes: 23 additions & 12 deletions tests/expr_and_series/clip_test.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,35 @@
from typing import Any

import pytest

import narwhals.stable.v1 as nw
from tests.utils import compare_dicts


def test_clip(request: Any, constructor: Any) -> None:
if "pyarrow_table" in str(constructor) or "dask" in str(constructor):
request.applymarker(pytest.mark.xfail)
def test_clip(constructor: Any) -> None:
df = nw.from_native(constructor({"a": [1, 2, 3, -4, 5]}))
result = df.select(b=nw.col("a").clip(3, 5))
expected = {"b": [3, 3, 3, 3, 5]}
result = df.select(
lower_only=nw.col("a").clip(lower_bound=3),
upper_only=nw.col("a").clip(upper_bound=4),
both=nw.col("a").clip(3, 4),
)
expected = {
"lower_only": [3, 3, 3, 3, 5],
"upper_only": [1, 2, 3, -4, 4],
"both": [3, 3, 3, 3, 4],
}
compare_dicts(result, expected)


def test_clip_series(request: Any, constructor_eager: Any) -> None:
if "pyarrow_table" in str(constructor_eager):
request.applymarker(pytest.mark.xfail)
def test_clip_series(constructor_eager: Any) -> None:
df = nw.from_native(constructor_eager({"a": [1, 2, 3, -4, 5]}), eager_only=True)
result = {"b": df["a"].clip(3, 5)}
expected = {"b": [3, 3, 3, 3, 5]}
result = {
"lower_only": df["a"].clip(lower_bound=3),
"upper_only": df["a"].clip(upper_bound=4),
"both": df["a"].clip(3, 4),
}

expected = {
"lower_only": [3, 3, 3, 3, 5],
"upper_only": [1, 2, 3, -4, 4],
"both": [3, 3, 3, 3, 4],
}
compare_dicts(result, expected)

0 comments on commit e3f9259

Please sign in to comment.