From e3f92592b9d8966fa7cb4f9f5addc5bd61c3a598 Mon Sep 17 00:00:00 2001 From: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com> Date: Wed, 7 Aug 2024 20:28:19 +0200 Subject: [PATCH] feat: arrow and dask expr `clip` method (#729) * feat: arrow and dask clip * merge main, add returns_scalar --- narwhals/_arrow/expr.py | 7 ++++++ narwhals/_arrow/series.py | 12 ++++++++++ narwhals/_dask/expr.py | 11 ++++++++++ narwhals/expr.py | 6 ++--- narwhals/series.py | 6 ++--- tests/expr_and_series/clip_test.py | 35 ++++++++++++++++++++---------- 6 files changed, 59 insertions(+), 18 deletions(-) diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index a539040f0e..1b8360a715 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -287,6 +287,13 @@ def quantile( def gather_every(self: Self, n: int, offset: int = 0) -> Self: return reuse_series_implementation(self, "gather_every", n=n, offset=offset) + def clip( + self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None + ) -> Self: + return reuse_series_implementation( + self, "clip", lower_bound=lower_bound, upper_bound=upper_bound + ) + def over(self: Self, keys: list[str]) -> Self: def func(df: ArrowDataFrame) -> list[ArrowSeries]: if self._output_names is None: diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 3c13be2db3..75fe0f6320 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -587,6 +587,18 @@ def quantile( def gather_every(self: Self, n: int, offset: int = 0) -> Self: return self._from_native_series(self._native_series[offset::n]) + def clip( + self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None + ) -> Self: + pa = get_pyarrow() + pc = get_pyarrow_compute() + + arr = self._native_series + arr = pc.max_element_wise(arr, pa.scalar(lower_bound, type=arr.type)) + arr = pc.min_element_wise(arr, pa.scalar(upper_bound, type=arr.type)) + + return self._from_native_series(arr) + @property def shape(self) -> tuple[int]: return (len(self._native_series),) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index e10ca92162..9986145e87 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -317,6 +317,17 @@ def fill_null(self, value: Any) -> DaskExpr: returns_scalar=False, ) + def clip( + self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None + ) -> Self: + return self._from_call( + lambda _input, _lower, _upper: _input.clip(lower=_lower, upper=_upper), + "clip", + lower_bound, + upper_bound, + returns_scalar=False, + ) + @property def str(self: Self) -> DaskExprStringNamespace: return DaskExprStringNamespace(self) diff --git a/narwhals/expr.py b/narwhals/expr.py index 27185a6eca..3a299a2d29 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -1868,11 +1868,11 @@ def clip( │ i64 │ ╞═════╡ │ -1 │ - │ 1 │ + │ 1 │ │ -1 │ - │ 3 │ + │ 3 │ │ -1 │ - │ 3 │ + │ 3 │ └─────┘ """ return self.__class__( diff --git a/narwhals/series.py b/narwhals/series.py index b8e98cd745..53041f1f41 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -634,11 +634,11 @@ def clip( Series: '' [i64] [ -1 - 1 + 1 -1 - 3 + 3 -1 - 3 + 3 ] """ return self._from_compliant_series( diff --git a/tests/expr_and_series/clip_test.py b/tests/expr_and_series/clip_test.py index 94b54272eb..909b153b75 100644 --- a/tests/expr_and_series/clip_test.py +++ b/tests/expr_and_series/clip_test.py @@ -1,24 +1,35 @@ from typing import Any -import pytest - import narwhals.stable.v1 as nw from tests.utils import compare_dicts -def test_clip(request: Any, constructor: Any) -> None: - if "pyarrow_table" in str(constructor) or "dask" in str(constructor): - request.applymarker(pytest.mark.xfail) +def test_clip(constructor: Any) -> None: df = nw.from_native(constructor({"a": [1, 2, 3, -4, 5]})) - result = df.select(b=nw.col("a").clip(3, 5)) - expected = {"b": [3, 3, 3, 3, 5]} + result = df.select( + lower_only=nw.col("a").clip(lower_bound=3), + upper_only=nw.col("a").clip(upper_bound=4), + both=nw.col("a").clip(3, 4), + ) + expected = { + "lower_only": [3, 3, 3, 3, 5], + "upper_only": [1, 2, 3, -4, 4], + "both": [3, 3, 3, 3, 4], + } compare_dicts(result, expected) -def test_clip_series(request: Any, constructor_eager: Any) -> None: - if "pyarrow_table" in str(constructor_eager): - request.applymarker(pytest.mark.xfail) +def test_clip_series(constructor_eager: Any) -> None: df = nw.from_native(constructor_eager({"a": [1, 2, 3, -4, 5]}), eager_only=True) - result = {"b": df["a"].clip(3, 5)} - expected = {"b": [3, 3, 3, 3, 5]} + result = { + "lower_only": df["a"].clip(lower_bound=3), + "upper_only": df["a"].clip(upper_bound=4), + "both": df["a"].clip(3, 4), + } + + expected = { + "lower_only": [3, 3, 3, 3, 5], + "upper_only": [1, 2, 3, -4, 4], + "both": [3, 3, 3, 3, 4], + } compare_dicts(result, expected)