feat: arrow and dask expr clip method (#729)

* feat: arrow and dask clip * merge main, add returns_scalar
narwhals-dev · Aug 7, 2024 · e3f9259 · e3f9259
1 parent 465a1c7
commit e3f9259
Show file tree

Hide file tree

Showing 6 changed files with 59 additions and 18 deletions.
diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py
@@ -287,6 +287,13 @@ def quantile(
     def gather_every(self: Self, n: int, offset: int = 0) -> Self:
         return reuse_series_implementation(self, "gather_every", n=n, offset=offset)
 
+    def clip(
+        self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None
+    ) -> Self:
+        return reuse_series_implementation(
+            self, "clip", lower_bound=lower_bound, upper_bound=upper_bound
+        )
+
     def over(self: Self, keys: list[str]) -> Self:
         def func(df: ArrowDataFrame) -> list[ArrowSeries]:
             if self._output_names is None:

diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
@@ -587,6 +587,18 @@ def quantile(
     def gather_every(self: Self, n: int, offset: int = 0) -> Self:
         return self._from_native_series(self._native_series[offset::n])
 
+    def clip(
+        self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None
+    ) -> Self:
+        pa = get_pyarrow()
+        pc = get_pyarrow_compute()
+
+        arr = self._native_series
+        arr = pc.max_element_wise(arr, pa.scalar(lower_bound, type=arr.type))
+        arr = pc.min_element_wise(arr, pa.scalar(upper_bound, type=arr.type))
+
+        return self._from_native_series(arr)
+
     @property
     def shape(self) -> tuple[int]:
         return (len(self._native_series),)

diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py
@@ -317,6 +317,17 @@ def fill_null(self, value: Any) -> DaskExpr:
             returns_scalar=False,
         )
 
+    def clip(
+        self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None
+    ) -> Self:
+        return self._from_call(
+            lambda _input, _lower, _upper: _input.clip(lower=_lower, upper=_upper),
+            "clip",
+            lower_bound,
+            upper_bound,
+            returns_scalar=False,
+        )
+
     @property
     def str(self: Self) -> DaskExprStringNamespace:
         return DaskExprStringNamespace(self)

diff --git a/narwhals/expr.py b/narwhals/expr.py
@@ -1868,11 +1868,11 @@ def clip(
             │ i64 │
             ╞═════╡
             │ -1  │
-            │ 1   │
+            │  1  │
             │ -1  │
-            │ 3   │
+            │  3  │
             │ -1  │
-            │ 3   │
+            │  3  │
             └─────┘
         """
         return self.__class__(

diff --git a/narwhals/series.py b/narwhals/series.py
@@ -634,11 +634,11 @@ def clip(
             Series: '' [i64]
             [
                -1
-               1
+                1
                -1
-               3
+                3
                -1
-               3
+                3
             ]
         """
         return self._from_compliant_series(

diff --git a/tests/expr_and_series/clip_test.py b/tests/expr_and_series/clip_test.py
@@ -1,24 +1,35 @@
 from typing import Any
 
-import pytest
-
 import narwhals.stable.v1 as nw
 from tests.utils import compare_dicts
 
 
-def test_clip(request: Any, constructor: Any) -> None:
-    if "pyarrow_table" in str(constructor) or "dask" in str(constructor):
-        request.applymarker(pytest.mark.xfail)
+def test_clip(constructor: Any) -> None:
     df = nw.from_native(constructor({"a": [1, 2, 3, -4, 5]}))
-    result = df.select(b=nw.col("a").clip(3, 5))
-    expected = {"b": [3, 3, 3, 3, 5]}
+    result = df.select(
+        lower_only=nw.col("a").clip(lower_bound=3),
+        upper_only=nw.col("a").clip(upper_bound=4),
+        both=nw.col("a").clip(3, 4),
+    )
+    expected = {
+        "lower_only": [3, 3, 3, 3, 5],
+        "upper_only": [1, 2, 3, -4, 4],
+        "both": [3, 3, 3, 3, 4],
+    }
     compare_dicts(result, expected)
 
 
-def test_clip_series(request: Any, constructor_eager: Any) -> None:
-    if "pyarrow_table" in str(constructor_eager):
-        request.applymarker(pytest.mark.xfail)
+def test_clip_series(constructor_eager: Any) -> None:
     df = nw.from_native(constructor_eager({"a": [1, 2, 3, -4, 5]}), eager_only=True)
-    result = {"b": df["a"].clip(3, 5)}
-    expected = {"b": [3, 3, 3, 3, 5]}
+    result = {
+        "lower_only": df["a"].clip(lower_bound=3),
+        "upper_only": df["a"].clip(upper_bound=4),
+        "both": df["a"].clip(3, 4),
+    }
+
+    expected = {
+        "lower_only": [3, 3, 3, 3, 5],
+        "upper_only": [1, 2, 3, -4, 4],
+        "both": [3, 3, 3, 3, 4],
+    }
     compare_dicts(result, expected)
-Original file line number
+Diff line change
@@ Expand Up / @@ -634,11 +634,11 @@ def clip( @@
                 Series: '' [i64]
                 [
                    -1
                    -1
                    -1
                 ]
             """
             return self._from_compliant_series(
@@ Expand Down @@