From e3f92592b9d8966fa7cb4f9f5addc5bd61c3a598 Mon Sep 17 00:00:00 2001
From: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com>
Date: Wed, 7 Aug 2024 20:28:19 +0200
Subject: [PATCH] feat: arrow and dask expr `clip` method (#729)

* feat: arrow and dask clip

* merge main, add returns_scalar
---
 narwhals/_arrow/expr.py            |  7 ++++++
 narwhals/_arrow/series.py          | 12 ++++++++++
 narwhals/_dask/expr.py             | 11 ++++++++++
 narwhals/expr.py                   |  6 ++---
 narwhals/series.py                 |  6 ++---
 tests/expr_and_series/clip_test.py | 35 ++++++++++++++++++++----------
 6 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py
index a539040f0e..1b8360a715 100644
--- a/narwhals/_arrow/expr.py
+++ b/narwhals/_arrow/expr.py
@@ -287,6 +287,13 @@ def quantile(
     def gather_every(self: Self, n: int, offset: int = 0) -> Self:
         return reuse_series_implementation(self, "gather_every", n=n, offset=offset)
 
+    def clip(
+        self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None
+    ) -> Self:
+        return reuse_series_implementation(
+            self, "clip", lower_bound=lower_bound, upper_bound=upper_bound
+        )
+
     def over(self: Self, keys: list[str]) -> Self:
         def func(df: ArrowDataFrame) -> list[ArrowSeries]:
             if self._output_names is None:
diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
index 3c13be2db3..75fe0f6320 100644
--- a/narwhals/_arrow/series.py
+++ b/narwhals/_arrow/series.py
@@ -587,6 +587,18 @@ def quantile(
     def gather_every(self: Self, n: int, offset: int = 0) -> Self:
         return self._from_native_series(self._native_series[offset::n])
 
+    def clip(
+        self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None
+    ) -> Self:
+        pa = get_pyarrow()
+        pc = get_pyarrow_compute()
+
+        arr = self._native_series
+        arr = pc.max_element_wise(arr, pa.scalar(lower_bound, type=arr.type))
+        arr = pc.min_element_wise(arr, pa.scalar(upper_bound, type=arr.type))
+
+        return self._from_native_series(arr)
+
     @property
     def shape(self) -> tuple[int]:
         return (len(self._native_series),)
diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py
index e10ca92162..9986145e87 100644
--- a/narwhals/_dask/expr.py
+++ b/narwhals/_dask/expr.py
@@ -317,6 +317,17 @@ def fill_null(self, value: Any) -> DaskExpr:
             returns_scalar=False,
         )
 
+    def clip(
+        self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None
+    ) -> Self:
+        return self._from_call(
+            lambda _input, _lower, _upper: _input.clip(lower=_lower, upper=_upper),
+            "clip",
+            lower_bound,
+            upper_bound,
+            returns_scalar=False,
+        )
+
     @property
     def str(self: Self) -> DaskExprStringNamespace:
         return DaskExprStringNamespace(self)
diff --git a/narwhals/expr.py b/narwhals/expr.py
index 27185a6eca..3a299a2d29 100644
--- a/narwhals/expr.py
+++ b/narwhals/expr.py
@@ -1868,11 +1868,11 @@ def clip(
             │ i64 │
             ╞═════╡
             │ -1  │
-            │ 1   │
+            │  1  │
             │ -1  │
-            │ 3   │
+            │  3  │
             │ -1  │
-            │ 3   │
+            │  3  │
             └─────┘
         """
         return self.__class__(
diff --git a/narwhals/series.py b/narwhals/series.py
index b8e98cd745..53041f1f41 100644
--- a/narwhals/series.py
+++ b/narwhals/series.py
@@ -634,11 +634,11 @@ def clip(
             Series: '' [i64]
             [
                -1
-               1
+                1
                -1
-               3
+                3
                -1
-               3
+                3
             ]
         """
         return self._from_compliant_series(
diff --git a/tests/expr_and_series/clip_test.py b/tests/expr_and_series/clip_test.py
index 94b54272eb..909b153b75 100644
--- a/tests/expr_and_series/clip_test.py
+++ b/tests/expr_and_series/clip_test.py
@@ -1,24 +1,35 @@
 from typing import Any
 
-import pytest
-
 import narwhals.stable.v1 as nw
 from tests.utils import compare_dicts
 
 
-def test_clip(request: Any, constructor: Any) -> None:
-    if "pyarrow_table" in str(constructor) or "dask" in str(constructor):
-        request.applymarker(pytest.mark.xfail)
+def test_clip(constructor: Any) -> None:
     df = nw.from_native(constructor({"a": [1, 2, 3, -4, 5]}))
-    result = df.select(b=nw.col("a").clip(3, 5))
-    expected = {"b": [3, 3, 3, 3, 5]}
+    result = df.select(
+        lower_only=nw.col("a").clip(lower_bound=3),
+        upper_only=nw.col("a").clip(upper_bound=4),
+        both=nw.col("a").clip(3, 4),
+    )
+    expected = {
+        "lower_only": [3, 3, 3, 3, 5],
+        "upper_only": [1, 2, 3, -4, 4],
+        "both": [3, 3, 3, 3, 4],
+    }
     compare_dicts(result, expected)
 
 
-def test_clip_series(request: Any, constructor_eager: Any) -> None:
-    if "pyarrow_table" in str(constructor_eager):
-        request.applymarker(pytest.mark.xfail)
+def test_clip_series(constructor_eager: Any) -> None:
     df = nw.from_native(constructor_eager({"a": [1, 2, 3, -4, 5]}), eager_only=True)
-    result = {"b": df["a"].clip(3, 5)}
-    expected = {"b": [3, 3, 3, 3, 5]}
+    result = {
+        "lower_only": df["a"].clip(lower_bound=3),
+        "upper_only": df["a"].clip(upper_bound=4),
+        "both": df["a"].clip(3, 4),
+    }
+
+    expected = {
+        "lower_only": [3, 3, 3, 3, 5],
+        "upper_only": [1, 2, 3, -4, 4],
+        "both": [3, 3, 3, 3, 4],
+    }
     compare_dicts(result, expected)