diff --git a/docs/api-reference/expr.md b/docs/api-reference/expr.md index d37a1266fa..cc1290a855 100644 --- a/docs/api-reference/expr.md +++ b/docs/api-reference/expr.md @@ -33,6 +33,7 @@ - null_count - n_unique - over + - pipe - quantile - round - sample diff --git a/docs/api-reference/series.md b/docs/api-reference/series.md index 011e43571c..2479196a2b 100644 --- a/docs/api-reference/series.md +++ b/docs/api-reference/series.md @@ -37,6 +37,7 @@ - name - null_count - n_unique + - pipe - quantile - round - sample diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 324eb636e5..f8a12a399e 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -1999,8 +1999,8 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: starting from a offset of 1: >>> @nw.narwhalify - ... def func(df_any): - ... return df_any.gather_every(n=2, offset=1) + ... def func(df): + ... return df.gather_every(n=2, offset=1) >>> func(df_pd) a b @@ -3299,8 +3299,8 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: starting from a offset of 1: >>> @nw.narwhalify - ... def func(df_any): - ... return df_any.gather_every(n=2, offset=1) + ... def func(df): + ... return df.gather_every(n=2, offset=1) >>> func(df_pd) a b diff --git a/narwhals/expr.py b/narwhals/expr.py index e51bd2670e..2a00b6dada 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -77,6 +77,47 @@ def alias(self, name: str) -> Self: """ return self.__class__(lambda plx: self._call(plx).alias(name)) + def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Self: + """ + Pipe function call. + + Examples: + >>> import polars as pl + >>> import pandas as pd + >>> import narwhals as nw + >>> data = {"a": [1, 2, 3, 4]} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + + Lets define a library-agnostic function: + + >>> @nw.narwhalify + ... def func(df): + ... return df.select(nw.col("a").pipe(lambda x: x + 1)) + + We can then pass any supported library: + + >>> func(df_pd) + a + 0 2 + 1 3 + 2 4 + 3 5 + >>> func(df_pl) + shape: (4, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 2 │ + │ 3 │ + │ 4 │ + │ 5 │ + └─────┘ + """ + return function(self, *args, **kwargs) + def cast( self, dtype: Any, @@ -1309,13 +1350,13 @@ def is_duplicated(self) -> Self: We can then pass either pandas or Polars to `func`: - >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pd) a b 0 True True 1 False True 2 False False 3 True False - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pl) shape: (4, 2) ┌───────┬───────┐ │ a ┆ b │ @@ -1350,13 +1391,13 @@ def is_unique(self) -> Self: We can then pass either pandas or Polars to `func`: - >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pd) a b 0 False False 1 True False 2 True True 3 False True - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pl) shape: (4, 2) ┌───────┬───────┐ │ a ┆ b │ @@ -1431,13 +1472,13 @@ def is_first_distinct(self) -> Self: We can then pass either pandas or Polars to `func`: - >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pd) a b 0 True True 1 True False 2 True True 3 False True - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pl) shape: (4, 2) ┌───────┬───────┐ │ a ┆ b │ @@ -1471,13 +1512,13 @@ def is_last_distinct(self) -> Self: We can then pass either pandas or Polars to `func`: - >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pd) a b 0 False False 1 True True 2 True True 3 True True - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pl) shape: (4, 2) ┌───────┬───────┐ │ a ┆ b │ @@ -1524,11 +1565,11 @@ def quantile( We can then pass either pandas or Polars to `func`: - >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE - a b + >>> func(df_pd) + a b 0 24.5 74.5 - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pl) shape: (1, 2) ┌──────┬──────┐ │ a ┆ b │ @@ -1566,12 +1607,12 @@ def head(self, n: int = 10) -> Self: We can then pass either pandas or Polars to `func`: - >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pd) a 0 0 1 1 2 2 - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pl) shape: (3, 1) ┌─────┐ │ a │ @@ -1610,12 +1651,12 @@ def tail(self, n: int = 10) -> Self: We can then pass either pandas or Polars to `func`: - >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE - a + >>> func(df_pd) + a 7 7 8 8 9 9 - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pl) shape: (3, 1) ┌─────┐ │ a │ @@ -1662,12 +1703,12 @@ def round(self, decimals: int = 0) -> Self: We can then pass either pandas or Polars to `func`: - >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pd) a 0 1.1 1 2.6 2 3.9 - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pl) shape: (3, 1) ┌─────┐ │ a │ @@ -1707,10 +1748,10 @@ def len(self) -> Self: We can then pass either pandas or Polars to `func`: - >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE - a1 a2 - 0 2 1 - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pd) + a1 a2 + 0 2 1 + >>> func(df_pl) shape: (1, 2) ┌─────┬─────┐ │ a1 ┆ a2 │ @@ -1801,7 +1842,7 @@ def clip( 0 2 1 2 2 3 - >>> func_lower(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func_lower(df_pl) shape: (3, 1) ┌─────┐ │ s │ @@ -1826,7 +1867,7 @@ def clip( 0 1 1 2 2 2 - >>> func_upper(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func_upper(df_pl) shape: (3, 1) ┌─────┐ │ s │ @@ -1860,7 +1901,7 @@ def clip( 3 3 4 -1 5 3 - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pl) shape: (6, 1) ┌─────┐ │ s │ @@ -1868,11 +1909,11 @@ def clip( │ i64 │ ╞═════╡ │ -1 │ - │ 1 │ + │ 1 │ │ -1 │ - │ 3 │ + │ 3 │ │ -1 │ - │ 3 │ + │ 3 │ └─────┘ """ return self.__class__( @@ -2162,7 +2203,7 @@ def slice(self, offset: int, length: int | None = None) -> Expr: 2 papaya ya 3 dragonfruit onf - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pl) shape: (4, 2) ┌─────────────┬──────────┐ │ s ┆ s_sliced │ @@ -2181,14 +2222,14 @@ def slice(self, offset: int, length: int | None = None) -> Expr: ... def func(df): ... return df.with_columns(s_sliced=nw.col("s").str.slice(-3)) - >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pd) s s_sliced 0 pear ear 1 None None 2 papaya aya 3 dragonfruit uit - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pl) shape: (4, 2) ┌─────────────┬──────────┐ │ s ┆ s_sliced │ @@ -2375,8 +2416,8 @@ def to_uppercase(self) -> Expr: We can then pass either pandas or Polars to `func`: - >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE - fruits upper_col + >>> func(df_pd) + fruits upper_col 0 apple APPLE 1 mango MANGO 2 None None @@ -2416,13 +2457,13 @@ def to_lowercase(self) -> Expr: We can then pass either pandas or Polars to `func`: - >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE - fruits lower_col - 0 APPLE apple - 1 MANGO mango - 2 None None + >>> func(df_pd) + fruits lower_col + 0 APPLE apple + 1 MANGO mango + 2 None None - >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func(df_pl) shape: (3, 2) ┌────────┬───────────┐ │ fruits ┆ lower_col │ @@ -2453,32 +2494,35 @@ def date(self) -> Expr: >>> import polars as pl >>> from datetime import datetime >>> import narwhals as nw - >>> dates = [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)] - >>> s_pd = pd.Series(dates).convert_dtypes( + >>> data = {"a": [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)]} + >>> df_pd = pd.DataFrame(data).convert_dtypes( ... dtype_backend="pyarrow" ... ) # doctest:+SKIP - >>> s_pl = pl.Series(dates) + >>> df_pl = pl.DataFrame(data) We define a library agnostic function: >>> @nw.narwhalify - ... def func(s): - ... return s.dt.date() + ... def func(df): + ... return df.select(nw.col("a").dt.date()) We can then pass either pandas or Polars to `func`: - >>> func(s_pd) # doctest:+SKIP - 0 2012-01-07 - 1 2023-03-10 - dtype: date32[day][pyarrow] + >>> func(df_pd) # doctest:+SKIP + a + 0 2012-01-07 + 1 2023-03-10 - >>> func(s_pl) # doctest: +NORMALIZE_WHITESPACE - shape: (2,) - Series: '' [date] - [ - 2012-01-07 - 2023-03-10 - ] + >>> func(df_pl) # docetst + shape: (2, 1) + ┌────────────┐ + │ a │ + │ --- │ + │ date │ + ╞════════════╡ + │ 2012-01-07 │ + │ 2023-03-10 │ + └────────────┘ """ return self._expr.__class__(lambda plx: self._expr._call(plx).dt.date()) diff --git a/narwhals/series.py b/narwhals/series.py index 8027cacf6e..76dd71e625 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING from typing import Any +from typing import Callable from typing import Literal from typing import Sequence from typing import overload @@ -100,6 +101,44 @@ def _from_compliant_series(self, series: Any) -> Self: level=self._level, ) + def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Self: + """ + Pipe function call. + + Examples: + >>> import polars as pl + >>> import pandas as pd + >>> import narwhals as nw + >>> s_pd = pd.Series([1, 2, 3, 4]) + >>> s_pl = pl.Series([1, 2, 3, 4]) + + Lets define a function to pipe into + >>> @nw.narwhalify + ... def func(s): + ... return s.pipe(lambda x: x + 2) + + Now apply it to the series + + >>> func(s_pd) + 0 3 + 1 4 + 2 5 + 3 6 + dtype: int64 + >>> func(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (4,) + Series: '' [i64] + [ + 3 + 4 + 5 + 6 + ] + + + """ + return function(self, *args, **kwargs) + def __repr__(self) -> str: # pragma: no cover header = " Narwhals Series " length = len(header) @@ -139,9 +178,9 @@ def len(self) -> int: We can then pass either pandas or Polars to `func`: - >>> func(s_pd) # doctest: +NORMALIZE_WHITESPACE + >>> func(s_pd) 3 - >>> func(s_pl) # doctest: +NORMALIZE_WHITESPACE + >>> func(s_pl) 3 """ return len(self._compliant_series) diff --git a/tests/expr_and_series/pipe_test.py b/tests/expr_and_series/pipe_test.py new file mode 100644 index 0000000000..eb2430041c --- /dev/null +++ b/tests/expr_and_series/pipe_test.py @@ -0,0 +1,20 @@ +from typing import Any + +import narwhals as nw +from tests.utils import compare_dicts + +input_list = {"a": [2, 4, 6, 8]} +expected = [4, 16, 36, 64] + + +def test_pipe_expr(constructor: Any) -> None: + df = nw.from_native(constructor(input_list)) + e = df.select(nw.col("a").pipe(lambda x: x**2)) + compare_dicts(e, {"a": expected}) + + +def test_pipe_series( + constructor_eager: Any, +) -> None: + s = nw.from_native(constructor_eager(input_list), eager_only=True)["a"] + assert s.pipe(lambda x: x**2).to_list() == expected