Skip to content

Commit 26ed283

Browse files
committed
Merge branch 'main' into feat/selectors-by-datetime
2 parents 6b0b006 + 79098f1 commit 26ed283

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+441
-207
lines changed

narwhals/_arrow/namespace.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ def __call__(self: Self, df: ArrowDataFrame) -> Sequence[ArrowSeries]:
441441
except TypeError:
442442
# `self._then_value` is a scalar and can't be converted to an expression
443443
value_series = plx._create_series_from_scalar(
444-
self._then_value, reference_series=condition
444+
self._then_value, reference_series=condition.alias("literal")
445445
)
446446

447447
condition_native, value_series_native = broadcast_series(

narwhals/_dask/namespace.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -411,8 +411,8 @@ def __call__(self, df: DaskLazyFrame) -> Sequence[dx.Series]:
411411

412412
if is_scalar:
413413
_df = condition.to_frame("a")
414-
_df["tmp"] = value_sequence[0]
415-
value_series = _df["tmp"]
414+
_df["literal"] = value_sequence[0]
415+
value_series = _df["literal"]
416416
else:
417417
value_series = value_sequence
418418

narwhals/_duckdb/expr_dt.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,55 @@ def date(self) -> DuckDBExpr:
130130
"date",
131131
returns_scalar=self._compliant_expr._returns_scalar,
132132
)
133+
134+
def total_minutes(self) -> DuckDBExpr:
135+
from duckdb import ConstantExpression
136+
from duckdb import FunctionExpression
137+
138+
return self._compliant_expr._from_call(
139+
lambda _input: FunctionExpression(
140+
"datepart", ConstantExpression("minute"), _input
141+
),
142+
"total_minutes",
143+
returns_scalar=self._compliant_expr._returns_scalar,
144+
)
145+
146+
def total_seconds(self) -> DuckDBExpr:
147+
from duckdb import ConstantExpression
148+
from duckdb import FunctionExpression
149+
150+
return self._compliant_expr._from_call(
151+
lambda _input: 60
152+
* FunctionExpression("datepart", ConstantExpression("minute"), _input)
153+
+ FunctionExpression("datepart", ConstantExpression("second"), _input),
154+
"total_seconds",
155+
returns_scalar=self._compliant_expr._returns_scalar,
156+
)
157+
158+
def total_milliseconds(self) -> DuckDBExpr:
159+
from duckdb import ConstantExpression
160+
from duckdb import FunctionExpression
161+
162+
return self._compliant_expr._from_call(
163+
lambda _input: 60_000
164+
* FunctionExpression("datepart", ConstantExpression("minute"), _input)
165+
+ FunctionExpression("datepart", ConstantExpression("millisecond"), _input),
166+
"total_milliseconds",
167+
returns_scalar=self._compliant_expr._returns_scalar,
168+
)
169+
170+
def total_microseconds(self) -> DuckDBExpr:
171+
from duckdb import ConstantExpression
172+
from duckdb import FunctionExpression
173+
174+
return self._compliant_expr._from_call(
175+
lambda _input: 60_000_000
176+
* FunctionExpression("datepart", ConstantExpression("minute"), _input)
177+
+ FunctionExpression("datepart", ConstantExpression("microsecond"), _input),
178+
"total_microseconds",
179+
returns_scalar=self._compliant_expr._returns_scalar,
180+
)
181+
182+
def total_nanoseconds(self) -> DuckDBExpr:
183+
msg = "`total_nanoseconds` is not implemented for DuckDB"
184+
raise NotImplementedError(msg)

narwhals/_duckdb/namespace.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -248,22 +248,27 @@ def __call__(self, df: DuckDBLazyFrame) -> Sequence[duckdb.Expression]:
248248
value = parse_into_expr(self._then_value, namespace=plx)(df)[0]
249249
except TypeError:
250250
# `self._otherwise_value` is a scalar and can't be converted to an expression
251-
value = ConstantExpression(self._then_value)
251+
value = ConstantExpression(self._then_value).alias("literal")
252252
value = cast("duckdb.Expression", value)
253+
value_name = get_column_name(df, value)
253254

254255
if self._otherwise_value is None:
255-
return [CaseExpression(condition=condition, value=value)]
256+
return [CaseExpression(condition=condition, value=value).alias(value_name)]
256257
try:
257258
otherwise_expr = parse_into_expr(self._otherwise_value, namespace=plx)
258259
except TypeError:
259260
# `self._otherwise_value` is a scalar and can't be converted to an expression
260261
return [
261-
CaseExpression(condition=condition, value=value).otherwise(
262-
ConstantExpression(self._otherwise_value)
263-
)
262+
CaseExpression(condition=condition, value=value)
263+
.otherwise(ConstantExpression(self._otherwise_value))
264+
.alias(value_name)
264265
]
265266
otherwise = otherwise_expr(df)[0]
266-
return [CaseExpression(condition=condition, value=value).otherwise(otherwise)]
267+
return [
268+
CaseExpression(condition=condition, value=value)
269+
.otherwise(otherwise)
270+
.alias(value_name)
271+
]
267272

268273
def then(self, value: DuckDBExpr | Any) -> DuckDBThen:
269274
self._then_value = value

narwhals/_pandas_like/namespace.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -467,13 +467,12 @@ def __call__(self, df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]:
467467
except TypeError:
468468
# `self._then_value` is a scalar and can't be converted to an expression
469469
value_series = plx._create_series_from_scalar(
470-
self._then_value, reference_series=condition
470+
self._then_value, reference_series=condition.alias("literal")
471471
)
472472

473473
condition_native, value_series_native = broadcast_align_and_extract_native(
474474
condition, value_series
475475
)
476-
477476
if self._otherwise_value is None:
478477
return [
479478
value_series._from_native_series(

narwhals/_pandas_like/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def broadcast_align_and_extract_native(
128128
s = rhs._native_series
129129
return (
130130
lhs._native_series,
131-
s.__class__(s.iloc[0], index=lhs_index, dtype=s.dtype),
131+
s.__class__(s.iloc[0], index=lhs_index, dtype=s.dtype, name=rhs.name),
132132
)
133133
if lhs.len() == 1:
134134
# broadcast

narwhals/_spark_like/expr.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import Sequence
88

99
from narwhals._expression_parsing import infer_new_root_output_names
10+
from narwhals._spark_like.expr_dt import SparkLikeExprDateTimeNamespace
1011
from narwhals._spark_like.expr_name import SparkLikeExprNameNamespace
1112
from narwhals._spark_like.expr_str import SparkLikeExprStringNamespace
1213
from narwhals._spark_like.utils import get_column_name
@@ -541,3 +542,7 @@ def str(self: Self) -> SparkLikeExprStringNamespace:
541542
@property
542543
def name(self: Self) -> SparkLikeExprNameNamespace:
543544
return SparkLikeExprNameNamespace(self)
545+
546+
@property
547+
def dt(self: Self) -> SparkLikeExprDateTimeNamespace:
548+
return SparkLikeExprDateTimeNamespace(self)

narwhals/_spark_like/expr_dt.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
5+
if TYPE_CHECKING:
6+
from pyspark.sql import Column
7+
from typing_extensions import Self
8+
9+
from narwhals._spark_like.expr import SparkLikeExpr
10+
11+
12+
class SparkLikeExprDateTimeNamespace:
13+
def __init__(self: Self, expr: SparkLikeExpr) -> None:
14+
self._compliant_expr = expr
15+
16+
def date(self: Self) -> SparkLikeExpr:
17+
from pyspark.sql import functions as F # noqa: N812
18+
19+
return self._compliant_expr._from_call(
20+
F.to_date,
21+
"date",
22+
returns_scalar=self._compliant_expr._returns_scalar,
23+
)
24+
25+
def year(self: Self) -> SparkLikeExpr:
26+
from pyspark.sql import functions as F # noqa: N812
27+
28+
return self._compliant_expr._from_call(
29+
F.year,
30+
"year",
31+
returns_scalar=self._compliant_expr._returns_scalar,
32+
)
33+
34+
def month(self: Self) -> SparkLikeExpr:
35+
from pyspark.sql import functions as F # noqa: N812
36+
37+
return self._compliant_expr._from_call(
38+
F.month,
39+
"month",
40+
returns_scalar=self._compliant_expr._returns_scalar,
41+
)
42+
43+
def day(self: Self) -> SparkLikeExpr:
44+
from pyspark.sql import functions as F # noqa: N812
45+
46+
return self._compliant_expr._from_call(
47+
F.day,
48+
"day",
49+
returns_scalar=self._compliant_expr._returns_scalar,
50+
)
51+
52+
def hour(self: Self) -> SparkLikeExpr:
53+
from pyspark.sql import functions as F # noqa: N812
54+
55+
return self._compliant_expr._from_call(
56+
F.hour,
57+
"hour",
58+
returns_scalar=self._compliant_expr._returns_scalar,
59+
)
60+
61+
def minute(self: Self) -> SparkLikeExpr:
62+
from pyspark.sql import functions as F # noqa: N812
63+
64+
return self._compliant_expr._from_call(
65+
F.minute,
66+
"minute",
67+
returns_scalar=self._compliant_expr._returns_scalar,
68+
)
69+
70+
def second(self: Self) -> SparkLikeExpr:
71+
from pyspark.sql import functions as F # noqa: N812
72+
73+
return self._compliant_expr._from_call(
74+
F.second,
75+
"second",
76+
returns_scalar=self._compliant_expr._returns_scalar,
77+
)
78+
79+
def millisecond(self: Self) -> SparkLikeExpr:
80+
from pyspark.sql import functions as F # noqa: N812
81+
82+
def _millisecond(_input: Column) -> Column:
83+
return F.floor((F.unix_micros(_input) % 1_000_000) / 1000)
84+
85+
return self._compliant_expr._from_call(
86+
_millisecond,
87+
"millisecond",
88+
returns_scalar=self._compliant_expr._returns_scalar,
89+
)
90+
91+
def microsecond(self: Self) -> SparkLikeExpr:
92+
from pyspark.sql import functions as F # noqa: N812
93+
94+
def _microsecond(_input: Column) -> Column:
95+
return F.unix_micros(_input) % 1_000_000
96+
97+
return self._compliant_expr._from_call(
98+
_microsecond,
99+
"microsecond",
100+
returns_scalar=self._compliant_expr._returns_scalar,
101+
)
102+
103+
def nanosecond(self: Self) -> SparkLikeExpr:
104+
from pyspark.sql import functions as F # noqa: N812
105+
106+
def _nanosecond(_input: Column) -> Column:
107+
return (F.unix_micros(_input) % 1_000_000) * 1000
108+
109+
return self._compliant_expr._from_call(
110+
_nanosecond,
111+
"nanosecond",
112+
returns_scalar=self._compliant_expr._returns_scalar,
113+
)
114+
115+
def ordinal_day(self: Self) -> SparkLikeExpr:
116+
from pyspark.sql import functions as F # noqa: N812
117+
118+
return self._compliant_expr._from_call(
119+
F.dayofyear,
120+
"ordinal_day",
121+
returns_scalar=self._compliant_expr._returns_scalar,
122+
)
123+
124+
def weekday(self: Self) -> SparkLikeExpr:
125+
from pyspark.sql import functions as F # noqa: N812
126+
127+
def _weekday(_input: Column) -> Column:
128+
# PySpark's dayofweek returns 1-7 for Sunday-Saturday
129+
return (F.dayofweek(_input) + 6) % 7
130+
131+
return self._compliant_expr._from_call(
132+
_weekday,
133+
"weekday",
134+
returns_scalar=self._compliant_expr._returns_scalar,
135+
)

narwhals/_spark_like/expr_str.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from typing import TYPE_CHECKING
4+
from typing import overload
45

56
if TYPE_CHECKING:
67
from pyspark.sql import Column
@@ -128,3 +129,56 @@ def to_lowercase(self: Self) -> SparkLikeExpr:
128129
"to_lowercase",
129130
returns_scalar=self._compliant_expr._returns_scalar,
130131
)
132+
133+
def to_datetime(self: Self, format: str | None) -> SparkLikeExpr: # noqa: A002
134+
from pyspark.sql import functions as F # noqa: N812
135+
136+
return self._compliant_expr._from_call(
137+
lambda _input: F.to_timestamp(
138+
F.replace(_input, F.lit("T"), F.lit(" ")),
139+
format=strptime_to_pyspark_format(format),
140+
),
141+
"to_datetime",
142+
returns_scalar=self._compliant_expr._returns_scalar,
143+
)
144+
145+
146+
@overload
147+
def strptime_to_pyspark_format(format: None) -> None: ...
148+
149+
150+
@overload
151+
def strptime_to_pyspark_format(format: str) -> str: ...
152+
153+
154+
def strptime_to_pyspark_format(format: str | None) -> str | None: # noqa: A002
155+
"""Converts a Python strptime datetime format string to a PySpark datetime format string."""
156+
# Mapping from Python strptime format to PySpark format
157+
if format is None:
158+
return None
159+
160+
# see https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
161+
# and https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior
162+
format_mapping = {
163+
"%Y": "y", # Year with century
164+
"%y": "y", # Year without century
165+
"%m": "M", # Month
166+
"%d": "d", # Day of the month
167+
"%H": "H", # Hour (24-hour clock) 0-23
168+
"%I": "h", # Hour (12-hour clock) 1-12
169+
"%M": "m", # Minute
170+
"%S": "s", # Second
171+
"%f": "S", # Microseconds -> Milliseconds
172+
"%p": "a", # AM/PM
173+
"%a": "E", # Abbreviated weekday name
174+
"%A": "E", # Full weekday name
175+
"%j": "D", # Day of the year
176+
"%z": "Z", # Timezone offset
177+
"%s": "X", # Unix timestamp
178+
}
179+
180+
# Replace Python format specifiers with PySpark specifiers
181+
pyspark_format = format
182+
for py_format, spark_format in format_mapping.items():
183+
pyspark_format = pyspark_format.replace(py_format, spark_format)
184+
return pyspark_format.replace("T", " ")

0 commit comments

Comments
 (0)