Skip to content

Commit 973b499

Browse files
feat: add Expr.dt methods to PySpark (#1835)
1 parent e7ca81e commit 973b499

File tree

3 files changed

+140
-2
lines changed

3 files changed

+140
-2
lines changed

narwhals/_spark_like/expr.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import Sequence
88

99
from narwhals._expression_parsing import infer_new_root_output_names
10+
from narwhals._spark_like.expr_dt import SparkLikeExprDateTimeNamespace
1011
from narwhals._spark_like.expr_name import SparkLikeExprNameNamespace
1112
from narwhals._spark_like.expr_str import SparkLikeExprStringNamespace
1213
from narwhals._spark_like.utils import get_column_name
@@ -541,3 +542,7 @@ def str(self: Self) -> SparkLikeExprStringNamespace:
541542
@property
542543
def name(self: Self) -> SparkLikeExprNameNamespace:
543544
return SparkLikeExprNameNamespace(self)
545+
546+
@property
547+
def dt(self: Self) -> SparkLikeExprDateTimeNamespace:
548+
return SparkLikeExprDateTimeNamespace(self)

narwhals/_spark_like/expr_dt.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
5+
if TYPE_CHECKING:
6+
from pyspark.sql import Column
7+
from typing_extensions import Self
8+
9+
from narwhals._spark_like.expr import SparkLikeExpr
10+
11+
12+
class SparkLikeExprDateTimeNamespace:
13+
def __init__(self: Self, expr: SparkLikeExpr) -> None:
14+
self._compliant_expr = expr
15+
16+
def date(self: Self) -> SparkLikeExpr:
17+
from pyspark.sql import functions as F # noqa: N812
18+
19+
return self._compliant_expr._from_call(
20+
F.to_date,
21+
"date",
22+
returns_scalar=self._compliant_expr._returns_scalar,
23+
)
24+
25+
def year(self: Self) -> SparkLikeExpr:
26+
from pyspark.sql import functions as F # noqa: N812
27+
28+
return self._compliant_expr._from_call(
29+
F.year,
30+
"year",
31+
returns_scalar=self._compliant_expr._returns_scalar,
32+
)
33+
34+
def month(self: Self) -> SparkLikeExpr:
35+
from pyspark.sql import functions as F # noqa: N812
36+
37+
return self._compliant_expr._from_call(
38+
F.month,
39+
"month",
40+
returns_scalar=self._compliant_expr._returns_scalar,
41+
)
42+
43+
def day(self: Self) -> SparkLikeExpr:
44+
from pyspark.sql import functions as F # noqa: N812
45+
46+
return self._compliant_expr._from_call(
47+
F.dayofmonth,
48+
"day",
49+
returns_scalar=self._compliant_expr._returns_scalar,
50+
)
51+
52+
def hour(self: Self) -> SparkLikeExpr:
53+
from pyspark.sql import functions as F # noqa: N812
54+
55+
return self._compliant_expr._from_call(
56+
F.hour,
57+
"hour",
58+
returns_scalar=self._compliant_expr._returns_scalar,
59+
)
60+
61+
def minute(self: Self) -> SparkLikeExpr:
62+
from pyspark.sql import functions as F # noqa: N812
63+
64+
return self._compliant_expr._from_call(
65+
F.minute,
66+
"minute",
67+
returns_scalar=self._compliant_expr._returns_scalar,
68+
)
69+
70+
def second(self: Self) -> SparkLikeExpr:
71+
from pyspark.sql import functions as F # noqa: N812
72+
73+
return self._compliant_expr._from_call(
74+
F.second,
75+
"second",
76+
returns_scalar=self._compliant_expr._returns_scalar,
77+
)
78+
79+
def millisecond(self: Self) -> SparkLikeExpr:
80+
from pyspark.sql import functions as F # noqa: N812
81+
82+
def _millisecond(_input: Column) -> Column:
83+
return F.floor((F.unix_micros(_input) % 1_000_000) / 1000)
84+
85+
return self._compliant_expr._from_call(
86+
_millisecond,
87+
"millisecond",
88+
returns_scalar=self._compliant_expr._returns_scalar,
89+
)
90+
91+
def microsecond(self: Self) -> SparkLikeExpr:
92+
from pyspark.sql import functions as F # noqa: N812
93+
94+
def _microsecond(_input: Column) -> Column:
95+
return F.unix_micros(_input) % 1_000_000
96+
97+
return self._compliant_expr._from_call(
98+
_microsecond,
99+
"microsecond",
100+
returns_scalar=self._compliant_expr._returns_scalar,
101+
)
102+
103+
def nanosecond(self: Self) -> SparkLikeExpr:
104+
from pyspark.sql import functions as F # noqa: N812
105+
106+
def _nanosecond(_input: Column) -> Column:
107+
return (F.unix_micros(_input) % 1_000_000) * 1000
108+
109+
return self._compliant_expr._from_call(
110+
_nanosecond,
111+
"nanosecond",
112+
returns_scalar=self._compliant_expr._returns_scalar,
113+
)
114+
115+
def ordinal_day(self: Self) -> SparkLikeExpr:
116+
from pyspark.sql import functions as F # noqa: N812
117+
118+
return self._compliant_expr._from_call(
119+
F.dayofyear,
120+
"ordinal_day",
121+
returns_scalar=self._compliant_expr._returns_scalar,
122+
)
123+
124+
def weekday(self: Self) -> SparkLikeExpr:
125+
from pyspark.sql import functions as F # noqa: N812
126+
127+
def _weekday(_input: Column) -> Column:
128+
# PySpark's dayofweek returns 1-7 for Sunday-Saturday
129+
return (F.dayofweek(_input) + 6) % 7
130+
131+
return self._compliant_expr._from_call(
132+
_weekday,
133+
"weekday",
134+
returns_scalar=self._compliant_expr._returns_scalar,
135+
)

tests/expr_and_series/dt/datetime_attributes_test.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,6 @@ def test_datetime_attributes(
4949
request.applymarker(pytest.mark.xfail)
5050
if attribute == "date" and "cudf" in str(constructor):
5151
request.applymarker(pytest.mark.xfail)
52-
if "pyspark" in str(constructor):
53-
request.applymarker(pytest.mark.xfail)
5452

5553
df = nw.from_native(constructor(data))
5654
result = df.select(getattr(nw.col("a").dt, attribute)())

0 commit comments

Comments
 (0)