-
Notifications
You must be signed in to change notification settings - Fork 150
feat: add DuckDB join_asof
#1860
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
9a80c09
d2b382b
e50f5ee
d64d682
1e2ce0e
069d4f9
173a442
64702ad
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -260,6 +260,53 @@ def join( | |
res = rel.select(", ".join(select)).set_alias(original_alias) | ||
return self._from_native_frame(res) | ||
|
||
def join_asof( | ||
self: Self, | ||
other: Self, | ||
*, | ||
left_on: str | None = None, | ||
right_on: str | None = None, | ||
by_left: list[str] | None = None, | ||
by_right: list[str] | None = None, | ||
strategy: Literal["backward", "forward", "nearest"] = "backward", | ||
suffix: str = "_right", | ||
) -> Self: | ||
import duckdb | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think we can do this at the top of the file |
||
|
||
lhs = self._native_frame | ||
rhs = other._native_frame | ||
conditions = [] | ||
if by_left is not None and by_right is not None: | ||
conditions += [ | ||
f'lhs."{left}" = rhs."{right}"' for left, right in zip(by_left, by_right) | ||
] | ||
else: | ||
by_left = by_right = [] | ||
if strategy == "backward": | ||
conditions += [f'lhs."{left_on}" >= rhs."{right_on}"'] | ||
elif strategy == "forward": | ||
conditions += [f'lhs."{left_on}" <= rhs."{right_on}"'] | ||
else: | ||
msg = "Only 'backward' and 'forward' strategies are currently supported for DuckDB" | ||
raise NotImplementedError(msg) | ||
condition = " and ".join(conditions) | ||
select = [f'lhs."{x}"' for x in lhs.columns] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. π€ does `select = ['lhs.*'] work? |
||
for col in rhs.columns: | ||
if col in lhs.columns and ( | ||
right_on is None or col not in [right_on, *by_right] | ||
): | ||
select.append(f'rhs."{col}" as "{col}{suffix}"') | ||
elif right_on is None or col not in [right_on, *by_right]: | ||
select.append(col) | ||
query = f""" | ||
SELECT {",".join(select)} | ||
FROM lhs | ||
ASOF LEFT JOIN rhs | ||
ON {condition} | ||
""" # noqa: S608 | ||
res = duckdb.sql(query) | ||
return self._from_native_frame(res) | ||
|
||
def collect_schema(self: Self) -> dict[str, DType]: | ||
return { | ||
column_name: native_to_narwhals_dtype(str(duckdb_dtype), self._version) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice one! π |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
really minor, but can we avoid setting the default in the compliant level? So, in
narwhals.dataframe.py
, we can havesuffix: str = "_right"
, but in the internal methods in_arrow
/_duckdb
/_pandas_like
/ etc., we can just havesuffix: str
, to make sure that we're always passing it down?