Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: duckdb join was failing if column names contained spaces #1775

Merged
merged 3 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions narwhals/_duckdb/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,24 +243,24 @@ def join(
assert right_on is not None # noqa: S101

conditions = [
f"lhs.{left} = rhs.{right}" for left, right in zip(left_on, right_on)
f'lhs."{left}" = rhs."{right}"' for left, right in zip(left_on, right_on)
]
condition = " and ".join(conditions)
rel = self._native_frame.set_alias("lhs").join(
other._native_frame.set_alias("rhs"), condition=condition, how=how
)

if how in ("inner", "left", "cross"):
select = [f"lhs.{x}" for x in self._native_frame.columns]
select = [f'lhs."{x}"' for x in self._native_frame.columns]
for col in other._native_frame.columns:
if col in self._native_frame.columns and (
right_on is None or col not in right_on
):
select.append(f"rhs.{col} as {col}{suffix}")
select.append(f'rhs."{col}" as "{col}{suffix}"')
elif right_on is None or col not in right_on:
select.append(col)
else: # semi
select = [f"lhs.{x}" for x in self._native_frame.columns]
select = ["lhs.*"]

res = rel.select(", ".join(select)).set_alias(original_alias)
return self._from_native_frame(res)
Expand Down Expand Up @@ -317,9 +317,9 @@ def sort(
result = self._native_frame.order(
",".join(
(
f"{col} {desc} nulls last"
f'"{col}" {desc} nulls last'
if nulls_last
else f"{col} {desc} nulls first"
else f'"{col}" {desc} nulls first'
for col, desc in zip(flat_by, descending_str)
)
)
Expand Down
44 changes: 22 additions & 22 deletions tests/frame/join_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_inner_join_two_keys(constructor: Constructor) -> None:
data = {
"antananarivo": [1, 3, 2],
"bob": [4, 4, 6],
"zorro": [7.0, 8, 9],
"zor ro": [7.0, 8, 9],
"index": [0, 1, 2],
}
df = nw.from_native(constructor(data))
Expand All @@ -37,9 +37,9 @@ def test_inner_join_two_keys(constructor: Constructor) -> None:
expected = {
"antananarivo": [1, 3, 2],
"bob": [4, 4, 6],
"zorro": [7.0, 8, 9],
"zor ro": [7.0, 8, 9],
"index": [0, 1, 2],
"zorro_right": [7.0, 8, 9],
"zor ro_right": [7.0, 8, 9],
}
assert_equal_data(result, expected)
assert_equal_data(result_on, expected)
Expand All @@ -49,7 +49,7 @@ def test_inner_join_single_key(constructor: Constructor) -> None:
data = {
"antananarivo": [1, 3, 2],
"bob": [4, 4, 6],
"zorro": [7.0, 8, 9],
"zor ro": [7.0, 8, 9],
"index": [0, 1, 2],
}
df = nw.from_native(constructor(data))
Expand All @@ -66,10 +66,10 @@ def test_inner_join_single_key(constructor: Constructor) -> None:
expected = {
"antananarivo": [1, 3, 2],
"bob": [4, 4, 6],
"zorro": [7.0, 8, 9],
"zor ro": [7.0, 8, 9],
"index": [0, 1, 2],
"bob_right": [4, 4, 6],
"zorro_right": [7.0, 8, 9],
"zor ro_right": [7.0, 8, 9],
}
assert_equal_data(result, expected)
assert_equal_data(result_on, expected)
Expand Down Expand Up @@ -99,7 +99,7 @@ def test_suffix(constructor: Constructor, how: str, suffix: str) -> None:
data = {
"antananarivo": [1, 3, 2],
"bob": [4, 4, 6],
"zorro": [7.0, 8, 9],
"zor ro": [7.0, 8, 9],
}
df = nw.from_native(constructor(data))
df_right = df
Expand All @@ -111,7 +111,7 @@ def test_suffix(constructor: Constructor, how: str, suffix: str) -> None:
suffix=suffix,
)
result_cols = result.collect_schema().names()
assert result_cols == ["antananarivo", "bob", "zorro", f"zorro{suffix}"]
assert result_cols == ["antananarivo", "bob", "zor ro", f"zor ro{suffix}"]


@pytest.mark.parametrize("suffix", ["_right", "_custom_suffix"])
Expand Down Expand Up @@ -151,13 +151,13 @@ def test_cross_join_non_pandas() -> None:
(
["antananarivo", "bob"],
(nw.col("bob") < 5),
{"antananarivo": [2], "bob": [6], "zorro": [9]},
{"antananarivo": [2], "bob": [6], "zor ro": [9]},
),
(["bob"], (nw.col("bob") < 5), {"antananarivo": [2], "bob": [6], "zorro": [9]}),
(["bob"], (nw.col("bob") < 5), {"antananarivo": [2], "bob": [6], "zor ro": [9]}),
(
["bob"],
(nw.col("bob") > 5),
{"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7.0, 8.0]},
{"antananarivo": [1, 3], "bob": [4, 4], "zor ro": [7.0, 8.0]},
),
],
)
Expand All @@ -170,7 +170,7 @@ def test_anti_join(
) -> None:
if "duckdb" in str(constructor):
request.applymarker(pytest.mark.xfail)
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))
other = df.filter(filter_expr)
result = df.join(other, how="anti", left_on=join_key, right_on=join_key) # type: ignore[arg-type]
Expand All @@ -183,22 +183,22 @@ def test_anti_join(
(
"antananarivo",
(nw.col("bob") > 5),
{"antananarivo": [2], "bob": [6], "zorro": [9]},
{"antananarivo": [2], "bob": [6], "zor ro": [9]},
),
(
["antananarivo"],
(nw.col("bob") > 5),
{"antananarivo": [2], "bob": [6], "zorro": [9]},
{"antananarivo": [2], "bob": [6], "zor ro": [9]},
),
(
["bob"],
(nw.col("bob") < 5),
{"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7, 8]},
{"antananarivo": [1, 3], "bob": [4, 4], "zor ro": [7, 8]},
),
(
["antananarivo", "bob"],
(nw.col("bob") < 5),
{"antananarivo": [1, 3], "bob": [4, 4], "zorro": [7, 8]},
{"antananarivo": [1, 3], "bob": [4, 4], "zor ro": [7, 8]},
),
],
)
Expand All @@ -208,7 +208,7 @@ def test_semi_join(
filter_expr: nw.Expr,
expected: dict[str, list[Any]],
) -> None:
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))
other = df.filter(filter_expr)
result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort( # type: ignore[arg-type]
Expand All @@ -219,7 +219,7 @@ def test_semi_join(

@pytest.mark.parametrize("how", ["right", "full"])
def test_join_not_implemented(constructor: Constructor, how: str) -> None:
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))

with pytest.raises(
Expand Down Expand Up @@ -333,7 +333,7 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None:

@pytest.mark.parametrize("how", ["inner", "left", "semi", "anti"])
def test_join_keys_exceptions(constructor: Constructor, how: str) -> None:
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))

with pytest.raises(
Expand Down Expand Up @@ -538,7 +538,7 @@ def test_joinasof_by(
def test_joinasof_not_implemented(
constructor: Constructor, strategy: Literal["backward", "forward"]
) -> None:
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))

with pytest.raises(
Expand All @@ -554,7 +554,7 @@ def test_joinasof_not_implemented(


def test_joinasof_keys_exceptions(constructor: Constructor) -> None:
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))

with pytest.raises(
Expand Down Expand Up @@ -595,7 +595,7 @@ def test_joinasof_keys_exceptions(constructor: Constructor) -> None:


def test_joinasof_by_exceptions(constructor: Constructor) -> None:
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zorro": [7.0, 8, 9]}
data = {"antananarivo": [1, 3, 2], "bob": [4, 4, 6], "zor ro": [7.0, 8, 9]}
df = nw.from_native(constructor(data))
with pytest.raises(
ValueError,
Expand Down
16 changes: 8 additions & 8 deletions tests/frame/sort_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@


def test_sort(constructor: Constructor) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
data = {"an tan": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(constructor(data))
result = df.sort("a", "b")
result = df.sort("an tan", "b")
expected = {
"a": [1, 2, 3],
"an tan": [1, 2, 3],
"b": [4, 6, 4],
"z": [7.0, 9.0, 8.0],
}
assert_equal_data(result, expected)
result = df.sort("a", "b", descending=[True, False])
result = df.sort("an tan", "b", descending=[True, False])
expected = {
"a": [3, 2, 1],
"an tan": [3, 2, 1],
"b": [4, 6, 4],
"z": [8.0, 9.0, 7.0],
}
Expand All @@ -29,14 +29,14 @@ def test_sort(constructor: Constructor) -> None:
@pytest.mark.parametrize(
("nulls_last", "expected"),
[
(True, {"a": [0, 2, 0, -1], "b": [3, 2, 1, None]}),
(False, {"a": [-1, 0, 2, 0], "b": [None, 3, 2, 1]}),
(True, {"antan desc": [0, 2, 0, -1], "b": [3, 2, 1, None]}),
(False, {"antan desc": [-1, 0, 2, 0], "b": [None, 3, 2, 1]}),
],
)
def test_sort_nulls(
constructor: Constructor, *, nulls_last: bool, expected: dict[str, float]
) -> None:
data = {"a": [0, 0, 2, -1], "b": [1, 3, 2, None]}
data = {"antan desc": [0, 0, 2, -1], "b": [1, 3, 2, None]}
df = nw.from_native(constructor(data))
result = df.sort("b", descending=True, nulls_last=nulls_last)
assert_equal_data(result, expected)
Loading