Skip to content

Commit 3939e61

Browse files
authored
test(tpcds): add queries 28-63 (#9736)
1 parent bd59fc0 commit 3939e61

File tree

15 files changed

+1933
-28
lines changed

15 files changed

+1933
-28
lines changed

.codespellrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[codespell]
22
# local codespell matches `./docs`, pre-commit codespell matches `docs`
3-
skip = *.lock,.direnv,.git,./docs/_freeze,./docs/_output/**,./docs/_inv/**,docs/_freeze/**,*.svg,*.css,*.html,*.js,ibis/backends/tests/tpc/queries/duckdb/ds/44.sql
3+
skip = *.lock,.direnv,.git,./docs/_freeze,./docs/_output/**,./docs/_inv/**,docs/_freeze/**,*.svg,*.css,*.html,*.js,ibis/backends/tests/tpc/queries/duckdb/ds/*.sql
44
ignore-regex = \b(i[if]f|I[IF]F|AFE|alls)\b
55
builtin = clear,rare,names
66
ignore-words-list = tim,notin,ang

ibis/backends/tests/tpc/README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# TPC queries with Ibis
2+
3+
These tests perform correctness tests against backends that are able to run
4+
some of the TPC-H and TPC-DS queries.
5+
6+
The text queries are assumed to be correct, and also that if transpiled
7+
correctly will produce the same results as the written Ibis expression.
8+
9+
**This is the assertion being made in these tests.**
10+
11+
The ground truth SQL text is taken from
12+
[DuckDB](https://github.com/duckdb/duckdb/tree/main/extension/tpcds/dsdgen/queries)
13+
and transpiled using SQLGlot to the dialect of whatever backend is under test.
14+
15+
Some queries are altered from the upstream DucKDB repo to have static column
16+
names and to cast strings that are dates explicitly to dates so that pedantic
17+
engines like Trino will accept these queries. These alterations do not change
18+
the computed results of the queries.
19+
20+
ClickHouse is a bit odd in that queries that contain a cross join with an `OR`
21+
condition common to all operands of the `OR` will effectively never finish.
22+
This is probably a bug in ClickHouse.
23+
24+
For that case, the queries for clickhouse have been minimally rewritten to pass
25+
by extracting the common join condition out into a single `AND` operand.

ibis/backends/tests/tpc/conftest.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,18 @@ def tpc_test(suite_name: Literal["h", "ds"], *, result_is_empty=False):
5252
def inner(test: Callable[..., ir.Table]):
5353
name = f"tpc{suite_name}"
5454

55-
@getattr(pytest.mark, name)
55+
# so that clickhouse doesn't run forever when we hit one of its weird cross
56+
# join performance black holes
57+
#
58+
# trino can sometimes take a while as well, especially in CI
59+
#
60+
# func_only=True doesn't include the fixture setup time in the duration
61+
# of the test run, which is important since backends can take a hugely
62+
# variable amount of time to load all the TPC-$WHATEVER tables.
63+
@pytest.mark.timeout(60, func_only=True)
5664
@pytest.mark.usefixtures("backend")
5765
@pytest.mark.xdist_group(name)
66+
@getattr(pytest.mark, name)
5867
@functools.wraps(test)
5968
def wrapper(*args, backend, **kwargs):
6069
backend_name = backend.name()
@@ -94,17 +103,25 @@ def wrapper(*args, backend, **kwargs):
94103

95104
assert result_expr._find_backend(use_default=False) is backend.connection
96105
result = backend.connection.to_pandas(result_expr)
97-
assert (result_is_empty and result.empty) or not result.empty
106+
107+
assert (result_is_empty and result.empty) or (
108+
not result_is_empty and not result.empty
109+
)
98110

99111
expected = expected_expr.to_pandas()
100112

101113
assert len(expected.columns) == len(result.columns)
102-
assert all(r in e.lower() for r, e in zip(result.columns, expected.columns))
114+
assert all(
115+
r.lower() in e.lower() for r, e in zip(result.columns, expected.columns)
116+
)
103117

104118
expected.columns = result.columns
105119

106120
expected = PandasData.convert_table(expected, result_expr.schema())
107-
assert (result_is_empty and expected.empty) or not expected.empty
121+
122+
assert (result_is_empty and expected.empty) or (
123+
not result_is_empty and not expected.empty
124+
)
108125

109126
assert len(expected) == len(result)
110127
assert result.columns.tolist() == expected.columns.tolist()

0 commit comments

Comments
 (0)