diff --git a/ibis/backends/sql/compilers/base.py b/ibis/backends/sql/compilers/base.py index 74a39d3f9db1..a9d0f71ae9a0 100644 --- a/ibis/backends/sql/compilers/base.py +++ b/ibis/backends/sql/compilers/base.py @@ -267,6 +267,9 @@ class SQLGlotCompiler(abc.ABC): copy_func_args: bool = False """Whether to copy function arguments when generating SQL.""" + supports_qualify: bool = False + """Whether the backend supports the QUALIFY clause.""" + NAN: ClassVar[sge.Expression] = sge.Cast( this=sge.convert("NaN"), to=sge.DataType(this=sge.DataType.Type.DOUBLE) ) @@ -1249,15 +1252,21 @@ def _cleanup_names(self, exprs: Mapping[str, sge.Expression]): else: yield value.as_(name, quoted=self.quoted, copy=False) - def visit_Select(self, op, *, parent, selections, predicates, sort_keys): + def visit_Select(self, op, *, parent, selections, predicates, qualified, sort_keys): # if we've constructed a useless projection return the parent relation - if not selections and not predicates and not sort_keys: + if not (selections or predicates or qualified or sort_keys): return parent result = parent if selections: - if op.is_star_selection(): + # if there are `qualify` predicates then sqlglot adds a hidden + # column to implement the functionality if the dialect doesn't + # support it + # + # using STAR in that case would lead to an extra column, so in that + # case we have to spell out the columns + if op.is_star_selection() and (not qualified or self.supports_qualify): fields = [STAR] else: fields = self._cleanup_names(selections) @@ -1266,6 +1275,9 @@ def visit_Select(self, op, *, parent, selections, predicates, sort_keys): if predicates: result = result.where(*predicates, copy=False) + if qualified: + result = result.qualify(*qualified, copy=False) + if sort_keys: result = result.order_by(*sort_keys, copy=False) diff --git a/ibis/backends/sql/compilers/bigquery/__init__.py b/ibis/backends/sql/compilers/bigquery/__init__.py index 96bfc46cafae..35f71b44fdc3 100644 --- a/ibis/backends/sql/compilers/bigquery/__init__.py +++ b/ibis/backends/sql/compilers/bigquery/__init__.py @@ -112,6 +112,8 @@ class BigQueryCompiler(SQLGlotCompiler): *SQLGlotCompiler.rewrites, ) + supports_qualify = True + UNSUPPORTED_OPS = ( ops.DateDiff, ops.ExtractAuthority, diff --git a/ibis/backends/sql/compilers/clickhouse.py b/ibis/backends/sql/compilers/clickhouse.py index b4a6dd671898..d1f3db494f2e 100644 --- a/ibis/backends/sql/compilers/clickhouse.py +++ b/ibis/backends/sql/compilers/clickhouse.py @@ -42,6 +42,8 @@ class ClickHouseCompiler(SQLGlotCompiler): agg = ClickhouseAggGen() + supports_qualify = True + UNSUPPORTED_OPS = ( ops.RowID, ops.CumeDist, diff --git a/ibis/backends/sql/compilers/duckdb.py b/ibis/backends/sql/compilers/duckdb.py index af5a0757abe1..2ff6e99d41c7 100644 --- a/ibis/backends/sql/compilers/duckdb.py +++ b/ibis/backends/sql/compilers/duckdb.py @@ -42,6 +42,8 @@ class DuckDBCompiler(SQLGlotCompiler): agg = AggGen(supports_filter=True, supports_order_by=True) + supports_qualify = True + LOWERED_OPS = { ops.Sample: None, ops.StringSlice: None, diff --git a/ibis/backends/sql/compilers/mssql.py b/ibis/backends/sql/compilers/mssql.py index e16e969abfc0..d55a24b54c8f 100644 --- a/ibis/backends/sql/compilers/mssql.py +++ b/ibis/backends/sql/compilers/mssql.py @@ -477,9 +477,9 @@ def visit_All(self, op, *, arg, where): arg = self.if_(where, arg, NULL) return sge.Min(this=arg) - def visit_Select(self, op, *, parent, selections, predicates, sort_keys): + def visit_Select(self, op, *, parent, selections, predicates, qualified, sort_keys): # if we've constructed a useless projection return the parent relation - if not selections and not predicates and not sort_keys: + if not (selections or predicates or qualified or sort_keys): return parent result = parent @@ -492,6 +492,9 @@ def visit_Select(self, op, *, parent, selections, predicates, sort_keys): if predicates: result = result.where(*predicates, copy=True) + if qualified: + result = result.qualify(*qualified, copy=True) + if sort_keys: result = result.order_by(*sort_keys, copy=False) diff --git a/ibis/backends/sql/compilers/snowflake.py b/ibis/backends/sql/compilers/snowflake.py index e1929bd57d35..2aff5ab5512b 100644 --- a/ibis/backends/sql/compilers/snowflake.py +++ b/ibis/backends/sql/compilers/snowflake.py @@ -45,6 +45,7 @@ class SnowflakeCompiler(SQLGlotCompiler): dialect = Snowflake type_mapper = SnowflakeType no_limit_value = NULL + supports_qualify = True agg = AggGen(supports_order_by=True) diff --git a/ibis/backends/sql/dialects.py b/ibis/backends/sql/dialects.py index 499a76e75daa..217fdf34e1e2 100644 --- a/ibis/backends/sql/dialects.py +++ b/ibis/backends/sql/dialects.py @@ -368,7 +368,13 @@ def _create_sql(self, expression: sge.Create) -> str: sge.Stddev: rename_func("stddev_pop"), sge.ApproxDistinct: rename_func("approx_count_distinct"), sge.Create: _create_sql, - sge.Select: transforms.preprocess([transforms.eliminate_semi_and_anti_joins]), + sge.Select: transforms.preprocess( + [ + transforms.eliminate_semi_and_anti_joins, + transforms.eliminate_distinct_on, + transforms.eliminate_qualify, + ] + ), sge.GroupConcat: rename_func("listagg"), } diff --git a/ibis/backends/sql/rewrites.py b/ibis/backends/sql/rewrites.py index 7e01999144ec..067c9bf70b91 100644 --- a/ibis/backends/sql/rewrites.py +++ b/ibis/backends/sql/rewrites.py @@ -51,6 +51,7 @@ class Select(ops.Relation): parent: ops.Relation selections: FrozenDict[str, ops.Value] = {} predicates: VarTuple[ops.Value[dt.Boolean]] = () + qualified: VarTuple[ops.Value[dt.Boolean]] = () sort_keys: VarTuple[ops.SortKey] = () def is_star_selection(self): @@ -99,10 +100,26 @@ def project_to_select(_, **kwargs): return Select(_.parent, selections=_.values) +def partition_predicates(predicates): + qualified = [] + unqualified = [] + + for predicate in predicates: + if predicate.find(ops.WindowFunction, filter=ops.Value): + qualified.append(predicate) + else: + unqualified.append(predicate) + + return unqualified, qualified + + @replace(p.Filter) def filter_to_select(_, **kwargs): """Convert a Filter node to a Select node.""" - return Select(_.parent, selections=_.values, predicates=_.predicates) + predicates, qualified = partition_predicates(_.predicates) + return Select( + _.parent, selections=_.values, predicates=predicates, qualified=qualified + ) @replace(p.Sort) @@ -233,6 +250,9 @@ def merge_select_select(_, **kwargs): predicates = tuple(p.replace(subs, filter=ops.Value) for p in _.predicates) unique_predicates = toolz.unique(_.parent.predicates + predicates) + qualified = tuple(p.replace(subs, filter=ops.Value) for p in _.qualified) + unique_qualified = toolz.unique(_.parent.qualified + qualified) + sort_keys = tuple(s.replace(subs, filter=ops.Value) for s in _.sort_keys) sort_key_exprs = {s.expr for s in sort_keys} parent_sort_keys = tuple( @@ -244,6 +264,7 @@ def merge_select_select(_, **kwargs): _.parent.parent, selections=selections, predicates=unique_predicates, + qualified=unique_qualified, sort_keys=unique_sort_keys, ) return result if complexity(result) <= complexity(_) else _ diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/bigquery/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/bigquery/out.sql new file mode 100644 index 000000000000..97b7a1428416 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/bigquery/out.sql @@ -0,0 +1,12 @@ +SELECT + * +FROM ( + SELECT + `t0`.`x`, + SUM(`t0`.`x`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `y` + FROM `t` AS `t0` +) AS `t1` +WHERE + `t1`.`y` <= 37 +QUALIFY + AVG(`t1`.`x`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/clickhouse/out.sql new file mode 100644 index 000000000000..f6bf7bdeb354 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/clickhouse/out.sql @@ -0,0 +1,12 @@ +SELECT + * +FROM ( + SELECT + "t0"."x" AS "x", + SUM("t0"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "y" + FROM "t" AS "t0" +) AS "t1" +WHERE + "t1"."y" <= 37 +QUALIFY + isNotNull(AVG("t1"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/datafusion/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/datafusion/out.sql new file mode 100644 index 000000000000..704c730d726a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/datafusion/out.sql @@ -0,0 +1,19 @@ +SELECT + "x", + "y" +FROM ( + SELECT + "t1"."x", + "t1"."y", + AVG("t1"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS _w + FROM ( + SELECT + "t0"."x", + SUM("t0"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "y" + FROM "t" AS "t0" + ) AS "t1" + WHERE + "t1"."y" <= 37 +) AS _t +WHERE + _w IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/druid/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/druid/out.sql new file mode 100644 index 000000000000..704c730d726a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/druid/out.sql @@ -0,0 +1,19 @@ +SELECT + "x", + "y" +FROM ( + SELECT + "t1"."x", + "t1"."y", + AVG("t1"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS _w + FROM ( + SELECT + "t0"."x", + SUM("t0"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "y" + FROM "t" AS "t0" + ) AS "t1" + WHERE + "t1"."y" <= 37 +) AS _t +WHERE + _w IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/duckdb/out.sql new file mode 100644 index 000000000000..a9bfa1070567 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/duckdb/out.sql @@ -0,0 +1,12 @@ +SELECT + * +FROM ( + SELECT + "t0"."x", + SUM("t0"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "y" + FROM "t" AS "t0" +) AS "t1" +WHERE + "t1"."y" <= 37 +QUALIFY + AVG("t1"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/exasol/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/exasol/out.sql new file mode 100644 index 000000000000..88c5c84cce6d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/exasol/out.sql @@ -0,0 +1,19 @@ +SELECT + "x", + "y" +FROM ( + SELECT + "t1"."x", + "t1"."y", + AVG("t1"."x") OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS _w + FROM ( + SELECT + "t0"."x", + SUM("t0"."x") OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "y" + FROM "t" AS "t0" + ) AS "t1" + WHERE + "t1"."y" <= 37 +) AS _t +WHERE + _w IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/flink/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/flink/out.sql new file mode 100644 index 000000000000..2b27fa484bf1 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/flink/out.sql @@ -0,0 +1,13 @@ +SELECT + `t1`.`x`, + `t1`.`y` +FROM ( + SELECT + `t0`.`x`, + SUM(`t0`.`x`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `y` + FROM `t` AS `t0` +) AS `t1` +WHERE + `t1`.`y` <= 37 +QUALIFY + AVG(`t1`.`x`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/impala/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/impala/out.sql new file mode 100644 index 000000000000..787083fbab6a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/impala/out.sql @@ -0,0 +1,19 @@ +SELECT + `x`, + `y` +FROM ( + SELECT + `t1`.`x`, + `t1`.`y`, + AVG(`t1`.`x`) OVER (ORDER BY NULL ASC) AS _w + FROM ( + SELECT + `t0`.`x`, + SUM(`t0`.`x`) OVER (ORDER BY NULL ASC) AS `y` + FROM `t` AS `t0` + ) AS `t1` + WHERE + `t1`.`y` <= 37 +) AS _t +WHERE + _w IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/mssql/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/mssql/out.sql new file mode 100644 index 000000000000..256fa05c07ab --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/mssql/out.sql @@ -0,0 +1,19 @@ +SELECT + [x], + [y] +FROM ( + SELECT + [t1].[x] AS [x], + [t1].[y] AS [y], + AVG([t1].[x]) OVER (ORDER BY CASE WHEN [t1].[x] IS NULL THEN 1 ELSE 0 END, [t1].[x] ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS _w + FROM ( + SELECT + [t0].[x], + SUM([t0].[x]) OVER (ORDER BY CASE WHEN [t0].[x] IS NULL THEN 1 ELSE 0 END, [t0].[x] ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS [y] + FROM [t] AS [t0] + ) AS [t1] + WHERE + [t1].[y] <= 37 +) AS _t +WHERE + _w IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/mysql/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/mysql/out.sql new file mode 100644 index 000000000000..42f560a982a8 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/mysql/out.sql @@ -0,0 +1,19 @@ +SELECT + `x`, + `y` +FROM ( + SELECT + `t1`.`x`, + `t1`.`y`, + AVG(`t1`.`x`) OVER (ORDER BY CASE WHEN NULL IS NULL THEN 1 ELSE 0 END, NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS _w + FROM ( + SELECT + `t0`.`x`, + SUM(`t0`.`x`) OVER (ORDER BY CASE WHEN NULL IS NULL THEN 1 ELSE 0 END, NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `y` + FROM `t` AS `t0` + ) AS `t1` + WHERE + `t1`.`y` <= 37 +) AS _t +WHERE + _w IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/oracle/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/oracle/out.sql new file mode 100644 index 000000000000..dce34d3769d2 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/oracle/out.sql @@ -0,0 +1,19 @@ +SELECT + "x", + "y" +FROM ( + SELECT + "t1"."x", + "t1"."y", + AVG("t1"."x") OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS _w + FROM ( + SELECT + "t0"."x", + SUM("t0"."x") OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "y" + FROM "t" "t0" + ) "t1" + WHERE + "t1"."y" <= 37 +) _t +WHERE + _w IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/postgres/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/postgres/out.sql new file mode 100644 index 000000000000..704c730d726a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/postgres/out.sql @@ -0,0 +1,19 @@ +SELECT + "x", + "y" +FROM ( + SELECT + "t1"."x", + "t1"."y", + AVG("t1"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS _w + FROM ( + SELECT + "t0"."x", + SUM("t0"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "y" + FROM "t" AS "t0" + ) AS "t1" + WHERE + "t1"."y" <= 37 +) AS _t +WHERE + _w IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/pyspark/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/pyspark/out.sql new file mode 100644 index 000000000000..f255a3c6d9f2 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/pyspark/out.sql @@ -0,0 +1,20 @@ +SELECT + `x`, + `y` +FROM ( + SELECT + `t1`.`x`, + `t1`.`y`, + AVG(`t1`.`x`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS _w, + AVG(`t1`.`x`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS _w_2 + FROM ( + SELECT + `t0`.`x`, + SUM(`t0`.`x`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `y` + FROM `t` AS `t0` + ) AS `t1` + WHERE + `t1`.`y` <= 37 +) AS _t +WHERE + _w IS NOT NULL AND NOT ISNAN(_w_2) \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/risingwave/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/risingwave/out.sql new file mode 100644 index 000000000000..704c730d726a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/risingwave/out.sql @@ -0,0 +1,19 @@ +SELECT + "x", + "y" +FROM ( + SELECT + "t1"."x", + "t1"."y", + AVG("t1"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS _w + FROM ( + SELECT + "t0"."x", + SUM("t0"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "y" + FROM "t" AS "t0" + ) AS "t1" + WHERE + "t1"."y" <= 37 +) AS _t +WHERE + _w IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/snowflake/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/snowflake/out.sql new file mode 100644 index 000000000000..9149dca4d3a5 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/snowflake/out.sql @@ -0,0 +1,12 @@ +SELECT + * +FROM ( + SELECT + "t0"."x", + SUM("t0"."x") OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "y" + FROM "t" AS "t0" +) AS "t1" +WHERE + "t1"."y" <= 37 +QUALIFY + AVG("t1"."x") OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/sqlite/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/sqlite/out.sql new file mode 100644 index 000000000000..704c730d726a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/sqlite/out.sql @@ -0,0 +1,19 @@ +SELECT + "x", + "y" +FROM ( + SELECT + "t1"."x", + "t1"."y", + AVG("t1"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS _w + FROM ( + SELECT + "t0"."x", + SUM("t0"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "y" + FROM "t" AS "t0" + ) AS "t1" + WHERE + "t1"."y" <= 37 +) AS _t +WHERE + _w IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/trino/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/trino/out.sql new file mode 100644 index 000000000000..704c730d726a --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/trino/out.sql @@ -0,0 +1,19 @@ +SELECT + "x", + "y" +FROM ( + SELECT + "t1"."x", + "t1"."y", + AVG("t1"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS _w + FROM ( + SELECT + "t0"."x", + SUM("t0"."x") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "y" + FROM "t" AS "t0" + ) AS "t1" + WHERE + "t1"."y" <= 37 +) AS _t +WHERE + _w IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 09a8a705c60b..bd61852109a9 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -345,37 +345,29 @@ def test_filter(backend, alltypes, sorted_df, predicate_fn, expected_fn): backend.assert_frame_equal(result, expected) +@pytest.mark.notyet( + ["exasol"], + raises=ExaQueryError, + reason="sqlglot `eliminate_qualify` transform produces underscores in aliases, which is not allowed by exasol", +) @pytest.mark.notimpl( - [ - "bigquery", - "clickhouse", - "datafusion", - "duckdb", - "impala", - "mysql", - "postgres", - "risingwave", - "sqlite", - "snowflake", - "polars", - "mssql", - "trino", - "druid", - "oracle", - "exasol", - "pandas", - "pyspark", - "dask", - ] + ["druid"], + raises=PyDruidProgrammingError, + reason="requires enabling window functions", ) -@pytest.mark.never( +@pytest.mark.notimpl(["polars", "dask", "pandas"], raises=com.OperationNotDefinedError) +@pytest.mark.notyet( + ["oracle"], + raises=OracleDatabaseError, + reason="sqlglot `eliminate_qualify` transform produces underscores in aliases, which is not allowed by oracle", +) +@pytest.mark.notyet( ["flink"], reason="Flink engine does not support generic window clause with no order by", ) # TODO(kszucs): this is not supported at the expression level def test_filter_with_window_op(backend, alltypes, sorted_df): - sorted_alltypes = alltypes.order_by("id") - table = sorted_alltypes + table = alltypes window = ibis.window(group_by=table.id) table = table.filter(lambda t: t["id"].mean().over(window) > 3).order_by("id") result = table.execute() diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 4a299f8a83c1..1f2baedcd244 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -1,5 +1,7 @@ from __future__ import annotations +import re + import pytest from pytest import param @@ -205,3 +207,23 @@ def query(t, group_cols): t3 = query(t2, group_cols=["street"]) snapshot.assert_match(str(ibis.to_sql(t3, dialect=backend_name)), "out.sql") + + +@pytest.mark.parametrize("backend_name", _get_backends_to_test()) +@pytest.mark.notimpl( + ["dask", "pandas", "polars"], raises=ValueError, reason="not a SQL backend" +) +def test_mixed_qualified_and_unqualified_predicates(backend_name, snapshot): + t = ibis.table({"x": "int64"}, name="t") + expr = t.mutate(y=t.x.sum().over(ibis.window())).filter( + _.y <= 37, _.x.mean().over().notnull() + ) + result = ibis.to_sql(expr, dialect=backend_name) + + sc = ibis.backends.sql.compilers + compiler = getattr(sc, backend_name).compiler + + assert (not compiler.supports_qualify) or re.search( + r"\bQUALIFY\b", result, flags=re.MULTILINE | re.IGNORECASE + ) + snapshot.assert_match(result, "out.sql") diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index 104b5d752c34..be65a80e35d0 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -270,13 +270,16 @@ class Filter(Simple): predicates: VarTuple[Value[dt.Boolean]] def __init__(self, parent, predicates): - from ibis.expr.rewrites import ReductionLike + from ibis.expr.rewrites import ReductionLike, p for pred in predicates: - if pred.find(ReductionLike, filter=Value): + # bare reductions that are not window functions are not allowed + if pred.find(ReductionLike, filter=Value) and not pred.find( + p.WindowFunction, filter=Value + ): raise IntegrityError( f"Cannot add {pred!r} to filter, it is a reduction which " - "must be converted to a scalar subquery first" + "must be converted to a scalar subquery or window function first" ) if pred.relations and parent not in pred.relations: raise IntegrityError(