diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql index 1391a6c01c51f..14c575abc2d55 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/first/out.sql @@ -1,3 +1,3 @@ SELECT - FIRST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC) AS `First(double_col, ())` + FIRST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `First(double_col, ())` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql index 904f923e17e7a..cbfa788b87978 100644 --- a/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_analytic_functions/test_analytic_exprs/last/out.sql @@ -1,3 +1,3 @@ SELECT - LAST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC) AS `Last(double_col, ())` + LAST_VALUE(`t0`.`double_col`) OVER (ORDER BY `t0`.`id` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `Last(double_col, ())` FROM `functional_alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql b/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql index 4705e8524638b..5cdacbb050331 100644 --- a/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_exprs/test_filter_with_analytic/out.sql @@ -1 +1 @@ -SELECT * FROM (SELECT `t1`.`col`, COUNT(*) OVER (ORDER BY NULL ASC) AS `analytic` FROM (SELECT `t0`.`col`, NULL AS `filter` FROM `x` AS `t0` WHERE NULL IS NULL) AS `t1`) AS `t2` \ No newline at end of file +SELECT * FROM (SELECT `t1`.`col`, COUNT(*) OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `analytic` FROM (SELECT `t0`.`col`, NULL AS `filter` FROM `x` AS `t0` WHERE NULL IS NULL) AS `t1`) AS `t2` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql index 79ea467e969b4..63aacc7c6eb90 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_group_by_with_window_preserves_range/out.sql @@ -1 +1 @@ -SELECT `t0`.`one`, `t0`.`two`, `t0`.`three`, SUM(`t0`.`two`) OVER (PARTITION BY `t0`.`three` ORDER BY `t0`.`one` ASC) AS `four` FROM `my_data` AS `t0` \ No newline at end of file +SELECT `t0`.`one`, `t0`.`two`, `t0`.`three`, SUM(`t0`.`two`) OVER (PARTITION BY `t0`.`three` ORDER BY `t0`.`one` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `four` FROM `my_data` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql index 9a50bddd2c69e..dffd166928516 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_add_default_order_by/out.sql @@ -12,7 +12,7 @@ SELECT `t0`.`k`, LAG(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) AS `lag`, LEAD(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) - `t0`.`f` AS `fwd_diff`, - FIRST_VALUE(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) AS `first`, - LAST_VALUE(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) AS `last`, + FIRST_VALUE(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `first`, + LAST_VALUE(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `last`, LAG(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`d` ASC) AS `lag2` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql index 3004bcd8535f1..8a730a7c6f9ef 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_aggregate_in_projection/out.sql @@ -10,5 +10,5 @@ SELECT `t0`.`i`, `t0`.`j`, `t0`.`k`, - `t0`.`f` / SUM(`t0`.`f`) OVER (ORDER BY NULL ASC) AS `normed_f` + `t0`.`f` / SUM(`t0`.`f`) OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `normed_f` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql index 1c00fd99c792b..211f7becce07e 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out1.sql @@ -1,3 +1,3 @@ SELECT - MAX(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + MAX(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql index 1c00fd99c792b..211f7becce07e 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/max/out2.sql @@ -1,3 +1,3 @@ SELECT - MAX(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + MAX(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql index 8bfdfda64125c..a623a3cfe0661 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out1.sql @@ -1,3 +1,3 @@ SELECT - AVG(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + AVG(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql index 8bfdfda64125c..a623a3cfe0661 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/mean/out2.sql @@ -1,3 +1,3 @@ SELECT - AVG(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + AVG(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql index fa763e794811d..bc120a638006e 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out1.sql @@ -1,3 +1,3 @@ SELECT - MIN(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + MIN(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql index fa763e794811d..bc120a638006e 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/min/out2.sql @@ -1,3 +1,3 @@ SELECT - MIN(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + MIN(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql index 4a0222bfac7c8..bcd6a2ba703e7 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out1.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + SUM(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql index 4a0222bfac7c8..bcd6a2ba703e7 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_cumulative_functions/sum/out2.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC) AS `foo` + SUM(`t0`.`f`) OVER (ORDER BY `t0`.`d` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql index 7f161f6585290..69e25f2943844 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_multiple_windows/out.sql @@ -1,4 +1,4 @@ SELECT `t0`.`g`, - SUM(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC) - SUM(`t0`.`f`) OVER (ORDER BY NULL ASC) AS `result` + SUM(`t0`.`f`) OVER (PARTITION BY `t0`.`g` ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - SUM(`t0`.`f`) OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `result` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql b/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql index 148babbf968d9..a5c1c098080ea 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_order_by_desc/out2.sql @@ -1,4 +1,4 @@ SELECT LAG(`t0`.`d`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` DESC NULLS LAST) AS `foo`, - MAX(`t0`.`a`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` DESC NULLS LAST) AS `Max(a)` + MAX(`t0`.`a`) OVER (PARTITION BY `t0`.`g` ORDER BY `t0`.`f` DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `Max(a)` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql index 59c90d433f04a..560362ff0a455 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/cumulative/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql index 59c90d433f04a..560362ff0a455 100644 --- a/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_window/test_window_frame_specs/foll_0/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC) AS `foo` + SUM(`t0`.`d`) OVER (ORDER BY `t0`.`f` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `foo` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/sql/compilers/base.py b/ibis/backends/sql/compilers/base.py index a86e3013e3592..ad3db779d84f5 100644 --- a/ibis/backends/sql/compilers/base.py +++ b/ibis/backends/sql/compilers/base.py @@ -1185,12 +1185,12 @@ def visit_WindowFunction(self, op, *, how, func, start, end, group_by, order_by) ) order = sge.Order(expressions=order_by) if order_by else None - spec = self._minimize_spec(op.start, op.end, spec) + spec = self._minimize_spec(op, spec) return sge.Window(this=func, partition_by=group_by, order=order, spec=spec) @staticmethod - def _minimize_spec(start, end, spec): + def _minimize_spec(op, spec): return spec def visit_LagLead(self, op, *, arg, offset, default): diff --git a/ibis/backends/sql/compilers/bigquery/__init__.py b/ibis/backends/sql/compilers/bigquery/__init__.py index 2b62903d00098..804ba5c2c58b2 100644 --- a/ibis/backends/sql/compilers/bigquery/__init__.py +++ b/ibis/backends/sql/compilers/bigquery/__init__.py @@ -19,6 +19,8 @@ from ibis.backends.sql.compilers.bigquery.udf.core import PythonToJavaScriptTranslator from ibis.backends.sql.datatypes import BigQueryType, BigQueryUDFType from ibis.backends.sql.rewrites import ( + FirstValue, + LastValue, exclude_unsupported_window_frame_from_ops, exclude_unsupported_window_frame_from_rank, exclude_unsupported_window_frame_from_row_number, @@ -323,12 +325,10 @@ def _compile_python_udf(self, udf_node: ops.ScalarUDF) -> sge.Create: return func @staticmethod - def _minimize_spec(start, end, spec): - if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following + def _minimize_spec(op, spec): + # bigquery doesn't allow certain window functions to specify a window frame + if isinstance(func := op.func, ops.Analytic) and not isinstance( + func, (ops.First, ops.Last, FirstValue, LastValue, ops.NthValue) ): return None return spec diff --git a/ibis/backends/sql/compilers/clickhouse.py b/ibis/backends/sql/compilers/clickhouse.py index fe25f73bc943d..1793ebb3b94ed 100644 --- a/ibis/backends/sql/compilers/clickhouse.py +++ b/ibis/backends/sql/compilers/clickhouse.py @@ -120,13 +120,8 @@ class ClickHouseCompiler(SQLGlotCompiler): } @staticmethod - def _minimize_spec(start, end, spec): - if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following - ): + def _minimize_spec(op, spec): + if isinstance(op.func, ops.NTile): return None return spec diff --git a/ibis/backends/sql/compilers/exasol.py b/ibis/backends/sql/compilers/exasol.py index f1f7ee803c1fe..34f45919eaa17 100644 --- a/ibis/backends/sql/compilers/exasol.py +++ b/ibis/backends/sql/compilers/exasol.py @@ -10,6 +10,8 @@ from ibis.backends.sql.datatypes import ExasolType from ibis.backends.sql.dialects import Exasol from ibis.backends.sql.rewrites import ( + FirstValue, + LastValue, exclude_unsupported_window_frame_from_ops, exclude_unsupported_window_frame_from_rank, exclude_unsupported_window_frame_from_row_number, @@ -85,12 +87,9 @@ class ExasolCompiler(SQLGlotCompiler): } @staticmethod - def _minimize_spec(start, end, spec): - if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following + def _minimize_spec(op, spec): + if isinstance(func := op.func, ops.Analytic) and not isinstance( + func, (ops.First, ops.Last, FirstValue, LastValue, ops.NthValue) ): return None return spec diff --git a/ibis/backends/sql/compilers/flink.py b/ibis/backends/sql/compilers/flink.py index da8b27d94cd2d..0bd140ba761ed 100644 --- a/ibis/backends/sql/compilers/flink.py +++ b/ibis/backends/sql/compilers/flink.py @@ -128,16 +128,16 @@ def _generate_groups(groups): return groups @staticmethod - def _minimize_spec(start, end, spec): + def _minimize_spec(op, spec): if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) + op.start is None + and isinstance(getattr(end := op.end, "value", None), ops.Literal) and end.value.value == 0 and end.following ): return None elif ( - isinstance(getattr(end, "value", None), ops.Cast) + isinstance(getattr(end := op.end, "value", None), ops.Cast) and end.value.arg.value == 0 and end.following ): diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index 239e7a5f69404..8159e48ccf951 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -10,7 +10,12 @@ from ibis.backends.sql.compilers.base import NULL, STAR, SQLGlotCompiler from ibis.backends.sql.datatypes import ImpalaType from ibis.backends.sql.dialects import Impala -from ibis.backends.sql.rewrites import lower_sample, rewrite_empty_order_by_window +from ibis.backends.sql.rewrites import ( + FirstValue, + LastValue, + lower_sample, + rewrite_empty_order_by_window, +) class ImpalaCompiler(SQLGlotCompiler): @@ -73,28 +78,11 @@ class ImpalaCompiler(SQLGlotCompiler): } @staticmethod - def _minimize_spec(start, end, spec): - # start is None means unbounded preceding - if start is None: - # end is None: unbounded following - # end == 0 => current row - # these are treated the same because for the functions where these - # are not allowed they end up behaving the same - # - # I think we're not covering some cases here: - # These will be treated the same, even though they're not - # - window(order_by=x, rows=(None, None)) # should be equivalent to `over ()` - # - window(order_by=x, rows=(None, 0)) # equivalent to a cumulative aggregation - # - # TODO(cpcloud): we need to clean up the semantics of unbounded - # following vs current row at the API level. - # - if end is None or ( - isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following - ): - return None + def _minimize_spec(op, spec): + if isinstance(func := op.func, ops.Analytic) and not isinstance( + func, (ops.First, ops.Last, FirstValue, LastValue, ops.NthValue) + ): + return None return spec def visit_Log2(self, op, *, arg): diff --git a/ibis/backends/sql/compilers/mssql.py b/ibis/backends/sql/compilers/mssql.py index 429dd2503af09..786d37c9d385c 100644 --- a/ibis/backends/sql/compilers/mssql.py +++ b/ibis/backends/sql/compilers/mssql.py @@ -20,6 +20,8 @@ from ibis.backends.sql.datatypes import MSSQLType from ibis.backends.sql.dialects import MSSQL from ibis.backends.sql.rewrites import ( + FirstValue, + LastValue, exclude_unsupported_window_frame_from_ops, exclude_unsupported_window_frame_from_rank, exclude_unsupported_window_frame_from_row_number, @@ -147,12 +149,9 @@ def _generate_groups(groups): return groups @staticmethod - def _minimize_spec(start, end, spec): - if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following + def _minimize_spec(op, spec): + if isinstance(func := op.func, ops.Analytic) and not isinstance( + func, (ops.First, ops.Last, FirstValue, LastValue, ops.NthValue) ): return None return spec diff --git a/ibis/backends/sql/compilers/mysql.py b/ibis/backends/sql/compilers/mysql.py index 3971c89b4f610..d8f0c3262d137 100644 --- a/ibis/backends/sql/compilers/mysql.py +++ b/ibis/backends/sql/compilers/mysql.py @@ -101,12 +101,9 @@ def POS_INF(self): } @staticmethod - def _minimize_spec(start, end, spec): - if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following + def _minimize_spec(op, spec): + if isinstance( + op.func, (ops.RankBase, ops.CumeDist, ops.NTile, ops.PercentRank) ): return None return spec diff --git a/ibis/backends/sql/compilers/oracle.py b/ibis/backends/sql/compilers/oracle.py index 062e9074ac83d..7548e494b7c1b 100644 --- a/ibis/backends/sql/compilers/oracle.py +++ b/ibis/backends/sql/compilers/oracle.py @@ -454,7 +454,7 @@ def visit_WindowFunction(self, op, *, how, func, start, end, group_by, order_by) order = sge.Order(expressions=order_by) if order_by else None - spec = self._minimize_spec(op.start, op.end, spec) + spec = self._minimize_spec(op, spec) return sge.Window(this=func, partition_by=group_by, order=order, spec=spec) diff --git a/ibis/backends/sql/compilers/snowflake.py b/ibis/backends/sql/compilers/snowflake.py index 7cdc980752fe9..35e0397fc1e67 100644 --- a/ibis/backends/sql/compilers/snowflake.py +++ b/ibis/backends/sql/compilers/snowflake.py @@ -25,6 +25,8 @@ from ibis.backends.sql.datatypes import SnowflakeType from ibis.backends.sql.dialects import Snowflake from ibis.backends.sql.rewrites import ( + FirstValue, + LastValue, exclude_unsupported_window_frame_from_ops, exclude_unsupported_window_frame_from_row_number, lower_log2, @@ -238,10 +240,10 @@ def _compile_udf(self, udf_node: ops.ScalarUDF): _compile_python_udf = _compile_udf @staticmethod - def _minimize_spec(start, end, spec): + def _minimize_spec(op, spec): if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) + op.start is None + and isinstance(getattr(end := op.end, "value", None), ops.Literal) and end.value.value == 0 and end.following ): @@ -668,6 +670,14 @@ def visit_Xor(self, op, *, left, right): # boolxor accepts numerics ... and returns a boolean? wtf? return self.f.boolxor(self.cast(left, dt.int8), self.cast(right, dt.int8)) + @staticmethod + def _minimize_spec(op, spec): + if isinstance(func := op.func, ops.Analytic) and not isinstance( + func, (ops.First, ops.Last, FirstValue, LastValue, ops.NthValue) + ): + return None + return spec + def visit_WindowFunction(self, op, *, how, func, start, end, group_by, order_by): if start is None: start = {} @@ -698,7 +708,7 @@ def visit_WindowFunction(self, op, *, how, func, start, end, group_by, order_by) order = sge.Order(expressions=order_by) if order_by else None orig_spec = spec - spec = self._minimize_spec(op.start, op.end, orig_spec) + spec = self._minimize_spec(op, orig_spec) # due to https://docs.snowflake.com/en/sql-reference/functions-analytic#window-frame-usage-notes # we need to make the default window rows (since range isn't supported) diff --git a/ibis/backends/sql/compilers/trino.py b/ibis/backends/sql/compilers/trino.py index 2019e1d36cd47..8cdbd201ecee9 100644 --- a/ibis/backends/sql/compilers/trino.py +++ b/ibis/backends/sql/compilers/trino.py @@ -22,6 +22,8 @@ from ibis.backends.sql.datatypes import TrinoType from ibis.backends.sql.dialects import Trino from ibis.backends.sql.rewrites import ( + FirstValue, + LastValue, exclude_unsupported_window_frame_from_ops, exclude_unsupported_window_frame_from_rank, exclude_unsupported_window_frame_from_row_number, @@ -104,12 +106,9 @@ class TrinoCompiler(SQLGlotCompiler): } @staticmethod - def _minimize_spec(start, end, spec): - if ( - start is None - and isinstance(getattr(end, "value", None), ops.Literal) - and end.value.value == 0 - and end.following + def _minimize_spec(op, spec): + if isinstance(func := op.func, ops.Analytic) and not isinstance( + func, (ops.First, ops.Last, FirstValue, LastValue, ops.NthValue) ): return None return spec diff --git a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/impala/out.sql b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/impala/out.sql index 787083fbab6a0..642b68f972ff2 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/impala/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/impala/out.sql @@ -5,11 +5,11 @@ FROM ( SELECT `t1`.`x`, `t1`.`y`, - AVG(`t1`.`x`) OVER (ORDER BY NULL ASC) AS _w + AVG(`t1`.`x`) OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS _w FROM ( SELECT `t0`.`x`, - SUM(`t0`.`x`) OVER (ORDER BY NULL ASC) AS `y` + SUM(`t0`.`x`) OVER (ORDER BY NULL ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `y` FROM `t` AS `t0` ) AS `t1` WHERE diff --git a/ibis/backends/tests/snapshots/test_sql/test_rewrite_context/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_rewrite_context/clickhouse/out.sql index 1224d283f40e7..b70218337a595 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_rewrite_context/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_rewrite_context/clickhouse/out.sql @@ -1,4 +1,4 @@ SELECT - ntile(2) OVER (ORDER BY randCanonical() ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "new_col" + ntile(2) OVER (ORDER BY randCanonical() ASC) - 1 AS "new_col" FROM "test" AS "t0" LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index dff9331a1d269..ebdaf8f92d79b 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -664,11 +664,6 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): True, id="ordered-mean", marks=[ - pytest.mark.notimpl( - ["impala"], - reason="default window semantics are different", - raises=AssertionError, - ), pytest.mark.notimpl( ["risingwave"], raises=PsycoPg2InternalError, @@ -1242,3 +1237,49 @@ def test_windowed_order_by_sequence_is_preserved(con): result = con.execute(expr) value = result.bool_col.loc[result["rank"] == 4].item() assert pd.isna(value) + + +@pytest.mark.notimpl( + ["polars"], + raises=com.OperationNotDefinedError, + reason="window functions aren't yet implemented for the polars backend", +) +@pytest.mark.notimpl( + ["flink"], + raises=AssertionError, + reason="default behavior is the same as bigquery and hasn't been addressed", +) +@pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError) +@pytest.mark.notimpl( + ["risingwave"], + raises=PsycoPg2InternalError, + reason="Window function with empty PARTITION BY is not supported due to performance issues", +) +def test_duplicate_ordered_sum(con): + expr = ( + ibis.memtable( + {"id": range(4), "ranking": [1, 2, 3, 3], "rewards": [10, 20, 30, 40]} + ) + .mutate(csum=lambda t: t.rewards.cumsum(order_by="ranking")) + .order_by("id") + ) + arrow_table = con.to_pyarrow(expr) + + result = arrow_table["csum"].to_pylist() + + assert len(result) == 4 + + assert result[0] == 10 + assert result[1] == 30 + # why? because the order_by column is not unique, so both + # + # 10 -> 10 + 20 -> 10 + 20 + 30 => 10 -> 30 -> 60 + # + # *AND* + # + # 10 -> 10 + 20 -> 10 + 20 + 40 => 10 -> 30 -> 70 + # + # are valid, and it *may* depend on how the computation is distributed in + # the query engine + assert result[2] in (60, 70) + assert result[3] == 100