diff --git a/README.md b/README.md index dc57ad8..84dbb04 100644 --- a/README.md +++ b/README.md @@ -87,12 +87,12 @@ print(references) # ] ``` -### analyze_timespan +### analyze with time bounds -Extract time bounds from filters. +Extract time bounds from filters by enabling `with_timebounds`. ```python -from analytics_query_analyzer import analyze_timespan +from analytics_query_analyzer import analyze from sqlglot import dialects schema = { @@ -117,28 +117,51 @@ where and ordered_at < "2026-01-01" """ -timespans = analyze_timespan(dialects.BigQuery, sql, schema, "production") -print(timespans) +timebounds = analyze( + dialects.BigQuery, + sql, + schema, + "production", + with_timebounds=True, +) +print(timebounds) # [ # { # "database": "production", # "schema": "shop", # "table": "orders", +# "column": "id", +# "lower": None, +# "upper": None, +# }, +# { +# "database": "production", +# "schema": "shop", +# "table": "orders", # "column": "ordered_at", # "lower": "2025-01-01", # "upper": "2026-01-01", -# } +# }, +# { +# "database": "production", +# "schema": "shop", +# "table": "orders", +# "column": "user_id", +# "lower": None, +# "upper": None, +# }, # ] ``` To make `current_date()` deterministic, pass a provider: ```python -timespans = analyze_timespan( +timebounds = analyze( dialects.BigQuery, "select * from shop.orders where ordered_at >= current_date()", schema, "production", + with_timebounds=True, current_date_provider=lambda: "2026-01-01", ) ``` @@ -158,7 +181,7 @@ print(schema) - Authentication uses Application Default Credentials (ADC). - When `table` is omitted, it scans all tables in the dataset. - When both `dataset` and `table` are omitted, it scans all datasets in the project. -- The returned `schema` can be passed directly to `analyze` and `analyze_timespan`. +- The returned `schema` can be passed directly to `analyze`. Fetching from Redshift is also supported: diff --git a/src/analytics_query_analyzer/__init__.py b/src/analytics_query_analyzer/__init__.py index 7cbdd4b..e4e6a4f 100644 --- a/src/analytics_query_analyzer/__init__.py +++ b/src/analytics_query_analyzer/__init__.py @@ -1,4 +1,4 @@ -from .analyzer import analyze, analyze_timespan +from .analyzer import analyze from .schema_builder import build_schema -__all__ = ["analyze", "analyze_timespan", "build_schema"] +__all__ = ["analyze", "build_schema"] diff --git a/src/analytics_query_analyzer/analyzer.py b/src/analytics_query_analyzer/analyzer.py index 0d64823..d0438be 100644 --- a/src/analytics_query_analyzer/analyzer.py +++ b/src/analytics_query_analyzer/analyzer.py @@ -1,7 +1,10 @@ +from typing import Literal, overload + from sqlglot import dialects, optimizer, parse_one from .references_analyzer import ReferencesAnalyzer -from .timespan_analyzer import TimespanAnalyzer +from .timebounds_analyzer import TimeboundsAnalyzer +from .types import ReferenceRow, TimeboundsRow def _resolve_dialect( @@ -12,32 +15,36 @@ def _resolve_dialect( return dialect +@overload def analyze( dialect: str | type[dialects.Dialect], sql: str, schema: dict, default_catalog: str, -): - dialect = _resolve_dialect(dialect) - expression = parse_one(sql, read=dialect) + with_timebounds: Literal[False] = False, + current_date_provider=None, +) -> list[ReferenceRow]: ... - qualified = optimizer.qualify.qualify( - expression, - schema=schema, - catalog=default_catalog, - validate_qualify_columns=False, - ) - analyzer = ReferencesAnalyzer(schema, default_catalog) - return analyzer.analyze(qualified) + +@overload +def analyze( + dialect: str | type[dialects.Dialect], + sql: str, + schema: dict, + default_catalog: str, + with_timebounds: Literal[True], + current_date_provider=None, +) -> list[TimeboundsRow]: ... -def analyze_timespan( +def analyze( dialect: str | type[dialects.Dialect], sql: str, schema: dict, default_catalog: str, + with_timebounds: bool = False, current_date_provider=None, -) -> dict: +) -> list[ReferenceRow] | list[TimeboundsRow]: dialect = _resolve_dialect(dialect) expression = parse_one(sql, read=dialect) @@ -47,8 +54,37 @@ def analyze_timespan( catalog=default_catalog, validate_qualify_columns=False, ) - analyzer = TimespanAnalyzer(schema, default_catalog, current_date_provider) - return analyzer.analyze(qualified) + analyzer = ReferencesAnalyzer(schema, default_catalog) + references: list[ReferenceRow] = analyzer.analyze(qualified) + if not with_timebounds: + return references + return _merge_timebounds( + references, + TimeboundsAnalyzer(schema, default_catalog, current_date_provider).analyze( + qualified + ), + ) + + +def _merge_timebounds( + references: list[ReferenceRow], + timebounds: list[TimeboundsRow], +) -> list[TimeboundsRow]: + merged: dict[tuple[str, str, str, str], TimeboundsRow] = {} + for row in references: + key = (row["database"], row["schema"], row["table"], row["column"]) + merged[key] = {**row, "lower": None, "upper": None} + for row in timebounds: + key = (row["database"], row["schema"], row["table"], row["column"]) + if key in merged: + merged[key]["lower"] = row.get("lower") + merged[key]["upper"] = row.get("upper") + else: + merged[key] = row + return [ + merged[key] + for key in sorted(merged.keys(), key=lambda k: (k[0], k[1], k[2], k[3])) + ] -__all__ = ["analyze", "analyze_timespan"] +__all__ = ["analyze"] diff --git a/src/analytics_query_analyzer/references_analyzer.py b/src/analytics_query_analyzer/references_analyzer.py index 45c7c99..5116117 100644 --- a/src/analytics_query_analyzer/references_analyzer.py +++ b/src/analytics_query_analyzer/references_analyzer.py @@ -2,6 +2,7 @@ from sqlglot.optimizer.scope import traverse_scope from .column_resolver import resolve_column_path +from .types import ReferenceRow class ReferencesAnalyzer: @@ -9,7 +10,7 @@ def __init__(self, schema: dict, default_catalog: str): self.schema = schema self.default_catalog = default_catalog - def analyze(self, expression: exp.Expression) -> list[dict[str, str]]: + def analyze(self, expression: exp.Expression) -> list[ReferenceRow]: references: dict[str, set[str]] = {} for scope in traverse_scope(expression): @@ -36,20 +37,22 @@ def _add(references: dict[str, set[str]], table: str, column: str): references[table] = {column} -def _flatten_references(references: dict[str, set[str]]) -> list[dict[str, str]]: - rows: list[dict[str, str]] = [] +def _flatten_references(references: dict[str, set[str]]) -> list[ReferenceRow]: + rows: list[ReferenceRow] = [] for table_path, columns in references.items(): database, schema, table = table_path.split(".", 2) for column in columns: rows.append( - { - "database": database, - "schema": schema, - "table": table, - "column": column, - } + ReferenceRow( + database=database, + schema=schema, + table=table, + column=column, + ) ) - rows.sort(key=lambda row: (row["database"], row["schema"], row["table"], row["column"])) + rows.sort( + key=lambda row: (row["database"], row["schema"], row["table"], row["column"]) + ) return rows diff --git a/src/analytics_query_analyzer/timespan_analyzer.py b/src/analytics_query_analyzer/timebounds_analyzer.py similarity index 94% rename from src/analytics_query_analyzer/timespan_analyzer.py rename to src/analytics_query_analyzer/timebounds_analyzer.py index 007e671..10e188f 100644 --- a/src/analytics_query_analyzer/timespan_analyzer.py +++ b/src/analytics_query_analyzer/timebounds_analyzer.py @@ -6,17 +6,18 @@ from sqlglot.optimizer.scope import traverse_scope from .column_resolver import resolve_column_path +from .types import TimeboundsRow -class TimespanBounds(TypedDict): +class TimeboundsBounds(TypedDict): lower: date | None upper: date | None -TimespanResults: TypeAlias = dict[str, TimespanBounds] +TimeboundsResults: TypeAlias = dict[str, TimeboundsBounds] -class TimespanAnalyzer: +class TimeboundsAnalyzer: def __init__( self, schema: dict, @@ -27,10 +28,12 @@ def __init__( self.default_catalog = default_catalog self.current_date_provider = current_date_provider or date.today - def analyze(self, expression: exp.Expression) -> list[dict[str, str | None]]: - return _flatten_timespans(_stringify_results(self._analyze_internal(expression))) + def analyze(self, expression: exp.Expression) -> list[TimeboundsRow]: + return _flatten_timebounds( + _stringify_results(self._analyze_internal(expression)) + ) - def _analyze_internal(self, expression: exp.Expression) -> TimespanResults: + def _analyze_internal(self, expression: exp.Expression) -> TimeboundsResults: if isinstance(expression, exp.Union): with_expr = expression.args.get("with") left = expression.this.copy() @@ -43,8 +46,8 @@ def _analyze_internal(self, expression: exp.Expression) -> TimespanResults: return self.merge_bounds_union(left_bounds, right_bounds) return self.analyze_scopes(expression) - def analyze_scopes(self, expression: exp.Expression) -> TimespanResults: - scoped_results: TimespanResults = {} + def analyze_scopes(self, expression: exp.Expression) -> TimeboundsResults: + scoped_results: TimeboundsResults = {} for scope in traverse_scope(expression): conditions = [] where = scope.expression.args.get("where") @@ -81,7 +84,9 @@ def analyze_scopes(self, expression: exp.Expression) -> TimespanResults: return scoped_results - def bounds_for_condition(self, condition: exp.Expression, scope) -> TimespanResults: + def bounds_for_condition( + self, condition: exp.Expression, scope + ) -> TimeboundsResults: if isinstance(condition, exp.Paren): return self.bounds_for_condition(condition.this, scope) if isinstance(condition, exp.Not): @@ -133,7 +138,7 @@ def bounds_for_condition(self, condition: exp.Expression, scope) -> TimespanResu def bounds_for_comparison( self, comparison: exp.Expression, scope - ) -> TimespanResults: + ) -> TimeboundsResults: if isinstance(comparison, exp.Between): column = self.extract_column(comparison.this) low = self.extract_literal(comparison.args.get("low")) @@ -270,7 +275,9 @@ def extract_window_column(self, expr: exp.Expression) -> exp.Column | None: return columns[0] return None - def extract_literal(self, expr: exp.Expression) -> date | None: + def extract_literal(self, expr: exp.Expression | None) -> date | None: + if expr is None: + return None if isinstance(expr, exp.Literal): if expr.is_string: return _normalize_iso_date(expr.this) @@ -532,9 +539,9 @@ def is_time_column( @staticmethod def merge_bounds_and( - left: TimespanResults, right: TimespanResults - ) -> TimespanResults: - merged: TimespanResults = {} + left: TimeboundsResults, right: TimeboundsResults + ) -> TimeboundsResults: + merged: TimeboundsResults = {} for key in set(left) | set(right): if key in left and key in right: lower_left = left[key]["lower"] @@ -562,9 +569,9 @@ def merge_bounds_and( @staticmethod def merge_bounds_or( - left: TimespanResults, right: TimespanResults - ) -> TimespanResults: - merged: TimespanResults = {} + left: TimeboundsResults, right: TimeboundsResults + ) -> TimeboundsResults: + merged: TimeboundsResults = {} for key in set(left) & set(right): lower_left = left[key]["lower"] lower_right = right[key]["lower"] @@ -585,13 +592,13 @@ def merge_bounds_or( @staticmethod def merge_bounds_union( - left: TimespanResults, right: TimespanResults - ) -> TimespanResults: + left: TimeboundsResults, right: TimeboundsResults + ) -> TimeboundsResults: if not left: return right if not right: return left - merged: TimespanResults = {} + merged: TimeboundsResults = {} for key in set(left) | set(right): if key not in left: merged[key] = right[key] @@ -624,7 +631,9 @@ def _normalize_iso_date(value: str) -> date | None: return None -def _stringify_results(results: TimespanResults) -> dict[str, dict[str, str | None]]: +def _stringify_results( + results: TimeboundsResults, +) -> dict[str, dict[str, str | None]]: output: dict[str, dict[str, str | None]] = {} for key, bounds in results.items(): output[key] = { @@ -634,21 +643,21 @@ def _stringify_results(results: TimespanResults) -> dict[str, dict[str, str | No return output -def _flatten_timespans( +def _flatten_timebounds( results: dict[str, dict[str, str | None]], -) -> list[dict[str, str | None]]: - rows: list[dict[str, str | None]] = [] +) -> list[TimeboundsRow]: + rows: list[TimeboundsRow] = [] for full_path, bounds in results.items(): database, schema, table, column = full_path.split(".", 3) rows.append( - { - "database": database, - "schema": schema, - "table": table, - "column": column, - "lower": bounds.get("lower"), - "upper": bounds.get("upper"), - } + TimeboundsRow( + database=database, + schema=schema, + table=table, + column=column, + lower=bounds.get("lower"), + upper=bounds.get("upper"), + ) ) rows.sort( key=lambda row: ( diff --git a/src/analytics_query_analyzer/types.py b/src/analytics_query_analyzer/types.py new file mode 100644 index 0000000..a59eff4 --- /dev/null +++ b/src/analytics_query_analyzer/types.py @@ -0,0 +1,13 @@ +from typing import TypedDict + + +class ReferenceRow(TypedDict): + database: str + schema: str + table: str + column: str + + +class TimeboundsRow(ReferenceRow): + lower: str | None + upper: str | None diff --git a/tests/test_analyze_timespan_redshift.py b/tests/test_analyze_timespan_redshift.py deleted file mode 100644 index f116846..0000000 --- a/tests/test_analyze_timespan_redshift.py +++ /dev/null @@ -1,576 +0,0 @@ -import pytest -from sqlglot import dialects - -from analytics_query_analyzer.analyzer import analyze_timespan - -schema = {'production': {'shop': {'orders': {'id': 'int64', - 'ordered_at': 'timestamp', - 'user_id': 'int64'}, - 'events': {'id': 'int64', - 'event_at': 'timestamp', - 'user_id': 'int64'}, - 'users': {'id': 'int64', 'name': 'string'}}}} - -test_cases = [{'name': 'without condition', - 'sql': 'select ordered_at from shop.orders', - 'expected': []}, - {'name': 'lower bound (>)', - 'sql': "select * from shop.orders where ordered_at > '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'lower bound (>=)', - 'sql': "select * from shop.orders where ordered_at >= '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'upper bound (<)', - 'sql': "select * from shop.orders where ordered_at < '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': None, - 'upper': '2026-01-01'}]}, - {'name': 'upper bound (<=)', - 'sql': "select * from shop.orders where ordered_at <= '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': None, - 'upper': '2026-01-01'}]}, - {'name': 'equality', - 'sql': "select * from shop.orders where ordered_at = '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': '2026-01-01'}]}, - {'name': 'in clause', - 'sql': "select * from shop.orders where ordered_at in ('2026-01-01')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': '2026-01-01'}]}, - {'name': 'between', - 'sql': "select * from shop.orders where ordered_at between '2026-01-01' and " - "'2026-01-02'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': '2026-01-02'}]}, - {'name': 'trunc column', - 'sql': "select * from shop.orders where date_trunc('month', ordered_at) >= " - "'2026-02-10'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-02-01', - 'upper': None}]}, - {'name': 'date function string', - 'sql': "select * from shop.orders where ordered_at >= to_date('2026-01-01', " - "'YYYY-MM-DD')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'date from parts', - 'sql': 'select * from shop.orders where ordered_at >= date_from_parts(2026, 1, 1)', - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'date literal', - 'sql': "select * from shop.orders where ordered_at >= date '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'datetime function string', - 'sql': "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 " - "09:00:00', 'YYYY-MM-DD HH24:MI:SS')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'datetime from parts', - 'sql': 'select * from shop.orders where ordered_at >= make_timestamp(2026, 1, 1, 9, ' - '0, 0)', - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'datetime literal', - 'sql': "select * from shop.orders where ordered_at >= cast('2026-01-01 09:00:00' as " - 'timestamp)', - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'timestamp function string', - 'sql': "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 " - "09:00:00', 'YYYY-MM-DD HH24:MI:SS')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'timestamp function string with offset', - 'sql': "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 " - "00:00:00+09:00', 'YYYY-MM-DD HH24:MI:SSOF')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'timestamp function zone', - 'sql': "select * from shop.orders where ordered_at >= timestamp '2026-01-01 " - "00:00:00+09:00'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'timestamp literal', - 'sql': "select * from shop.orders where ordered_at >= timestamp '2026-01-01 " - "09:00:00'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'timestamp from date', - 'sql': "select * from shop.orders where ordered_at >= timestamp(date '2026-01-01')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'date sub day', - 'sql': 'select * from shop.orders where ordered_at > dateadd(day, -3, date ' - "'2026-01-04')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'interval sub day', - 'sql': "select * from shop.orders where ordered_at > (date '2026-01-04' - interval " - "'3 day')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'date add day', - 'sql': 'select * from shop.orders where ordered_at >= dateadd(day, 2, date ' - "'2026-01-04')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-06', - 'upper': None}]}, - {'name': 'interval add day', - 'sql': "select * from shop.orders where ordered_at >= (date '2026-01-04' + interval " - "'2 day')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-06', - 'upper': None}]}, - {'name': 'date add month', - 'sql': 'select * from shop.orders where ordered_at >= dateadd(month, 1, date ' - "'2026-01-31')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-02-28', - 'upper': None}]}, - {'name': 'trunc literal', - 'sql': "select * from shop.orders where ordered_at >= date_trunc('month', " - "'2026-02-10')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-02-01', - 'upper': None}]}, - {'name': 'cast date', - 'sql': "select * from shop.orders where ordered_at >= cast('2026-01-01' as date)", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'cast datetime', - 'sql': "select * from shop.orders where ordered_at >= cast('2026-01-01 09:00:00' as " - 'timestamp)', - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'cast timestamp', - 'sql': "select * from shop.orders where ordered_at >= cast('2026-01-01 09:00:00' as " - 'timestamptz)', - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'timestamp seconds', - 'sql': 'select * from shop.orders where ordered_at >= to_timestamp(1767229200)', - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'timestamp millis', - 'sql': 'select * from shop.orders where ordered_at >= to_timestamp(1767229200.0)', - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'parse date', - 'sql': "select * from shop.orders where ordered_at >= to_date('2026-01-01', " - "'YYYY-MM-DD')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'parse datetime', - 'sql': "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 " - "09:00:00', 'YYYY-MM-DD HH24:MI:SS')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'parse timestamp', - 'sql': "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 " - "09:00:00+09:00', 'YYYY-MM-DD HH24:MI:SSOF')", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'differently formatted literals', - 'sql': "select * from shop.orders where ordered_at >= '2026-01-01' and ordered_at > " - "'2026-01-02 09:00:00'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-02', - 'upper': None}]}, - {'name': 'non-literal', - 'sql': 'select * from shop.orders where ordered_at > dateadd(month, -1, ' - "to_date('2026-01-01', 'YYYY-MM-DD'))", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2025-12-01', - 'upper': None}]}, - {'name': 'not equal', - 'sql': "select * from shop.orders where ordered_at != '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': None, - 'upper': None}]}, - {'name': 'not equal (alt)', - 'sql': "select * from shop.orders where ordered_at <> '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': None, - 'upper': None}]}, - {'name': 'negation', - 'sql': "select * from shop.orders where not ordered_at > '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': None, - 'upper': '2026-01-01'}]}, - {'name': 'multiple conditions (and)', - 'sql': "select * from orders where ordered_at > '2026-01-01' and ordered_at > " - "'2026-01-02'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-02', - 'upper': None}]}, - {'name': 'multiple conditions (or)', - 'sql': "select * from shop.orders where ordered_at > '2026-01-01' or ordered_at > " - "'2026-01-02'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'lower > upper', - 'sql': "select * from shop.orders where ordered_at > '2026-01-02' and ordered_at < " - "'2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': None, - 'upper': None}]}, - {'name': 'lower > upper between', - 'sql': "select * from shop.orders where ordered_at between '2026-01-02' and " - "'2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': None, - 'upper': None}]}, - {'name': 'function wrapped column', - 'sql': "select * from shop.orders where date(ordered_at) >= '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'function wrapped mixed conditions', - 'sql': "select * from shop.orders where date(ordered_at) >= '2026-01-01' and " - "ordered_at < '2026-01-02'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': '2026-01-02'}]}, - {'name': 'join condition', - 'sql': 'select * from shop.users join shop.orders on users.id = orders.user_id and ' - "orders.ordered_at > '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'multiple tables', - 'sql': 'select * from shop.orders join shop.events on orders.user_id = ' - "events.user_id where orders.ordered_at >= '2026-01-01' and events.event_at > " - "'2026-01-02'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'events', - 'column': 'event_at', - 'lower': '2026-01-02', - 'upper': None}, - {'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'cte', - 'sql': 'with base as (select ordered_at from shop.orders) select * from base where ' - "ordered_at > '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'cte inner and outer', - 'sql': 'with base as (select ordered_at from shop.orders where ordered_at >= ' - "'2026-01-01') select * from base where ordered_at < '2026-01-02'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': '2026-01-02'}]}, - {'name': 'multi cte', - 'sql': 'with base as (select ordered_at from shop.orders), filtered as (select ' - "ordered_at from base where ordered_at >= '2026-01-01') select * from " - "filtered where ordered_at < '2026-01-02'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': '2026-01-02'}]}, - {'name': 'subquery from', - 'sql': 'select * from (select ordered_at from shop.orders) t where t.ordered_at >= ' - "'2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'join subquery', - 'sql': 'select * from shop.users u join (select user_id, ordered_at from ' - "shop.orders) o on u.id = o.user_id and o.ordered_at < '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': None, - 'upper': '2026-01-01'}]}, - {'name': 'qualify clause', - 'sql': 'select min(ordered_at) over (partition by user_id) as first_ordered_at from ' - "shop.orders qualify first_ordered_at > '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'having clause', - 'sql': 'select min(ordered_at) as first_ordered_at from shop.orders having ' - "first_ordered_at > '2026-01-01'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'union both filtered', - 'sql': "select ordered_at from shop.orders where ordered_at >= '2026-01-01' union " - "all select ordered_at from shop.orders where ordered_at >= '2026-01-02'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'union one filtered', - 'sql': "select ordered_at from shop.orders where ordered_at >= '2026-01-01' union " - 'all select ordered_at from shop.orders', - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}, - {'name': 'union different tables', - 'sql': "select ordered_at from shop.orders where ordered_at >= '2026-01-01' union " - "all select event_at from shop.events where event_at < '2026-01-02'", - 'expected': [{'database': 'production', - 'schema': 'shop', - 'table': 'events', - 'column': 'event_at', - 'lower': None, - 'upper': '2026-01-02'}, - {'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}]}] - -@pytest.mark.parametrize( - ("sql", "expected"), - [(case["sql"], case["expected"]) for case in test_cases], - ids=[case["name"] for case in test_cases], -) -def test_analyze_case(sql, expected): - result = analyze_timespan(dialects.Redshift, sql, schema, "production") - assert result == expected - -def test_current_date_provider(): - sql = "select * from shop.orders where ordered_at >= current_date()" - result = analyze_timespan( - dialects.Redshift, - sql, - schema, - "production", - current_date_provider=lambda: "2026-01-01", - ) - assert result == [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}] - -def test_current_datetime_provider(): - sql = "select * from shop.orders where ordered_at >= current_timestamp()" - result = analyze_timespan( - dialects.Redshift, - sql, - schema, - "production", - current_date_provider=lambda: "2026-01-01", - ) - assert result == [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}] - -def test_current_timestamp_provider(): - sql = "select * from shop.orders where ordered_at >= current_timestamp()" - result = analyze_timespan( - dialects.Redshift, - sql, - schema, - "production", - current_date_provider=lambda: "2026-01-01", - ) - assert result == [{'database': 'production', - 'schema': 'shop', - 'table': 'orders', - 'column': 'ordered_at', - 'lower': '2026-01-01', - 'upper': None}] - diff --git a/tests/test_analyze_timespan_bigquery.py b/tests/test_analyze_with_timebounds_bigquery.py similarity index 50% rename from tests/test_analyze_timespan_bigquery.py rename to tests/test_analyze_with_timebounds_bigquery.py index 53ed120..acba3f5 100644 --- a/tests/test_analyze_timespan_bigquery.py +++ b/tests/test_analyze_with_timebounds_bigquery.py @@ -1,7 +1,7 @@ import pytest from sqlglot import dialects -from analytics_query_analyzer.analyzer import analyze_timespan +from analytics_query_analyzer.analyzer import analyze schema = { "production": { @@ -17,12 +17,29 @@ { "name": "without condition", "sql": "select ordered_at from shop.orders", - "expected": [], + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": None, + "upper": None, + } + ], }, { "name": "lower bound (>)", "sql": "select * from shop.orders where ordered_at > '2026-01-01'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -30,13 +47,29 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { "name": "lower bound (>=)", "sql": "select * from shop.orders where ordered_at >= '2026-01-01'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -44,13 +77,29 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { "name": "upper bound (<)", "sql": "select * from shop.orders where ordered_at < '2026-01-01'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -58,13 +107,29 @@ "column": "ordered_at", "lower": None, "upper": "2026-01-01", - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { "name": "upper bound (<=)", "sql": "select * from shop.orders where ordered_at <= '2026-01-01'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -72,13 +137,29 @@ "column": "ordered_at", "lower": None, "upper": "2026-01-01", - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { "name": "equality", "sql": "select * from shop.orders where ordered_at = '2026-01-01'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -86,13 +167,29 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": "2026-01-01", - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { "name": "in clause", "sql": "select * from shop.orders where ordered_at in ('2026-01-01')", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -100,7 +197,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": "2026-01-01", - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -108,6 +213,14 @@ "sql": "select * from shop.orders where ordered_at between '2026-01-01' and " "'2026-01-02'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -115,7 +228,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": "2026-01-02", - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -123,6 +244,14 @@ "sql": "select * from shop.orders where date_trunc(ordered_at, month) >= " "'2026-02-10'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -130,13 +259,29 @@ "column": "ordered_at", "lower": "2026-02-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { "name": "date function string", "sql": "select * from shop.orders where ordered_at >= date('2026-01-01')", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -144,13 +289,29 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { "name": "date from parts", "sql": "select * from shop.orders where ordered_at >= date(2026, 1, 1)", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -158,13 +319,29 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { "name": "date literal", "sql": "select * from shop.orders where ordered_at >= date '2026-01-01'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -172,7 +349,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -180,6 +365,14 @@ "sql": "select * from shop.orders where ordered_at >= datetime('2026-01-01 " "09:00:00')", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -187,13 +380,29 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { "name": "datetime from parts", "sql": "select * from shop.orders where ordered_at >= datetime(2026, 1, 1, 9, 0, 0)", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -201,13 +410,29 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { "name": "datetime literal", "sql": "select * from shop.orders where ordered_at >= datetime '2026-01-01 09:00:00'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -215,7 +440,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -223,6 +456,14 @@ "sql": "select * from shop.orders where ordered_at >= timestamp('2026-01-01 " "09:00:00')", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -230,7 +471,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -238,6 +487,14 @@ "sql": "select * from shop.orders where ordered_at >= timestamp('2026-01-01 " "00:00:00+09:00')", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -245,7 +502,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -253,6 +518,14 @@ "sql": "select * from shop.orders where ordered_at >= timestamp('2026-01-01 " "00:00:00', 'Asia/Tokyo')", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -260,7 +533,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -272,16 +553,40 @@ "database": "production", "schema": "shop", "table": "orders", - "column": "ordered_at", - "lower": "2026-01-01", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, "upper": None, - } + }, ], }, { "name": "timestamp from date", "sql": "select * from shop.orders where ordered_at >= timestamp(date '2026-01-01')", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -289,7 +594,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -297,6 +610,14 @@ "sql": "select * from shop.orders where ordered_at > date_sub(date('2026-01-04'), " "interval 3 day)", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -304,7 +625,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -312,6 +641,14 @@ "sql": "select * from shop.orders where ordered_at >= date_add(date('2026-01-04'), " "interval 2 day)", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -319,7 +656,15 @@ "column": "ordered_at", "lower": "2026-01-06", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -327,6 +672,14 @@ "sql": "select * from shop.orders where ordered_at >= date_add(date('2026-01-31'), " "interval 1 month)", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -334,7 +687,15 @@ "column": "ordered_at", "lower": "2026-02-28", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -342,6 +703,14 @@ "sql": "select * from shop.orders where ordered_at >= date_trunc('2026-02-10', " "month)", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -349,13 +718,29 @@ "column": "ordered_at", "lower": "2026-02-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { "name": "cast date", "sql": "select * from shop.orders where ordered_at >= cast('2026-01-01' as date)", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -363,7 +748,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -371,6 +764,14 @@ "sql": "select * from shop.orders where ordered_at >= cast('2026-01-01 09:00:00' as " "datetime)", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -378,7 +779,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -386,6 +795,14 @@ "sql": "select * from shop.orders where ordered_at >= cast('2026-01-01 09:00:00' as " "timestamp)", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -393,13 +810,29 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { "name": "timestamp seconds", "sql": "select * from shop.orders where ordered_at >= timestamp_seconds(1767229200)", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -407,7 +840,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -415,6 +856,14 @@ "sql": "select * from shop.orders where ordered_at >= " "timestamp_millis(1767229200000)", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -422,7 +871,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -430,6 +887,14 @@ "sql": "select * from shop.orders where ordered_at >= parse_date('%Y-%m-%d', " "'2026-01-01')", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -437,7 +902,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -445,6 +918,14 @@ "sql": "select * from shop.orders where ordered_at >= parse_datetime('%Y-%m-%d " "%H:%M:%S', '2026-01-01 09:00:00')", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -452,7 +933,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -460,6 +949,14 @@ "sql": "select * from shop.orders where ordered_at >= parse_timestamp('%Y-%m-%d " "%H:%M:%S', '2026-01-01 09:00:00')", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -467,7 +964,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -475,6 +980,14 @@ "sql": "select * from shop.orders where ordered_at >= '2026-01-01' and ordered_at > " "'2026-01-02 09:00:00'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -482,7 +995,15 @@ "column": "ordered_at", "lower": "2026-01-02", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -490,6 +1011,14 @@ "sql": "select * from shop.orders where ordered_at > date_sub('2026-01-01', interval " "1 month)", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -497,7 +1026,15 @@ "column": "ordered_at", "lower": "2025-12-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -508,16 +1045,40 @@ "database": "production", "schema": "shop", "table": "orders", - "column": "ordered_at", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", "lower": None, "upper": None, - } + }, ], }, { "name": "not equal (alt)", "sql": "select * from shop.orders where ordered_at <> '2026-01-01'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -525,13 +1086,29 @@ "column": "ordered_at", "lower": None, "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { "name": "negation", "sql": "select * from shop.orders where not ordered_at > '2026-01-01'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -539,7 +1116,15 @@ "column": "ordered_at", "lower": None, "upper": "2026-01-01", - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -547,6 +1132,14 @@ "sql": "select * from orders where ordered_at > '2026-01-01' and ordered_at > " "'2026-01-02'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -554,7 +1147,15 @@ "column": "ordered_at", "lower": "2026-01-02", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -562,6 +1163,14 @@ "sql": "select * from shop.orders where ordered_at > '2026-01-01' or ordered_at > " "'2026-01-02'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -569,7 +1178,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -577,6 +1194,14 @@ "sql": "select * from shop.orders where ordered_at > '2026-01-02' and ordered_at < " "'2026-01-01'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -584,7 +1209,15 @@ "column": "ordered_at", "lower": None, "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -592,6 +1225,14 @@ "sql": "select * from shop.orders where ordered_at between '2026-01-02' and " "'2026-01-01'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -599,13 +1240,29 @@ "column": "ordered_at", "lower": None, "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { "name": "function wrapped column", "sql": "select * from shop.orders where date(ordered_at) >= '2026-01-01'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -613,7 +1270,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -621,6 +1286,14 @@ "sql": "select * from shop.orders where date(ordered_at) >= '2026-01-01' and " "ordered_at < '2026-01-02'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -628,7 +1301,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": "2026-01-02", - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -636,6 +1317,14 @@ "sql": "select * from shop.users join shop.orders on users.id = orders.user_id and " "orders.ordered_at > '2026-01-01'", "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -643,7 +1332,31 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "users", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "users", + "column": "name", + "lower": None, + "upper": None, + }, ], }, { @@ -660,6 +1373,30 @@ "lower": "2026-01-02", "upper": None, }, + { + "database": "production", + "schema": "shop", + "table": "events", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "events", + "column": "user_id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -668,6 +1405,14 @@ "lower": "2026-01-01", "upper": None, }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -743,7 +1488,31 @@ "column": "ordered_at", "lower": None, "upper": "2026-01-01", - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "users", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "users", + "column": "name", + "lower": None, + "upper": None, + }, ], }, { @@ -758,7 +1527,15 @@ "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ], }, { @@ -838,20 +1615,31 @@ ids=[case["name"] for case in test_cases], ) def test_analyze_case(sql, expected): - result = analyze_timespan(dialects.BigQuery, sql, schema, "production") + result = analyze( + dialects.BigQuery, sql, schema, "production", with_timebounds=True + ) assert result == expected def test_current_date_provider(): sql = "select * from shop.orders where ordered_at >= current_date()" - result = analyze_timespan( + result = analyze( dialects.BigQuery, sql, schema, "production", + with_timebounds=True, current_date_provider=lambda: "2026-01-01", ) assert result == [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -859,20 +1647,37 @@ def test_current_date_provider(): "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ] def test_current_datetime_provider(): sql = "select * from shop.orders where ordered_at >= current_datetime()" - result = analyze_timespan( + result = analyze( dialects.BigQuery, sql, schema, "production", + with_timebounds=True, current_date_provider=lambda: "2026-01-01", ) assert result == [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -880,20 +1685,37 @@ def test_current_datetime_provider(): "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ] def test_current_timestamp_provider(): sql = "select * from shop.orders where ordered_at >= current_timestamp()" - result = analyze_timespan( + result = analyze( dialects.BigQuery, sql, schema, "production", + with_timebounds=True, current_date_provider=lambda: "2026-01-01", ) assert result == [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, { "database": "production", "schema": "shop", @@ -901,5 +1723,13 @@ def test_current_timestamp_provider(): "column": "ordered_at", "lower": "2026-01-01", "upper": None, - } + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, ] diff --git a/tests/test_analyze_with_timebounds_redshift.py b/tests/test_analyze_with_timebounds_redshift.py new file mode 100644 index 0000000..144e38a --- /dev/null +++ b/tests/test_analyze_with_timebounds_redshift.py @@ -0,0 +1,1799 @@ +import pytest +from sqlglot import dialects + +from analytics_query_analyzer.analyzer import analyze + +schema = { + "production": { + "shop": { + "orders": {"id": "int64", "ordered_at": "timestamp", "user_id": "int64"}, + "events": {"id": "int64", "event_at": "timestamp", "user_id": "int64"}, + "users": {"id": "int64", "name": "string"}, + } + } +} + +test_cases = [ + { + "name": "without condition", + "sql": "select ordered_at from shop.orders", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": None, + "upper": None, + } + ], + }, + { + "name": "lower bound (>)", + "sql": "select * from shop.orders where ordered_at > '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "lower bound (>=)", + "sql": "select * from shop.orders where ordered_at >= '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "upper bound (<)", + "sql": "select * from shop.orders where ordered_at < '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": None, + "upper": "2026-01-01", + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "upper bound (<=)", + "sql": "select * from shop.orders where ordered_at <= '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": None, + "upper": "2026-01-01", + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "equality", + "sql": "select * from shop.orders where ordered_at = '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": "2026-01-01", + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "in clause", + "sql": "select * from shop.orders where ordered_at in ('2026-01-01')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": "2026-01-01", + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "between", + "sql": "select * from shop.orders where ordered_at between '2026-01-01' and " + "'2026-01-02'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": "2026-01-02", + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "trunc column", + "sql": "select * from shop.orders where date_trunc('month', ordered_at) >= " + "'2026-02-10'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-02-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "date function string", + "sql": "select * from shop.orders where ordered_at >= to_date('2026-01-01', " + "'YYYY-MM-DD')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "date from parts", + "sql": "select * from shop.orders where ordered_at >= date_from_parts(2026, 1, 1)", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "date literal", + "sql": "select * from shop.orders where ordered_at >= date '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "datetime function string", + "sql": "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 " + "09:00:00', 'YYYY-MM-DD HH24:MI:SS')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "datetime from parts", + "sql": "select * from shop.orders where ordered_at >= make_timestamp(2026, 1, 1, 9, " + "0, 0)", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "datetime literal", + "sql": "select * from shop.orders where ordered_at >= cast('2026-01-01 09:00:00' as " + "timestamp)", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "timestamp function string", + "sql": "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 " + "09:00:00', 'YYYY-MM-DD HH24:MI:SS')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "timestamp function string with offset", + "sql": "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 " + "00:00:00+09:00', 'YYYY-MM-DD HH24:MI:SSOF')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "timestamp function zone", + "sql": "select * from shop.orders where ordered_at >= timestamp '2026-01-01 " + "00:00:00+09:00'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "timestamp literal", + "sql": "select * from shop.orders where ordered_at >= timestamp '2026-01-01 " + "09:00:00'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "timestamp from date", + "sql": "select * from shop.orders where ordered_at >= timestamp(date '2026-01-01')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "date sub day", + "sql": "select * from shop.orders where ordered_at > dateadd(day, -3, date " + "'2026-01-04')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "interval sub day", + "sql": "select * from shop.orders where ordered_at > (date '2026-01-04' - interval " + "'3 day')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "date add day", + "sql": "select * from shop.orders where ordered_at >= dateadd(day, 2, date " + "'2026-01-04')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-06", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "interval add day", + "sql": "select * from shop.orders where ordered_at >= (date '2026-01-04' + interval " + "'2 day')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-06", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "date add month", + "sql": "select * from shop.orders where ordered_at >= dateadd(month, 1, date " + "'2026-01-31')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-02-28", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "trunc literal", + "sql": "select * from shop.orders where ordered_at >= date_trunc('month', " + "'2026-02-10')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-02-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "cast date", + "sql": "select * from shop.orders where ordered_at >= cast('2026-01-01' as date)", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "cast datetime", + "sql": "select * from shop.orders where ordered_at >= cast('2026-01-01 09:00:00' as " + "timestamp)", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "cast timestamp", + "sql": "select * from shop.orders where ordered_at >= cast('2026-01-01 09:00:00' as " + "timestamptz)", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "timestamp seconds", + "sql": "select * from shop.orders where ordered_at >= to_timestamp(1767229200)", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "timestamp millis", + "sql": "select * from shop.orders where ordered_at >= to_timestamp(1767229200.0)", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "parse date", + "sql": "select * from shop.orders where ordered_at >= to_date('2026-01-01', " + "'YYYY-MM-DD')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "parse datetime", + "sql": "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 " + "09:00:00', 'YYYY-MM-DD HH24:MI:SS')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "parse timestamp", + "sql": "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 " + "09:00:00+09:00', 'YYYY-MM-DD HH24:MI:SSOF')", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "differently formatted literals", + "sql": "select * from shop.orders where ordered_at >= '2026-01-01' and ordered_at > " + "'2026-01-02 09:00:00'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-02", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "non-literal", + "sql": "select * from shop.orders where ordered_at > dateadd(month, -1, " + "to_date('2026-01-01', 'YYYY-MM-DD'))", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2025-12-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "not equal", + "sql": "select * from shop.orders where ordered_at != '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "not equal (alt)", + "sql": "select * from shop.orders where ordered_at <> '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "negation", + "sql": "select * from shop.orders where not ordered_at > '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": None, + "upper": "2026-01-01", + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "multiple conditions (and)", + "sql": "select * from orders where ordered_at > '2026-01-01' and ordered_at > " + "'2026-01-02'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-02", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "multiple conditions (or)", + "sql": "select * from shop.orders where ordered_at > '2026-01-01' or ordered_at > " + "'2026-01-02'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "lower > upper", + "sql": "select * from shop.orders where ordered_at > '2026-01-02' and ordered_at < " + "'2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "lower > upper between", + "sql": "select * from shop.orders where ordered_at between '2026-01-02' and " + "'2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "function wrapped column", + "sql": "select * from shop.orders where date(ordered_at) >= '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "function wrapped mixed conditions", + "sql": "select * from shop.orders where date(ordered_at) >= '2026-01-01' and " + "ordered_at < '2026-01-02'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": "2026-01-02", + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "join condition", + "sql": "select * from shop.users join shop.orders on users.id = orders.user_id and " + "orders.ordered_at > '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "users", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "users", + "column": "name", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "multiple tables", + "sql": "select * from shop.orders join shop.events on orders.user_id = " + "events.user_id where orders.ordered_at >= '2026-01-01' and events.event_at > " + "'2026-01-02'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "events", + "column": "event_at", + "lower": "2026-01-02", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "events", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "events", + "column": "user_id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "cte", + "sql": "with base as (select ordered_at from shop.orders) select * from base where " + "ordered_at > '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + } + ], + }, + { + "name": "cte inner and outer", + "sql": "with base as (select ordered_at from shop.orders where ordered_at >= " + "'2026-01-01') select * from base where ordered_at < '2026-01-02'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": "2026-01-02", + } + ], + }, + { + "name": "multi cte", + "sql": "with base as (select ordered_at from shop.orders), filtered as (select " + "ordered_at from base where ordered_at >= '2026-01-01') select * from " + "filtered where ordered_at < '2026-01-02'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": "2026-01-02", + } + ], + }, + { + "name": "subquery from", + "sql": "select * from (select ordered_at from shop.orders) t where t.ordered_at >= " + "'2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + } + ], + }, + { + "name": "join subquery", + "sql": "select * from shop.users u join (select user_id, ordered_at from " + "shop.orders) o on u.id = o.user_id and o.ordered_at < '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": None, + "upper": "2026-01-01", + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "users", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "users", + "column": "name", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "qualify clause", + "sql": "select min(ordered_at) over (partition by user_id) as first_ordered_at from " + "shop.orders qualify first_ordered_at > '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ], + }, + { + "name": "having clause", + "sql": "select min(ordered_at) as first_ordered_at from shop.orders having " + "first_ordered_at > '2026-01-01'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + } + ], + }, + { + "name": "union both filtered", + "sql": "select ordered_at from shop.orders where ordered_at >= '2026-01-01' union " + "all select ordered_at from shop.orders where ordered_at >= '2026-01-02'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + } + ], + }, + { + "name": "union one filtered", + "sql": "select ordered_at from shop.orders where ordered_at >= '2026-01-01' union " + "all select ordered_at from shop.orders", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + } + ], + }, + { + "name": "union different tables", + "sql": "select ordered_at from shop.orders where ordered_at >= '2026-01-01' union " + "all select event_at from shop.events where event_at < '2026-01-02'", + "expected": [ + { + "database": "production", + "schema": "shop", + "table": "events", + "column": "event_at", + "lower": None, + "upper": "2026-01-02", + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + ], + }, +] + + +@pytest.mark.parametrize( + ("sql", "expected"), + [(case["sql"], case["expected"]) for case in test_cases], + ids=[case["name"] for case in test_cases], +) +def test_analyze_case(sql, expected): + result = analyze( + dialects.Redshift, sql, schema, "production", with_timebounds=True + ) + assert result == expected + + +def test_current_date_provider(): + sql = "select * from shop.orders where ordered_at >= current_date()" + result = analyze( + dialects.Redshift, + sql, + schema, + "production", + with_timebounds=True, + current_date_provider=lambda: "2026-01-01", + ) + assert result == [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ] + + +def test_current_datetime_provider(): + sql = "select * from shop.orders where ordered_at >= current_timestamp()" + result = analyze( + dialects.Redshift, + sql, + schema, + "production", + with_timebounds=True, + current_date_provider=lambda: "2026-01-01", + ) + assert result == [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ] + + +def test_current_timestamp_provider(): + sql = "select * from shop.orders where ordered_at >= current_timestamp()" + result = analyze( + dialects.Redshift, + sql, + schema, + "production", + with_timebounds=True, + current_date_provider=lambda: "2026-01-01", + ) + assert result == [ + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "id", + "lower": None, + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "ordered_at", + "lower": "2026-01-01", + "upper": None, + }, + { + "database": "production", + "schema": "shop", + "table": "orders", + "column": "user_id", + "lower": None, + "upper": None, + }, + ]