diff --git a/src/analytics_query_analyzer/timespan_analyzer.py b/src/analytics_query_analyzer/timespan_analyzer.py index 95e9a3f..20a978e 100644 --- a/src/analytics_query_analyzer/timespan_analyzer.py +++ b/src/analytics_query_analyzer/timespan_analyzer.py @@ -315,8 +315,10 @@ def extract_literal(self, expr: exp.Expression) -> date | None: return self._extract_parts_date(expr) if isinstance(expr, exp.UnixToTime): return self._extract_epoch_date(expr) - if isinstance(expr, (exp.DateAdd, exp.DateSub)): + if isinstance(expr, (exp.DateAdd, exp.DateSub, exp.TsOrDsAdd)): return self._extract_date_arithmetic(expr) + if isinstance(expr, (exp.Add, exp.Sub)): + return self._extract_interval_arithmetic(expr) return None def _evaluate_current_date(self) -> date | None: @@ -388,6 +390,27 @@ def _extract_date_arithmetic(self, expr: exp.Expression) -> date | None: delta_value = -delta_value return self._apply_date_delta(base, delta_value, unit) + def _extract_interval_arithmetic(self, expr: exp.Expression) -> date | None: + left = expr.args.get("this") + right = expr.args.get("expression") + if isinstance(right, exp.Interval): + base = self.extract_literal(left) + delta = self._extract_int_literal(right.args.get("this")) + unit = self._extract_unit_literal(right.args.get("unit")) + if base is None or delta is None or not unit: + return None + if isinstance(expr, exp.Sub): + delta = -delta + return self._apply_date_delta(base, delta, unit) + if isinstance(left, exp.Interval): + base = self.extract_literal(right) + delta = self._extract_int_literal(left.args.get("this")) + unit = self._extract_unit_literal(left.args.get("unit")) + if base is None or delta is None or not unit: + return None + return self._apply_date_delta(base, delta, unit) + return None + def _extract_trunc_date(self, expr: exp.Expression) -> date | None: target = expr.args.get("this") or expr.this unit_expr = expr.args.get("unit") or expr.args.get("part") @@ -425,7 +448,13 @@ def _extract_int_literal(self, expr: exp.Expression | None) -> int | None: try: return int(expr.this) except (TypeError, ValueError): - return None + try: + return int(float(expr.this)) + except (TypeError, ValueError): + return None + if isinstance(expr, exp.Neg): + value = self._extract_int_literal(expr.this) + return None if value is None else -value if isinstance(expr, exp.Cast): return self._extract_int_literal(expr.this) if isinstance(expr, exp.Paren): diff --git a/tests/test_analyze.py b/tests/test_analyze_bigquery.py similarity index 100% rename from tests/test_analyze.py rename to tests/test_analyze_bigquery.py diff --git a/tests/test_analyze_redshift.py b/tests/test_analyze_redshift.py new file mode 100644 index 0000000..0889d1f --- /dev/null +++ b/tests/test_analyze_redshift.py @@ -0,0 +1,142 @@ +import pytest +from sqlglot import dialects + +from analytics_query_analyzer.analyzer import analyze + + +schema = { + "production": { + "shop": { + "orders": { + "id": "int64", + "ordered_at": "timestamp", + "user_id": "int64", + "payment_amount": "int64", + "payment_method": "string", + "items": "super", + }, + "users": {"id": "int64", "name": "varchar"}, + "items": { + "id": "int64", + "name": "varchar", + "brand": "super", + }, + } + }, + "development": { + "shop": { + "users": {"id": "int64", "name": "varchar"}, + } + }, +} + + +test_cases = [ + {"name": "not referencing a table", "sql": "select 1", "expected": {}}, + { + "name": "referencing a table but not columns", + "sql": "select count(1) from shop.orders", + "expected": {}, + }, + { + "name": "simple column reference", + "sql": "select user_id from shop.orders", + "expected": {"production.shop.orders": {"user_id"}}, + }, + { + "name": "qualifying a table with a project", + "sql": "select user_id from production.shop.orders", + "expected": {"production.shop.orders": {"user_id"}}, + }, + { + "name": "qualifying a table with a non-default project", + "sql": "select id from development.shop.users", + "expected": {"development.shop.users": {"id"}}, + }, + { + "name": "where clause reference", + "sql": "select count(1) from shop.orders where ordered_at >= '2026-01-01'", + "expected": {"production.shop.orders": {"ordered_at"}}, + }, + { + "name": "join reference", + "sql": "select count(1) from shop.orders join shop.users on orders.user_id = users.id", + "expected": { + "production.shop.orders": {"user_id"}, + "production.shop.users": {"id"}, + }, + }, + { + "name": "referencing a column in an ORDER BY clause", + "sql": "select user_id from shop.orders order by payment_amount desc", + "expected": {"production.shop.orders": {"user_id", "payment_amount"}}, + }, + { + "name": "wildcard pattern", + "sql": "select * from shop.users", + "expected": { + "production.shop.users": set(schema["production"]["shop"]["users"].keys()) + }, + }, + { + "name": "using a wildcard with COUNT", + "sql": "select count(*) from shop.users", + "expected": {}, + }, + { + "name": "selecting multiple columns", + "sql": "select id, name from shop.users", + "expected": {"production.shop.users": {"id", "name"}}, + }, + { + "name": "CTE pattern", + "sql": """ + with amount_by_method as ( + select + payment_method, + sum(payment_amount) as total_amount + from + shop.orders + group by + 1 + ) + select + * + from + amount_by_method + """, + "expected": {"production.shop.orders": {"payment_method", "payment_amount"}}, + }, + { + "name": "referencing a field of a super column", + "sql": "select brand['category'] from shop.items", + "expected": {"production.shop.items": {"brand"}}, + }, + { + "name": "referencing multiple fields of a super column", + "sql": "select brand['category'], brand['name'] from shop.items", + "expected": {"production.shop.items": {"brand"}}, + }, + { + "name": "referencing a field in a super column filter", + "sql": """ + select + count(1) + from + shop.orders + where + json_extract_path_text(items, 'amount') is not null + """, + "expected": {"production.shop.orders": {"items"}}, + }, +] + + +@pytest.mark.parametrize( + ("sql", "expected"), + [(case["sql"], case["expected"]) for case in test_cases], + ids=[case["name"] for case in test_cases], +) +def test_analyze_case(sql, expected): + result = analyze(dialects.Redshift, sql, schema, "production") + assert result == expected diff --git a/tests/test_analyze_timespan.py b/tests/test_analyze_timespan_bigquery.py similarity index 100% rename from tests/test_analyze_timespan.py rename to tests/test_analyze_timespan_bigquery.py diff --git a/tests/test_analyze_timespan_redshift.py b/tests/test_analyze_timespan_redshift.py new file mode 100644 index 0000000..e45a451 --- /dev/null +++ b/tests/test_analyze_timespan_redshift.py @@ -0,0 +1,608 @@ +import pytest +from sqlglot import dialects + +from analytics_query_analyzer.analyzer import analyze_timespan + +schema = { + "production": { + "shop": { + "orders": { + "id": "int64", + "ordered_at": "timestamp", + "user_id": "int64", + }, + "events": { + "id": "int64", + "event_at": "timestamp", + "user_id": "int64", + }, + "users": {"id": "int64", "name": "string"}, + } + }, +} + +test_cases = [ + { + "name": "without condition", + "sql": "select ordered_at from shop.orders", + "expected": {}, + }, + { + "name": "lower bound (>)", + "sql": "select * from shop.orders where ordered_at > '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": {"lower": "2026-01-01", "upper": None} + }, + }, + { + "name": "lower bound (>=)", + "sql": "select * from shop.orders where ordered_at >= '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": {"lower": "2026-01-01", "upper": None} + }, + }, + { + "name": "upper bound (<)", + "sql": "select * from shop.orders where ordered_at < '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": {"lower": None, "upper": "2026-01-01"} + }, + }, + { + "name": "upper bound (<=)", + "sql": "select * from shop.orders where ordered_at <= '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": {"lower": None, "upper": "2026-01-01"} + }, + }, + { + "name": "equality", + "sql": "select * from shop.orders where ordered_at = '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": "2026-01-01", + } + }, + }, + { + "name": "in clause", + "sql": "select * from shop.orders where ordered_at in ('2026-01-01')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": "2026-01-01", + } + }, + }, + { + "name": "between", + "sql": "select * from shop.orders where ordered_at between '2026-01-01' and '2026-01-02'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": "2026-01-02", + } + }, + }, + { + "name": "trunc column", + "sql": "select * from shop.orders where date_trunc('month', ordered_at) >= '2026-02-10'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-02-01", + "upper": None, + } + }, + }, + { + "name": "date function string", + "sql": "select * from shop.orders where ordered_at >= to_date('2026-01-01', 'YYYY-MM-DD')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "date from parts", + "sql": "select * from shop.orders where ordered_at >= date_from_parts(2026, 1, 1)", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "date literal", + "sql": "select * from shop.orders where ordered_at >= date '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "datetime function string", + "sql": "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 09:00:00', 'YYYY-MM-DD HH24:MI:SS')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "datetime from parts", + "sql": "select * from shop.orders where ordered_at >= make_timestamp(2026, 1, 1, 9, 0, 0)", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "datetime literal", + "sql": "select * from shop.orders where ordered_at >= cast('2026-01-01 09:00:00' as timestamp)", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "timestamp function string", + "sql": "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 09:00:00', 'YYYY-MM-DD HH24:MI:SS')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "timestamp function string with offset", + "sql": "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 00:00:00+09:00', 'YYYY-MM-DD HH24:MI:SSOF')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "timestamp function zone", + "sql": "select * from shop.orders where ordered_at >= timestamp '2026-01-01 00:00:00+09:00'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "timestamp literal", + "sql": "select * from shop.orders where ordered_at >= timestamp '2026-01-01 09:00:00'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "timestamp from date", + "sql": "select * from shop.orders where ordered_at >= timestamp(date '2026-01-01')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "date sub day", + "sql": "select * from shop.orders where ordered_at > dateadd(day, -3, date '2026-01-04')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "interval sub day", + "sql": "select * from shop.orders where ordered_at > (date '2026-01-04' - interval '3 day')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "date add day", + "sql": "select * from shop.orders where ordered_at >= dateadd(day, 2, date '2026-01-04')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-06", + "upper": None, + } + }, + }, + { + "name": "interval add day", + "sql": "select * from shop.orders where ordered_at >= (date '2026-01-04' + interval '2 day')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-06", + "upper": None, + } + }, + }, + { + "name": "date add month", + "sql": "select * from shop.orders where ordered_at >= dateadd(month, 1, date '2026-01-31')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-02-28", + "upper": None, + } + }, + }, + { + "name": "trunc literal", + "sql": "select * from shop.orders where ordered_at >= date_trunc('month', '2026-02-10')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-02-01", + "upper": None, + } + }, + }, + { + "name": "cast date", + "sql": "select * from shop.orders where ordered_at >= cast('2026-01-01' as date)", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "cast datetime", + "sql": "select * from shop.orders where ordered_at >= cast('2026-01-01 09:00:00' as timestamp)", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "cast timestamp", + "sql": "select * from shop.orders where ordered_at >= cast('2026-01-01 09:00:00' as timestamptz)", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "timestamp seconds", + "sql": "select * from shop.orders where ordered_at >= to_timestamp(1767229200)", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "timestamp millis", + "sql": "select * from shop.orders where ordered_at >= to_timestamp(1767229200.0)", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "parse date", + "sql": "select * from shop.orders where ordered_at >= to_date('2026-01-01', 'YYYY-MM-DD')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "parse datetime", + "sql": "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 09:00:00', 'YYYY-MM-DD HH24:MI:SS')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "parse timestamp", + "sql": "select * from shop.orders where ordered_at >= to_timestamp('2026-01-01 09:00:00+09:00', 'YYYY-MM-DD HH24:MI:SSOF')", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "differently formatted literals", + "sql": "select * from shop.orders where ordered_at >= '2026-01-01' and ordered_at > '2026-01-02 09:00:00'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-02", + "upper": None, + } + }, + }, + { + "name": "non-literal", + "sql": "select * from shop.orders where ordered_at > dateadd(month, -1, to_date('2026-01-01', 'YYYY-MM-DD'))", + "expected": { + "production.shop.orders.ordered_at": {"lower": "2025-12-01", "upper": None} + }, + }, + { + "name": "not equal", + "sql": "select * from shop.orders where ordered_at != '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": {"lower": None, "upper": None} + }, + }, + { + "name": "not equal (alt)", + "sql": "select * from shop.orders where ordered_at <> '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": {"lower": None, "upper": None} + }, + }, + { + "name": "negation", + "sql": "select * from shop.orders where not ordered_at > '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": {"lower": None, "upper": "2026-01-01"} + }, + }, + { + "name": "multiple conditions (and)", + "sql": "select * from orders where ordered_at > '2026-01-01' and ordered_at > '2026-01-02'", + "expected": { + "production.shop.orders.ordered_at": {"lower": "2026-01-02", "upper": None} + }, + }, + { + "name": "multiple conditions (or)", + "sql": "select * from shop.orders where ordered_at > '2026-01-01' or ordered_at > '2026-01-02'", + "expected": { + "production.shop.orders.ordered_at": {"lower": "2026-01-01", "upper": None} + }, + }, + { + "name": "lower > upper", + "sql": "select * from shop.orders where ordered_at > '2026-01-02' and ordered_at < '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": {"lower": None, "upper": None} + }, + }, + { + "name": "lower > upper between", + "sql": "select * from shop.orders where ordered_at between '2026-01-02' and '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": {"lower": None, "upper": None} + }, + }, + { + "name": "function wrapped column", + "sql": "select * from shop.orders where date(ordered_at) >= '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "function wrapped mixed conditions", + "sql": "select * from shop.orders where date(ordered_at) >= '2026-01-01' and ordered_at < '2026-01-02'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": "2026-01-02", + } + }, + }, + { + "name": "join condition", + "sql": "select * from shop.users join shop.orders on users.id = orders.user_id and orders.ordered_at > '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": {"lower": "2026-01-01", "upper": None} + }, + }, + { + "name": "multiple tables", + "sql": "select * from shop.orders join shop.events on orders.user_id = events.user_id where orders.ordered_at >= '2026-01-01' and events.event_at > '2026-01-02'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + }, + "production.shop.events.event_at": { + "lower": "2026-01-02", + "upper": None, + }, + }, + }, + { + "name": "cte", + "sql": "with base as (select ordered_at from shop.orders) select * from base where ordered_at > '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": {"lower": "2026-01-01", "upper": None} + }, + }, + { + "name": "cte inner and outer", + "sql": "with base as (select ordered_at from shop.orders where ordered_at >= '2026-01-01') select * from base where ordered_at < '2026-01-02'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": "2026-01-02", + } + }, + }, + { + "name": "multi cte", + "sql": "with base as (select ordered_at from shop.orders), filtered as (select ordered_at from base where ordered_at >= '2026-01-01') select * from filtered where ordered_at < '2026-01-02'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": "2026-01-02", + } + }, + }, + { + "name": "subquery from", + "sql": "select * from (select ordered_at from shop.orders) t where t.ordered_at >= '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "join subquery", + "sql": "select * from shop.users u join (select user_id, ordered_at from shop.orders) o on u.id = o.user_id and o.ordered_at < '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": None, + "upper": "2026-01-01", + } + }, + }, + { + "name": "qualify clause", + "sql": "select min(ordered_at) over (partition by user_id) as first_ordered_at from shop.orders qualify first_ordered_at > '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "having clause", + "sql": "select min(ordered_at) as first_ordered_at from shop.orders having first_ordered_at > '2026-01-01'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "union both filtered", + "sql": "select ordered_at from shop.orders where ordered_at >= '2026-01-01' union all select ordered_at from shop.orders where ordered_at >= '2026-01-02'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "union one filtered", + "sql": "select ordered_at from shop.orders where ordered_at >= '2026-01-01' union all select ordered_at from shop.orders", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + } + }, + }, + { + "name": "union different tables", + "sql": "select ordered_at from shop.orders where ordered_at >= '2026-01-01' union all select event_at from shop.events where event_at < '2026-01-02'", + "expected": { + "production.shop.orders.ordered_at": { + "lower": "2026-01-01", + "upper": None, + }, + "production.shop.events.event_at": { + "lower": None, + "upper": "2026-01-02", + }, + }, + }, +] + + +@pytest.mark.parametrize( + ("sql", "expected"), + [(case["sql"], case["expected"]) for case in test_cases], + ids=[case["name"] for case in test_cases], +) +def test_analyze_case(sql, expected): + result = analyze_timespan(dialects.Redshift, sql, schema, "production") + assert result == expected + + +def test_current_date_provider(): + sql = "select * from shop.orders where ordered_at >= current_date()" + result = analyze_timespan( + dialects.Redshift, + sql, + schema, + "production", + current_date_provider=lambda: "2026-01-01", + ) + assert result == { + "production.shop.orders.ordered_at": {"lower": "2026-01-01", "upper": None} + } + + +def test_current_datetime_provider(): + sql = "select * from shop.orders where ordered_at >= current_timestamp()" + result = analyze_timespan( + dialects.Redshift, + sql, + schema, + "production", + current_date_provider=lambda: "2026-01-01", + ) + assert result == { + "production.shop.orders.ordered_at": {"lower": "2026-01-01", "upper": None} + } + + +def test_current_timestamp_provider(): + sql = "select * from shop.orders where ordered_at >= current_timestamp()" + result = analyze_timespan( + dialects.Redshift, + sql, + schema, + "production", + current_date_provider=lambda: "2026-01-01", + ) + assert result == { + "production.shop.orders.ordered_at": {"lower": "2026-01-01", "upper": None} + }