From 907d76a96257320c295c791f7caee55f323bebe4 Mon Sep 17 00:00:00 2001 From: Martin Braun Date: Sat, 30 Sep 2023 15:16:10 +0200 Subject: [PATCH 1/7] start prototyping more complex user defined filters --- .../queries/user_defined_filter.py | 60 ++++++++++++++++--- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py b/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py index 37ffafd..e2e2caa 100644 --- a/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py +++ b/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py @@ -20,13 +20,38 @@ class UserDefinedFilter(NamedTuple): used_data_series_children: DataSeriesQueryInfo +def complex_filter_to_sql_filter(filter_dict, handle_column, max_depth=10, depth=0): + if depth > max_depth: + raise ValueError("Maximum recursion depth exceeded") + + sql_filter = "" + + key: str + for key, value in filter_dict.items(): + if key == "$and": + and_clauses = [complex_filter_to_sql_filter(item, handle_column, max_depth, depth + 1) for item in value] + sql_filter += "(" + " AND ".join(and_clauses) + ")" + elif key == "$or": + or_clauses = [complex_filter_to_sql_filter(item, handle_column, max_depth, depth + 1) for item in value] + sql_filter += "(" + " OR ".join(or_clauses) + ")" + elif key.startswith("$"): + raise ValueError(f"Unsupported operator: {key}") + else: + column = key + sql_filter += handle_column(column, value) + + + sql_filter += " AND " + + return sql_filter[:-5] # Remove the trailing "AND" + + def compute_user_defined_filter_for_raw_query( data_series_query_info: DataSeriesQueryInfo, filter_params: Dict[str, Any], use_materialized_table: bool, ) -> UserDefinedFilter: query_params: Dict[str, Any] = {} - query_parts: List[str] = [] used_data_series_children = DataSeriesQueryInfo( data_series_id=data_series_query_info.data_series_id, backend=data_series_query_info.backend, @@ -47,12 +72,29 @@ def compute_user_defined_filter_for_raw_query( main_alive_filter='ds_dp.deleted_at IS NULL' ) + def handle_column(column, filter, operation="$eq"): + if isinstance(filter, dict): + and_clauses = [handle_column(column, item[1], operation=item[0]) for item in filter.items()] + sql_filter = " AND ".join(and_clauses) + return sql_filter + else: + return primitive_filter(column, filter, operation) + + query_param_idx = 0 + + handled_keys: Set[str] = set() + keys_overall: Set[str] = set() + + def primitive_filter(key, value, operation): + keys_overall.add(key) - for i, (key, value) in enumerate(filter_params.items()): + nonlocal query_param_idx + query_parts: List[str] = [] # JSON is not supported because of the added complexity # it would bring without a proper DSL - query_param_name = f'filter_{i}' + query_param_name = f'filter_{query_param_idx}' + query_param_idx += 1 # image and file intentionally not here _handle_if_float_fact(use_materialized_table, data_series_query_info, query_parts, query_params, used_data_series_children, @@ -72,17 +114,19 @@ def compute_user_defined_filter_for_raw_query( _handle_if_dimension(use_materialized_table, data_series_query_info, query_parts, query_params, used_data_series_children, query_param_name, key, value, handled_keys) + return '\n'.join(query_parts) - filter_keys = filter_params.keys() + filter_query_str = complex_filter_to_sql_filter( + filter_dict=filter_params, + handle_column=handle_column + ) - not_found = frozenset(filter_keys).difference(handled_keys) + not_found = frozenset(keys_overall).difference(handled_keys) if len(not_found) > 0: raise ValidationError(f'unrecognized fields in filter query parameter: {"{"}{",".join(not_found)}{"}"}') - handled_keys.intersection(filter_keys) - return UserDefinedFilter( - filter_query_str='\n'.join(query_parts), + filter_query_str=filter_query_str, query_params=query_params, used_data_series_children=used_data_series_children ) From 556669b45a5e473515752ca7ceb0e69d09acbb07 Mon Sep 17 00:00:00 2001 From: Martin Braun Date: Sun, 1 Oct 2023 00:32:44 +0200 Subject: [PATCH 2/7] implement first draft of user_defined_filter that works with equality and/or --- .../queries/user_defined_filter.py | 42 +++++++++++-------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py b/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py index e2e2caa..4ed9e49 100644 --- a/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py +++ b/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py @@ -29,17 +29,16 @@ def complex_filter_to_sql_filter(filter_dict, handle_column, max_depth=10, depth key: str for key, value in filter_dict.items(): if key == "$and": - and_clauses = [complex_filter_to_sql_filter(item, handle_column, max_depth, depth + 1) for item in value] + and_clauses = ["(" + complex_filter_to_sql_filter(item, handle_column, max_depth, depth + 1) + ")" for item in value] sql_filter += "(" + " AND ".join(and_clauses) + ")" elif key == "$or": - or_clauses = [complex_filter_to_sql_filter(item, handle_column, max_depth, depth + 1) for item in value] + or_clauses = ["(" + complex_filter_to_sql_filter(item, handle_column, max_depth, depth + 1) + ")" for item in value] sql_filter += "(" + " OR ".join(or_clauses) + ")" elif key.startswith("$"): raise ValueError(f"Unsupported operator: {key}") else: column = key sql_filter += handle_column(column, value) - sql_filter += " AND " @@ -74,7 +73,7 @@ def compute_user_defined_filter_for_raw_query( def handle_column(column, filter, operation="$eq"): if isinstance(filter, dict): - and_clauses = [handle_column(column, item[1], operation=item[0]) for item in filter.items()] + and_clauses = ["(" + handle_column(column, item[1], operation=item[0]) + ")" for item in filter.items()] sql_filter = " AND ".join(and_clauses) return sql_filter else: @@ -96,6 +95,10 @@ def primitive_filter(key, value, operation): query_param_name = f'filter_{query_param_idx}' query_param_idx += 1 # image and file intentionally not here + + # FIXME: query_parts is stupid, we should return + # a string here and not return a string array that is not used + # for more than one call _handle_if_float_fact(use_materialized_table, data_series_query_info, query_parts, query_params, used_data_series_children, query_param_name, key, value, handled_keys) @@ -114,7 +117,7 @@ def primitive_filter(key, value, operation): _handle_if_dimension(use_materialized_table, data_series_query_info, query_parts, query_params, used_data_series_children, query_param_name, key, value, handled_keys) - return '\n'.join(query_parts) + return ' AND '.join(query_parts) filter_query_str = complex_filter_to_sql_filter( filter_dict=filter_params, @@ -126,7 +129,10 @@ def primitive_filter(key, value, operation): raise ValidationError(f'unrecognized fields in filter query parameter: {"{"}{",".join(not_found)}{"}"}') return UserDefinedFilter( - filter_query_str=filter_query_str, + # we need this to start with an AND + # as all other code expects it, maybe we could improve this in the future + # but meh. + filter_query_str=f'AND {filter_query_str}' if len(filter_query_str) > 0 else "", query_params=query_params, used_data_series_children=used_data_series_children ) @@ -152,11 +158,11 @@ def _handle_if_dimension( _tbl_name = escape(f'relation_{key}') _lhs = f'{_tbl_name}.value' if value is None: - query_parts.append(f"AND {_lhs} IS NULL") + query_parts.append(f"{_lhs} IS NULL") else: if not isinstance(value, str): raise ValidationError(f"expected string value for field {str(key)}") - query_parts.append(f"AND {_lhs} = %({query_param_name})s") + query_parts.append(f"{_lhs} = %({query_param_name})s") query_params[query_param_name] = value used_data_series_children.dimensions[key] = data_series_query_info.dimensions[key] @@ -183,7 +189,7 @@ def _handle_if_timestamp_fact( _tbl_name = escape(f'relation_{key}') _lhs = f'{_tbl_name}.value' if value is None: - query_parts.append(f"AND {_lhs} IS NULL") + query_parts.append(f"{_lhs} IS NULL") else: try: parsed_date_time = dateparse.parse_datetime(str(value)) @@ -191,7 +197,7 @@ def _handle_if_timestamp_fact( raise ValidationError(f'{value} is no valid datetime') except ValueError: raise ValidationError(f'{value} is no valid datetime') - query_parts.append(f"AND {_lhs} = %({query_param_name})s") + query_parts.append(f"{_lhs} = %({query_param_name})s") query_params[query_param_name] = parsed_date_time used_data_series_children.timestamp_facts[key] = data_series_query_info.timestamp_facts[key] @@ -218,11 +224,11 @@ def _handle_if_text_fact( _tbl_name = escape(f'relation_{key}') _lhs = f'{_tbl_name}.value' if value is None: - query_parts.append(f"AND {_lhs} IS NULL") + query_parts.append(f"{_lhs} IS NULL") else: if not isinstance(value, str): raise ValidationError(f"expected string value for field {str(key)}") - query_parts.append(f"AND {_lhs} = %({query_param_name})s::text") + query_parts.append(f"{_lhs} = %({query_param_name})s::text") query_params[query_param_name] = value used_data_series_children.text_facts[key] = data_series_query_info.text_facts[key] @@ -249,11 +255,11 @@ def _handle_if_string_fact( _tbl_name = escape(f'relation_{key}') _lhs = f'{_tbl_name}.value' if value is None: - query_parts.append(f"AND {_lhs} IS NULL") + query_parts.append(f"{_lhs} IS NULL") else: if not isinstance(value, str): raise ValidationError(f"expected string value for field {str(key)}") - query_parts.append(f"AND {_lhs} = %({query_param_name})s") + query_parts.append(f"{_lhs} = %({query_param_name})s") query_params[query_param_name] = value used_data_series_children.string_facts[key] = data_series_query_info.string_facts[key] @@ -280,11 +286,11 @@ def _handle_if_float_fact( _tbl_name = escape(f'relation_{key}') _lhs = f'{_tbl_name}.value' if value is None: - query_parts.append(f"AND {_lhs} IS NULL") + query_parts.append(f"{_lhs} IS NULL") else: if not isinstance(value, float) and not isinstance(value, int): raise ValidationError(f"expected numeric value for field {str(key)}") - query_parts.append(f"AND {_lhs} = %({query_param_name})s::double precision") + query_parts.append(f"{_lhs} = %({query_param_name})s::double precision") query_params[query_param_name] = value used_data_series_children.float_facts[key] = data_series_query_info.float_facts[key] @@ -311,11 +317,11 @@ def _handle_if_boolean_fact( _tbl_name = escape(f'relation_{key}') _lhs = f'{_tbl_name}.value' if value is None: - query_parts.append(f"AND {_lhs} IS NULL") + query_parts.append(f"{_lhs} IS NULL") else: if not isinstance(value, bool): raise ValidationError(f"expected boolean value for field {str(key)}") - query_parts.append(f"AND {_lhs} = %({query_param_name})s") + query_parts.append(f"{_lhs} = %({query_param_name})s") query_params[query_param_name] = value used_data_series_children.boolean_facts[key] = data_series_query_info.boolean_facts[key] From 85c57a6f6e34d91c06bac21ce81cfa7b1a72e577 Mon Sep 17 00:00:00 2001 From: Martin Braun Date: Sun, 1 Oct 2023 12:22:06 +0200 Subject: [PATCH 3/7] add basic support for standard operators #59 --- skipper/skipper/core/lint.py | 1 + .../queries/user_defined_filter.py | 204 +++++++++++++----- 2 files changed, 152 insertions(+), 53 deletions(-) diff --git a/skipper/skipper/core/lint.py b/skipper/skipper/core/lint.py index 9ba8ddb..255978e 100644 --- a/skipper/skipper/core/lint.py +++ b/skipper/skipper/core/lint.py @@ -88,6 +88,7 @@ def find_all_groups(token: Token) -> Generator[TokenList, None, None]: 'current_database', 'table_schema', 'ANY', + 'UNNEST', # SQL types 'varchar', diff --git a/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py b/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py index 4ed9e49..50c1c08 100644 --- a/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py +++ b/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py @@ -4,7 +4,7 @@ # This file is part of NF Compose # [2019] - [2023] © NeuroForge GmbH & Co. KG - +from typing import Callable from django.utils import dateparse from rest_framework.exceptions import ValidationError from typing import NamedTuple, Dict, Any, List, Set @@ -20,18 +20,22 @@ class UserDefinedFilter(NamedTuple): used_data_series_children: DataSeriesQueryInfo -def complex_filter_to_sql_filter(filter_dict, handle_column, max_depth=10, depth=0): +def complex_filter_to_sql_filter(filter_dict: Dict[str, Any], handle_column: Callable[[str, Any], str], max_depth: int = 10, depth: int = 0) -> str: if depth > max_depth: - raise ValueError("Maximum recursion depth exceeded") + raise ValidationError("Maximum nesting complexity of query reached") sql_filter = "" key: str for key, value in filter_dict.items(): if key == "$and": + if not isinstance(value, list): + raise ValidationError("$and operators require lists as arguments") and_clauses = ["(" + complex_filter_to_sql_filter(item, handle_column, max_depth, depth + 1) + ")" for item in value] sql_filter += "(" + " AND ".join(and_clauses) + ")" elif key == "$or": + if not isinstance(value, list): + raise ValidationError("$or operators require lists as arguments") or_clauses = ["(" + complex_filter_to_sql_filter(item, handle_column, max_depth, depth + 1) + ")" for item in value] sql_filter += "(" + " OR ".join(or_clauses) + ")" elif key.startswith("$"): @@ -71,13 +75,13 @@ def compute_user_defined_filter_for_raw_query( main_alive_filter='ds_dp.deleted_at IS NULL' ) - def handle_column(column, filter, operation="$eq"): + def handle_column(column: str, filter: Any, operator: str = "$eq") -> str: if isinstance(filter, dict): - and_clauses = ["(" + handle_column(column, item[1], operation=item[0]) + ")" for item in filter.items()] + and_clauses = ["(" + handle_column(column, item[1], operator=item[0]) + ")" for item in filter.items()] sql_filter = " AND ".join(and_clauses) return sql_filter else: - return primitive_filter(column, filter, operation) + return primitive_filter(column, filter, operator) query_param_idx = 0 @@ -85,7 +89,10 @@ def handle_column(column, filter, operation="$eq"): handled_keys: Set[str] = set() keys_overall: Set[str] = set() - def primitive_filter(key, value, operation): + def primitive_filter(key: str, value: Any, operator: str) -> str: + if operator not in single_valued_sql_operators and operator not in multi_valued_sql_operators: + raise ValidationError(f"unrecognized operator {operator}") + keys_overall.add(key) nonlocal query_param_idx @@ -101,22 +108,22 @@ def primitive_filter(key, value, operation): # for more than one call _handle_if_float_fact(use_materialized_table, data_series_query_info, query_parts, query_params, used_data_series_children, - query_param_name, key, value, handled_keys) + query_param_name, key, value, handled_keys, operator) _handle_if_string_fact(use_materialized_table, data_series_query_info, query_parts, query_params, used_data_series_children, - query_param_name, key, value, handled_keys) + query_param_name, key, value, handled_keys, operator) _handle_if_text_fact(use_materialized_table, data_series_query_info, query_parts, query_params, used_data_series_children, - query_param_name, key, value, handled_keys) + query_param_name, key, value, handled_keys, operator) _handle_if_timestamp_fact(use_materialized_table, data_series_query_info, query_parts, query_params, used_data_series_children, - query_param_name, key, value, handled_keys) + query_param_name, key, value, handled_keys, operator) _handle_if_boolean_fact(use_materialized_table, data_series_query_info, query_parts, query_params, used_data_series_children, - query_param_name, key, value, handled_keys) + query_param_name, key, value, handled_keys, operator) _handle_if_dimension(use_materialized_table, data_series_query_info, query_parts, query_params, used_data_series_children, - query_param_name, key, value, handled_keys) + query_param_name, key, value, handled_keys, operator) return ' AND '.join(query_parts) filter_query_str = complex_filter_to_sql_filter( @@ -138,6 +145,21 @@ def primitive_filter(key, value, operation): ) +single_valued_sql_operators = { + "$eq": "=", + "$lt": "<", + "$lte": "<=", + "$ne": "!=", + "$gte": ">=", + "$gt": ">" +} + +multi_valued_sql_operators = { + "$in": "IN", + "$nin": "NOT IN" +} + + def _handle_if_dimension( use_materialized_table: bool, data_series_query_info: DataSeriesQueryInfo, @@ -147,7 +169,8 @@ def _handle_if_dimension( query_param_name: str, key: str, value: Any, - handled_keys: Set[str] + handled_keys: Set[str], + operator: str ) -> None: if key in data_series_query_info.dimensions: _lhs: str @@ -157,18 +180,27 @@ def _handle_if_dimension( else: _tbl_name = escape(f'relation_{key}') _lhs = f'{_tbl_name}.value' - if value is None: + if operator == "$eq" and value is None: query_parts.append(f"{_lhs} IS NULL") + elif operator == "$ne" and value is None: + query_parts.append(f"{_lhs} IS NOT NULL") else: - if not isinstance(value, str): - raise ValidationError(f"expected string value for field {str(key)}") - query_parts.append(f"{_lhs} = %({query_param_name})s") - query_params[query_param_name] = value + if operator in single_valued_sql_operators: + if not isinstance(value, str): + raise ValidationError(f"expected string value for field {str(key)}") + query_parts.append(f"{_lhs} {single_valued_sql_operators[operator]} %({query_param_name})s") + query_params[query_param_name] = value + elif operator in multi_valued_sql_operators: + if not isinstance(value, list) or not all([isinstance(elem, str) for elem in value]): + raise ValidationError(f"expected list of strings for operator {operator} on field {str(key)}") + query_parts.append(f"{_lhs} {multi_valued_sql_operators[operator]} %({query_param_name})s") + query_params[query_param_name] = tuple(value) + else: + raise ValidationError(f"unsupported operator {operator} for field {str(key)}") used_data_series_children.dimensions[key] = data_series_query_info.dimensions[key] handled_keys.add(key) - def _handle_if_timestamp_fact( use_materialized_table: bool, data_series_query_info: DataSeriesQueryInfo, @@ -178,7 +210,8 @@ def _handle_if_timestamp_fact( query_param_name: str, key: str, value: Any, - handled_keys: Set[str] + handled_keys: Set[str], + operator: str ) -> None: if key in data_series_query_info.timestamp_facts: _lhs: str @@ -188,17 +221,38 @@ def _handle_if_timestamp_fact( else: _tbl_name = escape(f'relation_{key}') _lhs = f'{_tbl_name}.value' - if value is None: + if operator == "$eq" and value is None: query_parts.append(f"{_lhs} IS NULL") + elif operator == "$ne" and value is None: + query_parts.append(f"{_lhs} IS NOT NULL") else: - try: - parsed_date_time = dateparse.parse_datetime(str(value)) - if parsed_date_time is None: + if operator in single_valued_sql_operators: + try: + parsed_date_time = dateparse.parse_datetime(str(value)) + if parsed_date_time is None: + raise ValidationError(f'{value} is no valid datetime') + except ValueError: raise ValidationError(f'{value} is no valid datetime') - except ValueError: - raise ValidationError(f'{value} is no valid datetime') - query_parts.append(f"{_lhs} = %({query_param_name})s") - query_params[query_param_name] = parsed_date_time + query_parts.append(f"{_lhs} {single_valued_sql_operators[operator]} %({query_param_name})s") + query_params[query_param_name] = parsed_date_time + elif operator in multi_valued_sql_operators: + if not isinstance(value, list): + raise ValidationError(f"expected list of timestamps for operator {operator} on field {str(key)}") + + parsed_date_times = [] + for elem in value: + try: + cur_parsed = dateparse.parse_datetime(str(elem)) + if cur_parsed is None: + raise ValidationError(f"expected list of timestamps for operator {operator} on field {str(key)}") + parsed_date_times.append(cur_parsed) + except ValueError: + raise ValidationError(f"expected list of timestamps for operator {operator} on field {str(key)}") + + query_parts.append(f"{_lhs} {multi_valued_sql_operators[operator]} %({query_param_name})s") + query_params[query_param_name] = tuple(parsed_date_times) + else: + raise ValidationError(f"unsupported operator {operator} for field {str(key)}") used_data_series_children.timestamp_facts[key] = data_series_query_info.timestamp_facts[key] handled_keys.add(key) @@ -213,7 +267,8 @@ def _handle_if_text_fact( query_param_name: str, key: str, value: Any, - handled_keys: Set[str] + handled_keys: Set[str], + operator: str ) -> None: if key in data_series_query_info.text_facts: _lhs: str @@ -223,13 +278,23 @@ def _handle_if_text_fact( else: _tbl_name = escape(f'relation_{key}') _lhs = f'{_tbl_name}.value' - if value is None: + if operator == "$eq" and value is None: query_parts.append(f"{_lhs} IS NULL") + elif operator == "$ne" and value is None: + query_parts.append(f"{_lhs} IS NOT NULL") else: - if not isinstance(value, str): - raise ValidationError(f"expected string value for field {str(key)}") - query_parts.append(f"{_lhs} = %({query_param_name})s::text") - query_params[query_param_name] = value + if operator in single_valued_sql_operators: + if not isinstance(value, str): + raise ValidationError(f"expected string value for field {str(key)}") + query_parts.append(f"{_lhs} {single_valued_sql_operators[operator]} %({query_param_name})s::text") + query_params[query_param_name] = value + elif operator in multi_valued_sql_operators: + if not isinstance(value, list) or not all([isinstance(elem, str) for elem in value]): + raise ValidationError(f"expected list of strings for operator {operator} on field {str(key)}") + query_parts.append(f"{_lhs} {multi_valued_sql_operators[operator]} %({query_param_name})s") + query_params[query_param_name] = tuple(value) + else: + raise ValidationError(f"unsupported operator {operator} for field {str(key)}") used_data_series_children.text_facts[key] = data_series_query_info.text_facts[key] handled_keys.add(key) @@ -244,7 +309,8 @@ def _handle_if_string_fact( query_param_name: str, key: str, value: Any, - handled_keys: Set[str] + handled_keys: Set[str], + operator: str ) -> None: if key in data_series_query_info.string_facts: _lhs: str @@ -254,13 +320,23 @@ def _handle_if_string_fact( else: _tbl_name = escape(f'relation_{key}') _lhs = f'{_tbl_name}.value' - if value is None: + if operator == "$eq" and value is None: query_parts.append(f"{_lhs} IS NULL") + elif operator == "$ne" and value is None: + query_parts.append(f"{_lhs} IS NOT NULL") else: - if not isinstance(value, str): - raise ValidationError(f"expected string value for field {str(key)}") - query_parts.append(f"{_lhs} = %({query_param_name})s") - query_params[query_param_name] = value + if operator in single_valued_sql_operators: + if not isinstance(value, str): + raise ValidationError(f"expected string value for field {str(key)}") + query_parts.append(f"{_lhs} {single_valued_sql_operators[operator]} %({query_param_name})s") + query_params[query_param_name] = value + elif operator in multi_valued_sql_operators: + if not isinstance(value, list) or not all([isinstance(elem, str) for elem in value]): + raise ValidationError(f"expected list of strings for operator {operator} on field {str(key)}") + query_parts.append(f"{_lhs} {multi_valued_sql_operators[operator]} %({query_param_name})s") + query_params[query_param_name] = tuple(value) + else: + raise ValidationError(f"unsupported operator {operator} for field {str(key)}") used_data_series_children.string_facts[key] = data_series_query_info.string_facts[key] handled_keys.add(key) @@ -275,7 +351,8 @@ def _handle_if_float_fact( query_param_name: str, key: str, value: Any, - handled_keys: Set[str] + handled_keys: Set[str], + operator: str ) -> None: if key in data_series_query_info.float_facts: _lhs: str @@ -285,13 +362,23 @@ def _handle_if_float_fact( else: _tbl_name = escape(f'relation_{key}') _lhs = f'{_tbl_name}.value' - if value is None: + if operator == "$eq" and value is None: query_parts.append(f"{_lhs} IS NULL") + elif operator == "$ne" and value is None: + query_parts.append(f"{_lhs} IS NOT NULL") else: - if not isinstance(value, float) and not isinstance(value, int): - raise ValidationError(f"expected numeric value for field {str(key)}") - query_parts.append(f"{_lhs} = %({query_param_name})s::double precision") - query_params[query_param_name] = value + if operator in single_valued_sql_operators: + if not isinstance(value, float) and not isinstance(value, int): + raise ValidationError(f"expected numeric value for field {str(key)}") + query_parts.append(f"{_lhs} {single_valued_sql_operators[operator]} %({query_param_name})s::double precision") + query_params[query_param_name] = value + elif operator in multi_valued_sql_operators: + if not isinstance(value, list) or not all([isinstance(elem, int) or isinstance(elem, float) for elem in value]): + raise ValidationError(f"expected list of numeric values for operator {operator} on field {str(key)}") + query_parts.append(f"{_lhs} {multi_valued_sql_operators[operator]} %({query_param_name})s") + query_params[query_param_name] = tuple(value) + else: + raise ValidationError(f"unsupported operator {operator} for field {str(key)}") used_data_series_children.float_facts[key] = data_series_query_info.float_facts[key] handled_keys.add(key) @@ -306,7 +393,8 @@ def _handle_if_boolean_fact( query_param_name: str, key: str, value: Any, - handled_keys: Set[str] + handled_keys: Set[str], + operator: str ) -> None: if key in data_series_query_info.boolean_facts: _lhs: str @@ -316,13 +404,23 @@ def _handle_if_boolean_fact( else: _tbl_name = escape(f'relation_{key}') _lhs = f'{_tbl_name}.value' - if value is None: + if operator == "$eq" and value is None: query_parts.append(f"{_lhs} IS NULL") + elif operator == "$ne" and value is None: + query_parts.append(f"{_lhs} IS NOT NULL") else: - if not isinstance(value, bool): - raise ValidationError(f"expected boolean value for field {str(key)}") - query_parts.append(f"{_lhs} = %({query_param_name})s") - query_params[query_param_name] = value + if operator in single_valued_sql_operators: + if not isinstance(value, bool): + raise ValidationError(f"expected boolean value for field {str(key)}") + query_parts.append(f"{_lhs} {single_valued_sql_operators[operator]} %({query_param_name})s") + query_params[query_param_name] = value + elif operator in multi_valued_sql_operators: + if not isinstance(value, list) or not all([isinstance(elem, bool) for elem in value]): + raise ValidationError(f"expected list of boolean values for operator {operator} on field {str(key)}") + query_parts.append(f"{_lhs} {multi_valued_sql_operators[operator]} %({query_param_name})s") + query_params[query_param_name] = tuple(value) + else: + raise ValidationError(f"unsupported operator {operator} for field {str(key)}") used_data_series_children.boolean_facts[key] = data_series_query_info.boolean_facts[key] handled_keys.add(key) From d61098504b647fd8967083d5dd16b5944b2f907d Mon Sep 17 00:00:00 2001 From: Martin Braun Date: Sun, 1 Oct 2023 12:30:19 +0200 Subject: [PATCH 4/7] remove unnecessary UNNEST from lint.py --- skipper/skipper/core/lint.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skipper/skipper/core/lint.py b/skipper/skipper/core/lint.py index 255978e..9ba8ddb 100644 --- a/skipper/skipper/core/lint.py +++ b/skipper/skipper/core/lint.py @@ -88,7 +88,6 @@ def find_all_groups(token: Token) -> Generator[TokenList, None, None]: 'current_database', 'table_schema', 'ANY', - 'UNNEST', # SQL types 'varchar', From 77b76ac9431cf4bff8ab6976a6edf5b75ebcf951 Mon Sep 17 00:00:00 2001 From: Martin Braun Date: Sun, 1 Oct 2023 12:37:10 +0200 Subject: [PATCH 5/7] add prefix filter --- .../dynamic_sql/queries/user_defined_filter.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py b/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py index 50c1c08..0d3c814 100644 --- a/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py +++ b/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py @@ -90,9 +90,6 @@ def handle_column(column: str, filter: Any, operator: str = "$eq") -> str: keys_overall: Set[str] = set() def primitive_filter(key: str, value: Any, operator: str) -> str: - if operator not in single_valued_sql_operators and operator not in multi_valued_sql_operators: - raise ValidationError(f"unrecognized operator {operator}") - keys_overall.add(key) nonlocal query_param_idx @@ -288,6 +285,11 @@ def _handle_if_text_fact( raise ValidationError(f"expected string value for field {str(key)}") query_parts.append(f"{_lhs} {single_valued_sql_operators[operator]} %({query_param_name})s::text") query_params[query_param_name] = value + elif operator == "$prefix": + if not isinstance(value, str): + raise ValidationError(f"expected string value for field {str(key)}") + query_parts.append(f"{_lhs} LIKE (%({query_param_name})s::text) || '%%'") + query_params[query_param_name] = value elif operator in multi_valued_sql_operators: if not isinstance(value, list) or not all([isinstance(elem, str) for elem in value]): raise ValidationError(f"expected list of strings for operator {operator} on field {str(key)}") @@ -330,6 +332,11 @@ def _handle_if_string_fact( raise ValidationError(f"expected string value for field {str(key)}") query_parts.append(f"{_lhs} {single_valued_sql_operators[operator]} %({query_param_name})s") query_params[query_param_name] = value + elif operator == "$prefix": + if not isinstance(value, str): + raise ValidationError(f"expected string value for field {str(key)}") + query_parts.append(f"{_lhs} LIKE %({query_param_name}) || '%%'") + query_params[query_param_name] = value elif operator in multi_valued_sql_operators: if not isinstance(value, list) or not all([isinstance(elem, str) for elem in value]): raise ValidationError(f"expected list of strings for operator {operator} on field {str(key)}") From ebb71cebb218ae35bfefe31692306470528745fa Mon Sep 17 00:00:00 2001 From: Martin Braun Date: Sun, 1 Oct 2023 12:41:30 +0200 Subject: [PATCH 6/7] add support for --- .../storage/dynamic_sql/queries/user_defined_filter.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py b/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py index 0d3c814..b5aa201 100644 --- a/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py +++ b/skipper/skipper/dataseries/storage/dynamic_sql/queries/user_defined_filter.py @@ -30,14 +30,19 @@ def complex_filter_to_sql_filter(filter_dict: Dict[str, Any], handle_column: Cal for key, value in filter_dict.items(): if key == "$and": if not isinstance(value, list): - raise ValidationError("$and operators require lists as arguments") + raise ValidationError("$and operator requires a list as argument") and_clauses = ["(" + complex_filter_to_sql_filter(item, handle_column, max_depth, depth + 1) + ")" for item in value] sql_filter += "(" + " AND ".join(and_clauses) + ")" elif key == "$or": if not isinstance(value, list): - raise ValidationError("$or operators require lists as arguments") + raise ValidationError("$or operator requires a list as argument") or_clauses = ["(" + complex_filter_to_sql_filter(item, handle_column, max_depth, depth + 1) + ")" for item in value] sql_filter += "(" + " OR ".join(or_clauses) + ")" + elif key == "$not": + if not isinstance(value, dict): + raise ValidationError("$not operator requires a dictionary as argument") + child_clause = "(" + complex_filter_to_sql_filter(value, handle_column, max_depth, depth + 1) + ")" + sql_filter += "(NOT " + child_clause + ")" elif key.startswith("$"): raise ValueError(f"Unsupported operator: {key}") else: From 057162d40b31ebda1fd6791da15cdaa4cd8defb5 Mon Sep 17 00:00:00 2001 From: Martin Braun Date: Sun, 1 Oct 2023 12:47:38 +0200 Subject: [PATCH 7/7] add documentation for new filter features to crud.py --- skipper/skipper/dataseries/views/datapoint/crud.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skipper/skipper/dataseries/views/datapoint/crud.py b/skipper/skipper/dataseries/views/datapoint/crud.py index 4a19d13..fec9b54 100644 --- a/skipper/skipper/dataseries/views/datapoint/crud.py +++ b/skipper/skipper/dataseries/views/datapoint/crud.py @@ -271,7 +271,8 @@ def get_description_string(self) -> str:

- changes_since=<timestamp>
- - filter={{"<dimension/fact external id>": , ...}}
+ - filter={{"$or": [{{"<dimension/fact external id>": "<some-value>", ...}}, {{"<dimension/fact external id>": "<some-other-value>", ...}}]}} + (supports logical operators $or, $and, $not and primitive operators $eq, $lt, $lte, $ne, $gte, $gt, $in, $nin, $prefix)
- count[=true]
- external_id= (repeatable)
- identify_dimensions_by_external_id[=true]