Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add timeout to SQL parsing #1063

Merged
merged 1 commit into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions metaphor/common/sql/table_level_lineage/table_level_lineage.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import re
import signal
from collections import defaultdict
from typing import Dict, Optional, Set

Expand Down Expand Up @@ -128,6 +129,10 @@
return sources


def _handle_parser_timeout(signum, frame):
raise TimeoutError("Parser timeout")

Check warning on line 133 in metaphor/common/sql/table_level_lineage/table_level_lineage.py

View check run for this annotation

Codecov / codecov/patch

metaphor/common/sql/table_level_lineage/table_level_lineage.py#L133

Added line #L133 was not covered by tests


def extract_table_level_lineage(
sql: str,
platform: DataPlatform,
Expand All @@ -137,25 +142,32 @@
default_database: Optional[str] = None,
default_schema: Optional[str] = None,
) -> Result:

if statement_type and statement_type.upper() not in _VALID_STATEMENT_TYPES:
# No target, no TLL possible
return Result()

signal.signal(signal.SIGALRM, _handle_parser_timeout)
signal.alarm(10) # set timeout to 10 seconds
try:
expression: Expression = maybe_parse(
sql, dialect=PLATFORM_TO_DIALECT.get(platform)
)
except (sqlglot.errors.ParseError, sqlglot.errors.TokenError):
if not _is_truncated_insert_into_with_values(sql) and query_id:
logger.warning(f"Cannot parse sql with SQLGlot, query_id = {query_id}")
logger.warning(f"Cannot parse sql with query_id = {query_id}")

Check warning on line 157 in metaphor/common/sql/table_level_lineage/table_level_lineage.py

View check run for this annotation

Codecov / codecov/patch

metaphor/common/sql/table_level_lineage/table_level_lineage.py#L157

Added line #L157 was not covered by tests
return Result()
except RecursionError:
if query_id:
logger.warning(
f"Cannot parse sql with SQLGlot (max recursion level exceeded), query_id = {query_id}"
)
return Result()
except TimeoutError:
if query_id:
logger.warning(f"Parser timeout, query_id = {query_id}")
return Result()

Check warning on line 168 in metaphor/common/sql/table_level_lineage/table_level_lineage.py

View check run for this annotation

Codecov / codecov/patch

metaphor/common/sql/table_level_lineage/table_level_lineage.py#L165-L168

Added lines #L165 - L168 were not covered by tests
finally:
signal.alarm(0)

try:
return Result(
Expand Down
5 changes: 3 additions & 2 deletions metaphor/quick_sight/lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class Column:

def _get_source_from_custom_sql(
resources: Dict[str, ResourceType],
table_id: str,
custom_sql: TypeCustomSql,
) -> Tuple[Optional[List[str]], Optional[VirtualViewQuery]]:
data_source = resources.get(custom_sql.DataSourceArn)
Expand All @@ -80,7 +81,7 @@ def _get_source_from_custom_sql(
query,
platform=data_platform,
account=account,
query_id="",
query_id=table_id,
default_database=database,
)

Expand Down Expand Up @@ -198,7 +199,7 @@ def _process_physical_table_map(

if physical_table.CustomSql:
source_entities, query = _get_source_from_custom_sql(
self._resources, physical_table.CustomSql
self._resources, table_id, physical_table.CustomSql
)

# CLL of custom sql is not supported
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.14.173"
version = "0.14.174"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <dev@metaphor.io>"]
Expand Down
Loading