Skip to content

Commit

Permalink
Merge pull request #9 from carte-data/ConnectionBasedTables-Glue
Browse files Browse the repository at this point in the history
fixing for Connection Based Tables
  • Loading branch information
IstvanM authored May 24, 2022
2 parents 86dc1c7 + 06012cc commit df2f4c7
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 14 deletions.
37 changes: 24 additions & 13 deletions carte_cli/extractor/glue_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def _get_column_type(self, column: Dict) -> str:
else:
return col_type

def _get_glue_table_columns(
def _get_schema_columns(
self, row: Dict[str, Any], table_name: str
) -> List[ColumnMetadata]:
columns = []
Expand Down Expand Up @@ -81,33 +81,44 @@ def _get_glue_table_columns(
)
return columns

def _get_descriptor_columns(self, row: Dict) -> List[ColumnMetadata]:
columns = []
for column in row["StorageDescriptor"]["Columns"]:
columns.append(
ColumnMetadata(
name=column["Name"],
column_type=column["Type"],
description=None,
)
)
return columns

def _get_extract_iter(self) -> Iterator[TableMetadata]:
for row in self._get_raw_extract_iter():
columns = []
table_name = row["Name"]
db_name = row["DatabaseName"]
table_type_raw_value = row.get("TableType")
connection_name = row.get("Parameters", {}).get("connectionName", None)

full_table_name = f"{db_name}.{table_name}"
if (
self._table_name_re is not None
and self._table_name_re.search(full_table_name) is not None
):
continue

is_view = row.get("TableType") == "VIRTUAL_VIEW"

if is_view:
if table_type_raw_value == "VIRTUAL_VIEW":
table_type = TableType.VIEW
for column in row["StorageDescriptor"]["Columns"]:
columns.append(
ColumnMetadata(
name=column["Name"],
column_type=column["Type"],
description=None,
)
)
columns = self._get_descriptor_columns(row)
elif (
table_type_raw_value == "EXTERNAL_TABLE" and connection_name is not None
):
table_type = TableType.TABLE
columns = self._get_descriptor_columns(row)
else:
columns = self._get_glue_table_columns(row, full_table_name)
table_type = TableType.TABLE
columns = self._get_schema_columns(row, full_table_name)

yield TableMetadata(
name=table_name,
Expand Down
1 change: 1 addition & 0 deletions carte_cli/model/carte_table_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class ColumnType(Enum):

class TableType(Enum):
TABLE = "table"
CONNECTION_BASED_TABLE = "connection_based_table"
VIEW = "view"


Expand Down
38 changes: 37 additions & 1 deletion tests/extractor/test_glue_extractor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import boto3
import unittest
from unittest.mock import patch
from pyhocon import ConfigFactory
Expand Down Expand Up @@ -308,6 +307,29 @@ def test_extraction_with_multiple_result(self) -> None:
},
"TableType": "VIRTUAL_VIEW",
},
{
"Name": "ConnectionTable",
"DatabaseName": "test_schema1",
"Description": "test connection 1",
"Parameters": {
"connectionName": "test_connection_1",
},
"StorageDescriptor": {
"Columns": [
{
"Name": "col_id3",
"Type": "varchar",
"Comment": "description of col_id3",
},
{
"Name": "col_name3",
"Type": "varchar",
"Comment": "description of col_name3",
},
]
},
"TableType": "EXTERNAL_TABLE",
},
{
"Name": "shouldnt",
"DatabaseName": "sandbox",
Expand Down Expand Up @@ -395,6 +417,20 @@ def test_extraction_with_multiple_result(self) -> None:
ColumnMetadata("col_name3", "varchar", None),
],
)
self.assertEqual(expected.__repr__(), extractor.extract().__repr__())

expected = TableMetadata(
name="ConnectionTable",
connection="test-connection",
database="test_schema1",
description=None,
location=None,
table_type=TableType.TABLE,
columns=[
ColumnMetadata("col_id3", "varchar", None),
ColumnMetadata("col_name3", "varchar", None),
],
)

self.assertEqual(expected.__repr__(), extractor.extract().__repr__())

Expand Down

0 comments on commit df2f4c7

Please sign in to comment.