Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

quicksight crawler add filter [sc-30103] #1068

Merged
merged 1 commit into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions metaphor/quick_sight/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,22 @@ aws_account_id: <quick_aws_account_id>

### Optional Configurations

#### Dashboard Filter

You can optionally specify a list of dashboard IDs to include or exclude in the output.

```yaml
filter:
include_dashboard_ids:
- <dashboard_id_1>
- <dashboard_id_2>
exclude_dashboard_ids:
- <dashboard_id_3>
- <dashboard_id_4>
```

If the filter is set, only the dashboards specified in the filter and the associated data sets will be included in the output. Otherwise, all dashboards and data sets will be included.

#### Output Destination

See [Output Config](../common/docs/output.md) for more information.
Expand Down
12 changes: 12 additions & 0 deletions metaphor/quick_sight/config.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
from dataclasses import field
from typing import List

from pydantic.dataclasses import dataclass

from metaphor.common.aws import AwsCredentials
from metaphor.common.base_config import BaseConfig
from metaphor.common.dataclass import ConnectorConfig


@dataclass(config=ConnectorConfig)
class QuickSightFilter:
include_dashboard_ids: List[str] = field(default_factory=list)
exclude_dashboard_ids: List[str] = field(default_factory=list)


@dataclass(config=ConnectorConfig)
class QuickSightRunConfig(BaseConfig):
aws: AwsCredentials

aws_account_id: str

# Include or exclude specific dashboards and the related data sets
filter: QuickSightFilter = field(default_factory=QuickSightFilter)
96 changes: 84 additions & 12 deletions metaphor/quick_sight/extractor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Collection, Dict, List, Optional
from typing import Collection, Dict, List, Optional, Set

from func_timeout import FunctionTimedOut, func_set_timeout

Expand Down Expand Up @@ -52,6 +52,7 @@
super().__init__(config)
self._aws_config = config.aws
self._aws_account_id = config.aws_account_id
self._filter = config.filter

# Arn -> Resource
self._resources: Dict[str, ResourceType] = {}
Expand All @@ -62,6 +63,12 @@
# DashboardId -> Dashboard
self._dashboards: Dict[str, MetaphorDashboard] = {}

# VirtualViewId -> VirtualView
self._virtual_view_reference: Dict[str, VirtualView] = {}

# Set of virtual view IDs that are referenced by dashboards
self._referenced_virtual_views: Set[str] = set()

async def extract(self) -> Collection[ENTITY_TYPES]:
logger.info("Fetching metadata from QuickSight")

Expand All @@ -87,7 +94,7 @@
view.entity_upstream = output.entity_upstream
self._virtual_views.pop(output_logical_table_id)

def _extract_virtual_views(self):
def _extract_virtual_views(self) -> None:
count = 0
for data_set in self._resources.values():
if not isinstance(data_set, DataSet):
Expand All @@ -107,12 +114,40 @@

logger.info(f"Parsed {count} virtual views")

# Create a reference map of metaphor virtual view ID -> virtual views
for virtual_view in self._virtual_views.values():
assert virtual_view.virtual_view_id
self._virtual_view_reference[virtual_view.virtual_view_id] = virtual_view

def _include_dashboard(self, dashboard_id: str) -> bool:
"""
Check if the dashboard should be included based on the filter
"""
if (
self._filter.include_dashboard_ids
and dashboard_id not in self._filter.include_dashboard_ids
):
return False

if (
self._filter.exclude_dashboard_ids
and dashboard_id in self._filter.exclude_dashboard_ids
):
return False

Check warning on line 136 in metaphor/quick_sight/extractor.py

View check run for this annotation

Codecov / codecov/patch

metaphor/quick_sight/extractor.py#L136

Added line #L136 was not covered by tests

return True

def _extract_dashboards(self) -> None:
count = 0
for dashboard in self._resources.values():
if not isinstance(dashboard, Dashboard) or dashboard.Version is None:
continue

if not dashboard.DashboardId or not self._include_dashboard(
dashboard.DashboardId
):
continue

metaphor_dashboard = self._init_dashboard(dashboard)
metaphor_dashboard.entity_upstream = self._get_dashboard_upstream(
dataset_arns=dashboard.Version.DataSetArns or []
Expand All @@ -124,9 +159,27 @@

logger.info(f"Parsed {count} dashboards")

def _include_virtual_view(self, virtual_view_id: str) -> bool:
"""
Check if the virtual view should be included in the final output.
If the filter is set, only include virtual views that are referenced by dashboards,
Otherwise, include all virtual views
"""
if self._filter.include_dashboard_ids or self._filter.exclude_dashboard_ids:
return virtual_view_id in self._referenced_virtual_views
return True

def _make_entities_list(self) -> Collection[ENTITY_TYPES]:
entities: List[ENTITY_TYPES] = []
entities.extend(self._virtual_views.values())
# Only include virtual views that are referenced by dashboards
entities.extend(
[
view
for view in self._virtual_views.values()
if view.virtual_view_id
and self._include_virtual_view(view.virtual_view_id)
]
)
entities.extend(self._dashboards.values())
entities.extend(create_top_level_folders())
return entities
Expand All @@ -135,11 +188,14 @@
data_set_id = data_set.DataSetId
assert data_set_id

logical_id = VirtualViewLogicalID(
name=data_set_id,
type=VirtualViewType.QUICK_SIGHT,
)

view = VirtualView(
logical_id=VirtualViewLogicalID(
name=data_set_id,
type=VirtualViewType.QUICK_SIGHT,
),
logical_id=logical_id,
virtual_view_id=str(to_entity_id_from_virtual_view_logical_id(logical_id)),
structure=AssetStructure(
name=data_set.Name, directories=DATA_SET_DIRECTORIES
),
Expand Down Expand Up @@ -193,20 +249,36 @@
def _get_dashboard_upstream(
self, dataset_arns: List[str]
) -> Optional[EntityUpstream]:
source_entities: List[str] = []
source_entities = []

for arn in dataset_arns:
dataset_id = get_id_from_arn(arn)
virtual_view = self._virtual_views.get(dataset_id)
if not virtual_view:
if not virtual_view or not virtual_view.virtual_view_id:
logger.warning(f"Virtual view not found for dataset {dataset_id}")
continue

source_entities.append(
str(to_entity_id_from_virtual_view_logical_id(virtual_view.logical_id))
)
source_entities.append(virtual_view.virtual_view_id)
self._mark_virtual_view_as_referenced(virtual_view)

if not source_entities:
return None

return EntityUpstream(source_entities=(unique_list(source_entities)))

def _mark_virtual_view_as_referenced(self, virtual_view: VirtualView) -> None:
"""
Recursively mark a virtual view as referenced by dashboards
"""
assert virtual_view.virtual_view_id
if virtual_view.virtual_view_id in self._referenced_virtual_views:
return

Check warning on line 275 in metaphor/quick_sight/extractor.py

View check run for this annotation

Codecov / codecov/patch

metaphor/quick_sight/extractor.py#L275

Added line #L275 was not covered by tests

self._referenced_virtual_views.add(virtual_view.virtual_view_id)

if virtual_view.entity_upstream:
for upstream in virtual_view.entity_upstream.source_entities or []:
if upstream in self._virtual_view_reference:
self._mark_virtual_view_as_referenced(
self._virtual_view_reference[upstream]
)
12 changes: 8 additions & 4 deletions metaphor/quick_sight/lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,15 @@ def _init_virtual_view(self, table_id: str) -> VirtualView:
if table_id in self._virtual_views:
return self._virtual_views[table_id]

logical_id = VirtualViewLogicalID(
name=table_id,
type=VirtualViewType.QUICK_SIGHT,
)
virtual_view_id = str(to_entity_id_from_virtual_view_logical_id(logical_id))
view = VirtualView(
logical_id=VirtualViewLogicalID(
name=table_id,
type=VirtualViewType.QUICK_SIGHT,
),
virtual_view_id=virtual_view_id,
logical_id=logical_id,
is_non_prod=True, # set as non-prod to indicate incomplete metadata
)

self._virtual_views[table_id] = view
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.14.178"
version = "0.14.179"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <dev@metaphor.io>"]
Expand Down
14 changes: 14 additions & 0 deletions tests/quick_sight/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
aws:
region_name: us-east-1
access_key_id: AKIAIOSFODNN7EXAMPLE
secret_access_key: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
session_token: EXAMPLE-TOKEN
aws_account_id: 123456789012
filter:
include_dashboard_ids:
- 123456789012
- 123456789013
exclude_dashboard_ids:
- 123456789014
output: {}
28 changes: 28 additions & 0 deletions tests/quick_sight/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
"VIRTUAL_VIEW~0D6860D187D8087B34F5A91C7B46E3BA"
]
},
"id": "VIRTUAL_VIEW~13EE9EB48B4EA51A49B8FBA8E27AF570",
"logicalId": {
"name": "6f516e19-84f8-4d17-9bd9-feecf1bdc346",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -193,6 +194,8 @@
"DATASET~D35648E120DEEA5750A87336C6AA2D25"
]
},
"id": "VIRTUAL_VIEW~3534AE0FAF6548F9160E55BD6863F608",
"isNonProd": true,
"logicalId": {
"name": "f3b260dc-4638-4620-91fe-36f006936052",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -329,6 +332,8 @@
"VIRTUAL_VIEW~543D1D7F1F0469635E8A454303BA2913"
]
},
"id": "VIRTUAL_VIEW~0D6860D187D8087B34F5A91C7B46E3BA",
"isNonProd": true,
"logicalId": {
"name": "2c824bdb-87ae-43ff-bb28-b31b089be133",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -437,6 +442,8 @@
"VIRTUAL_VIEW~3534AE0FAF6548F9160E55BD6863F608"
]
},
"id": "VIRTUAL_VIEW~D5A7DA553237C2E663B6037E5AA406A2",
"isNonProd": true,
"logicalId": {
"name": "ff3db8ef-966c-4631-b33f-867fdbe0c008",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -600,6 +607,7 @@
"VIRTUAL_VIEW~BE474923610C8381CA5A341A1A719DC3"
]
},
"id": "VIRTUAL_VIEW~856DB99CD9EB660911DA12D6F14499CA",
"logicalId": {
"name": "7bcddd7f-ed98-4e91-8064-9ae885f0376a",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -823,6 +831,8 @@
"DATASET~B639ACB48EEE0795206A61803BC37DDF"
]
},
"id": "VIRTUAL_VIEW~BE474923610C8381CA5A341A1A719DC3",
"isNonProd": true,
"logicalId": {
"name": "501bd127-d6a3-45ac-843c-6fee1ccb1aad",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -956,6 +966,7 @@
"VIRTUAL_VIEW~9427B43C5DB15C9C4FAD1EA70FEAE41B"
]
},
"id": "VIRTUAL_VIEW~D288BB7726CE69F66EB18C3AF1DD9FA7",
"logicalId": {
"name": "7c6a5c47-fbc7-4307-afd3-57f79864593e",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -1016,6 +1027,8 @@
"DATASET~83E8304683CD6C30CA41557A39C4DF25"
]
},
"id": "VIRTUAL_VIEW~F908650C2FC967EE56C1FFBBF061089A",
"isNonProd": true,
"logicalId": {
"name": "1b016641-23c2-4b17-ab94-c773333bc76d",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -1065,6 +1078,8 @@
"DATASET~E2EB9491F5BDD97D1591D2454917F450"
]
},
"id": "VIRTUAL_VIEW~3DC76D1D06345542B062CFB396C26AE4",
"isNonProd": true,
"logicalId": {
"name": "2a463fad-08c9-4a63-9aab-a786f1b41752",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -1126,6 +1141,8 @@
"VIRTUAL_VIEW~F908650C2FC967EE56C1FFBBF061089A"
]
},
"id": "VIRTUAL_VIEW~9427B43C5DB15C9C4FAD1EA70FEAE41B",
"isNonProd": true,
"logicalId": {
"name": "12f0dcb4-ff96-4123-8568-00781596eb37",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -1189,6 +1206,8 @@
"VIRTUAL_VIEW~3DC76D1D06345542B062CFB396C26AE4"
]
},
"id": "VIRTUAL_VIEW~8664CF3991BE407B385522A9C0AA05B3",
"isNonProd": true,
"logicalId": {
"name": "82e644be-26ce-44a0-bbc9-95cc88e16a5c",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -1326,6 +1345,7 @@
"VIRTUAL_VIEW~438A916533E05D3F1912ECA364B8945A"
]
},
"id": "VIRTUAL_VIEW~FA71319E309500C82F721FA821F6B24B",
"logicalId": {
"name": "fb1b23e7-ff1f-47b7-a04e-33b30847e9a7",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -1480,6 +1500,8 @@
"DATASET~F6D22C1B6C06D037407B74D2D708058E"
]
},
"id": "VIRTUAL_VIEW~8C883993FF150DA6E24D4B31C751CAF4",
"isNonProd": true,
"logicalId": {
"name": "48cf151b-0e89-4707-a901-871f69b22017",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -1618,6 +1640,8 @@
"DATASET~1D56ACA83CDEEDC47163BA86B7ADF5C5"
]
},
"id": "VIRTUAL_VIEW~0A83EA95986D9EE6A8BF88909EFE8975",
"isNonProd": true,
"logicalId": {
"name": "4e4695f3-6178-447b-947e-289363740a82",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -1761,6 +1785,8 @@
"VIRTUAL_VIEW~0A83EA95986D9EE6A8BF88909EFE8975"
]
},
"id": "VIRTUAL_VIEW~77FD4E77864FA104E47F9C4DF85B75FC",
"isNonProd": true,
"logicalId": {
"name": "a7b9ddbf-9fdf-48df-8952-b75b972fcc5d",
"type": "QUICK_SIGHT"
Expand Down Expand Up @@ -1895,6 +1921,8 @@
"VIRTUAL_VIEW~8C883993FF150DA6E24D4B31C751CAF4"
]
},
"id": "VIRTUAL_VIEW~438A916533E05D3F1912ECA364B8945A",
"isNonProd": true,
"logicalId": {
"name": "fb24929a-5175-4485-b934-5210fbd09dae",
"type": "QUICK_SIGHT"
Expand Down
Loading
Loading