Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bugfix] - Changes in logic to delete share db #1706

Merged
merged 11 commits into from
Feb 4, 2025
12 changes: 12 additions & 0 deletions backend/dataall/modules/s3_datasets/db/dataset_repositories.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,18 @@ def list_all_datasets(session) -> [S3Dataset]:
def list_all_active_datasets(session) -> [S3Dataset]:
return session.query(S3Dataset).filter(S3Dataset.deleted.is_(None)).all()

@staticmethod
TejasRGitHub marked this conversation as resolved.
Show resolved Hide resolved
def list_all_active_datasets_with_glue_db(session, glue_db_name: str) -> [S3Dataset]:
# List all the S3 datasets which have the same glue db name ( irrespective of the environment )
# This query will fetch S3 dataset even if they belong to different environments.
# This is because the _shared db which will be created in consumer's account is a common resource which can be modified at the same time and cause contention and potential override. See https://github.com/data-dot-all/dataall/issues/1633 for more details

return (
session.query(S3Dataset)
.filter(and_(S3Dataset.deleted.is_(None), S3Dataset.GlueDatabaseName == glue_db_name))
.all()
)

@staticmethod
def get_dataset_by_bucket_name(session, bucket) -> [S3Dataset]:
return session.query(S3Dataset).filter(S3Dataset.S3BucketName == bucket).first()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,24 +149,35 @@ def check_other_approved_share_item_table_exists(session, environment_uri, item_
return query.first()

@staticmethod
TejasRGitHub marked this conversation as resolved.
Show resolved Hide resolved
def check_existing_shared_items_of_type(session, uri, item_type):
share: ShareObject = ShareObjectRepository.get_share_by_uri(session, uri)
def check_existing_shares_on_items_for_principal(session, item_type, principal, database):
shares: List[ShareObject] = S3ShareObjectRepository.get_shares_for_principal_and_database(
session=session, principal=principal, database=database
)
share_item_shared_states = ShareStatusRepository.get_share_item_shared_states()
shared_items = (
session.query(ShareObjectItem)
.filter(
and_(
ShareObjectItem.shareUri == share.shareUri,
ShareObjectItem.itemType == item_type,
ShareObjectItem.status.in_(share_item_shared_states),
for share in shares:
shared_items = (
session.query(ShareObjectItem)
.filter(
and_(
ShareObjectItem.shareUri == share.shareUri,
ShareObjectItem.itemType == item_type,
ShareObjectItem.status.in_(share_item_shared_states),
)
)
.all()
)
.all()
)
if shared_items:
return True
if shared_items:
return True
return False

@staticmethod
def get_shares_for_principal_and_database(session, principal, database):
return (
session.query(ShareObject)
.join(S3Dataset, S3Dataset.datasetUri == ShareObject.datasetUri)
.filter(and_(S3Dataset.GlueDatabaseName == database, ShareObject.principalRoleName == principal))
)

@staticmethod
def query_dataset_tables_shared_with_env(
session, environment_uri: str, dataset_uri: str, username: str, groups: [str]
Expand Down Expand Up @@ -298,3 +309,23 @@ def get_approved_share_object(session, item):
.first()
)
return share_object

@staticmethod
def list_dataset_shares_on_database(
session, dataset_uri, share_item_shared_states, item_type, database
) -> [ShareObject]:
dlpzx marked this conversation as resolved.
Show resolved Hide resolved
query = (
session.query(ShareObject)
.join(ShareObjectItem, ShareObjectItem.shareUri == ShareObject.shareUri)
.join(S3Dataset, S3Dataset.datasetUri == dataset_uri)
.filter(
and_(
S3Dataset.GlueDatabaseName == database,
ShareObject.deleted.is_(None),
ShareObjectItem.status.in_(share_item_shared_states),
ShareObjectItem.itemType == item_type,
)
)
)

return query.all()
Loading
Loading