Skip to content
12 changes: 12 additions & 0 deletions backend/dataall/modules/s3_datasets/db/dataset_repositories.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,18 @@ def list_all_datasets(session) -> [S3Dataset]:
def list_all_active_datasets(session) -> [S3Dataset]:
return session.query(S3Dataset).filter(S3Dataset.deleted.is_(None)).all()

@staticmethod
def list_all_active_datasets_with_glue_db(session, glue_db_name: str) -> [S3Dataset]:
# List all the S3 datasets which have the same glue db name ( irrespective of the environment )
# This query will fetch S3 dataset even if they belong to different environments.
# This is because the _shared db which will be created in consumer's account is a common resource which can be modified at the same time and cause contention and potential override. See https://github.com/data-dot-all/dataall/issues/1633 for more details

return (
session.query(S3Dataset)
.filter(and_(S3Dataset.deleted.is_(None), S3Dataset.GlueDatabaseName == glue_db_name))
.all()
)

@staticmethod
def get_dataset_by_bucket_name(session, bucket) -> [S3Dataset]:
return session.query(S3Dataset).filter(S3Dataset.S3BucketName == bucket).first()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,24 +149,35 @@ def check_other_approved_share_item_table_exists(session, environment_uri, item_
return query.first()

@staticmethod
def check_existing_shared_items_of_type(session, uri, item_type):
share: ShareObject = ShareObjectRepository.get_share_by_uri(session, uri)
def check_existing_shares_on_items_for_principal(session, item_type, principal, database):
shares: List[ShareObject] = S3ShareObjectRepository.get_shares_for_principal_and_database(
session=session, principal=principal, database=database
)
share_item_shared_states = ShareStatusRepository.get_share_item_shared_states()
shared_items = (
session.query(ShareObjectItem)
.filter(
and_(
ShareObjectItem.shareUri == share.shareUri,
ShareObjectItem.itemType == item_type,
ShareObjectItem.status.in_(share_item_shared_states),
for share in shares:
shared_items = (
session.query(ShareObjectItem)
.filter(
and_(
ShareObjectItem.shareUri == share.shareUri,
ShareObjectItem.itemType == item_type,
ShareObjectItem.status.in_(share_item_shared_states),
)
)
.all()
)
.all()
)
if shared_items:
return True
if shared_items:
return True
return False

@staticmethod
def get_shares_for_principal_and_database(session, principal, database):
return (
session.query(ShareObject)
.join(S3Dataset, S3Dataset.datasetUri == ShareObject.datasetUri)
.filter(and_(S3Dataset.GlueDatabaseName == database, ShareObject.principalRoleName == principal))
)

@staticmethod
def query_dataset_tables_shared_with_env(
session, environment_uri: str, dataset_uri: str, username: str, groups: [str]
Expand Down Expand Up @@ -298,3 +309,23 @@ def get_approved_share_object(session, item):
.first()
)
return share_object

@staticmethod
def list_dataset_shares_on_database(
session, dataset_uri, share_item_shared_states, item_type, database
) -> [ShareObject]:
query = (
session.query(ShareObject)
.join(ShareObjectItem, ShareObjectItem.shareUri == ShareObject.shareUri)
.join(S3Dataset, S3Dataset.datasetUri == dataset_uri)
.filter(
and_(
S3Dataset.GlueDatabaseName == database,
ShareObject.deleted.is_(None),
ShareObjectItem.status.in_(share_item_shared_states),
ShareObjectItem.itemType == item_type,
)
)
)

return query.all()
Loading
Loading