Skip to content

Commit

Permalink
feat(GH-1083) share expiration (#1489)
Browse files Browse the repository at this point in the history
### Feature or Bugfix
- Feature
- 
### Detail

- Details mentioned in the GH issue -
#1083

### Relates
- #1083

### Security
Please answer the questions below briefly where applicable, or write
`N/A`. Based on
[OWASP 10](https://owasp.org/Top10/en/).

- Does this PR introduce or modify any input fields or queries - this
includes
fetching data from storage outside the application (e.g. a database, an
S3 bucket)? No
  - Is the input sanitized?
- What precautions are you taking before deserializing the data you
consume?
  - Is injection prevented by parametrizing queries?
  - Have you ensured no `eval` or similar functions are used?
- Does this PR introduce any functionality or component that requires
authorization? No
- How have you ensured it respects the existing AuthN/AuthZ mechanisms?
  - Are you logging failed auth attempts?
- Are you using or adding any cryptographic features? No
  - Do you use a standard proven implementations?
  - Are the used keys controlled by the customer? Where are they stored?
- Are you introducing any new policies/roles/users? Yes
  - Have you used the least-privilege principle? How? Yes


By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license.

---------

Co-authored-by: trajopadhye <tejas.rajopadhye@yahooinc.com>
  • Loading branch information
TejasRGitHub and trajopadhye authored Sep 3, 2024
1 parent eb89983 commit 76a7a3e
Show file tree
Hide file tree
Showing 47 changed files with 3,823 additions and 130 deletions.
6 changes: 6 additions & 0 deletions .checkov.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,12 @@
"check_ids": [
"CKV_AWS_158"
]
},
{
"resource": "AWS::Logs::LogGroup.ECSLogGroupshareexpirationtaskdev40CB15AF",
"check_ids": [
"CKV_AWS_158"
]
}
]
},
Expand Down
30 changes: 30 additions & 0 deletions backend/dataall/base/utils/expiration_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import calendar
from datetime import date, datetime
from dateutil.relativedelta import relativedelta
from dataall.base.api import GraphQLEnumMapper


class ExpirationUtils:
@staticmethod
def calculate_expiry_date(expirationPeriod, expirySetting):
currentDate = date.today()
if expirySetting == Expiration.Quartely.value:
quarterlyCalculatedDate = currentDate + relativedelta(months=expirationPeriod * 3 - 1)
day = calendar.monthrange(quarterlyCalculatedDate.year, quarterlyCalculatedDate.month)[1]
shareExpiryDate = datetime(quarterlyCalculatedDate.year, quarterlyCalculatedDate.month, day)
elif expirySetting == Expiration.Monthly.value:
monthlyCalculatedDate = currentDate + relativedelta(months=expirationPeriod - 1)
monthEndDay = calendar.monthrange(monthlyCalculatedDate.year, monthlyCalculatedDate.month)[1]
shareExpiryDate = datetime(monthlyCalculatedDate.year, monthlyCalculatedDate.month, monthEndDay)
else:
shareExpiryDate = None

return shareExpiryDate


# Enums used for dataset expiration.
# Could be repurposed for environment, worksheet, etc if need be
# This is defined here instead of the dataset_enums file because this is used in expiration_util.py
class Expiration(GraphQLEnumMapper):
Monthly = 'Monthly'
Quartely = 'Quarterly'
10 changes: 5 additions & 5 deletions backend/dataall/modules/datasets_base/db/dataset_models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from sqlalchemy import Boolean, Column, String, Enum, ForeignKey
from sqlalchemy import Boolean, Column, String, Enum, ForeignKey, Integer
from sqlalchemy.dialects.postgresql import JSON, ARRAY
from sqlalchemy.orm import query_expression
from dataall.base.db import Base, Resource, utils
Expand All @@ -21,17 +21,17 @@ class DatasetBase(Resource, Base):
confidentiality = Column(String, nullable=False, default=ConfidentialityClassification.Unclassified.value)
tags = Column(ARRAY(String))
inProject = query_expression()

businessOwnerEmail = Column(String, nullable=True)
businessOwnerDelegationEmails = Column(ARRAY(String), nullable=True)
stewards = Column(String, nullable=True)

SamlAdminGroupName = Column(String, nullable=True)
autoApprovalEnabled = Column(Boolean, default=False)

datasetType = Column(Enum(DatasetTypes), nullable=False, default=DatasetTypes.S3)
imported = Column(Boolean, default=False)

enableExpiration = Column(Boolean, default=False, nullable=False)
expirySetting = Column(String, nullable=True)
expiryMinDuration = Column(Integer, nullable=True)
expiryMaxDuration = Column(Integer, nullable=True)
__mapper_args__ = {'polymorphic_identity': 'dataset', 'polymorphic_on': datasetType}

@classmethod
Expand Down
12 changes: 12 additions & 0 deletions backend/dataall/modules/s3_datasets/api/dataset/input_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
gql.Argument('confidentiality', gql.String),
gql.Argument(name='stewards', type=gql.String),
gql.Argument(name='autoApprovalEnabled', type=gql.Boolean),
gql.Argument(name='enableExpiration', type=gql.Boolean),
gql.Argument(name='expirySetting', type=gql.String),
gql.Argument(name='expiryMinDuration', type=gql.Integer),
gql.Argument(name='expiryMaxDuration', type=gql.Integer),
],
)

Expand All @@ -36,6 +40,10 @@
gql.Argument(name='stewards', type=gql.String),
gql.Argument('KmsAlias', gql.NonNullableType(gql.String)),
gql.Argument(name='autoApprovalEnabled', type=gql.Boolean),
gql.Argument(name='enableExpiration', type=gql.Boolean),
gql.Argument(name='expirySetting', type=gql.String),
gql.Argument(name='expiryMinDuration', type=gql.Integer),
gql.Argument(name='expiryMaxDuration', type=gql.Integer),
],
)

Expand Down Expand Up @@ -71,6 +79,10 @@
gql.Argument('confidentiality', gql.String),
gql.Argument(name='stewards', type=gql.String),
gql.Argument(name='autoApprovalEnabled', type=gql.Boolean),
gql.Argument(name='enableExpiration', type=gql.Boolean),
gql.Argument(name='expirySetting', type=gql.String),
gql.Argument(name='expiryMinDuration', type=gql.Integer),
gql.Argument(name='expiryMaxDuration', type=gql.Integer),
],
)

Expand Down
32 changes: 32 additions & 0 deletions backend/dataall/modules/s3_datasets/api/dataset/resolvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from dataall.base.api.context import Context
from dataall.base.feature_toggle_checker import is_feature_enabled
from dataall.base.utils.expiration_util import Expiration
from dataall.core.stacks.services.stack_service import StackService
from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository
from dataall.core.environment.services.environment_service import EnvironmentService
Expand Down Expand Up @@ -101,6 +102,8 @@ def get_dataset_stewards_group(context, source: S3Dataset, **kwargs):


def update_dataset(context, source, datasetUri: str = None, input: dict = None):
if input.get('enableExpiration', False):
RequestValidator.validate_share_expiration_request(input)
return DatasetService.update_dataset(uri=datasetUri, data=input)


Expand Down Expand Up @@ -167,6 +170,35 @@ def validate_creation_request(data):
ConfidentialityClassification.validate_confidentiality_level(data.get('confidentiality', ''))
if len(data['label']) > 52:
raise InvalidInput('Dataset name', data['label'], 'less than 52 characters')
if data.get('enableExpiration', False):
RequestValidator.validate_share_expiration_request(data)

@staticmethod
def validate_share_expiration_request(data):
if not isinstance(data.get('expiryMinDuration'), int) or not isinstance(data.get('expiryMaxDuration'), int):
raise InvalidInput(
'Expiration durations (Minimum and Maximum)',
'',
'must be valid integers',
)
if data.get('expiryMinDuration') < 0 or data.get('expiryMaxDuration') < 0:
raise InvalidInput(
'expiration duration ',
'',
'must be greater than zero',
)
if data.get('expiryMinDuration') > data.get('expiryMaxDuration'):
raise InvalidInput(
'Minimum expiration duration ',
data.get('expiryMinDuration'),
f'cannot be greater than max expiration {data.get("expiryMaxDuration")}',
)
if data.get('expirySetting') not in [item.value for item in list(Expiration)]:
raise InvalidInput(
'Expiration Setting',
data.get('expirySetting'),
'is of invalid type',
)

@staticmethod
def validate_import_request(data):
Expand Down
4 changes: 4 additions & 0 deletions backend/dataall/modules/s3_datasets/api/dataset/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@
gql.Field(name='language', type=gql.Ref('Language')),
gql.Field(name='stack', type=gql.Ref('Stack'), resolver=resolve_dataset_stack),
gql.Field(name='autoApprovalEnabled', type=gql.Boolean),
gql.Field(name='enableExpiration', type=gql.Boolean),
gql.Field(name='expirySetting', type=gql.String),
gql.Field(name='expiryMinDuration', type=gql.Integer),
gql.Field(name='expiryMaxDuration', type=gql.Integer),
],
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
NamingConventionService,
NamingConventionPattern,
)
from dataall.modules.shares_base.db.share_object_models import ShareObject
from dataall.modules.shares_base.services.share_object_service import ShareObjectService

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -46,6 +48,10 @@ def build_dataset(cls, username: str, env: Environment, data: dict) -> S3Dataset
businessOwnerDelegationEmails=data.get('businessOwnerDelegationEmails', []),
stewards=data.get('stewards') if data.get('stewards') else data['SamlAdminGroupName'],
autoApprovalEnabled=data.get('autoApprovalEnabled', False),
enableExpiration=data.get('enableExpiration', False),
expirySetting=data.get('expirySetting'),
expiryMinDuration=data.get('expiryMinDuration'),
expiryMaxDuration=data.get('expiryMaxDuration'),
)

cls._set_import_data(dataset, data)
Expand Down
13 changes: 12 additions & 1 deletion backend/dataall/modules/s3_datasets/services/dataset_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import json
import logging
from typing import List
from dataall.core.resource_lock.db.resource_lock_repositories import ResourceLockRepository
from dataall.base.aws.quicksight import QuicksightClient
from dataall.base.db import exceptions
from dataall.base.utils.naming_convention import NamingConventionPattern, NamingConventionService
from dataall.base.utils.expiration_util import ExpirationUtils
from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService
from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService
from dataall.core.stacks.services.stack_service import StackService
Expand All @@ -21,6 +21,8 @@
from dataall.core.tasks.db.task_models import Task
from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository
from dataall.modules.s3_datasets.db.dataset_bucket_repositories import DatasetBucketRepository
from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository
from dataall.modules.shares_base.services.share_object_service import ShareObjectService
from dataall.modules.vote.db.vote_repositories import VoteRepository
from dataall.modules.s3_datasets.aws.glue_dataset_client import DatasetCrawler
from dataall.modules.s3_datasets.aws.s3_dataset_client import S3DatasetClient
Expand Down Expand Up @@ -285,6 +287,15 @@ def update_dataset(uri: str, data: dict):
if k not in ['stewards', 'KmsAlias']:
setattr(dataset, k, data.get(k))

ShareObjectRepository.update_dataset_shares_expiration(
session=session,
enabledExpiration=dataset.enableExpiration,
datasetUri=dataset.datasetUri,
expirationDate=ExpirationUtils.calculate_expiry_date(
expirationPeriod=dataset.expiryMinDuration, expirySetting=dataset.expirySetting
),
)

if data.get('KmsAlias') not in ['Undefined'] and data.get('KmsAlias') != dataset.KmsAlias:
dataset.KmsAlias = 'SSE-S3' if data.get('KmsAlias') == '' else data.get('KmsAlias')
dataset.importedKmsKey = False if data.get('KmsAlias') == '' else True
Expand Down
2 changes: 2 additions & 0 deletions backend/dataall/modules/shares_base/api/input_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
gql.Argument(name='requestPurpose', type=gql.String),
gql.Argument(name='attachMissingPolicies', type=gql.Boolean),
gql.Argument(name='permissions', type=gql.ArrayType(ShareObjectDataPermission.toGraphQLEnum())),
gql.Argument(name='shareExpirationPeriod', type=gql.Integer),
gql.Argument(name='nonExpirable', type=gql.Boolean),
],
)

Expand Down
52 changes: 52 additions & 0 deletions backend/dataall/modules/shares_base/api/mutations.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
verify_items_share_object,
update_filters_table_share_item,
remove_filters_table_share_item,
update_share_extension_purpose,
update_share_expiration_period,
submit_share_extension,
approve_share_object_extension,
cancel_share_object_extension,
)

createShareObject = gql.MutationField(
Expand All @@ -35,6 +40,13 @@
type=gql.Boolean,
)

cancelShareExtension = gql.MutationField(
name='cancelShareExtension',
args=[gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String))],
resolver=cancel_share_object_extension,
type=gql.Boolean,
)

addSharedItem = gql.MutationField(
name='addSharedItem',
args=[
Expand All @@ -60,13 +72,32 @@
resolver=submit_share_object,
)

submitShareExtension = gql.MutationField(
name='submitShareExtension',
args=[
gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String)),
gql.Argument(name='expiration', type=gql.Integer),
gql.Argument(name='extensionReason', type=gql.String),
gql.Argument(name='nonExpirable', type=gql.Boolean),
],
type=gql.Ref('ShareObject'),
resolver=submit_share_extension,
)

approveShareObject = gql.MutationField(
name='approveShareObject',
args=[gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String))],
type=gql.Ref('ShareObject'),
resolver=approve_share_object,
)

approveShareExtension = gql.MutationField(
name='approveShareExtension',
args=[gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String))],
type=gql.Ref('ShareObject'),
resolver=approve_share_object_extension,
)


rejectShareObject = gql.MutationField(
name='rejectShareObject',
Expand Down Expand Up @@ -109,6 +140,27 @@
resolver=update_share_reject_purpose,
)

updateShareExpirationPeriod = gql.MutationField(
name='updateShareExpirationPeriod',
args=[
gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String)),
gql.Argument(name='expiration', type=gql.Integer),
gql.Argument(name='nonExpirable', type=gql.Boolean),
],
type=gql.Boolean,
resolver=update_share_expiration_period,
)

updateShareExtensionReason = gql.MutationField(
name='updateShareExtensionReason',
args=[
gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String)),
gql.Argument(name='extensionPurpose', type=gql.String),
],
type=gql.Boolean,
resolver=update_share_extension_purpose,
)

updateShareRequestReason = gql.MutationField(
name='updateShareRequestReason',
args=[
Expand Down
Loading

0 comments on commit 76a7a3e

Please sign in to comment.