Skip to content

Commit

Permalink
Add Dataset integration tests - S3 Share requests (#1389)
Browse files Browse the repository at this point in the history
### Feature or Bugfix
- Tests

### Detail
- module `share_base`
- bugfix `delete_env` requires `env_object` not `envUri`
- TEMPORARY: hardcoded dataset_uri --> I wait for dataset module

### Relates
- #1376 

### Security
Please answer the questions below briefly where applicable, or write
`N/A`. Based on
[OWASP 10](https://owasp.org/Top10/en/).

- Does this PR introduce or modify any input fields or queries - this
includes
fetching data from storage outside the application (e.g. a database, an
S3 bucket)?
  - Is the input sanitized?
- What precautions are you taking before deserializing the data you
consume?
  - Is injection prevented by parametrizing queries?
  - Have you ensured no `eval` or similar functions are used?
- Does this PR introduce any functionality or component that requires
authorization?
- How have you ensured it respects the existing AuthN/AuthZ mechanisms?
  - Are you logging failed auth attempts?
- Are you using or adding any cryptographic features?
  - Do you use a standard proven implementations?
  - Are the used keys controlled by the customer? Where are they stored?
- Are you introducing any new policies/roles/users?
  - Have you used the least-privilege principle? How?


By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license.

---------

Co-authored-by: dlpzx <dlpzx@amazon.com>
Co-authored-by: Noah Paige <noahpaig@amazon.com>
Co-authored-by: Sofia Sazonova <sazonova@amazon.co.uk>
  • Loading branch information
4 people authored Sep 25, 2024
1 parent 075b43c commit 2005863
Show file tree
Hide file tree
Showing 24 changed files with 1,232 additions and 76 deletions.
8 changes: 8 additions & 0 deletions backend/dataall/core/environment/cdk/environment_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,3 +654,11 @@ def create_integration_tests_role(self):
resources=[f'arn:aws:cloudformation:*:{self.account}:stack/*/*'],
),
)

self.test_role.add_to_policy(
iam.PolicyStatement(
actions=['iam:GetRole', 'iam:CreateRole', 'iam:PutRolePolicy'],
effect=iam.Effect.ALLOW,
resources=[f'arn:aws:iam::{self.account}:role/dataall-test-*'],
),
)
Original file line number Diff line number Diff line change
Expand Up @@ -453,10 +453,14 @@ def manage_access_point_and_policy(self):
not s3_client.get_bucket_access_point_arn(self.access_point_name)
and retries < ACCESS_POINT_CREATION_RETRIES
):
logger.info('Waiting 30s for access point creation to complete..')
logger.info(
f'Attempt {retries}. Waiting {ACCESS_POINT_CREATION_TIME * sleep_coeff}s for access point creation to complete..'
)
time.sleep(ACCESS_POINT_CREATION_TIME * sleep_coeff)
sleep_coeff = sleep_coeff * ACCESS_POINT_BACKOFF_COEFFICIENT
retries += 1
if not s3_client.get_bucket_access_point_arn(self.access_point_name):
raise Exception(f'Failed to create access point {self.access_point_name}')
existing_policy = s3_client.get_access_point_policy(self.access_point_name)
# requester will use this role to access resources
target_requester_id = SessionHelper.get_role_id(
Expand Down
4 changes: 2 additions & 2 deletions tests/modules/s3_datasets_shares/test_share.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ def share3_processed(
def share3_item_shared(
share_item: typing.Callable, share3_processed: ShareObject, table1: DatasetTable
) -> ShareObjectItem:
# Cleaned up with share3
# Cleaned up with share3_happy_path
yield share_item(share=share3_processed, table=table1, status=ShareItemStatus.Share_Succeeded.value)


Expand Down Expand Up @@ -608,7 +608,7 @@ def share4_draft(
def share3_item_shared_unhealthy(
share_item: typing.Callable, share3_processed: ShareObject, table1_1: DatasetTable
) -> ShareObjectItem:
# Cleaned up with share3
# Cleaned up with share3_happy_path
yield share_item(
share=share3_processed,
table=table1_1,
Expand Down
13 changes: 13 additions & 0 deletions tests_new/clean_up_s3.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
my_array=("$(aws s3api list-buckets --query 'Buckets[?contains(Name, `session`) == `true`].[Name]' --output text)")
array=("${(@f)my_array}")
for YOUR_BUCKET in "${array[@]}"
do

aws s3api delete-objects --bucket ${YOUR_BUCKET} \
--delete "$(aws s3api list-object-versions --bucket ${YOUR_BUCKET} --query='{Objects: Versions[].{Key:Key,VersionId:VersionId}}')"

aws s3api delete-objects --bucket ${YOUR_BUCKET} \
--delete "$(aws s3api list-object-versions --bucket ${YOUR_BUCKET} --query='{Objects: DeleteMarkers[].{Key:Key,VersionId:VersionId}}')"

aws s3api delete-bucket --bucket ${YOUR_BUCKET}
done
131 changes: 69 additions & 62 deletions tests_new/integration_tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,68 +12,75 @@ Currently **we support only Cognito based deployments** but support for any IdP

- A real deployment of data.all in AWS.
- For this deployment the `cdk.json` flag `enable_pivot_role_auto_create` must be set to `true`.
- For this deployment the `config.json` flag `cdk_pivot_role_multiple_environments_same_account` must be set to `true` if an AWS account is going to be reused for multiple environments,
- An SSM parameter (`/dataall/{env_name}/testdata`) in the DEPLOYMENT ACCOUNT with the following contents
```
{
"users": {
"testUserTenant": {
"username": "testUserTenant",
"password": "...",
"groups": [
"DAAdministrators"
]
},
"testUser1": {
"username": "testUser1",
"password": "...",
"groups": [
"testGroup1"
]
},
"testUser2": {
"username": "testUser2",
"password": "...",
"groups": [
"testGroup2"
]
},
"testUser3": {
"username": "testUser3",
"password": "...",
"groups": [
"testGroup3"
]
},
"testUser4": {
"username": "testUser4",
"password": "...",
"groups": [
"testGroup4"
]
}
},
"envs": {
"persistent_env1": {
"accountId": "...",
"region": "us-east-1"
},
"session_env1": {
"accountId": "...",
"region": "eu-central-1"
},
"session_env2": {
"accountId": "...",
"region": "eu-west-1"
}
},
"dashboards": {
"session_env1": {
"dashboardId": "..."
},
}
}
```
- For this deployment the `config.json` flag `cdk_pivot_role_multiple_environments_same_account` must be set to `true` if an AWS account is going to be reused for multiple environments,
- Second test account is bootstraped, and first account is added to trusted policy in target regions
```cdk bootstrap --trust <first-account-id> -c @aws-cdk/core:newStyleStackSynthesis=true --cloudformation-execution-policies arn:aws:iam::aws:policy/AdministratorAccess aws://<second-account-id>/region```
- An SSM parameter (`/dataall/{env_name}/testdata`) in the DEPLOYMENT ACCOUNT with the following contents
```
{
"users": {
"testUserTenant": {
"username": "testUserTenant",
"password": "...",
"groups": [
"DAAdministrators"
]
},
"testUser1": {
"username": "testUser1",
"password": "...",
"groups": [
"testGroup1"
]
},
"testUser2": {
"username": "testUser2",
"password": "...",
"groups": [
"testGroup2"
]
},
"testUser3": {
"username": "testUser3",
"password": "...",
"groups": [
"testGroup3"
]
},
"testUser4": {
"username": "testUser4",
"password": "...",
"groups": [
"testGroup4"
]
}
},
"envs": {
"session_env1": {
"accountId": "...",
"region": "eu-central-1"
},
"session_env2": {
"accountId": "...",
"region": "eu-west-1"
},
"persistent_env1": {
"accountId": "...",
"region": "us-east-1"
},
"session_cross_acc_env_1": {
"accountId": "another acc",
"region": "same as session_env1"
},
},
"dashboards": {
"session_env1": {
"dashboardId": "..."
},
}
}
```

- The pipeline will create the users/groups

## Run tests
Expand Down
37 changes: 37 additions & 0 deletions tests_new/integration_tests/aws_clients/athena.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import time
from tests_new.integration_tests.utils import poller


class AthenaClient:
def __init__(self, session, region):
self._client = session.client('athena', region_name=region)
self._region = region

def _run_query(self, query, workgroup='primary', output_location=None):
if output_location:
result = self._client.start_query_execution(
QueryString=query, ResultConfiguration={'OutputLocation': output_location}
)
else:
result = self._client.start_query_execution(QueryString=query, WorkGroup=workgroup)
return result['QueryExecutionId']

@poller(check_success=lambda state: state not in ['QUEUED', 'RUNNING'], timeout=600, sleep_time=5)
def _wait_for_query(self, query_id):
result = self._client.get_query_execution(QueryExecutionId=query_id)
return result['QueryExecution']['Status']['State']

def execute_query(self, query, workgroup='primary', output_location=None):
q_id = self._run_query(query, workgroup, output_location)
return self._wait_for_query(q_id)

def list_work_groups(self):
result = self._client.list_work_groups()
return [x['Name'] for x in result['WorkGroups']]

def get_env_work_group(self, env_name):
workgroups = self.list_work_groups()
for workgroup in workgroups:
if env_name in workgroup:
return workgroup
return workgroups[0] if workgroups else None
96 changes: 96 additions & 0 deletions tests_new/integration_tests/aws_clients/iam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import json
import logging
import os

import boto3

from dataall.base.aws.parameter_store import ParameterStoreManager

log = logging.getLogger(__name__)


class IAMClient:
def __init__(self, session=boto3.Session(), region=os.environ.get('AWS_REGION', 'us-east-1')):
self._client = session.client('iam', region_name=region)
self._resource = session.resource('iam', region_name=region)
self._region = region

def get_role(self, role_name):
try:
role = self._client.get_role(RoleName=role_name)
return role
except Exception as e:
log.info(f'Error occurred: {e}')
return None

@staticmethod
def get_tooling_account_id():
session = boto3.Session()
param_client = session.client('ssm', os.environ.get('AWS_REGION', 'us-east-1'))
parameter_path = f"/dataall/{os.environ.get('ENVNAME', 'dev')}/toolingAccount"
toolingAccount = param_client.get_parameter(Name=parameter_path)['Parameter']['Value']
return toolingAccount

def create_role(self, account_id, role_name, test_role_name):
policy_doc = {
'Version': '2012-10-17',
'Statement': [
{
'Effect': 'Allow',
'Principal': {
'AWS': [
f'arn:aws:iam::{account_id}:root',
f'arn:aws:iam::{IAMClient.get_tooling_account_id()}:root',
f'arn:aws:sts::{account_id}:assumed-role/{test_role_name}/{test_role_name}',
]
},
'Action': 'sts:AssumeRole',
'Condition': {},
}
],
}
try:
role = self._client.create_role(
RoleName=role_name,
AssumeRolePolicyDocument=json.dumps(policy_doc),
Description='Role for Lambda function',
)
return role
except Exception as e:
log.error(e)
raise e

def create_role_if_not_exists(self, account_id, role_name, test_role_name):
role = self.get_role(role_name)
if role is None:
role = self.create_role(account_id, role_name, test_role_name)
return role

def get_consumption_role(self, account_id, role_name, test_role_name):
role = self.get_role(role_name)
if role is None:
role = self.create_role(account_id, role_name, test_role_name)
self.put_consumption_role_policy(role_name)
return role

def put_consumption_role_policy(self, role_name):
self._client.put_role_policy(
RoleName=role_name,
PolicyName='ConsumptionPolicy',
PolicyDocument="""{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "VisualEditor0",
"Effect": "Allow",
"Action": [
"s3:*",
"athena:*",
"glue:*",
"lakeformation:GetDataAccess"
],
"Resource": "*"
}
]
}""",
)
19 changes: 19 additions & 0 deletions tests_new/integration_tests/aws_clients/sts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import os

import boto3


class StsClient:
def __init__(self, session=boto3.Session(), region=os.environ.get('AWS_REGION', 'us-east-1')):
self._client = session.client('sts', region_name=region)
self._region = region

def get_role_session(self, role_arn):
assumed_role_object = self._client.assume_role(RoleArn=role_arn, RoleSessionName='AssumeRole')
credentials = assumed_role_object['Credentials']

return boto3.Session(
aws_access_key_id=credentials['AccessKeyId'],
aws_secret_access_key=credentials['SecretAccessKey'],
aws_session_token=credentials['SessionToken'],
)
18 changes: 18 additions & 0 deletions tests_new/integration_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ def user4(userdata):
yield userdata['testUser4']


@pytest.fixture(scope='session', autouse=True)
def user5(userdata):
# Existing user with name and password
yield userdata['testUser5']


@pytest.fixture(scope='session', autouse=True)
def group1():
# Existing Cognito group with name testGroup1
Expand Down Expand Up @@ -119,6 +125,13 @@ def group4():
yield 'testGroup4'


@pytest.fixture(scope='session', autouse=True)
def group5():
# Existing Cognito group with name testGroup5
# Add user5
yield 'testGroup5'


@pytest.fixture(scope='session')
def client1(user1) -> Client:
yield Client(user1.username, user1.password)
Expand All @@ -139,6 +152,11 @@ def client4(user4) -> Client:
yield Client(user4.username, user4.password)


@pytest.fixture(scope='session')
def client5(user5) -> Client:
yield Client(user5.username, user5.password)


@pytest.fixture(scope='session')
def clientTenant(userTenant) -> Client:
yield Client(userTenant.username, userTenant.password)
Expand Down
Loading

0 comments on commit 2005863

Please sign in to comment.