Add Dataset integration tests - S3 Share requests (#1389)

### Feature or Bugfix - Tests ### Detail - module `share_base` - bugfix `delete_env` requires `env_object` not `envUri` - TEMPORARY: hardcoded dataset_uri --> I wait for dataset module ### Relates - #1376 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --------- Co-authored-by: dlpzx <dlpzx@amazon.com> Co-authored-by: Noah Paige <noahpaig@amazon.com> Co-authored-by: Sofia Sazonova <sazonova@amazon.co.uk>
data-dot-all · Sep 25, 2024 · 2005863 · 2005863
1 parent 075b43c
commit 2005863
Show file tree

Hide file tree

Showing 24 changed files with 1,232 additions and 76 deletions.
diff --git a/backend/dataall/core/environment/cdk/environment_stack.py b/backend/dataall/core/environment/cdk/environment_stack.py
@@ -654,3 +654,11 @@ def create_integration_tests_role(self):
                 resources=[f'arn:aws:cloudformation:*:{self.account}:stack/*/*'],
             ),
         )
+
+        self.test_role.add_to_policy(
+            iam.PolicyStatement(
+                actions=['iam:GetRole', 'iam:CreateRole', 'iam:PutRolePolicy'],
+                effect=iam.Effect.ALLOW,
+                resources=[f'arn:aws:iam::{self.account}:role/dataall-test-*'],
+            ),
+        )
diff --git a/...taall/modules/s3_datasets_shares/services/share_managers/s3_access_point_share_manager.py b/...taall/modules/s3_datasets_shares/services/share_managers/s3_access_point_share_manager.py
@@ -453,10 +453,14 @@ def manage_access_point_and_policy(self):
                 not s3_client.get_bucket_access_point_arn(self.access_point_name)
                 and retries < ACCESS_POINT_CREATION_RETRIES
             ):
-                logger.info('Waiting 30s for access point creation to complete..')
+                logger.info(
+                    f'Attempt {retries}. Waiting {ACCESS_POINT_CREATION_TIME * sleep_coeff}s for access point creation to complete..'
+                )
                 time.sleep(ACCESS_POINT_CREATION_TIME * sleep_coeff)
                 sleep_coeff = sleep_coeff * ACCESS_POINT_BACKOFF_COEFFICIENT
                 retries += 1
+        if not s3_client.get_bucket_access_point_arn(self.access_point_name):
+            raise Exception(f'Failed to create access point {self.access_point_name}')
         existing_policy = s3_client.get_access_point_policy(self.access_point_name)
         # requester will use this role to access resources
         target_requester_id = SessionHelper.get_role_id(

diff --git a/tests/modules/s3_datasets_shares/test_share.py b/tests/modules/s3_datasets_shares/test_share.py
@@ -427,7 +427,7 @@ def share3_processed(
 def share3_item_shared(
     share_item: typing.Callable, share3_processed: ShareObject, table1: DatasetTable
 ) -> ShareObjectItem:
-    # Cleaned up with share3
+    # Cleaned up with share3_happy_path
     yield share_item(share=share3_processed, table=table1, status=ShareItemStatus.Share_Succeeded.value)
 
 
@@ -608,7 +608,7 @@ def share4_draft(
 def share3_item_shared_unhealthy(
     share_item: typing.Callable, share3_processed: ShareObject, table1_1: DatasetTable
 ) -> ShareObjectItem:
-    # Cleaned up with share3
+    # Cleaned up with share3_happy_path
     yield share_item(
         share=share3_processed,
         table=table1_1,

diff --git a/tests_new/clean_up_s3.sh b/tests_new/clean_up_s3.sh
@@ -0,0 +1,13 @@
+my_array=("$(aws s3api list-buckets --query 'Buckets[?contains(Name, `session`) == `true`].[Name]' --output text)")
+array=("${(@f)my_array}")
+for YOUR_BUCKET in "${array[@]}"
+do
+
+aws s3api delete-objects --bucket ${YOUR_BUCKET} \
+--delete "$(aws s3api list-object-versions --bucket ${YOUR_BUCKET} --query='{Objects: Versions[].{Key:Key,VersionId:VersionId}}')"
+
+aws s3api delete-objects --bucket ${YOUR_BUCKET} \
+--delete "$(aws s3api list-object-versions --bucket ${YOUR_BUCKET} --query='{Objects: DeleteMarkers[].{Key:Key,VersionId:VersionId}}')"
+
+aws s3api delete-bucket --bucket ${YOUR_BUCKET}
+done
diff --git a/tests_new/integration_tests/README.md b/tests_new/integration_tests/README.md
@@ -12,68 +12,75 @@ Currently **we support only Cognito based deployments** but support for any IdP
 
 - A real deployment of data.all in AWS. 
      - For this deployment the `cdk.json` flag `enable_pivot_role_auto_create` must be set to `true`.
-     - For this deployment the `config.json` flag `cdk_pivot_role_multiple_environments_same_account` must be set to `true` if an AWS account is going to be reused for multiple environments,
-- An SSM parameter (`/dataall/{env_name}/testdata`) in the DEPLOYMENT ACCOUNT with the following contents
-    ```
-    {
-      "users": {
-        "testUserTenant": {
-          "username": "testUserTenant",
-          "password": "...",
-          "groups": [
-            "DAAdministrators"
-          ]
-        },
-        "testUser1": {
-          "username": "testUser1",
-          "password": "...",
-          "groups": [
-            "testGroup1"
-          ]
-        },
-        "testUser2": {
-          "username": "testUser2",
-          "password": "...",
-          "groups": [
-            "testGroup2"
-          ]
-        },
-        "testUser3": {
-          "username": "testUser3",
-          "password": "...",
-          "groups": [
-            "testGroup3"
-          ]
-        },
-        "testUser4": {
-          "username": "testUser4",
-          "password": "...",
-          "groups": [
-            "testGroup4"
-          ]
-        }
-      },
-      "envs": {
-        "persistent_env1": {
-          "accountId": "...",
-          "region": "us-east-1"
-        },
-        "session_env1": {
-          "accountId": "...",
-          "region": "eu-central-1"
-        },
-        "session_env2": {
-          "accountId": "...",
-          "region": "eu-west-1"
-        }
-      },
-      "dashboards": {
-        "session_env1": {
-          "dashboardId": "..."
-        },
-      }
-    }
-    ```
+       - For this deployment the `config.json` flag `cdk_pivot_role_multiple_environments_same_account` must be set to `true` if an AWS account is going to be reused for multiple environments,
+         - Second test account is bootstraped, and first account is added to trusted policy in target regions
+          ```cdk bootstrap --trust <first-account-id> -c @aws-cdk/core:newStyleStackSynthesis=true --cloudformation-execution-policies arn:aws:iam::aws:policy/AdministratorAccess aws://<second-account-id>/region```
+         - An SSM parameter (`/dataall/{env_name}/testdata`) in the DEPLOYMENT ACCOUNT with the following contents
+          ```
+          {
+            "users": {
+              "testUserTenant": {
+                "username": "testUserTenant",
+                "password": "...",
+                "groups": [
+                  "DAAdministrators"
+                ]
+              },
+              "testUser1": {
+                "username": "testUser1",
+                "password": "...",
+                "groups": [
+                  "testGroup1"
+                ]
+              },
+              "testUser2": {
+                "username": "testUser2",
+                "password": "...",
+                "groups": [
+                  "testGroup2"
+                ]
+              },
+              "testUser3": {
+                "username": "testUser3",
+                "password": "...",
+                "groups": [
+                  "testGroup3"
+                ]
+              },
+              "testUser4": {
+                "username": "testUser4",
+                "password": "...",
+                "groups": [
+                  "testGroup4"
+                ]
+              }
+            },
+            "envs": {
+              "session_env1": {
+                "accountId": "...",
+                "region": "eu-central-1"
+              },
+              "session_env2": {
+                "accountId": "...",
+                "region": "eu-west-1"
+              },
+               "persistent_env1": {
+                       "accountId": "...",
+                       "region": "us-east-1"
+               },
+              "session_cross_acc_env_1": {
+                       "accountId": "another acc",
+                       "region": "same as session_env1"  
+               },
+            },
+            "dashboards": {
+                "session_env1": {
+                  "dashboardId": "..."
+                },
+              }
+          }
+          ```
+
 - The pipeline will create the users/groups
 
 ## Run tests

diff --git a/tests_new/integration_tests/aws_clients/athena.py b/tests_new/integration_tests/aws_clients/athena.py
@@ -0,0 +1,37 @@
+import time
+from tests_new.integration_tests.utils import poller
+
+
+class AthenaClient:
+    def __init__(self, session, region):
+        self._client = session.client('athena', region_name=region)
+        self._region = region
+
+    def _run_query(self, query, workgroup='primary', output_location=None):
+        if output_location:
+            result = self._client.start_query_execution(
+                QueryString=query, ResultConfiguration={'OutputLocation': output_location}
+            )
+        else:
+            result = self._client.start_query_execution(QueryString=query, WorkGroup=workgroup)
+        return result['QueryExecutionId']
+
+    @poller(check_success=lambda state: state not in ['QUEUED', 'RUNNING'], timeout=600, sleep_time=5)
+    def _wait_for_query(self, query_id):
+        result = self._client.get_query_execution(QueryExecutionId=query_id)
+        return result['QueryExecution']['Status']['State']
+
+    def execute_query(self, query, workgroup='primary', output_location=None):
+        q_id = self._run_query(query, workgroup, output_location)
+        return self._wait_for_query(q_id)
+
+    def list_work_groups(self):
+        result = self._client.list_work_groups()
+        return [x['Name'] for x in result['WorkGroups']]
+
+    def get_env_work_group(self, env_name):
+        workgroups = self.list_work_groups()
+        for workgroup in workgroups:
+            if env_name in workgroup:
+                return workgroup
+        return workgroups[0] if workgroups else None
diff --git a/tests_new/integration_tests/aws_clients/iam.py b/tests_new/integration_tests/aws_clients/iam.py
@@ -0,0 +1,96 @@
+import json
+import logging
+import os
+
+import boto3
+
+from dataall.base.aws.parameter_store import ParameterStoreManager
+
+log = logging.getLogger(__name__)
+
+
+class IAMClient:
+    def __init__(self, session=boto3.Session(), region=os.environ.get('AWS_REGION', 'us-east-1')):
+        self._client = session.client('iam', region_name=region)
+        self._resource = session.resource('iam', region_name=region)
+        self._region = region
+
+    def get_role(self, role_name):
+        try:
+            role = self._client.get_role(RoleName=role_name)
+            return role
+        except Exception as e:
+            log.info(f'Error occurred: {e}')
+            return None
+
+    @staticmethod
+    def get_tooling_account_id():
+        session = boto3.Session()
+        param_client = session.client('ssm', os.environ.get('AWS_REGION', 'us-east-1'))
+        parameter_path = f"/dataall/{os.environ.get('ENVNAME', 'dev')}/toolingAccount"
+        toolingAccount = param_client.get_parameter(Name=parameter_path)['Parameter']['Value']
+        return toolingAccount
+
+    def create_role(self, account_id, role_name, test_role_name):
+        policy_doc = {
+            'Version': '2012-10-17',
+            'Statement': [
+                {
+                    'Effect': 'Allow',
+                    'Principal': {
+                        'AWS': [
+                            f'arn:aws:iam::{account_id}:root',
+                            f'arn:aws:iam::{IAMClient.get_tooling_account_id()}:root',
+                            f'arn:aws:sts::{account_id}:assumed-role/{test_role_name}/{test_role_name}',
+                        ]
+                    },
+                    'Action': 'sts:AssumeRole',
+                    'Condition': {},
+                }
+            ],
+        }
+        try:
+            role = self._client.create_role(
+                RoleName=role_name,
+                AssumeRolePolicyDocument=json.dumps(policy_doc),
+                Description='Role for Lambda function',
+            )
+            return role
+        except Exception as e:
+            log.error(e)
+            raise e
+
+    def create_role_if_not_exists(self, account_id, role_name, test_role_name):
+        role = self.get_role(role_name)
+        if role is None:
+            role = self.create_role(account_id, role_name, test_role_name)
+        return role
+
+    def get_consumption_role(self, account_id, role_name, test_role_name):
+        role = self.get_role(role_name)
+        if role is None:
+            role = self.create_role(account_id, role_name, test_role_name)
+            self.put_consumption_role_policy(role_name)
+        return role
+
+    def put_consumption_role_policy(self, role_name):
+        self._client.put_role_policy(
+            RoleName=role_name,
+            PolicyName='ConsumptionPolicy',
+            PolicyDocument="""{
+                                        "Version": "2012-10-17",
+                                        "Statement": [
+                                            {
+                                                "Sid": "VisualEditor0",
+                                                "Effect": "Allow",
+                                                "Action": [
+                                                    "s3:*",
+                                                    "athena:*",
+                                                    "glue:*",
+                                                    "lakeformation:GetDataAccess"
+                                                ],
+                                                "Resource": "*"
+                                            }
+                                        ]
+                                    }""",
+        )
diff --git a/tests_new/integration_tests/aws_clients/sts.py b/tests_new/integration_tests/aws_clients/sts.py
@@ -0,0 +1,19 @@
+import os
+
+import boto3
+
+
+class StsClient:
+    def __init__(self, session=boto3.Session(), region=os.environ.get('AWS_REGION', 'us-east-1')):
+        self._client = session.client('sts', region_name=region)
+        self._region = region
+
+    def get_role_session(self, role_arn):
+        assumed_role_object = self._client.assume_role(RoleArn=role_arn, RoleSessionName='AssumeRole')
+        credentials = assumed_role_object['Credentials']
+
+        return boto3.Session(
+            aws_access_key_id=credentials['AccessKeyId'],
+            aws_secret_access_key=credentials['SecretAccessKey'],
+            aws_session_token=credentials['SessionToken'],
+        )
diff --git a/tests_new/integration_tests/conftest.py b/tests_new/integration_tests/conftest.py
@@ -91,6 +91,12 @@ def user4(userdata):
     yield userdata['testUser4']
 
 
+@pytest.fixture(scope='session', autouse=True)
+def user5(userdata):
+    # Existing user with name and password
+    yield userdata['testUser5']
+
+
 @pytest.fixture(scope='session', autouse=True)
 def group1():
     # Existing Cognito group with name testGroup1
@@ -119,6 +125,13 @@ def group4():
     yield 'testGroup4'
 
 
+@pytest.fixture(scope='session', autouse=True)
+def group5():
+    # Existing Cognito group with name testGroup5
+    # Add user5
+    yield 'testGroup5'
+
+
 @pytest.fixture(scope='session')
 def client1(user1) -> Client:
     yield Client(user1.username, user1.password)
@@ -139,6 +152,11 @@ def client4(user4) -> Client:
     yield Client(user4.username, user4.password)
 
 
+@pytest.fixture(scope='session')
+def client5(user5) -> Client:
+    yield Client(user5.username, user5.password)
+
+
 @pytest.fixture(scope='session')
 def clientTenant(userTenant) -> Client:
     yield Client(userTenant.username, userTenant.password)