From 9d991afc37ae37f03a0c279f0c3f3d56879bde9d Mon Sep 17 00:00:00 2001 From: lukasz-staniszewski Date: Sun, 3 Dec 2023 15:53:31 +0100 Subject: [PATCH 1/3] fix: disable sudo access --- modules/vertex-ai-workbench/main.tf | 9 +++++---- .../resources/notebook_post_startup_script.sh | 3 --- tasks-phase1.md | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/modules/vertex-ai-workbench/main.tf b/modules/vertex-ai-workbench/main.tf index 656f222..a0aa64f 100644 --- a/modules/vertex-ai-workbench/main.tf +++ b/modules/vertex-ai-workbench/main.tf @@ -61,6 +61,10 @@ resource "google_notebooks_instance" "tbd_notebook" { shielded_instance_config { enable_secure_boot = true } + metadata = { + vmDnsSetting : "GlobalDefault" + notebook-disable-root = true + } network = var.network subnet = var.subnet @@ -69,10 +73,7 @@ resource "google_notebooks_instance" "tbd_notebook" { no_public_ip = true no_proxy_access = true # end - instance_owners = [var.ai_notebook_instance_owner] - metadata = { - vmDnsSetting : "GlobalDefault" - } + instance_owners = [var.ai_notebook_instance_owner] post_startup_script = "gs://${google_storage_bucket_object.post-startup.bucket}/${google_storage_bucket_object.post-startup.name}" } diff --git a/modules/vertex-ai-workbench/resources/notebook_post_startup_script.sh b/modules/vertex-ai-workbench/resources/notebook_post_startup_script.sh index 9543470..5b143d1 100644 --- a/modules/vertex-ai-workbench/resources/notebook_post_startup_script.sh +++ b/modules/vertex-ai-workbench/resources/notebook_post_startup_script.sh @@ -19,6 +19,3 @@ sudo docker run -d \ sleep 30s sudo docker exec -it $VERTEX_CONTAINER_NAME python3.8 -m ipykernel install --user --name pyspark - -# remove sudo privileges from user -sudo deluser $USER sudo \ No newline at end of file diff --git a/tasks-phase1.md b/tasks-phase1.md index 48547d0..d061466 100644 --- a/tasks-phase1.md +++ b/tasks-phase1.md @@ -29,7 +29,7 @@ ``` - resource "google_storage_bucket" "tbd-data-bucket" -> the bucket to store data. Set the following properties: - - project // look for variable in variables.tf + - project // look for variable in variables.tfł - name // look for variable in variables.tf - location // look for variable in variables.tf - uniform_bucket_level_access = false #tfsec:ignore:google-storage-enable-ubla From e9879001ac50ebbbc3afdccde50738a50f5f2f3e Mon Sep 17 00:00:00 2001 From: lukasz-staniszewski Date: Sun, 3 Dec 2023 22:13:25 +0100 Subject: [PATCH 2/3] feat: remove typo --- tasks-phase1.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks-phase1.md b/tasks-phase1.md index d061466..48547d0 100644 --- a/tasks-phase1.md +++ b/tasks-phase1.md @@ -29,7 +29,7 @@ ``` - resource "google_storage_bucket" "tbd-data-bucket" -> the bucket to store data. Set the following properties: - - project // look for variable in variables.tfł + - project // look for variable in variables.tf - name // look for variable in variables.tf - location // look for variable in variables.tf - uniform_bucket_level_access = false #tfsec:ignore:google-storage-enable-ubla From f8b66b7fb4548c3c666bca9e123671798319c6c9 Mon Sep 17 00:00:00 2001 From: lukasz-staniszewski Date: Sun, 3 Dec 2023 23:16:56 +0100 Subject: [PATCH 3/3] feat: service accounts description --- tasks-phase1.md | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/tasks-phase1.md b/tasks-phase1.md index 48547d0..ef031b4 100644 --- a/tasks-phase1.md +++ b/tasks-phase1.md @@ -187,12 +187,23 @@ SSH tunnel is created using local port 1080 and in Chrome we can connect through - Description of network communication (ports, why it is necessary to specify the host for the driver) of Apache Spark running from Vertex AI Workbech ``` +Components of service accounts: + +A Service Account is a special type of account used by an application or a service (to act on its behalf) to make authorized API calls. This account is not intended for direct user access but is associated with the application or service. Our project uses the following service accounts: + +1. **Terraform Service Account (Terraform SA)** - used to authenticate and authorize Terraform to manage resources on a cloud provider platform. It allows Terraform to create, update, and delete resources. +2. **Google Cloud Composer Service Account (Composer SA)** - used by Google Cloud Composer to access various Google Cloud resources such as Google Cloud Storage, BigQuery, etc. It is necessary for executing and managing workflows that interact with these services. +3. **Infrastructure as Code Service Account (IaC SA)** - used by IaC tools to automate the provisioning and configuration of infrastructure. In GitHub Actions, we use IaC SA to authenticate and execute actions such as deploying infrastructure when a pull request is merged. + +Network communication: + Specifying the host for the driver is essential for a few key reasons: 1. Resource Allocation: The driver needs to communicate with the master to allocate resources across worker nodes. Specifying the host ensures the driver is reachable for this coordination. 2. Task Distribution and Management: The driver divides the application into tasks and schedules them on workers. Knowing the driver's host helps manage this distribution effectively. 3. Fault Tolerance: In case of failures, the system needs to know the driver's location to restart or move tasks, ensuring resilience and continuity of operations. 4. Data Flow Optimization: For efficient data transfer between the nodes, the network topology must be known, which includes the driver's location. +Diagram: ![img.png](doc/figures/TBD_task_8_diagram.png) @@ -706,7 +717,7 @@ yarnApplications: >> 3. Perform additional hardening of Jupyterlab environment, i.e. disable sudo access and enable secure boot ``` -- We've changed the [`modules/vertex-ai-workbench/main.tf`](modules/vertex-ai-workbench/main.tf) file, where we put: +- To enable secure boot, we've changed the [`modules/vertex-ai-workbench/main.tf`](modules/vertex-ai-workbench/main.tf) file, where we put: ```tf # Enable Secure Boot TASK 14.3 @@ -717,11 +728,13 @@ yarnApplications: ... ``` -- To disable sudo access we've added to the end of the [`modules/vertex-ai-workbench/resources/notebook_post_startup_script.sh`](modules/vertex-ai-workbench/resources/notebook_post_startup_script.sh) file: +- To disable sudo access to an instance we've changed metadata in the [`modules/vertex-ai-workbench/main.tf`](modules/vertex-ai-workbench/main.tf) file: ```sh - # remove sudo privileges from user - sudo deluser $USER sudo + metadata = { + vmDnsSetting : "GlobalDefault" + notebook-disable-root = true + } ``` ```txt