From 064c7bb7c56ec0d7fd952f226f21ee87aa741209 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Mon, 6 Nov 2023 23:44:09 +0530 Subject: [PATCH] Allow public access to buckets via IAM, not ACL Previously, we were mixing ACL and IAM, which led to basically the bucket *not* being accessible publicly - only to authenticated users. This switches everything to using IAM, which *does* make the bucket properly publicly accessible. In addition, there's now a policy that we only enable this when 2i2c is *not* handling billing, as there can be disastrous cost consequences. We fix this for the LEAP bucket. Config is moved to `user_buckets` rather than `hub_permissions`, as the config is purely set on the bucket and not related to which hub we are configuring. Ref https://2i2c.freshdesk.com/a/tickets/954 --- docs/howto/features/buckets.md | 28 ++++++++++++++++++++++++--- docs/howto/features/cloud-access.md | 3 --- terraform/gcp/buckets.tf | 18 +++++------------ terraform/gcp/projects/leap.tfvars | 7 ++++--- terraform/gcp/projects/m2lines.tfvars | 4 ++-- terraform/gcp/variables.tf | 6 ++++-- 6 files changed, 40 insertions(+), 26 deletions(-) diff --git a/docs/howto/features/buckets.md b/docs/howto/features/buckets.md index 6a8dbe6c86..b10a0082bf 100644 --- a/docs/howto/features/buckets.md +++ b/docs/howto/features/buckets.md @@ -73,9 +73,31 @@ on why users want this! 4. Get this change deployed, and users should now be able to use the buckets! Currently running users might have to restart their pods for the change to take effect. - - -## Allowing access to buckets from outside the JupyterHub + + +## Allowing public, readonly to buckets from outside the JupyterHub + +### GCP + +Some hubs want to expose a particular bucket to the broad internet. +This can have catastrophic cost consequences, so we only allow this +on clusters where 2i2c is not paying the bill for. + +This can be enabled by setting the `public_access` parameter in +`user_buckets` for the appropriate bucket, and running `terraform apply`. + +Example: + +```terraform +user_buckets = { + "persistent": { + "delete_after": null, + "public_access": true + } +} +``` + +## Allowing authenticated access to buckets from outside the JupyterHub ### GCP diff --git a/docs/howto/features/cloud-access.md b/docs/howto/features/cloud-access.md index ea56301464..f6d8b8c7ff 100644 --- a/docs/howto/features/cloud-access.md +++ b/docs/howto/features/cloud-access.md @@ -44,7 +44,6 @@ This AWS IAM Role is managed via terraform. "": { requestor_pays : true, bucket_admin_access : ["bucket-1", "bucket-2"] - bucket_public_access : ["bucket-1"] hub_namespace : "" } } @@ -64,8 +63,6 @@ This AWS IAM Role is managed via terraform. access to. Used along with the [user_buckets](howto:features:storage-buckets) terraform variable to enable the [scratch buckets](topic:features:cloud:scratch-buckets) feature. - 4. `bucket_public_access` lists bucket names (as specified in `user_buckets` - terraform variable) that should be publicly accessible. 5. (GCP only) `hub_namespace` is the full name of the hub, as hubs are put in Kubernetes Namespaces that are the same as their names. This is explicitly specified here because `` could possibly be truncated on GCP. diff --git a/terraform/gcp/buckets.tf b/terraform/gcp/buckets.tf index cd541761a8..2cce5024c9 100644 --- a/terraform/gcp/buckets.tf +++ b/terraform/gcp/buckets.tf @@ -55,14 +55,6 @@ locals { ] ])) - bucket_public_permissions = distinct(flatten([ - for hub_name, permissions in var.hub_cloud_permissions : [ - for bucket_name in permissions.bucket_public_access : { - hub_name = hub_name - bucket_name = bucket_name - } - ] - ])) } resource "google_storage_bucket_iam_member" "member" { @@ -86,11 +78,11 @@ resource "google_storage_bucket_iam_member" "extra_admin_members" { member = each.value.member } -resource "google_storage_bucket_access_control" "public_rule" { - for_each = { for bp in local.bucket_public_permissions : "${bp.hub_name}.${bp.bucket_name}" => bp } - bucket = google_storage_bucket.user_buckets[each.value.bucket_name].name - role = "READER" - entity = "allUsers" +resource "google_storage_bucket_iam_member" "public_access" { + for_each = { for k, v in var.user_buckets : k => v if v.public_access } + bucket = google_storage_bucket.user_buckets[each.key].name + role = "roles/storage.objectViewer" + member = "allUsers" } output "buckets" { diff --git a/terraform/gcp/projects/leap.tfvars b/terraform/gcp/projects/leap.tfvars index 8387514a1a..dc26f4b45d 100644 --- a/terraform/gcp/projects/leap.tfvars +++ b/terraform/gcp/projects/leap.tfvars @@ -44,11 +44,13 @@ user_buckets = { # For https://github.com/2i2c-org/infrastructure/issues/1230#issuecomment-1278183441 "persistent-ro" : { "delete_after" : null, - "extra_admin_members" : ["group:leap-persistent-bucket-writers@googlegroups.com"] + "extra_admin_members" : ["group:leap-persistent-bucket-writers@googlegroups.com"], + "public_access" : true }, "persistent-ro-staging" : { "delete_after" : null, - "extra_admin_members" : ["group:leap-persistent-bucket-writers@googlegroups.com"] + "extra_admin_members" : ["group:leap-persistent-bucket-writers@googlegroups.com"], + "public_access" : true } } @@ -63,7 +65,6 @@ hub_cloud_permissions = { requestor_pays : true, bucket_admin_access : ["scratch", "persistent"], bucket_readonly_access : ["persistent-ro"], - bucket_public_access : ["persistent-ro"], hub_namespace : "prod" } } diff --git a/terraform/gcp/projects/m2lines.tfvars b/terraform/gcp/projects/m2lines.tfvars index 505fcdf1ce..10357ef41c 100644 --- a/terraform/gcp/projects/m2lines.tfvars +++ b/terraform/gcp/projects/m2lines.tfvars @@ -39,7 +39,8 @@ user_buckets = { }, "public-persistent" : { "delete_after" : null, - "extra_admin_members" : ["group:m2lines-persistent-bucket-writers@googlegroups.com"] + "extra_admin_members" : ["group:m2lines-persistent-bucket-writers@googlegroups.com"], + "public_access" : true }, } @@ -115,7 +116,6 @@ hub_cloud_permissions = { "prod" : { requestor_pays : true, bucket_admin_access : ["scratch", "persistent", "public-persistent"], - bucket_public_access : ["public-persistent"], hub_namespace : "prod" }, } diff --git a/terraform/gcp/variables.tf b/terraform/gcp/variables.tf index 662a7afbda..03eae707bd 100644 --- a/terraform/gcp/variables.tf +++ b/terraform/gcp/variables.tf @@ -240,7 +240,7 @@ variable "enable_network_policy" { } variable "user_buckets" { - type = map(object({ delete_after : number, extra_admin_members : optional(list(string), []) })) + type = map(object({ delete_after : number, extra_admin_members : optional(list(string), []), public_access : optional(bool, false) })) default = {} description = <<-EOT GCS Buckets to be created. @@ -258,6 +258,9 @@ variable "user_buckets" { is primarily useful for moving data into and out of buckets from outside the cloud. See https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/storage_bucket_iam#member/members for the format this would be specified in. + + 'public_access', if set to true, makes the bucket fully accessible to + the public internet, without any authentication. EOT } @@ -379,7 +382,6 @@ variable "hub_cloud_permissions" { requestor_pays : bool, bucket_admin_access : set(string), bucket_readonly_access : optional(set(string), []), - bucket_public_access : optional(set(string), []), hub_namespace : string }) )