Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,29 @@ venv
node_modules
.ruff_cache
.coverage
htmlcov/
htmlcov/

# Terraform
.terraform/
*.tfstate
*.tfstate.*
crash.log
crash.log.*
*.tfvars
*.tfvars.json
override.tf
override.tf.json
_override.tf
_override.tf.json
.terraformrc
terraform.rc
tfplan
infra/dcp/backend.tf
.terraform.lock.hcl

# Configuration
infra/dcp/.env

*.pyc
.venv/
uv.lock
64 changes: 64 additions & 0 deletions infra/dcp/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Data Commons Platform (DCP) Infrastructure

This directory contains the Terraform configuration to deploy the Data Commons Platform on Google Cloud Platform (GCP).

## Prerequisites
* **GCP Project**: A GCP project with billing enabled.
* **Terraform**: Terraform installed locally (>= 1.0.0).
* **gcloud CLI**: GCP CLI installed and authenticated.

## Setup

1. **Configure Local Variables**:
Copy the example variable file and fill in your project details.
```bash
cp terraform.tfvars.example terraform.tfvars
```
Edit `terraform.tfvars` with your `project_id` and other preferred settings.

2. **Run Setup Script**:
The `setup.sh` script creates a GCS bucket for your Terraform state and initializes the backend.
```bash
./setup.sh
```

## Deployment

1. **Initialize**:
Initialize Terraform (if not already done by setup.sh).
```bash
terraform init
```

2. **Plan**:
Review the changes Terraform will make.
```bash
terraform plan
```

3. **Apply**:
Provision the infrastructure.
```bash
terraform apply
```

4. **Teardown**:
Destroy all resources.
```bash
terraform destroy
```

## Architecture

This setup uses an **Orchestrator Pattern**:
- `infra/dcp/main.tf`: The root entrypoint that calls modules.
- `infra/dcp/modules/dcp/`: The new Data Commons Platform stack (Cloud Run + Spanner).
- `infra/dcp/modules/cdc/`: The legacy Custom Data Commons stack (Cloud Run + MySQL + Redis).

Each module is independent and can be toggled via the root variables in `terraform.tfvars`.

## Troubleshooting
* **Deletion Errors**: If you get a "cannot destroy... deletion_protection" error, ensure `deletion_protection = false` in your `terraform.tfvars`, run `terraform apply`, and then try `terraform destroy` again. Alternatively, use the helper command:
```bash
make force-destroy
```
123 changes: 123 additions & 0 deletions infra/dcp/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = ">= 5.0"
}
null = {
source = "hashicorp/null"
version = ">= 3.0"
}
}
}

provider "google" {
project = var.project_id
region = var.region
}

# Enable required APIs for both stacks
resource "google_project_service" "apis" {
for_each = toset(concat([
"run.googleapis.com",
"iam.googleapis.com",
"sqladmin.googleapis.com",
"redis.googleapis.com",
"secretmanager.googleapis.com",
"vpcaccess.googleapis.com",
"artifactregistry.googleapis.com",
"compute.googleapis.com"
], var.enable_dcp ? ["spanner.googleapis.com"] : []))

service = each.key
disable_on_destroy = false
}

# --- Data Commons Platform (DCP) Stack ---
module "dcp" {
source = "./modules/dcp"
count = var.enable_dcp ? 1 : 0

project_id = var.project_id
namespace = var.namespace
region = var.region
image_url = var.dcp_image_url
service_name = var.dcp_service_name
service_account_name = var.dcp_service_account_name
create_spanner_instance = var.dcp_create_spanner_instance
create_spanner_db = var.dcp_create_spanner_db
spanner_instance_id = var.dcp_spanner_instance_id
spanner_database_id = var.dcp_spanner_database_id
spanner_processing_units = var.dcp_spanner_processing_units
service_cpu = var.dcp_service_cpu
service_memory = var.dcp_service_memory
service_min_instances = var.dcp_service_min_instances
service_max_instances = var.dcp_service_max_instances
service_concurrency = var.dcp_service_concurrency
service_timeout_seconds = var.dcp_service_timeout_seconds
deletion_protection = var.deletion_protection

depends_on = [google_project_service.apis]
}

# --- Custom Data Commons (CDC) Legacy Stack ---
module "cdc" {
source = "./modules/cdc"
count = var.enable_cdc ? 1 : 0

project_id = var.project_id
namespace = var.namespace
dc_api_key = var.cdc_dc_api_key
maps_api_key = var.cdc_maps_api_key
disable_google_maps = var.cdc_disable_google_maps
region = var.region
google_analytics_tag_id = var.cdc_google_analytics_tag_id
gcs_data_bucket_name = var.cdc_gcs_data_bucket_name
gcs_data_bucket_input_folder = var.cdc_gcs_data_bucket_input_folder
gcs_data_bucket_output_folder = var.cdc_gcs_data_bucket_output_folder
gcs_data_bucket_location = var.cdc_gcs_data_bucket_location
mysql_instance_name = var.cdc_mysql_instance_name
mysql_database_name = var.cdc_mysql_database_name
mysql_database_version = var.cdc_mysql_database_version
mysql_cpu_count = var.cdc_mysql_cpu_count
mysql_memory_size_mb = var.cdc_mysql_memory_size_mb
mysql_storage_size_gb = var.cdc_mysql_storage_size_gb
mysql_user = var.cdc_mysql_user
mysql_deletion_protection = var.deletion_protection
dc_web_service_image = var.cdc_web_service_image
dc_web_service_min_instance_count = var.cdc_web_service_min_instance_count
dc_web_service_max_instance_count = var.cdc_web_service_max_instance_count
dc_web_service_cpu = var.cdc_web_service_cpu
dc_web_service_memory = var.cdc_web_service_memory
make_dc_web_service_public = var.cdc_make_dc_web_service_public
dc_data_job_image = var.cdc_data_job_image
dc_data_job_cpu = var.cdc_data_job_cpu
dc_data_job_memory = var.cdc_data_job_memory
dc_data_job_timeout = var.cdc_data_job_timeout
dc_search_scope = var.cdc_search_scope
enable_mcp = var.cdc_enable_mcp
vpc_network_name = var.cdc_vpc_network_name
vpc_network_subnet_name = var.cdc_vpc_network_subnet_name
enable_redis = var.cdc_enable_redis
redis_instance_name = var.cdc_redis_instance_name
redis_memory_size_gb = var.cdc_redis_memory_size_gb
redis_tier = var.cdc_redis_tier
redis_location_id = var.cdc_redis_location_id
redis_alternative_location_id = var.cdc_redis_alternative_location_id
redis_replica_count = var.cdc_redis_replica_count
vpc_connector_cidr = var.cdc_vpc_connector_cidr
use_spanner = var.enable_dcp
spanner_instance_id = var.enable_dcp ? module.dcp[0].spanner_instance_id : ""
spanner_database_id = var.enable_dcp ? module.dcp[0].spanner_database_id : ""
deletion_protection = var.deletion_protection

depends_on = [google_project_service.apis]
}

# Ensure Spanner instance ID is provided when not creating a new one
check "spanner_instance_id_provided" {
assert {
condition = !var.enable_dcp || var.dcp_create_spanner_instance || var.dcp_spanner_instance_id != ""
error_message = "dcp_spanner_instance_id must be provided when reusing an existing instance (dcp_create_spanner_instance = false)."
}
}
109 changes: 109 additions & 0 deletions infra/dcp/modules/cdc/locals.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Local variable definitions

locals {
# Data Commons Data Bucket
name_prefix = var.namespace != "" ? "${var.namespace}-" : ""
gcs_data_bucket_name = var.gcs_data_bucket_name != "" ? var.gcs_data_bucket_name : "${local.name_prefix}datacommons-data-${var.project_id}"

# Use var.maps_api_key if set, otherwise use generated Maps API key
maps_api_key = var.maps_api_key != null ? var.maps_api_key : google_apikeys_key.maps_api_key[0].key_string

# Data Commons API hostname
dc_api_hostname = "api.datacommons.org"

# Data Commons API protocol
dc_api_protocol = "https"

# Data Commons API root URL
dc_api_root = "${local.dc_api_protocol}://${local.dc_api_hostname}"

# Optionally-configured Redis instance
redis_instance = var.enable_redis ? google_redis_instance.redis_instance[0] : null


# Shared environment variables used by the Data Commons web service and the Data
# Commons data loading job
cloud_run_shared_env_variables = [
{
name = "USE_CLOUDSQL"
value = var.use_spanner ? "false" : "true"
},
{
name = "CLOUDSQL_INSTANCE"
value = var.use_spanner ? "" : google_sql_database_instance.mysql_instance[0].connection_name
},
{
name = "DB_NAME"
value = var.mysql_database_name
},
{
name = "DB_USER"
value = var.mysql_user
},
{
name = "DB_HOST"
value = ""
},
{
name = "DB_PORT"
value = "3306"
},
{
name = "OUTPUT_DIR"
value = "gs://${local.gcs_data_bucket_name}/${var.gcs_data_bucket_output_folder}"
},
{
name = "FORCE_RESTART"
value = "${timestamp()}"
},
{
name = "REDIS_HOST"
value = try(local.redis_instance.host, "")
},
{
name = "REDIS_PORT"
value = try(local.redis_instance.port, "")
},
{
name = "GCP_SPANNER_INSTANCE_ID"
value = var.spanner_instance_id
},
{
name = "GCP_SPANNER_DATABASE_NAME"
value = var.spanner_database_id
}
]

# Shared environment variables containing secret refs used by the Data Commons
# web service and the Data Commons data loading job
cloud_run_shared_env_variable_secrets = concat([
{
name = "DC_API_KEY"
value_source = {
secret_key_ref = {
secret = google_secret_manager_secret.dc_api_key.secret_id
version = "latest"
}
}
},
{
name = "MAPS_API_KEY"
value_source = {
secret_key_ref = {
secret = var.disable_google_maps ? "" : google_secret_manager_secret.maps_api_key[0].secret_id
version = "latest"
}
}
}
], var.use_spanner ? [] : [
{
name = "DB_PASS"
value_source = {
secret_key_ref = {
secret = google_secret_manager_secret.mysql_password_secret[0].id
version = "latest"
}
}
}
])
}
Loading
Loading