Skip to content

Commit

Permalink
feat(phase 2a): readmes + main.tf (#17)
Browse files Browse the repository at this point in the history
* feat(phase 2a): readmes + main.tf

* feat: add checkov
  • Loading branch information
luk-st authored Jan 3, 2024
1 parent 4c641f9 commit f141ff8
Showing 7 changed files with 290 additions and 53 deletions.
20 changes: 20 additions & 0 deletions .terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 19 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
@@ -113,38 +113,44 @@ terraform destroy -no-color -var-file env/project.tfvars
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | ~> 1.5.0 |
| <a name="requirement_docker"></a> [docker](#requirement\_docker) | 3.0.2 |
| <a name="requirement_google"></a> [google](#requirement\_google) | ~> 4.84.0 |
| <a name="requirement_kubernetes"></a> [kubernetes](#requirement\_kubernetes) | 2.24.0 |

## Providers

No providers.
| Name | Version |
|------|---------|
| <a name="provider_google"></a> [google](#provider\_google) | 4.84.0 |
| <a name="provider_kubernetes"></a> [kubernetes](#provider\_kubernetes) | 2.24.0 |

## Modules

| Name | Source | Version |
|------|--------|---------|
| <a name="module_composer"></a> [composer](#module\_composer) | ./modules/composer | n/a |
| <a name="module_data-pipelines"></a> [data-pipelines](#module\_data-pipelines) | ./modules/data-pipeline | n/a |
| <a name="module_dataproc"></a> [dataproc](#module\_dataproc) | ./modules/dataproc | n/a |
| <a name="module_gcr"></a> [gcr](#module\_gcr) | ./modules/gcr | n/a |
| <a name="module_jupyter_docker_image"></a> [jupyter\_docker\_image](#module\_jupyter\_docker\_image) | ./modules/docker_image | n/a |
| <a name="module_vertex_ai_workbench"></a> [vertex\_ai\_workbench](#module\_vertex\_ai\_workbench) | ./modules/vertex-ai-workbench | n/a |
| <a name="module_vpc"></a> [vpc](#module\_vpc) | ./modules/vpc | n/a |
| <a name="module_composer"></a> [composer](#module\_composer) | github.com/bdg-tbd/tbd-workshop-1.git | v1.0.36/modules/composer |
| <a name="module_data-pipelines"></a> [data-pipelines](#module\_data-pipelines) | github.com/bdg-tbd/tbd-workshop-1.git | v1.0.36/modules/data-pipeline |
| <a name="module_dataproc"></a> [dataproc](#module\_dataproc) | github.com/bdg-tbd/tbd-workshop-1.git | v1.0.36/modules/dataproc |
| <a name="module_dbt_docker_image"></a> [dbt\_docker\_image](#module\_dbt\_docker\_image) | github.com/bdg-tbd/tbd-workshop-1.git | v1.0.36/modules/dbt_docker_image |
| <a name="module_gcr"></a> [gcr](#module\_gcr) | github.com/bdg-tbd/tbd-workshop-1.git | v1.0.36/modules/gcr |
| <a name="module_jupyter_docker_image"></a> [jupyter\_docker\_image](#module\_jupyter\_docker\_image) | github.com/bdg-tbd/tbd-workshop-1.git | v1.0.36/modules/jupyter_docker_image |
| <a name="module_vertex_ai_workbench"></a> [vertex\_ai\_workbench](#module\_vertex\_ai\_workbench) | github.com/bdg-tbd/tbd-workshop-1.git | v1.0.36/modules/vertex-ai-workbench |
| <a name="module_vpc"></a> [vpc](#module\_vpc) | github.com/bdg-tbd/tbd-workshop-1.git | v1.0.36/modules/vpc |

## Resources

No resources.
| Name | Type |
|------|------|
| [google_compute_firewall.allow-all-internal](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_firewall) | resource |
| [kubernetes_service.dbt-task-service](https://registry.terraform.io/providers/hashicorp/kubernetes/2.24.0/docs/resources/service) | resource |
| [google_client_config.provider](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/client_config) | data source |
| [google_container_cluster.composer-gke-cluster](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/container_cluster) | data source |

## Inputs

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_ai_notebook_instance_owner"></a> [ai\_notebook\_instance\_owner](#input\_ai\_notebook\_instance\_owner) | Vertex AI workbench owner | `string` | n/a | yes |
| <a name="input_dataproc_num_workers"></a> [dataproc\_num\_workers](#input\_dataproc\_num\_workers) | Number of dataproc workers | `number` | `2` | no |
| <a name="input_dataproc_worker_machine_type"></a> [dataproc\_worker\_machine\_type](#input\_dataproc\_worker\_machine\_type) | Dataproc worker machine type | `string` | `"e2-standard-2"` | no |
| <a name="input_preemptible_num_workers"></a> [preemptible\_num\_workers](#input\_preemptible\_num\_workers) | Number of preemptible dataproc workers | `number` | `0` | no |
| <a name="input_project_name"></a> [project\_name](#input\_project\_name) | Project name | `string` | n/a | yes |
| <a name="input_region"></a> [region](#input\_region) | GCP region | `string` | `"europe-west1"` | no |
| <a name="input_vertex_machine_type"></a> [vertex\_machine\_type](#input\_vertex\_machine\_type) | Vertex AI machine type | `string` | `"e2-standard-2"` | no |

## Outputs

100 changes: 84 additions & 16 deletions main.tf
Original file line number Diff line number Diff line change
@@ -4,12 +4,20 @@ locals {
notebook_subnet_name = "subnet-01"
notebook_subnet_id = "${var.region}/${local.notebook_subnet_name}"
composer_subnet_address = "10.11.0.0/16"
composer_work_namespace = "composer-user-workloads"
code_bucket_name = "${var.project_name}-code"
data_bucket_name = "${var.project_name}-data"
spark_version = "3.3.2"
spark_driver_port = 30000
spark_blockmgr_port = 30001
dbt_version = "1.7.3"
dbt_spark_version = "1.7.1"
dbt_git_repo = "https://github.com/mwiewior/tbd-tpc-di.git"
dbt_git_repo_branch = "main"
}

module "vpc" {
source = "./modules/vpc"
source = "github.com/bdg-tbd/tbd-workshop-1.git?ref=v1.0.36/modules/vpc"
project_name = var.project_name
region = var.region
network_name = local.notebook_vpc_name
@@ -19,26 +27,28 @@ module "vpc" {


module "gcr" {
source = "./modules/gcr"
source = "github.com/bdg-tbd/tbd-workshop-1.git?ref=v1.0.36/modules/gcr"
project_name = var.project_name
}

module "jupyter_docker_image" {
depends_on = [module.gcr]
source = "./modules/docker_image"
source = "github.com/bdg-tbd/tbd-workshop-1.git?ref=v1.0.36/modules/jupyter_docker_image"
registry_hostname = module.gcr.registry_hostname
registry_repo_name = coalesce(var.project_name)
project_name = var.project_name
spark_version = local.spark_version
dbt_version = local.dbt_version
dbt_spark_version = local.dbt_spark_version
}

module "vertex_ai_workbench" {
depends_on = [module.jupyter_docker_image, module.vpc]
source = "./modules/vertex-ai-workbench"
source = "github.com/bdg-tbd/tbd-workshop-1.git?ref=v1.0.36/modules/vertex-ai-workbench"
project_name = var.project_name
region = var.region
network = module.vpc.network.network_id
subnet = module.vpc.subnets[local.notebook_subnet_id].id
machine_type = var.vertex_machine_type

ai_notebook_instance_owner = var.ai_notebook_instance_owner
## To remove before workshop
@@ -50,27 +60,25 @@ module "vertex_ai_workbench" {

#
module "dataproc" {
depends_on = [module.vpc]
source = "./modules/dataproc"
project_name = var.project_name
region = var.region
subnet = module.vpc.subnets[local.notebook_subnet_id].id
machine_type = var.dataproc_worker_machine_type
num_workers = var.dataproc_num_workers
preemptible_num_workers = var.preemptible_num_workers
depends_on = [module.vpc]
source = "github.com/bdg-tbd/tbd-workshop-1.git?ref=v1.0.36/modules/dataproc"
project_name = var.project_name
region = var.region
subnet = module.vpc.subnets[local.notebook_subnet_id].id
machine_type = "e2-standard-2"
}

## Uncomment for Dataproc batches (serverless)
#module "metastore" {
# source = "./modules/metastore"
# source = "github.com/bdg-tbd/tbd-workshop-1.git?ref=v1.0.36/modules/metastore"
# project_name = var.project_name
# region = var.region
# network = module.vpc.network.network_id
#}

module "composer" {
depends_on = [module.vpc]
source = "./modules/composer"
source = "github.com/bdg-tbd/tbd-workshop-1.git?ref=v1.0.36/modules/composer"
project_name = var.project_name
network = module.vpc.network.network_name
subnet_address = local.composer_subnet_address
@@ -79,11 +87,25 @@ module "composer" {
"AIRFLOW_VAR_REGION_NAME" : var.region,
"AIRFLOW_VAR_BUCKET_NAME" : local.code_bucket_name
"AIRFLOW_VAR_PHS_CLUSTER" : module.dataproc.dataproc_cluster_name,
"AIRFLOW_VAR_WRK_NAMESPACE" : local.composer_work_namespace,
"AIRFLOW_VAR_DBT_GIT_REPO" : local.dbt_git_repo,
"AIRFLOW_VAR_DBT_GIT_REPO_BRANCH" : local.dbt_git_repo_branch
}
}

module "dbt_docker_image" {
depends_on = [module.composer]
source = "github.com/bdg-tbd/tbd-workshop-1.git?ref=v1.0.36/modules/dbt_docker_image"
registry_hostname = module.gcr.registry_hostname
registry_repo_name = coalesce(var.project_name)
project_name = var.project_name
spark_version = local.spark_version
dbt_version = local.dbt_version
dbt_spark_version = local.dbt_spark_version
}

module "data-pipelines" {
source = "./modules/data-pipeline"
source = "github.com/bdg-tbd/tbd-workshop-1.git?ref=v1.0.36/modules/data-pipeline"
project_name = var.project_name
region = var.region
bucket_name = local.code_bucket_name
@@ -92,3 +114,49 @@ module "data-pipelines" {
data_bucket_name = local.data_bucket_name
}




resource "kubernetes_service" "dbt-task-service" {
metadata {
name = "dbt-task-service"
namespace = local.composer_work_namespace
labels = {
app = "dbt-app"
}
}

spec {
type = "NodePort"
selector = {
app = "dbt-app"
}
port {
name = "spark-driver"
protocol = "TCP"
port = local.spark_driver_port
target_port = local.spark_driver_port
node_port = local.spark_driver_port

}
port {
name = "spark-block-mgr"
protocol = "TCP"
port = local.spark_blockmgr_port
target_port = local.spark_blockmgr_port
node_port = local.spark_blockmgr_port
}

}
}

resource "google_compute_firewall" "allow-all-internal" {
#checkov:skip=CKV2_GCP_12: "Ensure GCP compute firewall ingress does not allow unrestricted access to all ports"
name = "allow-all-internal"
project = var.project_name
network = module.vpc.network.network_name
allow {
protocol = "all"
}
source_ranges = ["10.0.0.0/8"]
}
19 changes: 19 additions & 0 deletions provider.tf
Original file line number Diff line number Diff line change
@@ -8,6 +8,21 @@ provider "docker" {
config_file = pathexpand("~/.docker/config.json")
}
}
data "google_client_config" "provider" {}


data "google_container_cluster" "composer-gke-cluster" {
name = reverse(split("/", module.composer.gke_cluster))[0]
location = var.region
}

provider "kubernetes" {
host = "https://${data.google_container_cluster.composer-gke-cluster.endpoint}"
token = data.google_client_config.provider.access_token
cluster_ca_certificate = base64decode(
data.google_container_cluster.composer-gke-cluster.master_auth[0].cluster_ca_certificate,
)
}

terraform {
required_version = "~> 1.5.0"
@@ -20,5 +35,9 @@ terraform {
source = "kreuzwerker/docker"
version = "3.0.2"
}
kubernetes = {
source = "hashicorp/kubernetes"
version = "2.24.0"
}
}
}
Loading

0 comments on commit f141ff8

Please sign in to comment.