Skip to content

Commit

Permalink
use base otel with config via env (#285)
Browse files Browse the repository at this point in the history
this allows us to avoid building a new image to include the otel config
file.


seeing metrics in my dev env 
![Screenshot 2024-04-18 at 9 36
09 PM](https://github.com/chainguard-dev/terraform-infra-common/assets/16194785/088eaa54-3b60-4281-9d49-9a0599d0196e)

Signed-off-by: Kenny Leung <kleung@chainguard.dev>
  • Loading branch information
k4leung4 authored Apr 19, 2024
1 parent e845334 commit 6cb5f71
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 10 deletions.
4 changes: 3 additions & 1 deletion modules/regional-go-service/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ No requirements.
| Name | Source | Version |
|------|--------|---------|
| <a name="module_audit-serviceaccount"></a> [audit-serviceaccount](#module\_audit-serviceaccount) | ../audit-serviceaccount | n/a |
| <a name="module_otel-collector"></a> [otel-collector](#module\_otel-collector) | ../otel-collector | n/a |

## Resources

Expand All @@ -86,6 +85,8 @@ No requirements.
| [google_cloud_run_v2_service_iam_member.public-services-are-unauthenticated](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/cloud_run_v2_service_iam_member) | resource |
| [google_monitoring_alert_policy.anomalous-service-access](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/monitoring_alert_policy) | resource |
| [google_monitoring_alert_policy.bad-rollout](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/monitoring_alert_policy) | resource |
| [google_project_iam_member.metrics-writer](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource |
| [google_project_iam_member.trace-writer](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource |
| [ko_build.this](https://registry.terraform.io/providers/ko-build/ko/latest/docs/resources/build) | resource |
| [google_client_openid_userinfo.me](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/client_openid_userinfo) | data source |
| [google_project.project](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/project) | data source |
Expand All @@ -101,6 +102,7 @@ No requirements.
| <a name="input_labels"></a> [labels](#input\_labels) | Labels to apply to the service. | `map(string)` | `{}` | no |
| <a name="input_name"></a> [name](#input\_name) | n/a | `string` | n/a | yes |
| <a name="input_notification_channels"></a> [notification\_channels](#input\_notification\_channels) | List of notification channels to alert. | `list(string)` | n/a | yes |
| <a name="input_otel_collector_image"></a> [otel\_collector\_image](#input\_otel\_collector\_image) | The otel collector image to use as a base. Must be on gcr.io or dockerhub. | `string` | `"chainguard/opentelemetry-collector-contrib:latest"` | no |
| <a name="input_project_id"></a> [project\_id](#input\_project\_id) | n/a | `string` | n/a | yes |
| <a name="input_regional-volumes"></a> [regional-volumes](#input\_regional-volumes) | The volumes to make available to the containers in the service for mounting. | <pre>list(object({<br> name = string<br> gcs = optional(map(object({<br> bucket = string<br> read_only = optional(bool, true)<br> })), {})<br> nfs = optional(map(object({<br> server = string<br> path = string<br> read_only = optional(bool, true)<br> })), {})<br> }))</pre> | `[]` | no |
| <a name="input_regions"></a> [regions](#input\_regions) | A map from region names to a network and subnetwork. A pub/sub topic and ingress service (publishing to the respective topic) will be created in each region, with the ingress service configured to egress all traffic via the specified subnetwork. | <pre>map(object({<br> network = string<br> subnet = string<br> }))</pre> | n/a | yes |
Expand Down
30 changes: 21 additions & 9 deletions modules/regional-go-service/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@ module "audit-serviceaccount" {
notification_channels = var.notification_channels
}

resource "google_project_iam_member" "metrics-writer" {
project = var.project_id
role = "roles/monitoring.metricWriter"
member = "serviceAccount:${var.service_account}"
}

resource "google_project_iam_member" "trace-writer" {
project = var.project_id
role = "roles/cloudtrace.agent"
member = "serviceAccount:${var.service_account}"
}

// Build each of the application images from source.
resource "ko_build" "this" {
for_each = var.containers
Expand All @@ -33,14 +45,6 @@ resource "cosign_sign" "this" {
conflict = "REPLACE"
}

// Build our otel-collector sidecar image.
module "otel-collector" {
source = "../otel-collector"

project_id = var.project_id
service_account = var.service_account
}

// Deploy the service into each of our regions.
resource "google_cloud_run_v2_service" "this" {
for_each = var.regions
Expand Down Expand Up @@ -132,7 +136,15 @@ resource "google_cloud_run_v2_service" "this" {
}
}
}
containers { image = module.otel-collector.image }
containers {
image = var.otel_collector_image
// config via env is an option; https://pkg.go.dev/go.opentelemetry.io/collector/service#section-readme
args = ["--config=env:OTEL_CONFIG"]
env {
name = "OTEL_CONFIG"
value = file("${path.module}/otel-config/config.yaml")
}
}

dynamic "volumes" {
for_each = var.volumes
Expand Down
83 changes: 83 additions & 0 deletions modules/regional-go-service/otel-config/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
receivers:
prometheus:
config:
scrape_configs:
- job_name: "localhost"
scrape_interval: 10s
static_configs:
# TODO: make this configurable
- targets: ["localhost:2112"]
# Do not relabel job and instance labels if existed.
honor_labels: true
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: '^prometheus_.*'
action: drop
- source_labels: [ __name__ ]
regex: '^process_.*'
action: drop
- source_labels: [ __name__ ]
regex: '^go_.*'
action: drop

processors:
batch:
# batch metrics before sending to reduce API usage
send_batch_max_size: 200
send_batch_size: 200
timeout: 5s

memory_limiter:
# drop metrics if memory usage gets too high
check_interval: 1s
limit_percentage: 65
spike_limit_percentage: 20

# automatically detect Cloud Run resource metadata
resourcedetection:
detectors: [env, gcp]

resource:
attributes:
# Add instance_id as a resource attribute, so to avoid race conditions
# between multiple otel sidecar instance uploading overlapping time series
# to the same buckets.
- key: service.instance.id
from_attribute: faas.id
action: upsert
# The `gcp` resourcedetection processor sets `faas.name` to the name of the
# Cloud Run service or the Cloud Run job.
- from_attribute: faas.name
# The googlemanagedprometheus exporter consumes `service.name` attribute
# and set the `job` resource label to this value. (See
# https://github.com/GoogleCloudPlatform/opentelemetry-operations-go/pull/764)
key: "service.name"
action: upsert

exporters:
googlemanagedprometheus:
sending_queue:
enabled: true
# we are handling metrics for a single pod, no need to have
# too many senders. this will also avoid out-of-order data.
num_consumers: 1

extensions:
health_check:

service:
telemetry:
logs:
# We don't want to see scraper startup logging every
# cold start.
level: "error"
# Stack trace is less useful and break lines.
disable_stacktrace: true
encoding: json

extensions: [health_check]
pipelines:
metrics:
receivers: [prometheus]
processors: [batch, memory_limiter, resourcedetection, resource]
exporters: [googlemanagedprometheus]
6 changes: 6 additions & 0 deletions modules/regional-go-service/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,9 @@ variable "labels" {
type = map(string)
default = {}
}

variable "otel_collector_image" {
type = string
default = "chainguard/opentelemetry-collector-contrib:latest"
description = "The otel collector image to use as a base. Must be on gcr.io or dockerhub."
}

0 comments on commit 6cb5f71

Please sign in to comment.