Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open-telemetry deployment via Terraform #19

Merged
merged 3 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions deploy/staging/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,19 @@ deploy-flash-deps:
$(TF) apply --target module.flash-deps -var-file=$(ENV)-secrets.tfvars

deploy-flash:
$(TF) apply --target helm_release.flash -var-file="$(ENV)-secrets.tfvars" -auto-approve
$(TF) apply --target helm_release.flash -var-file="$(ENV)-secrets.tfvars"

deploy-pay:
$(TF) apply --target helm_release.flash-pay -var-file="$(ENV)-secrets.tfvars"

deploy-otel:
$(TF) apply --target module.otel -var-file="$(ENV)-secrets.tfvars"

# Install CRDs prior to cert-manager
# Ref: https://cert-manager.io/docs/installation/helm/
# deploy-cert-manager:
# kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.3/cert-manager.crds.yaml
# $(TF) apply

deploy:
deploy-all:
$(TF) apply -var-file="$(ENV)-secrets.tfvars"
2 changes: 1 addition & 1 deletion deploy/staging/flash-deps/staging-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ kubemonkey:
enabled: false

opentelemetry-collector:
enabled: false
enabled: true

# cert-manager:
# enabled: false
Expand Down
17 changes: 9 additions & 8 deletions deploy/staging/kratos-pg/staging-values.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@

persistence:
enabled: true
storageClass: do-block-storage-xfs-retain # xfs is file system format
size: 8Gi # default
# annotations: consider backups
primary:
persistence:
enabled: true
storageClass: do-block-storage-xfs-retain # xfs is file system format
size: 8Gi # default
# annotations: consider backups

# This becomes relevant when scaling the persistent volume
persistentVolumeClaimRetentionPolicy:
enabled: false
# This becomes relevant when scaling the persistent volume
persistentVolumeClaimRetentionPolicy:
enabled: false

auth:
enablePostgresUser: false
Expand Down
6 changes: 6 additions & 0 deletions deploy/staging/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ resource "helm_release" "flash-pay" {
]
}

module "otel" {
source = "./otel"

HONEYCOMB_API_KEY = var.HONEYCOMB_API_KEY
}

module "flash-deps" {
source = "./flash-deps"
}
Expand Down
45 changes: 45 additions & 0 deletions deploy/staging/otel/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@

variable "HONEYCOMB_API_KEY" {
description = "The api key to write open-telemetry data to Honeycomb"
type = string
sensitive = true
}

resource "kubernetes_namespace" "otel" {
metadata {
name = "otel"
}
}

resource "helm_release" "collector" {
repository = "https://open-telemetry.github.io/opentelemetry-helm-charts"
chart = "opentelemetry-collector"
version = "0.68.1"
name = "opentelemetry-collector"
namespace = kubernetes_namespace.otel.metadata[0].name

values = [
# file("${path.module}/staging-values.yaml"),
# {
# extraEnvs = [
# {
# name = "HONEYCOMB_API_KEY"
# value = var.HONEYCOMB_API_KEY
# }
# ]
# }

# This approach treats MY_POD_IP as a template parameter
templatefile("${path.module}/staging-values.yaml", {
HONEYCOMB_API_KEY = var.HONEYCOMB_API_KEY
})
]
# {
# extraEnvs = [
# {
# name = "HONEYCOMB_API_KEY"
# value = var.HONEYCOMB_API_KEY}
# }
# ]
# }
}
147 changes: 147 additions & 0 deletions deploy/staging/otel/staging-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@

enabled: true
resources: {}
mode: deployment
config:
exporters:
logging: {}
otlp:
endpoint: "api.honeycomb.io:443"
headers:
x-honeycomb-team: ${HONEYCOMB_API_KEY}
x-honeycomb-dataset: "flash-staging"
otlp/metrics:
endpoint: api.honeycomb.io:443
headers:
x-honeycomb-team: ${HONEYCOMB_API_KEY}
x-honeycomb-dataset: flash-staging-metrics
otlp/logging:
endpoint: api.honeycomb.io:443
headers:
x-honeycomb-team: ${HONEYCOMB_API_KEY}
x-honeycomb-dataset: flash-staging-logs
extensions:
health_check: {}
processors:
batch: {}
tail_sampling:
policies:
- name: status_code
type: status_code
status_code:
status_codes: [ERROR]
- name: probabilistic
type: probabilistic
probabilistic: { sampling_percentage: 100 }
attributes:
actions:
- key: graphql.variables.input.code
action: update
value: "<redacted>"
- key: code.function.params.code
action: update
value: "<redacted>"
- key: code.function.params.token
action: update
value: "<redacted>"
- key: code.function.params.cookie
action: update
value: "<redacted>"
- key: code.function.params.authToken
action: update
value: "<redacted>"
- key: code.function.params.totpCode
action: update
value: "<redacted>"
- key: code.function.params.body
action: update
value: "<redacted>"
# - key: code.function.params.macaroon
# action: update
# If set to null, will be overridden with values based on k8s resource limits
memory_limiter: null
resourcedetection:
detectors: [env, gcp]
timeout: 5s
override: false
k8sattributes:
passthrough: true
receivers:
jaeger:
protocols:
grpc:
endpoint: 0.0.0.0:14250
thrift_http:
endpoint: 0.0.0.0:14268
thrift_compact:
endpoint: 0.0.0.0:6831
thrift_binary:
endpoint: 0.0.0.0:6832
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
prometheus:
config:
scrape_configs:
- job_name: opentelemetry-collector
scrape_interval: 10s
# static_configs:
# - targets:
# - $\{MY_POD_IP\}:8888
service:
extensions:
- health_check
pipelines:
logs:
exporters:
- logging
processors:
- memory_limiter
- batch
receivers:
- otlp
metrics:
exporters:
- logging
processors:
- memory_limiter
- batch
receivers:
- otlp
- prometheus
traces:
exporters:
- logging
- otlp
processors:
- memory_limiter
- resourcedetection
- attributes
- k8sattributes
- tail_sampling
- batch
receivers:
- jaeger
- otlp
ports:
otlp:
enabled: true
containerPort: 4317
servicePort: 4317
hostPort: 4317
protocol: TCP
jaeger-thrift-b:
enabled: true
containerPort: 6832
servicePort: 6832
hostPort: 6832
protocol: UDP
jaeger-thrift:
enabled: true
containerPort: 14268
servicePort: 14268
hostPort: 14268
protocol: TCP
8 changes: 7 additions & 1 deletion deploy/staging/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,10 @@ variable "IBEX_PASSWORD" {
# description = "Email associated with the Cloudflare account"
# type = string
# sensitive = true
# }
# }

variable "HONEYCOMB_API_KEY" {
description = "The api key to write open-telemetry data to Honeycomb"
type = string
sensitive = true
}
2 changes: 1 addition & 1 deletion helm/flash-deps/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.0.2-staging
version: 0.0.3-staging
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
Expand Down
4 changes: 2 additions & 2 deletions helm/flash-pay/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.0.1-dev
version: 0.0.3-dev
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
appVersion: 0.0.1
appVersion: 0.0.2
dependencies:
- name: galoy-nostr
condition: galoy-nostr.enabled
Expand Down
4 changes: 2 additions & 2 deletions helm/flash-pay/values.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
image:
repository: lnflash/flash-pay:latest
digest: "sha256:5171bdc1827be0726f348b2331492a1a50af6ae78954bd733a7fbe8cecab4438"
repository: brh28/flash-pay:latest
digest: "sha256:d5620fe35398afd0e8079b1ae71ee0632ea4327b0129aa2258ac8735509842f2"
git_ref: "cf1ff28" # Not used by helm
ingress:
enabled: false
Expand Down
6 changes: 3 additions & 3 deletions helm/flash/Chart.lock
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ dependencies:
version: 1.25.0
- name: price
repository: file://../price
version: 0.3.0
digest: sha256:5f3e1505176098f1c3d4b2cf18f1454c2400fb8e1cf854a828298ade00fca111
generated: "2024-03-06T12:59:57.028415467-06:00"
version: 0.3.4
digest: sha256:cd159c706e304842659d06214237aed6f36e322996a8f2c11b377050b9bac033
generated: "2024-04-05T11:38:36.694730381-06:00"
6 changes: 3 additions & 3 deletions helm/flash/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ apiVersion: v2 # https://helm.sh/docs/topics/charts/#the-apiversion-field
name: flash
description: A Helm chart for the Flash application backend
type: application
version: 0.0.6-dev
# appVersion: perhaps git commit set by CI/CD? ""
version: 0.0.15-staging
appVersion: 0.0.3-dev # perhaps git commit set by CI/CD? ""
dependencies:
- name: redis
repository: https://charts.bitnami.com/bitnami
Expand All @@ -21,5 +21,5 @@ dependencies:
repository: oci://ghcr.io/apollographql/helm-charts
version: 1.25.0
- name: price
version: 0.3.0
version: 0.3.4
repository: "file://../price"
2 changes: 1 addition & 1 deletion helm/flash/templates/api-ingress.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ metadata:
nginx.ingress.kubernetes.io/limit-connections: "80"
nginx.ingress.kubernetes.io/cors-allow-methods: POST
nginx.ingress.kubernetes.io/cors-allow-origin: https://pay.staging.flashapp.me
nginx.ingress.kubernetes.io/enable-cors: true
nginx.ingress.kubernetes.io/enable-cors: "true"
nginx.ingress.kubernetes.io/auth-url: "http://flash-oathkeeper-api.{{ .Release.Namespace }}.svc.cluster.local:4456/decisions"
nginx.ingress.kubernetes.io/auth-method: GET
nginx.ingress.kubernetes.io/auth-response-headers: "Authorization, Set-Cookie"
Expand Down
10 changes: 5 additions & 5 deletions helm/flash/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ galoy:
hotToColdRebalanceQueueName: dev
images:
app:
repository: lnflash/flash-dev:latest
repository: brh28/flash-dev:latest
imagePullPolicy: Always
## TODO: remove digest and commit hash
digest: sha256:0f4c5aa2f011eb657e05d2a4cf5f01ae02c6bc202305a7146d1c2023cd0b912b
digest: sha256:e252557744f18e660b9874e428baa14fc162c54cb6be5f0b4a05774c1b5fe981
git_ref: "d2a438"
websocket:
repository: us.gcr.io/galoy-org/galoy-app-websocket
Expand Down Expand Up @@ -172,7 +172,7 @@ galoy:
## Exporter records metrics, primarly for Bitcoin & Lightning services.
## Turning off for now as most (if not all) of these metrics are irrelevant to Flash
exporter:
enabled: false
enabled: true
## Override the components name (defaults to admin).
##
nameOverride:
Expand Down Expand Up @@ -448,7 +448,7 @@ secrets:
tracing:
## OTEL Exporter OTLP Endpoint
##
otelExporterOtlpEndpoint: http://localhost:4318
otelExporterOtlpEndpoint: http://opentelemetry-collector.otel.svc.cluster.local:4318
## Prefix for tracing
##
prefix: galoy-dev
Expand Down Expand Up @@ -1021,7 +1021,7 @@ router:
telemetry:
tracing:
otlp:
endpoint: http://localhost:4318
endpoint: http://opentelemetry-collector.otel.svc.cluster.local:4318
protocol: http

ingress-nginx:
Expand Down
Loading
Loading