From 2462c6d9d0bc8416a1a17fb9ffeab6bf2cc5b7a9 Mon Sep 17 00:00:00 2001 From: Bruno De Assis Marques Date: Sat, 21 Feb 2026 11:20:22 +1100 Subject: [PATCH] refactor: simplify alloy installation by only requiring k8s secrets Signed-off-by: Bruno De Assis Marques --- Taskfile.yaml | 83 +++++++-- cmd/weaver/commands/alloy/cluster/cluster.go | 11 +- cmd/weaver/commands/alloy/cluster/install.go | 23 ++- internal/alloy/config.go | 37 ++-- internal/alloy/manifest.go | 51 ------ internal/alloy/render.go | 45 +---- internal/alloy/render_test.go | 104 ++++++++++- internal/config/config.go | 3 + internal/workflows/steps/step_alloy.go | 160 +++-------------- test/alloy/README.md | 180 ++++++++++++------- 10 files changed, 368 insertions(+), 329 deletions(-) diff --git a/Taskfile.yaml b/Taskfile.yaml index fb3fe41b..60e6d698 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -1375,19 +1375,35 @@ tasks: echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" alloy:start: - desc: "Start Alloy stack (used by VM, CI/CD, and Kind/k3d)" + desc: "Start Alloy stack (Prometheus, Loki, Grafana) for local development" dir: test/alloy - deps: [vault:start] cmds: - docker compose up -d prometheus loki grafana - | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "✅ Alloy stack started!" echo "" - echo "📦 This task is used by:" - echo " • VM development (via task vm:alloy:start)" - echo " • CI/CD workflows (GitHub Actions, GitLab CI)" - echo " • Local Kind/k3d clusters" + echo "🌐 Services:" + echo " • Prometheus: http://localhost:9090" + echo " • Loki: http://localhost:3100" + echo " • Grafana: http://localhost:3000" + echo "" + echo "📝 Next step: Create K8s secret for Alloy passwords" + echo " task alloy:create-secret" + echo "" + echo "💡 Optionally start Vault for ESO-based secret sync:" + echo " task vault:start" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + + alloy:start-with-vault: + desc: "Start full Alloy stack with Vault (Prometheus, Loki, Grafana, Vault)" + dir: test/alloy + deps: [vault:start] + cmds: + - docker compose up -d prometheus loki grafana + - | + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "✅ Alloy stack started with Vault!" echo "" echo "🌐 Services:" echo " • Vault: http://localhost:8200 (token: devtoken)" @@ -1397,8 +1413,8 @@ tasks: echo "" echo "🔐 Vault has been initialized with development secrets" echo "" - echo "📝 Next step: Apply ClusterSecretStore to connect ESO to Vault" - echo " kubectl apply -f test/alloy/cluster-secret-store-local.yaml" + echo "📝 Next step: Configure ESO ClusterSecretStore" + echo " task vault:setup-secret-store" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" alloy:stop: @@ -1441,6 +1457,51 @@ tasks: echo "💡 For VM-based development, use: task vm:alloy:start" fi + alloy:create-secret: + desc: "Create K8s secret 'grafana-alloy-secrets' with dev passwords for local testing" + vars: + CLUSTER_NAME: '{{.CLUSTER_NAME | default "vm-cluster"}}' + cmds: + - | + echo "🔐 Creating K8s secret for Alloy (local dev)..." + + # Create the namespace if it doesn't exist + kubectl create namespace grafana-alloy --dry-run=client -o yaml | kubectl apply -f - + + # Create/update the secret with dev passwords + # Key naming convention: PROMETHEUS_PASSWORD_, LOKI_PASSWORD_ + # The remote name must match the name= value in --add-prometheus-remote / --add-loki-remote + kubectl create secret generic grafana-alloy-secrets \ + --namespace=grafana-alloy \ + --from-literal=PROMETHEUS_PASSWORD_LOCAL=dev-password \ + --from-literal=LOKI_PASSWORD_LOCAL=dev-password \ + --dry-run=client -o yaml | kubectl apply -f - + + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "✅ Secret 'grafana-alloy-secrets' created in namespace 'grafana-alloy'" + echo "" + echo "📦 Keys (convention: {PROMETHEUS|LOKI}_PASSWORD_{REMOTE_NAME}):" + echo " • PROMETHEUS_PASSWORD_LOCAL" + echo " • LOKI_PASSWORD_LOCAL" + echo "" + echo "📋 Next step: install Alloy with remotes:" + echo " NODE_IP=\$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type==\"InternalIP\")].address}')" + echo " sudo solo-provisioner alloy cluster install \\" + echo " --cluster-name={{.CLUSTER_NAME}} \\" + echo " --add-prometheus-remote=name=local,url=http://\$NODE_IP:9090/api/v1/write,username=admin \\" + echo " --add-loki-remote=name=local,url=http://\$NODE_IP:3100/loki/api/v1/push,username=admin \\" + echo " --monitor-block-node" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + + alloy:delete-secret: + desc: "Delete K8s secret 'grafana-alloy-secrets'" + cmds: + - | + echo "🧹 Deleting K8s secret..." + kubectl delete secret grafana-alloy-secrets -n grafana-alloy --ignore-not-found + echo "✅ Secret deleted" + # =========================== # VM Docker & Alloy Setup # =========================== @@ -1535,12 +1596,10 @@ tasks: echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "✅ Alloy stack setup complete!" echo "" - echo "🔐 Vault has been initialized with development secrets" - echo "" echo "📋 Next steps:" echo " 1. SSH into VM: task vm:ssh" - echo " 2. Apply ClusterSecretStore: task vault:setup-secret-store" - echo " 3. Deploy with Alloy: sudo weaver kube cluster install --alloy-enabled ..." + echo " 2. Create K8s secret: task alloy:create-secret" + echo " 3. Install Alloy with remotes (see test/alloy/README.md)" echo "" echo "💡 To access from your Mac, use SSH port forwarding:" echo " task vm:alloy-forward" diff --git a/cmd/weaver/commands/alloy/cluster/cluster.go b/cmd/weaver/commands/alloy/cluster/cluster.go index 976b2393..f3877bec 100644 --- a/cmd/weaver/commands/alloy/cluster/cluster.go +++ b/cmd/weaver/commands/alloy/cluster/cluster.go @@ -35,13 +35,18 @@ func init() { // Core configuration flags clusterCmd.PersistentFlags().StringVar(&flagClusterName, "cluster-name", "", "Cluster name for Alloy metrics/logs labels") clusterCmd.PersistentFlags().BoolVar(&flagMonitorBlockNode, "monitor-block-node", false, "Enable Block Node monitoring in Alloy") - clusterCmd.PersistentFlags().StringVar(&flagClusterSecretStore, "cluster-secret-store", "vault-secret-store", "Name of the ClusterSecretStore resource for External Secrets Operator") + + // Deprecated: kept for backward compatibility but hidden + clusterCmd.PersistentFlags().StringVar(&flagClusterSecretStore, "cluster-secret-store", "vault-secret-store", "Name of the ClusterSecretStore resource") + _ = clusterCmd.PersistentFlags().MarkHidden("cluster-secret-store") // Multi-remote flags (repeatable) clusterCmd.PersistentFlags().StringArrayVar(&flagPrometheusRemotes, "add-prometheus-remote", nil, - "Add a Prometheus remote (format: name=,url=,username=). Can be specified multiple times") + "Add a Prometheus remote (format: name=,url=,username=). Can be specified multiple times. "+ + "Password is expected in K8s Secret 'grafana-alloy-secrets' under key 'PROMETHEUS_PASSWORD_'") clusterCmd.PersistentFlags().StringArrayVar(&flagLokiRemotes, "add-loki-remote", nil, - "Add a Loki remote (format: name=,url=,username=). Can be specified multiple times") + "Add a Loki remote (format: name=,url=,username=). Can be specified multiple times. "+ + "Password is expected in K8s Secret 'grafana-alloy-secrets' under key 'LOKI_PASSWORD_'") // Legacy single-remote flags (kept for backward compatibility) clusterCmd.PersistentFlags().StringVar(&flagPrometheusURL, "prometheus-url", "", "Prometheus remote write URL (deprecated: use --add-prometheus-remote)") diff --git a/cmd/weaver/commands/alloy/cluster/install.go b/cmd/weaver/commands/alloy/cluster/install.go index 433a5ac6..dfe4f09a 100644 --- a/cmd/weaver/commands/alloy/cluster/install.go +++ b/cmd/weaver/commands/alloy/cluster/install.go @@ -19,8 +19,21 @@ var installCmd = &cobra.Command{ Long: `Install the Grafana Alloy observability stack including Prometheus CRDs, Node Exporter, and Alloy for metrics and logs collection. +Passwords for remote endpoints must exist in K8s Secret "grafana-alloy-secrets" +in the "grafana-alloy" namespace before running this command. Use +"solo-provisioner eso secret create" to create the secret from an external store, +or create it manually. + Examples: - # Multiple remotes (recommended) + # Step 1: Create the K8s secret with passwords (via ESO) + solo-provisioner eso secret create \ + --store=vault-store \ + --name=grafana-alloy-secrets \ + --namespace=grafana-alloy \ + --set PROMETHEUS_PASSWORD_PRIMARY=secret/data/grafana/alloy/prod/prometheus/primary#password \ + --set LOKI_PASSWORD_PRIMARY=secret/data/grafana/alloy/prod/loki/primary#password + + # Step 2: Install Alloy with remotes solo-provisioner alloy cluster install \ --cluster-name=my-cluster \ --add-prometheus-remote=name=primary,url=https://prom1.example.com/api/v1/write,username=user1 \ @@ -28,13 +41,9 @@ Examples: --add-loki-remote=name=primary,url=https://loki1.example.com/loki/api/v1/push,username=user1 \ --monitor-block-node - # Single remote (legacy mode - deprecated) + # Local-only mode (no remotes, no secret needed) solo-provisioner alloy cluster install \ - --cluster-name=my-cluster \ - --prometheus-url=https://prometheus.example.com/api/v1/write \ - --prometheus-username=user \ - --loki-url=https://loki.example.com/loki/api/v1/push \ - --loki-username=user`, + --cluster-name=my-cluster`, RunE: func(cmd *cobra.Command, args []string) error { // Parse multi-remote flags prometheusRemotes, err := parseRemoteFlags(flagPrometheusRemotes) diff --git a/internal/alloy/config.go b/internal/alloy/config.go index b3f8eac6..bb4356a5 100644 --- a/internal/alloy/config.go +++ b/internal/alloy/config.go @@ -14,15 +14,13 @@ import ( const ( // Kubernetes resource names - Namespace = "grafana-alloy" - Release = "grafana-alloy" - Chart = "grafana/alloy" - Version = "1.4.0" - Repo = "https://grafana.github.io/helm-charts" - ConfigMapName = "grafana-alloy-cm" - SecretsName = "grafana-alloy-secrets" - ExternalSecretName = "grafana-alloy-external-secret" - ClusterSecretStoreName = "vault-secret-store" + Namespace = "grafana-alloy" + Release = "grafana-alloy" + Chart = "grafana/alloy" + Version = "1.4.0" + Repo = "https://grafana.github.io/helm-charts" + ConfigMapName = "grafana-alloy-cm" + SecretsName = "grafana-alloy-secrets" // Node exporter settings NodeExporterNamespace = "node-exporter" @@ -40,9 +38,6 @@ const ( BlockNodeTemplatePath = "files/alloy/block-node.alloy" BlockNodeServiceMonitorPath = "files/alloy/block-node-servicemonitor.yaml" BlockNodePodLogsPath = "files/alloy/block-node-podlogs.yaml" - - // Vault path prefix for secrets - VaultPathPrefix = "grafana/alloy/" ) // Remote represents a single remote endpoint for Prometheus or Loki. @@ -228,3 +223,21 @@ func toEnvVarName(name string) string { func isLocalhostURL(url string) bool { return strings.Contains(url, "localhost") || strings.Contains(url, "127.0.0.1") } + +// RequiredSecrets returns the K8s secret name and expected keys that must exist +// for the configured remotes. All passwords are expected in the conventional +// secret "grafana-alloy-secrets" under keys derived from remote names. +// Returns nil if no remotes are configured. +func (cb *ConfigBuilder) RequiredSecrets() map[string][]string { + var keys []string + for _, r := range cb.prometheusRemotes { + keys = append(keys, r.PasswordEnvVar) + } + for _, r := range cb.lokiRemotes { + keys = append(keys, r.PasswordEnvVar) + } + if len(keys) == 0 { + return nil + } + return map[string][]string{SecretsName: keys} +} diff --git a/internal/alloy/manifest.go b/internal/alloy/manifest.go index e076bf85..31d2b83c 100644 --- a/internal/alloy/manifest.go +++ b/internal/alloy/manifest.go @@ -5,7 +5,6 @@ package alloy import ( "sort" - "github.com/hashgraph/solo-weaver/internal/config" "github.com/hashgraph/solo-weaver/internal/templates" ) @@ -52,39 +51,6 @@ func ConfigMapManifest(modules []ModuleConfig) (string, error) { return templates.Render("files/alloy/configmap.yaml", data) } -// ExternalSecretTemplateData holds data for the ExternalSecret template. -type ExternalSecretTemplateData struct { - ExternalSecretName string - Namespace string - ClusterSecretStoreName string - SecretsName string - ClusterName string - SecretDataEntries string -} - -// ExternalSecretManifest generates the Alloy ExternalSecret manifest. -// Uses the external-secret.yaml template file. -func ExternalSecretManifest(cfg config.AlloyConfig, clusterName string) (string, error) { - secretDataEntries := BuildExternalSecretDataEntries(cfg, clusterName) - - // Use configurable ClusterSecretStoreName, fallback to default constant - clusterSecretStoreName := cfg.ClusterSecretStoreName - if clusterSecretStoreName == "" { - clusterSecretStoreName = ClusterSecretStoreName - } - - data := ExternalSecretTemplateData{ - ExternalSecretName: ExternalSecretName, - Namespace: Namespace, - ClusterSecretStoreName: clusterSecretStoreName, - SecretsName: SecretsName, - ClusterName: clusterName, - SecretDataEntries: secretDataEntries, - } - - return templates.Render("files/alloy/external-secret.yaml", data) -} - // BaseHelmValues returns the base Helm values for Alloy installation. // Configures Alloy to load config from a single config.alloy key in the ConfigMap. func BaseHelmValues() []string { @@ -161,20 +127,3 @@ func NamespaceManifest() (string, error) { return templates.Render("files/alloy/namespace.yaml", data) } - -// EmptySecretTemplateData holds data for the empty secret template. -type EmptySecretTemplateData struct { - SecretsName string - Namespace string -} - -// EmptySecretManifest generates an empty secret manifest. -// Used when no remotes are configured so the pod doesn't fail looking for the secret. -func EmptySecretManifest() (string, error) { - data := EmptySecretTemplateData{ - SecretsName: SecretsName, - Namespace: Namespace, - } - - return templates.Render("files/alloy/empty-secret.yaml", data) -} diff --git a/internal/alloy/render.go b/internal/alloy/render.go index f8b2b0b1..a30b1139 100644 --- a/internal/alloy/render.go +++ b/internal/alloy/render.go @@ -145,47 +145,9 @@ func GetModuleNames(modules []ModuleConfig) []string { return names } -// BuildExternalSecretDataEntries builds the data entries for the ExternalSecret manifest. -func BuildExternalSecretDataEntries(cfg config.AlloyConfig, clusterName string) string { - var entries []string - - // Handle Prometheus remotes - if len(cfg.PrometheusRemotes) > 0 { - for _, r := range cfg.PrometheusRemotes { - envVarName := "PROMETHEUS_PASSWORD_" + toEnvVarName(r.Name) - vaultKey := VaultPathPrefix + clusterName + "/prometheus/" + r.Name - entries = append(entries, buildSecretDataEntry(envVarName, vaultKey, "password")) - } - } else if cfg.PrometheusURL != "" { - // Backward compatibility: legacy single remote - entries = append(entries, buildSecretDataEntry("PROMETHEUS_PASSWORD", VaultPathPrefix+clusterName+"/prometheus", "password")) - } - - // Handle Loki remotes - if len(cfg.LokiRemotes) > 0 { - for _, r := range cfg.LokiRemotes { - envVarName := "LOKI_PASSWORD_" + toEnvVarName(r.Name) - vaultKey := VaultPathPrefix + clusterName + "/loki/" + r.Name - entries = append(entries, buildSecretDataEntry(envVarName, vaultKey, "password")) - } - } else if cfg.LokiURL != "" { - // Backward compatibility: legacy single remote - entries = append(entries, buildSecretDataEntry("LOKI_PASSWORD", VaultPathPrefix+clusterName+"/loki", "password")) - } - - return strings.Join(entries, "") -} - -// buildSecretDataEntry builds a single ExternalSecret data entry. -func buildSecretDataEntry(secretKey, vaultKey, property string) string { - return ` - secretKey: ` + secretKey + ` - remoteRef: - key: "` + vaultKey + `" - property: ` + property + ` -` -} - // BuildHelmEnvVars builds the Helm values for environment variables from secrets. +// All passwords are sourced from the conventional K8s Secret "grafana-alloy-secrets" +// using keys derived from remote names (e.g., PROMETHEUS_PASSWORD_PRIMARY). func BuildHelmEnvVars(cfg config.AlloyConfig) []string { var envVars []string idx := 0 @@ -219,7 +181,8 @@ func BuildHelmEnvVars(cfg config.AlloyConfig) []string { return envVars } -// buildEnvVarHelmValues builds the Helm value entries for a single environment variable. +// buildEnvVarHelmValues builds the Helm value entries for a single environment variable +// referencing the conventional K8s Secret "grafana-alloy-secrets". func buildEnvVarHelmValues(idx int, envVarName string) []string { idxStr := strconv.Itoa(idx) return []string{ diff --git a/internal/alloy/render_test.go b/internal/alloy/render_test.go index 7a5bbef2..50952820 100644 --- a/internal/alloy/render_test.go +++ b/internal/alloy/render_test.go @@ -217,15 +217,101 @@ func TestNamespaceManifest(t *testing.T) { assert.Contains(t, manifest, "name: "+Namespace) } -func TestEmptySecretManifest(t *testing.T) { - manifest, err := EmptySecretManifest() +func TestBuildHelmEnvVars_DefaultSecret(t *testing.T) { + cfg := config.AlloyConfig{ + PrometheusRemotes: []config.AlloyRemoteConfig{ + {Name: "primary", URL: "http://prom:9090/api/v1/write", Username: "user1"}, + }, + LokiRemotes: []config.AlloyRemoteConfig{ + {Name: "primary", URL: "http://loki:3100/loki/api/v1/push", Username: "user1"}, + }, + } + + envVars := BuildHelmEnvVars(cfg) + + // All env vars should reference the convention-based grafana-alloy-secrets + assert.Contains(t, envVars, "alloy.extraEnv[0].name=PROMETHEUS_PASSWORD_PRIMARY") + assert.Contains(t, envVars, "alloy.extraEnv[0].valueFrom.secretKeyRef.name="+SecretsName) + assert.Contains(t, envVars, "alloy.extraEnv[0].valueFrom.secretKeyRef.key=PROMETHEUS_PASSWORD_PRIMARY") + assert.Contains(t, envVars, "alloy.extraEnv[1].name=LOKI_PASSWORD_PRIMARY") + assert.Contains(t, envVars, "alloy.extraEnv[1].valueFrom.secretKeyRef.name="+SecretsName) + assert.Contains(t, envVars, "alloy.extraEnv[1].valueFrom.secretKeyRef.key=LOKI_PASSWORD_PRIMARY") +} + +func TestBuildHelmEnvVars_MultipleRemotes(t *testing.T) { + cfg := config.AlloyConfig{ + PrometheusRemotes: []config.AlloyRemoteConfig{ + {Name: "primary", URL: "http://prom1:9090/api/v1/write", Username: "user1"}, + {Name: "backup", URL: "http://prom2:9090/api/v1/write", Username: "user2"}, + }, + } + + envVars := BuildHelmEnvVars(cfg) + + // Both remotes reference the same conventional secret with derived keys + assert.Contains(t, envVars, "alloy.extraEnv[0].name=PROMETHEUS_PASSWORD_PRIMARY") + assert.Contains(t, envVars, "alloy.extraEnv[0].valueFrom.secretKeyRef.name="+SecretsName) + assert.Contains(t, envVars, "alloy.extraEnv[0].valueFrom.secretKeyRef.key=PROMETHEUS_PASSWORD_PRIMARY") + assert.Contains(t, envVars, "alloy.extraEnv[1].name=PROMETHEUS_PASSWORD_BACKUP") + assert.Contains(t, envVars, "alloy.extraEnv[1].valueFrom.secretKeyRef.name="+SecretsName) + assert.Contains(t, envVars, "alloy.extraEnv[1].valueFrom.secretKeyRef.key=PROMETHEUS_PASSWORD_BACKUP") +} + +func TestRequiredSecrets(t *testing.T) { + cfg := config.AlloyConfig{ + ClusterName: "test-cluster", + PrometheusRemotes: []config.AlloyRemoteConfig{ + {Name: "primary", URL: "http://prom:9090/api/v1/write", Username: "user1"}, + }, + LokiRemotes: []config.AlloyRemoteConfig{ + {Name: "primary", URL: "http://loki:3100/loki/api/v1/push", Username: "user1"}, + }, + } + + cb, err := NewConfigBuilder(cfg) require.NoError(t, err) - // Verify it's a valid secret manifest - assert.Contains(t, manifest, "apiVersion: v1") - assert.Contains(t, manifest, "kind: Secret") - assert.Contains(t, manifest, "name: "+SecretsName) - assert.Contains(t, manifest, "namespace: "+Namespace) - assert.Contains(t, manifest, "type: Opaque") - assert.Contains(t, manifest, "data: {}") + secrets := cb.RequiredSecrets() + + // All keys should be under the single conventional secret + require.Contains(t, secrets, SecretsName) + assert.Contains(t, secrets[SecretsName], "PROMETHEUS_PASSWORD_PRIMARY") + assert.Contains(t, secrets[SecretsName], "LOKI_PASSWORD_PRIMARY") + assert.Len(t, secrets, 1, "should only reference one secret") +} + +func TestRequiredSecrets_NoRemotes(t *testing.T) { + cfg := config.AlloyConfig{ + ClusterName: "test-cluster", + } + + cb, err := NewConfigBuilder(cfg) + require.NoError(t, err) + + secrets := cb.RequiredSecrets() + assert.Nil(t, secrets, "should return nil when no remotes configured") +} + +func TestRequiredSecrets_MultipleRemotes(t *testing.T) { + cfg := config.AlloyConfig{ + ClusterName: "test-cluster", + PrometheusRemotes: []config.AlloyRemoteConfig{ + {Name: "primary", URL: "http://prom1:9090/api/v1/write", Username: "user1"}, + {Name: "backup", URL: "http://prom2:9090/api/v1/write", Username: "user2"}, + }, + LokiRemotes: []config.AlloyRemoteConfig{ + {Name: "primary", URL: "http://loki:3100/loki/api/v1/push", Username: "user1"}, + }, + } + + cb, err := NewConfigBuilder(cfg) + require.NoError(t, err) + + secrets := cb.RequiredSecrets() + + require.Contains(t, secrets, SecretsName) + assert.Len(t, secrets[SecretsName], 3) + assert.Contains(t, secrets[SecretsName], "PROMETHEUS_PASSWORD_PRIMARY") + assert.Contains(t, secrets[SecretsName], "PROMETHEUS_PASSWORD_BACKUP") + assert.Contains(t, secrets[SecretsName], "LOKI_PASSWORD_PRIMARY") } diff --git a/internal/config/config.go b/internal/config/config.go index d02bdb6a..723aef4a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -126,6 +126,9 @@ type BlockNodeConfig struct { } // AlloyRemoteConfig represents a single remote endpoint for metrics or logs. +// Passwords are expected in K8s Secret "grafana-alloy-secrets" under conventional keys: +// - Prometheus: PROMETHEUS_PASSWORD_ +// - Loki: LOKI_PASSWORD_ type AlloyRemoteConfig struct { Name string `yaml:"name" json:"name"` // Unique identifier for this remote URL string `yaml:"url" json:"url"` // Remote write URL diff --git a/internal/workflows/steps/step_alloy.go b/internal/workflows/steps/step_alloy.go index 7f0914ef..32bb95d3 100644 --- a/internal/workflows/steps/step_alloy.go +++ b/internal/workflows/steps/step_alloy.go @@ -30,17 +30,17 @@ const ( InstallNodeExporterStepId = "install-node-exporter" DeployAlloyConfigStepId = "deploy-alloy-config" CreateAlloyNamespaceStepId = "create-alloy-namespace" - CreateAlloySecretsStepId = "create-alloy-secrets" IsAlloyReadyStepId = "is-alloy-ready" IsNodeExporterReadyStepId = "is-node-exporter-ready" ) // SetupAlloyStack returns a workflow builder that sets up the complete Alloy observability stack. // This includes Prometheus Operator CRDs and Grafana Alloy. +// K8s secrets containing passwords for remote endpoints must be pre-created before running this. +// Secrets can be created manually, via ESO/Vault, Terraform, or any other mechanism. func SetupAlloyStack() *automa.WorkflowBuilder { return automa.NewWorkflowBuilder().WithId("setup-alloy-stack").Steps( - preCheckAlloy(), // Verify prerequisites (ClusterSecretStore, remote endpoints) - SetupExternalSecrets(), // External Secrets Operator (general-purpose secret management for the cluster) + preCheckAlloy(), // Verify prerequisites (K8s secrets, remote endpoints) SetupPrometheusOperatorCRDs(), // Install CRDs for ServiceMonitor/PodMonitor SetupAlloy(), // Install Alloy with Node Exporter ). @@ -77,8 +77,7 @@ func TeardownAlloyStack() *automa.WorkflowBuilder { } // preCheckAlloy verifies that all prerequisites are in place before installing Alloy. -// This includes checking for ClusterSecretStore existence and verifying -// remote endpoint reachability. +// This includes verifying that required K8s secrets exist and that remote endpoints are reachable. func preCheckAlloy() automa.Builder { return automa.NewStepBuilder().WithId(PreCheckAlloyStepId). WithExecute(func(ctx context.Context, stp automa.Step) *automa.Report { @@ -87,48 +86,39 @@ func preCheckAlloy() automa.Builder { meta := map[string]string{} - // Get the ClusterSecretStore name (from config or default) - clusterSecretStoreName := cfg.ClusterSecretStoreName - if clusterSecretStoreName == "" { - clusterSecretStoreName = alloy.ClusterSecretStoreName - } - - // Check if any remotes are configured (require ClusterSecretStore only if remotes exist) + // Check if any remotes are configured hasRemotes := len(cfg.PrometheusRemotes) > 0 || len(cfg.LokiRemotes) > 0 || cfg.PrometheusURL != "" || cfg.LokiURL != "" - k, err := kube.NewClient() - if err != nil { - return automa.StepFailureReport(stp.Id(), automa.WithError( - fmt.Errorf("failed to create kubernetes client: %w", err))) - } - if hasRemotes { - // Verify ClusterSecretStore exists which is required to fetch credentials for remotes - exists, err := k.ResourceExists(ctx, "external-secrets.io/v1beta1", "ClusterSecretStore", "", clusterSecretStoreName) + k, err := kube.NewClient() if err != nil { return automa.StepFailureReport(stp.Id(), automa.WithError( - fmt.Errorf("failed to check ClusterSecretStore existence: %w", err))) - } else if !exists { - return automa.StepFailureReport(stp.Id(), automa.WithError( - fmt.Errorf("ClusterSecretStore %q not found; please create it first (e.g., run 'task vault:setup-secret-store') or specify a different name with --cluster-secret-store", clusterSecretStoreName))) - } else { - l.Info().Str("name", clusterSecretStoreName).Msg("ClusterSecretStore found") - meta["clusterSecretStore"] = clusterSecretStoreName + fmt.Errorf("failed to create kubernetes client: %w", err))) + } + + // Build config to determine required secrets + cb, err := alloy.NewConfigBuilder(cfg) + if err != nil { + return automa.StepFailureReport(stp.Id(), automa.WithError(err)) + } - // Check if Vault URL from ClusterSecretStore is reachable - vaultURL, err := k.GetResourceNestedString(ctx, "external-secrets.io/v1beta1", "ClusterSecretStore", "", clusterSecretStoreName, - "spec", "provider", "vault", "server") + // Verify that required K8s secrets exist + requiredSecrets := cb.RequiredSecrets() + for secretName, keys := range requiredSecrets { + exists, err := k.ResourceExists(ctx, "v1", "Secret", alloy.Namespace, secretName) if err != nil { - l.Warn().Err(err).Msg("Failed to get Vault URL from ClusterSecretStore") - } else if vaultURL != "" { - if err := network.CheckEndpointReachable(ctx, vaultURL, 10*time.Second); err != nil { - return automa.StepFailureReport(stp.Id(), automa.WithError( - fmt.Errorf("Vault at %s is not reachable: %w", vaultURL, err))) - } - l.Info().Str("url", vaultURL).Msg("Vault endpoint is reachable") - meta["vaultURL"] = vaultURL + return automa.StepFailureReport(stp.Id(), automa.WithError( + fmt.Errorf("failed to check K8s Secret %q in namespace %q: %w", secretName, alloy.Namespace, err))) + } + if !exists { + return automa.StepFailureReport(stp.Id(), automa.WithError( + fmt.Errorf("K8s Secret %q not found in namespace %q; expected keys: %v. "+ + "Create the secret before installing Alloy, e.g.: "+ + "solo-provisioner eso secret create --store= --name=%s --namespace=%s --set =", + secretName, alloy.Namespace, keys, secretName, alloy.Namespace))) } + l.Info().Str("name", secretName).Str("namespace", alloy.Namespace).Strs("expectedKeys", keys).Msg("Required K8s Secret found") } // Check Prometheus remote endpoints reachability @@ -165,7 +155,7 @@ func preCheckAlloy() automa.Builder { l.Info().Str("url", cfg.LokiURL).Msg("Loki remote is reachable") } } else { - l.Info().Msg("No remotes configured; skipping ClusterSecretStore and endpoint checks") + l.Info().Msg("No remotes configured; skipping secret and endpoint checks") } // Log summary of what will be installed @@ -195,7 +185,6 @@ func SetupAlloy() *automa.WorkflowBuilder { createAlloyNamespace(), installNodeExporter(), isNodeExporterPodsReady(), - createAlloyExternalSecret(), deployAlloyConfig(), installAlloy(), isAlloyPodsReady(), @@ -373,97 +362,6 @@ func createAlloyNamespace() automa.Builder { }) } -func createAlloyExternalSecret() automa.Builder { - return automa.NewStepBuilder().WithId(CreateAlloySecretsStepId). - WithExecute(func(ctx context.Context, stp automa.Step) *automa.Report { - cfg := config.Get().Alloy - - k, err := kube.NewClient() - if err != nil { - return automa.StepFailureReport(stp.Id(), automa.WithError(err)) - } - - // Check if remotes are configured - hasRemotes := len(cfg.PrometheusRemotes) > 0 || len(cfg.LokiRemotes) > 0 || - cfg.PrometheusURL != "" || cfg.LokiURL != "" - - meta := map[string]string{} - var manifestPath string - var manifest string - - if hasRemotes { - // Create ExternalSecret to fetch passwords from Vault - manifestPath = path.Join(core.Paths().TempDir, "alloy-external-secret.yaml") - - // Build config to get cluster name - cb, err := alloy.NewConfigBuilder(cfg) - if err != nil { - return automa.StepFailureReport(stp.Id(), automa.WithError(err)) - } - clusterName := cb.ClusterName() - - // Generate the ExternalSecret manifest using the alloy package - manifest, err = alloy.ExternalSecretManifest(cfg, clusterName) - if err != nil { - return automa.StepFailureReport(stp.Id(), automa.WithError(err)) - } - } else { - // Create an empty secret so the pod doesn't fail looking for it - manifestPath = path.Join(core.Paths().TempDir, "alloy-empty-secret.yaml") - manifest, err = alloy.EmptySecretManifest() - if err != nil { - return automa.StepFailureReport(stp.Id(), automa.WithError(err)) - } - } - - err = os.WriteFile(manifestPath, []byte(manifest), 0600) - if err != nil { - return automa.StepFailureReport(stp.Id(), automa.WithError(err)) - } - - err = k.ApplyManifest(ctx, manifestPath) - if err != nil { - return automa.StepFailureReport(stp.Id(), automa.WithError(err)) - } - - meta[InstalledByThisStep] = "true" - stp.State().Set(InstalledByThisStep, true) - stp.State().Set("secretManifestPath", manifestPath) - - return automa.StepSuccessReport(stp.Id(), automa.WithMetadata(meta)) - }). - WithRollback(func(ctx context.Context, stp automa.Step) *automa.Report { - if stp.State().Bool(InstalledByThisStep) == false { - return automa.StepSkippedReport(stp.Id()) - } - - k, err := kube.NewClient() - if err != nil { - return automa.StepFailureReport(stp.Id(), automa.WithError(err)) - } - - manifestPath := stp.State().String("secretManifestPath") - if manifestPath != "" { - err = k.DeleteManifest(ctx, manifestPath) - if err != nil { - return automa.StepFailureReport(stp.Id(), automa.WithError(err)) - } - } - - return automa.StepSuccessReport(stp.Id()) - }). - WithPrepare(func(ctx context.Context, stp automa.Step) (context.Context, error) { - notify.As().StepStart(ctx, stp, "Creating Alloy secrets") - return ctx, nil - }). - WithOnFailure(func(ctx context.Context, stp automa.Step, rpt *automa.Report) { - notify.As().StepFailure(ctx, stp, rpt, "Failed to create Alloy secrets") - }). - WithOnCompletion(func(ctx context.Context, stp automa.Step, rpt *automa.Report) { - notify.As().StepCompletion(ctx, stp, rpt, "Alloy secrets created successfully") - }) -} - func installAlloy() automa.Builder { return automa.NewStepBuilder().WithId(InstallAlloyStepId). WithExecute(func(ctx context.Context, stp automa.Step) *automa.Report { diff --git a/test/alloy/README.md b/test/alloy/README.md index 4c64d8ff..92754a12 100644 --- a/test/alloy/README.md +++ b/test/alloy/README.md @@ -1,30 +1,38 @@ -# Alloy Stack - Grafana Alloy with Vault +# Alloy Stack - Grafana Alloy Observability -Complete Alloy observability stack for Solo Provisioner with Vault-managed secrets. +Complete Alloy observability stack for Solo Provisioner. **Components:** - Grafana Alloy - Metrics and log collection -- HashiCorp Vault - Secret management -- External Secrets Operator - Secret synchronization - Prometheus - Metrics storage - Loki - Log aggregation - Grafana - Visualization +**Optional (for production secret management):** +- HashiCorp Vault - Secret management +- External Secrets Operator - Secret synchronization from Vault/AWS/GCP + ## 🎯 Architecture Overview -**All secrets are managed by Vault via External Secrets Operator** - no plain Kubernetes secrets! +Alloy expects passwords in a K8s Secret named `grafana-alloy-secrets` in the `grafana-alloy` namespace. +How that secret gets created is up to you — manually, via ESO/Vault, Terraform, or any other mechanism. ``` -Local Dev: Production: -Docker Vault (dev mode) Enterprise Vault cluster - ↓ ↓ -External Secrets Operator ←→ External Secrets Operator +Manual (local dev): ESO + Vault (production): +kubectl create secret Vault/AWS/GCP ↓ ↓ -K8s Secret (auto-synced) K8s Secret (auto-synced) - ↓ ↓ -Alloy Pod (metrics/logs) Alloy Pod (metrics/logs) +K8s Secret External Secrets Operator + "grafana-alloy-secrets" ↓ + ↓ K8s Secret (auto-synced) +Alloy Pod (metrics/logs) "grafana-alloy-secrets" + ↓ + Alloy Pod (metrics/logs) ``` +**Key naming convention:** +- `PROMETHEUS_PASSWORD_` — password for each `--add-prometheus-remote name=` +- `LOKI_PASSWORD_` — password for each `--add-loki-remote name=` + --- ## 🚀 Quick Start - Local Development @@ -36,14 +44,14 @@ From your Mac, ensure you have the latest build: task build ``` -### Step 1: Start Alloy Stack +### Step 1: Start Observability Stack Start and SSH into a fresh VM: ```bash task vm:ssh ``` -Then, from within the VM, start the Alloy stack: +Then, from within the VM, start the observability stack (Prometheus, Loki, Grafana): ```bash task alloy:start ``` @@ -64,32 +72,35 @@ sudo solo-provisioner block node install \ --config=/mnt/solo-weaver/test/config/config.yaml ``` -### Step 3: Install Alloy (Minimal) +### Step 3: Create K8s Secret -Install Alloy without remote endpoints. This installs External Secrets Operator and the basic Alloy stack without requiring secrets from Vault: +Create the K8s secret containing passwords for the remote endpoints. +**Option A: Using the task (recommended for local dev):** ```bash -sudo solo-provisioner alloy cluster install \ - --cluster-name=vm-cluster +task alloy:create-secret ``` -> **Note:** Without `--add-prometheus-remote` or `--add-loki-remote` flags, Alloy installs in "local-only" mode. No secrets are required. - -### Step 4: Configure Vault Connection - -Now that ESO is installed, configure the ClusterSecretStore to connect to Vault: +This creates secret `grafana-alloy-secrets` in namespace `grafana-alloy` with keys +`PROMETHEUS_PASSWORD_LOCAL` and `LOKI_PASSWORD_LOCAL` set to `dev-password`. +**Option B: Using kubectl directly:** ```bash -task vault:setup-secret-store +kubectl create namespace grafana-alloy --dry-run=client -o yaml | kubectl apply -f - + +kubectl create secret generic grafana-alloy-secrets \ + --namespace=grafana-alloy \ + --from-literal=PROMETHEUS_PASSWORD_LOCAL=dev-password \ + --from-literal=LOKI_PASSWORD_LOCAL=dev-password \ + --dry-run=client -o yaml | kubectl apply -f - ``` -This will auto-detect the node IP and configure the ClusterSecretStore. +> **Convention:** The key names follow the pattern `{PROMETHEUS|LOKI}_PASSWORD_{REMOTE_NAME}`, +> where `REMOTE_NAME` matches the `name=` value in the `--add-*-remote` flags (uppercased, dashes replaced with underscores). -### Step 5: Upgrade Alloy with Remotes +### Step 4: Install Alloy with Remotes -Now that secrets can sync, upgrade Alloy with remote endpoints: ```bash -# Get the node IP for remote endpoints NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}') echo "Node IP: $NODE_IP" @@ -100,7 +111,7 @@ sudo solo-provisioner alloy cluster install \ --monitor-block-node ``` -> **Note:** The `--add-*-remote` flags use the format `name=,url=,username=`. The password is fetched from Vault at path `grafana/alloy/{clusterName}/{prometheus|loki}/{remoteName}`. +> **Note:** The remote name `local` maps to secret keys `PROMETHEUS_PASSWORD_LOCAL` and `LOKI_PASSWORD_LOCAL`. Wait for pods to be ready: ```bash @@ -108,7 +119,12 @@ kubectl get pods -n grafana-alloy # Should show Running ``` -### Step 6: Access Grafana and Vault UI +> **Tip:** To install Alloy without remote endpoints (local-only mode, no secrets needed): +> ```bash +> sudo solo-provisioner alloy cluster install --cluster-name=vm-cluster +> ``` + +### Step 5: Access Grafana From your Mac, forward the ports: ```bash @@ -117,9 +133,10 @@ task vm:alloy-forward This forwards: - **Grafana:** http://localhost:3000 (anonymous auth enabled - no login required) -- **Vault UI:** http://localhost:8200 (token: `devtoken`) +- **Prometheus:** http://localhost:9090 +- **Loki:** http://localhost:3100 -### Step 7: Verify in Grafana +### Step 6: Verify in Grafana Navigate to **Explore** and test these queries organized by module: @@ -282,9 +299,25 @@ sudo solo-provisioner alloy cluster install \ --monitor-block-node ``` -Each remote requires a corresponding Vault secret at: -- `grafana/alloy/{clusterName}/prometheus/{remoteName}` → property: `password` -- `grafana/alloy/{clusterName}/loki/{remoteName}` → property: `password` +Each remote requires a corresponding key in K8s Secret `grafana-alloy-secrets`: + +| Remote flag | Expected secret key | +|---|---| +| `--add-prometheus-remote=name=primary,...` | `PROMETHEUS_PASSWORD_PRIMARY` | +| `--add-prometheus-remote=name=backup,...` | `PROMETHEUS_PASSWORD_BACKUP` | +| `--add-loki-remote=name=primary,...` | `LOKI_PASSWORD_PRIMARY` | +| `--add-loki-remote=name=grafana-cloud,...` | `LOKI_PASSWORD_GRAFANA_CLOUD` | + +Create the secret with all required keys: +```bash +kubectl create secret generic grafana-alloy-secrets \ + --namespace=grafana-alloy \ + --from-literal=PROMETHEUS_PASSWORD_PRIMARY=pass1 \ + --from-literal=PROMETHEUS_PASSWORD_BACKUP=pass2 \ + --from-literal=LOKI_PASSWORD_PRIMARY=pass3 \ + --from-literal=LOKI_PASSWORD_GRAFANA_CLOUD=pass4 \ + --dry-run=client -o yaml | kubectl apply -f - +``` ### Managing Remote Endpoints @@ -343,7 +376,7 @@ internal/templates/files/alloy/ ├── block-node-servicemonitor.yaml # ServiceMonitor for Block Node metrics ├── block-node-podlogs.yaml # PodLogs for Block Node logs ├── configmap.yaml # ConfigMap manifest template -└── external-secret.yaml # ExternalSecret manifest template +└── namespace.yaml # Namespace manifest template grafana-alloy-cm ConfigMap: └── config.alloy # Concatenated modules (used by Alloy) @@ -366,8 +399,19 @@ To view the current ConfigMap contents: kubectl get configmap grafana-alloy-cm -n grafana-alloy -o yaml ``` -### Manage Vault Separately +### Vault + ESO (Optional) + +For production or when you want secrets synced automatically from Vault: + +```bash +# Start the full stack including Vault +task alloy:start-with-vault + +# Configure ClusterSecretStore (from within the VM) +task vault:setup-secret-store +``` +Manage Vault separately: ```bash task vault:start # Start Vault only task vault:stop # Stop Vault @@ -403,28 +447,21 @@ You can also access the Vault UI at http://localhost:8200 (token: `devtoken`) af ### Production Setup -For production, configure ClusterSecretStore to point to your enterprise Vault: - -```yaml -apiVersion: external-secrets.io/v1 -kind: ClusterSecretStore -metadata: - name: vault-secret-store -spec: - provider: - vault: - server: "https://vault.example.com" - path: "secret" - version: v2 - auth: - userPass: - path: userpass - username: "production-eso-user" - secretRef: - name: vault-credentials - namespace: kube-system - key: password -``` +In production, the K8s Secret `grafana-alloy-secrets` can be created by any mechanism: + +**Option 1: ESO + Vault** — Use `solo-provisioner eso` commands to install ESO and create +ExternalSecret resources that sync passwords from Vault into the K8s Secret automatically. + +**Option 2: ESO + Cloud Provider** — Use ESO with AWS Secrets Manager, GCP Secret Manager, +or Azure Key Vault as the backend. + +**Option 3: Terraform / CI pipeline** — Create the K8s Secret as part of your infrastructure +provisioning. + +**Option 4: Manual** — Create the secret with `kubectl` as shown in the Quick Start. + +The only requirement is that the K8s Secret named `grafana-alloy-secrets` exists in namespace +`grafana-alloy` with the expected keys before running `alloy cluster install`. --- @@ -432,10 +469,27 @@ spec: | File | Purpose | |------|---------| -| `docker-compose.yml` | Container definitions | -| `init-vault.sh` | Initialize Vault with dev secrets | -| `cluster-secret-store-local.yaml` | ESO → Vault connection template | -| `config_with_alloy.yaml` | Solo Provisioner config with Alloy enabled | +| `docker-compose.yml` | Container definitions (Prometheus, Loki, Grafana, Vault) | +| `init-vault.sh` | Initialize Vault with dev secrets (used by `task vault:start`) | +| `cluster-secret-store-local.yaml` | ESO → Vault connection template (advanced, optional) | | `prometheus.yml` | Prometheus configuration | | `loki-config.yml` | Loki configuration | | `grafana-datasources.yml` | Grafana datasources | + +## 🛠️ Task Reference + +| Task | Description | +|------|-------------| +| `task alloy:start` | Start Prometheus, Loki, Grafana | +| `task alloy:start-with-vault` | Start full stack including Vault | +| `task alloy:stop` | Stop all containers | +| `task alloy:clean` | Stop and remove all data | +| `task alloy:create-secret` | Create `grafana-alloy-secrets` K8s Secret with dev passwords | +| `task alloy:delete-secret` | Delete the K8s Secret | +| `task alloy:status` | Show container status | +| `task alloy:logs` | Tail container logs | +| `task vault:start` | Start Vault only | +| `task vault:stop` | Stop Vault | +| `task vault:clean` | Remove Vault data | +| `task vault:setup-secret-store` | Configure ESO ClusterSecretStore → Vault | +