signalfx · atoulme · Jan 11, 2025 · Jan 11, 2025 · Jan 11, 2025 · Jan 11, 2025
@@ -258,7 +258,7 @@ jobs:
     strategy:
       matrix:
         RUNNER: [ "ubuntu-20.04", "ubuntu-22.04", "ubuntu-24.04" ]
-        PROFILE: [ "integration", "smartagent" ]
+        PROFILE: [ "integration", "smartagent", "envoy_discovery" ]
       fail-fast: false
     env:
       TEST_OUTPUT: ${{ github.job }}-${{ matrix.PROFILE }}-${{ matrix.RUNNER }}.out

@@ -6,6 +6,10 @@
 
 - (Splunk) Add `metricsgeneration` processor ([#5769](https://github.com/signalfx/splunk-otel-collector/pull/5769))
 
+### 💡 Enhancements 💡
+
+- (Splunk) Add a new discovery bundle for Envoy proxy metrics ([#5780](https://github.com/signalfx/splunk-otel-collector/pull/5780))
+
 ## v0.116.0
 
 This Splunk OpenTelemetry Collector release includes changes from the [opentelemetry-collector v0.116.0](https://github.com/open-telemetry/opentelemetry-collector/releases/tag/v0.116.0) and the [opentelemetry-collector-contrib v0.116.0](https://github.com/open-telemetry/opentelemetry-collector-contrib/releases/tag/v0.116.0) releases where appropriate.

@@ -0,0 +1,39 @@
+#####################################################################################
+# This file is generated by the Splunk Distribution of the OpenTelemetry Collector. #
+#                                                                                   #
+# It reflects the default configuration bundled in the Collector executable for use #
+# in discovery mode (--discovery) and is provided for reference or customization.   #
+# Please note that any changes made to this file will need to be reconciled during  #
+# upgrades of the Collector.                                                        #
+#####################################################################################
+# prometheus:
+#   enabled: true
+#   rule:
+#     docker_observer: type == "container" and any([name, image, command], {# matches "(?i)envoy"}) and not (command matches "splunk.discovery")
+#     host_observer: type == "hostport" and command matches "(?i)envoy" and not (command matches "splunk.discovery")
+#     k8s_observer: type == "port" and pod.name matches "(?i)envoy"
+#   config:
+#     default:
+#       config:
+#         scrape_configs:
+#           - job_name: 'envoy'
+#             metrics_path: /stats/prometheus
+#             scrape_interval: 10s
+#             static_configs:
+#               - targets: ['`endpoint`']
+#             metric_relabel_configs:
+#               - source_labels: [__name__]
+#                 action: keep
+#                 regex: '(envoy_cluster_upstream_cx_active|envoy_cluster_upstream_cx_total|envoy_cluster_upstream_cx_connect_fail|envoy_cluster_upstream_cx_connect_ms|envoy_cluster_upstream_rq_active|envoy_cluster_upstream_rq_total|envoy_cluster_upstream_rq_timeout|envoy_cluster_upstream_rq_pending_active|envoy_cluster_upstream_rq_pending_overflow|envoy_cluster_upstream_rq_time|envoy_cluster_membership_total|envoy_cluster_membership_degraded|envoy_cluster_membership_excluded|envoy_listener_downstream_cx_active|envoy_listener_downstream_cx_total|envoy_listener_downstream_cx_transport_socket_connect_timeout|envoy_listener_downstream_cx_overflow|envoy_listener_downstream_cx_overload_reject|envoy_listener_downstream_global_cx_overflow)'
+#   status:
+#     metrics:
+#       - status: successful
+#         strict: envoy_cluster_upstream_cx_active
+#         message: envoy prometheus receiver is working!
+#     statements:
+#       - status: failed
+#         regexp: "connection refused"
+#         message: The container is not serving http connections.
+#       - status: failed
+#         regexp: "dial tcp: lookup"
+#         message: Unable to resolve envoy prometheus tcp endpoint
@@ -80,6 +80,15 @@ services:
       - integration
     environment:
       - ELASTIC_PASSWORD=$ELASTIC_PASSWORD
+  envoy:
+    image: quay.io/splunko11ytest/envoy:latest
+    profiles:
+      - envoy_discovery
+    build: ./envoy
+    ports:
+      - "9901:9901"
+    networks:
+      - envoy
   # Haproxy image for haproxy test:
   haproxy:
     image: quay.io/splunko11ytest/haproxy:latest
@@ -309,3 +318,7 @@ services:
       interval: 10s
       timeout: 5s
       retries: 5
+networks:
+  envoy:
+    driver: bridge
+    name: envoy
@@ -0,0 +1 @@
+FROM envoyproxy/envoy:v1.32-latest
@@ -0,0 +1,35 @@
+#####################################################################################
+#                               Do not edit manually!                               #
+# All changes must be made to associated .tmpl file before running 'make bundle.d'. #
+#####################################################################################
+prometheus:
+  enabled: true
+  rule:
+    docker_observer: type == "container" and any([name, image, command], {# matches "(?i)envoy"}) and not (command matches "splunk.discovery")
+    host_observer: type == "hostport" and command matches "(?i)envoy" and not (command matches "splunk.discovery")
+    k8s_observer: type == "port" and pod.name matches "(?i)envoy"
+  config:
+    default:
+      config:
+        scrape_configs:
+          - job_name: 'envoy'
+            metrics_path: /stats/prometheus
+            scrape_interval: 10s
+            static_configs:
+              - targets: ['`endpoint`']
+            metric_relabel_configs:
+              - source_labels: [__name__]
+                action: keep
+                regex: '(envoy_cluster_upstream_cx_active|envoy_cluster_upstream_cx_total|envoy_cluster_upstream_cx_connect_fail|envoy_cluster_upstream_cx_connect_ms|envoy_cluster_upstream_rq_active|envoy_cluster_upstream_rq_total|envoy_cluster_upstream_rq_timeout|envoy_cluster_upstream_rq_pending_active|envoy_cluster_upstream_rq_pending_overflow|envoy_cluster_upstream_rq_time|envoy_cluster_membership_total|envoy_cluster_membership_degraded|envoy_cluster_membership_excluded|envoy_listener_downstream_cx_active|envoy_listener_downstream_cx_total|envoy_listener_downstream_cx_transport_socket_connect_timeout|envoy_listener_downstream_cx_overflow|envoy_listener_downstream_cx_overload_reject|envoy_listener_downstream_global_cx_overflow)'
+  status:
+    metrics:
+      - status: successful
+        strict: envoy_cluster_upstream_cx_active
+        message: envoy prometheus receiver is working!
+    statements:
+      - status: failed
+        regexp: "connection refused"
+        message: The container is not serving http connections.
+      - status: failed
+        regexp: "dial tcp: lookup"
+        message: Unable to resolve envoy prometheus tcp endpoint
@@ -0,0 +1,31 @@
+{{ receiver "prometheus" }}:
+  enabled: true
+  rule:
+    docker_observer: type == "container" and any([name, image, command], {# matches "(?i)envoy"}) and not (command matches "splunk.discovery")
+    host_observer: type == "hostport" and command matches "(?i)envoy" and not (command matches "splunk.discovery")
+    k8s_observer: type == "port" and pod.name matches "(?i)envoy"
+  config:
+    default:
+      config:
+        scrape_configs:
+          - job_name: 'envoy'
+            metrics_path: /stats/prometheus
+            scrape_interval: 10s
+            static_configs:
+              - targets: ['`endpoint`']
+            metric_relabel_configs:
+              - source_labels: [__name__]
+                action: keep
+                regex: '(envoy_cluster_upstream_cx_active|envoy_cluster_upstream_cx_total|envoy_cluster_upstream_cx_connect_fail|envoy_cluster_upstream_cx_connect_ms|envoy_cluster_upstream_rq_active|envoy_cluster_upstream_rq_total|envoy_cluster_upstream_rq_timeout|envoy_cluster_upstream_rq_pending_active|envoy_cluster_upstream_rq_pending_overflow|envoy_cluster_upstream_rq_time|envoy_cluster_membership_total|envoy_cluster_membership_degraded|envoy_cluster_membership_excluded|envoy_listener_downstream_cx_active|envoy_listener_downstream_cx_total|envoy_listener_downstream_cx_transport_socket_connect_timeout|envoy_listener_downstream_cx_overflow|envoy_listener_downstream_cx_overload_reject|envoy_listener_downstream_global_cx_overflow)'
+  status:
+    metrics:
+      - status: successful
+        strict: envoy_cluster_upstream_cx_active
+        message: envoy prometheus receiver is working!
+    statements:
+      - status: failed
+        regexp: "connection refused"
+        message: The container is not serving http connections.
+      - status: failed
+        regexp: "dial tcp: lookup"
+        message: Unable to resolve envoy prometheus tcp endpoint
@@ -25,6 +25,8 @@
 
 //go:generate discoverybundler --render --template bundle.d/receivers/apache.discovery.yaml.tmpl
 //go:generate discoverybundler --render --commented --dir ../../../../cmd/otelcol/config/collector/config.d.linux/receivers -t bundle.d/receivers/apache.discovery.yaml.tmpl
+//go:generate discoverybundler --render --template bundle.d/receivers/envoy.discovery.yaml.tmpl
+//go:generate discoverybundler --render --commented --dir ../../../../cmd/otelcol/config/collector/config.d.linux/receivers -t bundle.d/receivers/envoy.discovery.yaml.tmpl
 //go:generate discoverybundler --render --template bundle.d/receivers/jmx-cassandra.discovery.yaml.tmpl
 //go:generate discoverybundler --render --commented --dir ../../../../cmd/otelcol/config/collector/config.d.linux/receivers -t bundle.d/receivers/jmx-cassandra.discovery.yaml.tmpl
 //go:generate discoverybundler --render --template bundle.d/receivers/kafkametrics.discovery.yaml.tmpl

@@ -28,6 +28,7 @@ func TestBundleDir(t *testing.T) {
 	require.NoError(t, err)
 	require.Equal(t, []string{
 		"bundle.d/receivers/apache.discovery.yaml",
+		"bundle.d/receivers/envoy.discovery.yaml",
 		"bundle.d/receivers/jmx-cassandra.discovery.yaml",
 		"bundle.d/receivers/kafkametrics.discovery.yaml",
 		"bundle.d/receivers/mongodb.discovery.yaml",

@@ -26,6 +26,7 @@ import (
 //go:embed bundle.d/extensions/host-observer.discovery.yaml
 //go:embed bundle.d/extensions/k8s-observer.discovery.yaml
 //go:embed bundle.d/receivers/apache.discovery.yaml
+//go:embed bundle.d/receivers/envoy.discovery.yaml
 //go:embed bundle.d/receivers/jmx-cassandra.discovery.yaml
 //go:embed bundle.d/receivers/kafkametrics.discovery.yaml
 //go:embed bundle.d/receivers/mongodb.discovery.yaml

@@ -26,6 +26,7 @@ import (
 //go:embed bundle.d/extensions/host-observer.discovery.yaml
 //go:embed bundle.d/extensions/k8s-observer.discovery.yaml
 //go:embed bundle.d/receivers/apache.discovery.yaml
+//go:embed bundle.d/receivers/envoy.discovery.yaml
 //go:embed bundle.d/receivers/jmx-cassandra.discovery.yaml
 //go:embed bundle.d/receivers/kafkametrics.discovery.yaml
 //go:embed bundle.d/receivers/mongodb.discovery.yaml

@@ -28,6 +28,7 @@ func TestBundleDir(t *testing.T) {
 	require.NoError(t, err)
 	require.Equal(t, []string{
 		"bundle.d/receivers/apache.discovery.yaml",
+		"bundle.d/receivers/envoy.discovery.yaml",
 		"bundle.d/receivers/jmx-cassandra.discovery.yaml",
 		"bundle.d/receivers/kafkametrics.discovery.yaml",
 		"bundle.d/receivers/mongodb.discovery.yaml",

@@ -32,6 +32,7 @@ var (
 	// in Components.Linux. If desired in windows BundledFS, ensure they are included in Components.Windows.
 	receivers = []string{
 		"apache",
+		"envoy",
 		"jmx-cassandra",
 		"kafkametrics",
 		"mongodb",
@@ -66,6 +67,7 @@ var (
 		Windows: func() map[string]struct{} {
 			windows := map[string]struct{}{
 				"apache":        {},
+				"envoy":         {},
 				"jmx-cassandra": {},
 				"kafkametrics":  {},
 				"mongodb":       {},

@@ -74,10 +74,10 @@ func TestDockerObserver(t *testing.T) {
 				// runner seems to be slow
 				"SPLUNK_DISCOVERY_DURATION": "20s",
 				// confirm that debug logging doesn't affect runtime
-				"SPLUNK_DISCOVERY_LOG_LEVEL": "debug",
-				"DOCKER_DOMAIN_SOCKET":       fmt.Sprintf("tcp://%s", dockerSocketProxy.ContainerEndpoint),
-				"LABEL_ONE_VALUE":            "actual.label.one.value",
-				"LABEL_TWO_VALUE":            "actual.label.two.value",
+				//"SPLUNK_DISCOVERY_LOG_LEVEL": "debug",
+				"DOCKER_DOMAIN_SOCKET": fmt.Sprintf("tcp://%s", dockerSocketProxy.ContainerEndpoint),
+				"LABEL_ONE_VALUE":      "actual.label.one.value",
+				"LABEL_TWO_VALUE":      "actual.label.two.value",
 				"SPLUNK_DISCOVERY_RECEIVERS_prometheus_x5f_simple_CONFIG_labels_x3a__x3a_label_x5f_three": "overwritten by --set property",
 				"SPLUNK_DISCOVERY_RECEIVERS_prometheus_x5f_simple_CONFIG_labels_x3a__x3a_label_x5f_four":  "actual.label.four.value",
 			}).WithArgs(
@@ -188,6 +188,33 @@ func TestDockerObserver(t *testing.T) {
 							"resource_attributes": map[string]any{},
 							"rule":                "type == \"container\" and any([name, image, command], {# matches \"(?i)redis\"}) and not (command matches \"splunk.discovery\")",
 						},
+						"prometheus": map[string]any{
+							"config": map[string]any{
+								"config": map[string]any{
+									"scrape_configs": []any{
+										map[string]any{
+											"job_name": "envoy",
+											"metric_relabel_configs": []any{
+												map[string]any{
+													"action":        "keep",
+													"regex":         "(envoy_cluster_upstream_cx_active|envoy_cluster_upstream_cx_total|envoy_cluster_upstream_cx_connect_fail|envoy_cluster_upstream_cx_connect_ms|envoy_cluster_upstream_rq_active|envoy_cluster_upstream_rq_total|envoy_cluster_upstream_rq_timeout|envoy_cluster_upstream_rq_pending_active|envoy_cluster_upstream_rq_pending_overflow|envoy_cluster_upstream_rq_time|envoy_cluster_membership_total|envoy_cluster_membership_degraded|envoy_cluster_membership_excluded|envoy_listener_downstream_cx_active|envoy_listener_downstream_cx_total|envoy_listener_downstream_cx_transport_socket_connect_timeout|envoy_listener_downstream_cx_overflow|envoy_listener_downstream_cx_overload_reject|envoy_listener_downstream_global_cx_overflow)",
+													"source_labels": []any{"__name__"},
+												},
+											},
+											"metrics_path":    "/stats/prometheus",
+											"scrape_interval": "10s",
+											"static_configs": []any{
+												map[string]any{
+													"targets": []any{"`endpoint`"},
+												},
+											},
+										},
+									},
+								},
+							},
+							"resource_attributes": map[string]any{},
+							"rule":                "type == \"container\" and any([name, image, command], {# matches \"(?i)envoy\"}) and not (command matches \"splunk.discovery\")",
+						},
 					},
 					"watch_observers": []any{"docker_observer"},
 				},
@@ -248,6 +275,32 @@ func TestDockerObserver(t *testing.T) {
 		"receivers": map[string]any{
 			"receiver_creator/discovery": map[string]any{
 				"receivers": map[string]any{
+					"prometheus": map[string]any{
+						"config": map[string]any{
+							"config": map[string]any{
+								"scrape_configs": []any{map[string]any{
+									"job_name": "envoy",
+									"metric_relabel_configs": []any{
+										map[string]any{
+											"action":        "keep",
+											"regex":         "(envoy_cluster_upstream_cx_active|envoy_cluster_upstream_cx_total|envoy_cluster_upstream_cx_connect_fail|envoy_cluster_upstream_cx_connect_ms|envoy_cluster_upstream_rq_active|envoy_cluster_upstream_rq_total|envoy_cluster_upstream_rq_timeout|envoy_cluster_upstream_rq_pending_active|envoy_cluster_upstream_rq_pending_overflow|envoy_cluster_upstream_rq_time|envoy_cluster_membership_total|envoy_cluster_membership_degraded|envoy_cluster_membership_excluded|envoy_listener_downstream_cx_active|envoy_listener_downstream_cx_total|envoy_listener_downstream_cx_transport_socket_connect_timeout|envoy_listener_downstream_cx_overflow|envoy_listener_downstream_cx_overload_reject|envoy_listener_downstream_global_cx_overflow)",
+											"source_labels": []any{"__name__"},
+										},
+									},
+									"metrics_path":    "/stats/prometheus",
+									"scrape_interval": "10s",
+									"static_configs": []any{
+										map[string]any{
+											"targets": []any{"`endpoint`"},
+										},
+									},
+								},
+								},
+							},
+						},
+						"resource_attributes": map[string]any{},
+						"rule":                "type == \"container\" and any([name, image, command], {# matches \"(?i)envoy\"}) and not (command matches \"splunk.discovery\")",
+					},
 					"prometheus_simple": map[string]any{
 						"config": map[string]any{
 							"collection_interval": "1s",
@@ -304,6 +357,24 @@ processors:
 receivers:
   receiver_creator/discovery:
     receivers:
+      prometheus:
+        config:
+          config:
+            scrape_configs:
+            - job_name: envoy
+              metric_relabel_configs:
+              - action: keep
+                regex: (envoy_cluster_upstream_cx_active|envoy_cluster_upstream_cx_total|envoy_cluster_upstream_cx_connect_fail|envoy_cluster_upstream_cx_connect_ms|envoy_cluster_upstream_rq_active|envoy_cluster_upstream_rq_total|envoy_cluster_upstream_rq_timeout|envoy_cluster_upstream_rq_pending_active|envoy_cluster_upstream_rq_pending_overflow|envoy_cluster_upstream_rq_time|envoy_cluster_membership_total|envoy_cluster_membership_degraded|envoy_cluster_membership_excluded|envoy_listener_downstream_cx_active|envoy_listener_downstream_cx_total|envoy_listener_downstream_cx_transport_socket_connect_timeout|envoy_listener_downstream_cx_overflow|envoy_listener_downstream_cx_overload_reject|envoy_listener_downstream_global_cx_overflow)
+                source_labels:
+                - __name__
+              metrics_path: /stats/prometheus
+              scrape_interval: 10s
+              static_configs:
+              - targets:
+                - '`+"`endpoint`"+`'
+        resource_attributes: {}
+        rule: type == "container" and any([name, image, command], {# matches "(?i)envoy"})
+          and not (command matches "splunk.discovery")
       prometheus_simple:
         config:
           collection_interval: 1s