From b8b55460815ca7a5c90d9c41dee0065b3bd8ff72 Mon Sep 17 00:00:00 2001 From: Paulin Todev Date: Fri, 20 Sep 2024 11:44:38 +0100 Subject: [PATCH] Fix metric renames for otelcol, update RC version (#1719) * Update RC version, fix OTEL metric renames (#1713) * Reword --------- Co-authored-by: Piotr <17101802+thampiotr@users.noreply.github.com> --- CHANGELOG.md | 5 ++++- .../components/otelcol/otelcol.exporter.awss3.md | 8 ++++---- .../components/otelcol/otelcol.exporter.otlp.md | 8 ++++---- .../components/otelcol/otelcol.processor.batch.md | 10 +++++----- .../otelcol/otelcol.processor.deltatocumulative.md | 14 +++++++------- .../components/otelcol/otelcol.receiver.otlp.md | 4 ++-- docs/sources/set-up/deploy.md | 6 +++--- .../alloy-mixin/alerts/opentelemetry.libsonnet | 8 ++++---- .../alloy-mixin/dashboards/opentelemetry.libsonnet | 14 +++++++------- 9 files changed, 40 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b38abe2466..69cd7b8426 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,10 +7,13 @@ This document contains a historical list of changes between releases. Only changes that impact end-user behavior are listed; changes to documentation or internal API changes are not present. -v1.4.0-rc.2 +v1.4.0-rc.3 ----------------- ### Breaking changes +- Some debug metrics for `otelcol` components have changed. (@thampiotr) + For example, `otelcol.exporter.otlp`'s `exporter_sent_spans_ratio_total` metric is now `otelcol_exporter_sent_spans_total`. + - [otelcol.processor.transform] The functions `convert_sum_to_gauge` and `convert_gauge_to_sum` must now be used in the `metric` `context` rather than in the `datapoint` context. https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/34567 (@wildum) diff --git a/docs/sources/reference/components/otelcol/otelcol.exporter.awss3.md b/docs/sources/reference/components/otelcol/otelcol.exporter.awss3.md index 5730c03a95..f405f65cfb 100644 --- a/docs/sources/reference/components/otelcol/otelcol.exporter.awss3.md +++ b/docs/sources/reference/components/otelcol/otelcol.exporter.awss3.md @@ -142,10 +142,10 @@ information. ## Debug metrics -* `exporter_sent_spans_ratio_total` (counter): Number of spans successfully sent to destination. -* `exporter_send_failed_spans_ratio_total` (counter): Number of spans in failed attempts to send to destination. -* `exporter_queue_capacity_ratio` (gauge): Fixed capacity of the retry queue (in batches). -* `exporter_queue_size_ratio` (gauge): Current size of the retry queue (in batches). +* `otelcol_exporter_sent_spans_total` (counter): Number of spans successfully sent to destination. +* `otelcol_exporter_send_failed_spans_total` (counter): Number of spans in failed attempts to send to destination. +* `otelcol_exporter_queue_capacity` (gauge): Fixed capacity of the retry queue (in batches). +* `otelcol_exporter_queue_size` (gauge): Current size of the retry queue (in batches). * `rpc_client_duration_milliseconds` (histogram): Measures the duration of inbound RPC. * `rpc_client_request_size_bytes` (histogram): Measures size of RPC request messages (uncompressed). * `rpc_client_requests_per_rpc` (histogram): Measures the number of messages received per RPC. Should be 1 for all non-streaming RPCs. diff --git a/docs/sources/reference/components/otelcol/otelcol.exporter.otlp.md b/docs/sources/reference/components/otelcol/otelcol.exporter.otlp.md index 529893e9fb..d956b453a4 100644 --- a/docs/sources/reference/components/otelcol/otelcol.exporter.otlp.md +++ b/docs/sources/reference/components/otelcol/otelcol.exporter.otlp.md @@ -172,10 +172,10 @@ information. ## Debug metrics -* `exporter_sent_spans_ratio_total` (counter): Number of spans successfully sent to destination. -* `exporter_send_failed_spans_ratio_total` (counter): Number of spans in failed attempts to send to destination. -* `exporter_queue_capacity_ratio` (gauge): Fixed capacity of the retry queue (in batches) -* `exporter_queue_size_ratio` (gauge): Current size of the retry queue (in batches) +* `otelcol_exporter_sent_spans_total` (counter): Number of spans successfully sent to destination. +* `otelcol_exporter_send_failed_spans_total` (counter): Number of spans in failed attempts to send to destination. +* `otelcol_exporter_queue_capacity` (gauge): Fixed capacity of the retry queue (in batches) +* `otelcol_exporter_queue_size` (gauge): Current size of the retry queue (in batches) * `rpc_client_duration_milliseconds` (histogram): Measures the duration of inbound RPC. * `rpc_client_request_size_bytes` (histogram): Measures size of RPC request messages (uncompressed). * `rpc_client_requests_per_rpc` (histogram): Measures the number of messages received per RPC. Should be 1 for all non-streaming RPCs. diff --git a/docs/sources/reference/components/otelcol/otelcol.processor.batch.md b/docs/sources/reference/components/otelcol/otelcol.processor.batch.md index 7e3215ae59..f6a25dee02 100644 --- a/docs/sources/reference/components/otelcol/otelcol.processor.batch.md +++ b/docs/sources/reference/components/otelcol/otelcol.processor.batch.md @@ -136,11 +136,11 @@ information. ## Debug metrics -* `processor_batch_batch_send_size_bytes` (histogram): Number of bytes in batch that was sent. -* `processor_batch_batch_send_size_ratio` (histogram): Number of units in the batch. -* `processor_batch_metadata_cardinality_ratio` (gauge): Number of distinct metadata value combinations being processed. -* `processor_batch_timeout_trigger_send_ratio_total` (counter): Number of times the batch was sent due to a timeout trigger. -* `processor_batch_batch_size_trigger_send_ratio_total` (counter): Number of times the batch was sent due to a size trigger. +* `otelcol_processor_batch_batch_send_size_bytes` (histogram): Number of bytes in batch that was sent. +* `otelcol_processor_batch_batch_send_size` (histogram): Number of units in the batch. +* `otelcol_processor_batch_metadata_cardinality` (gauge): Number of distinct metadata value combinations being processed. +* `otelcol_processor_batch_timeout_trigger_send_total` (counter): Number of times the batch was sent due to a timeout trigger. +* `otelcol_processor_batch_batch_size_trigger_send_total` (counter): Number of times the batch was sent due to a size trigger. ## Examples diff --git a/docs/sources/reference/components/otelcol/otelcol.processor.deltatocumulative.md b/docs/sources/reference/components/otelcol/otelcol.processor.deltatocumulative.md index cd2d80054b..ba43d2eff9 100644 --- a/docs/sources/reference/components/otelcol/otelcol.processor.deltatocumulative.md +++ b/docs/sources/reference/components/otelcol/otelcol.processor.deltatocumulative.md @@ -89,13 +89,13 @@ Name | Type | Description ## Debug metrics -* `processor_deltatocumulative_streams_tracked` (gauge): Number of streams currently tracked by the aggregation state. -* `processor_deltatocumulative_streams_limit` (gauge): Upper limit of tracked streams. -* `processor_deltatocumulative_streams_evicted` (counter): Total number of streams removed from tracking to ingest newer streams. -* `processor_deltatocumulative_streams_max_stale` (gauge): Duration without new samples after which streams are dropped. -* `processor_deltatocumulative_datapoints_processed` (counter): Total number of datapoints processed (successfully or unsuccessfully). -* `processor_deltatocumulative_datapoints_dropped` (counter): Faulty datapoints that were dropped due to the reason given in the `reason` label. -* `processor_deltatocumulative_gaps_length` (counter): Total length of all gaps in the streams, such as being due to lost in transit. +* `otelcol_deltatocumulative_streams_tracked` (gauge): Number of streams currently tracked by the aggregation state. +* `otelcol_deltatocumulative_streams_limit` (gauge): Upper limit of tracked streams. +* `otelcol_deltatocumulative_streams_evicted` (counter): Total number of streams removed from tracking to ingest newer streams. +* `otelcol_deltatocumulative_streams_max_stale_seconds` (gauge): Duration without new samples after which streams are dropped. +* `otelcol_deltatocumulative_datapoints_processed` (counter): Total number of datapoints processed (successfully or unsuccessfully). +* `otelcol_deltatocumulative_datapoints_dropped` (counter): Faulty datapoints that were dropped due to the reason given in the `reason` label. +* `otelcol_deltatocumulative_gaps_length` (counter): Total length of all gaps in the streams, such as being due to lost in transit. ## Examples diff --git a/docs/sources/reference/components/otelcol/otelcol.receiver.otlp.md b/docs/sources/reference/components/otelcol/otelcol.receiver.otlp.md index f6246ca8b1..50fbb93dab 100644 --- a/docs/sources/reference/components/otelcol/otelcol.receiver.otlp.md +++ b/docs/sources/reference/components/otelcol/otelcol.receiver.otlp.md @@ -197,8 +197,8 @@ information. ## Debug metrics -* `receiver_accepted_spans_ratio_total` (counter): Number of spans successfully pushed into the pipeline. -* `receiver_refused_spans_ratio_total` (counter): Number of spans that could not be pushed into the pipeline. +* `otelcol_receiver_accepted_spans_total` (counter): Number of spans successfully pushed into the pipeline. +* `otelcol_receiver_refused_spans_total` (counter): Number of spans that could not be pushed into the pipeline. * `rpc_server_duration_milliseconds` (histogram): Duration of RPC requests from a gRPC server. * `rpc_server_request_size_bytes` (histogram): Measures size of RPC request messages (uncompressed). * `rpc_server_requests_per_rpc` (histogram): Measures the number of messages received per RPC. Should be 1 for all non-streaming RPCs. diff --git a/docs/sources/set-up/deploy.md b/docs/sources/set-up/deploy.md index 1aa55b2cd6..d3d6fb1f04 100644 --- a/docs/sources/set-up/deploy.md +++ b/docs/sources/set-up/deploy.md @@ -146,9 +146,9 @@ This similarity is because most {{< param "PRODUCT_NAME" >}} components used for #### When to scale To decide whether scaling is necessary, check metrics such as: -* `receiver_refused_spans_ratio_total` from receivers such as `otelcol.receiver.otlp`. -* `processor_refused_spans_ratio_total` from processors such as `otelcol.processor.batch`. -* `exporter_send_failed_spans_ratio_total` from exporters such as `otelcol.exporter.otlp` and `otelcol.exporter.loadbalancing`. +* `otelcol_receiver_refused_spans_total` from receivers such as `otelcol.receiver.otlp`. +* `otelcol_receiver_refused_spans_total` from processors such as `otelcol.processor.batch`. +* `otelcol_exporter_send_failed_spans_total` from exporters such as `otelcol.exporter.otlp` and `otelcol.exporter.loadbalancing`. #### Stateful and stateless components diff --git a/operations/alloy-mixin/alerts/opentelemetry.libsonnet b/operations/alloy-mixin/alerts/opentelemetry.libsonnet index c126b365d9..611034c33f 100644 --- a/operations/alloy-mixin/alerts/opentelemetry.libsonnet +++ b/operations/alloy-mixin/alerts/opentelemetry.libsonnet @@ -11,9 +11,9 @@ local alert = import './utils/alert.jsonnet'; alert.newRule( 'OtelcolReceiverRefusedSpans', if enableK8sCluster then - 'sum by (cluster, namespace, job) (rate(receiver_refused_spans_ratio_total{}[1m])) > 0' + 'sum by (cluster, namespace, job) (rate(otelcol_receiver_refused_spans_total{}[1m])) > 0' else - 'sum by (job) (rate(receiver_refused_spans_ratio_total{}[1m])) > 0' + 'sum by (job) (rate(otelcol_receiver_refused_spans_total{}[1m])) > 0' , 'The receiver could not push some spans to the pipeline.', 'The receiver could not push some spans to the pipeline under job {{ $labels.job }}. This could be due to reaching a limit such as the ones imposed by otelcol.processor.memory_limiter.', @@ -25,9 +25,9 @@ local alert = import './utils/alert.jsonnet'; alert.newRule( 'OtelcolExporterFailedSpans', if enableK8sCluster then - 'sum by (cluster, namespace, job) (rate(exporter_send_failed_spans_ratio_total{}[1m])) > 0' + 'sum by (cluster, namespace, job) (rate(otelcol_exporter_send_failed_spans_total{}[1m])) > 0' else - 'sum by (job) (rate(exporter_send_failed_spans_ratio_total{}[1m])) > 0' + 'sum by (job) (rate(otelcol_exporter_send_failed_spans_total{}[1m])) > 0' , 'The exporter failed to send spans to their destination.', 'The exporter failed to send spans to their destination under job {{ $labels.job }}. There could be an issue with the payload or with the destination endpoint.', diff --git a/operations/alloy-mixin/dashboards/opentelemetry.libsonnet b/operations/alloy-mixin/dashboards/opentelemetry.libsonnet index 4046b307e0..3cf6eda2d6 100644 --- a/operations/alloy-mixin/dashboards/opentelemetry.libsonnet +++ b/operations/alloy-mixin/dashboards/opentelemetry.libsonnet @@ -44,7 +44,7 @@ local stackedPanelMixin = { panel.withQueries([ panel.newQuery( expr= ||| - rate(receiver_accepted_spans_ratio_total{%(instanceSelector)s}[$__rate_interval]) + rate(otelcol_receiver_accepted_spans_total{%(instanceSelector)s}[$__rate_interval]) ||| % $._config, //TODO: How will the dashboard look if there is more than one receiver component? The legend is not unique enough? legendFormat='{{ pod }} / {{ transport }}', @@ -62,7 +62,7 @@ local stackedPanelMixin = { panel.withQueries([ panel.newQuery( expr= ||| - rate(receiver_refused_spans_ratio_total{%(instanceSelector)s}[$__rate_interval]) + rate(otelcol_receiver_refused_spans_total{%(instanceSelector)s}[$__rate_interval]) ||| % $._config, legendFormat='{{ pod }} / {{ transport }}', ), @@ -100,7 +100,7 @@ local stackedPanelMixin = { panel.withQueries([ panel.newQuery( expr= ||| - sum by (le) (increase(processor_batch_batch_send_size_ratio_bucket{%(instanceSelector)s}[$__rate_interval])) + sum by (le) (increase(otelcol_processor_batch_batch_send_size_bucket{%(instanceSelector)s}[$__rate_interval])) ||| % $._config, format='heatmap', legendFormat='{{le}}', @@ -119,7 +119,7 @@ local stackedPanelMixin = { panel.withQueries([ panel.newQuery( expr= ||| - processor_batch_metadata_cardinality_ratio{%(instanceSelector)s} + otelcol_processor_batch_metadata_cardinality{%(instanceSelector)s} ||| % $._config, legendFormat='{{ pod }}', ), @@ -134,7 +134,7 @@ local stackedPanelMixin = { panel.withQueries([ panel.newQuery( expr= ||| - rate(processor_batch_timeout_trigger_send_ratio_total{%(instanceSelector)s}[$__rate_interval]) + rate(otelcol_processor_batch_timeout_trigger_send_total{%(instanceSelector)s}[$__rate_interval]) ||| % $._config, legendFormat='{{ pod }}', ), @@ -156,7 +156,7 @@ local stackedPanelMixin = { panel.withQueries([ panel.newQuery( expr= ||| - rate(exporter_sent_spans_ratio_total{%(instanceSelector)s}[$__rate_interval]) + rate(otelcol_exporter_sent_spans_total{%(instanceSelector)s}[$__rate_interval]) ||| % $._config, legendFormat='{{ pod }}', ), @@ -172,7 +172,7 @@ local stackedPanelMixin = { panel.withQueries([ panel.newQuery( expr= ||| - rate(exporter_send_failed_spans_ratio_total{%(instanceSelector)s}[$__rate_interval]) + rate(otelcol_exporter_send_failed_spans_total{%(instanceSelector)s}[$__rate_interval]) ||| % $._config, legendFormat='{{ pod }}', ),