diff --git a/.chloggen/starttime-fallback.yaml b/.chloggen/starttime-fallback.yaml new file mode 100644 index 000000000000..adb5f64d6df7 --- /dev/null +++ b/.chloggen/starttime-fallback.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: prometheusreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add `receiver.prometheusreceiver.UseCollectorStartTimeFallback` featuregate for the start time metric adjuster to use the collector start time as an approximation of process start time as a fallback. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [36364] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/receiver/prometheusreceiver/README.md b/receiver/prometheusreceiver/README.md index cfa7472eb824..da1c93551f0b 100644 --- a/receiver/prometheusreceiver/README.md +++ b/receiver/prometheusreceiver/README.md @@ -67,7 +67,17 @@ prometheus --config.file=prom.yaml ```shell "--feature-gates=receiver.prometheusreceiver.UseCreatedMetric" ``` +- `receiver.prometheusreceiver.UseCollectorStartTimeFallback`: enables using + the collector start time as the metric start time if the + process_start_time_seconds metric yields no result (for example if targets + expose no process_start_time_seconds metric). This is useful when the collector + start time is a good approximation of the process start time - for example in + serverless workloads when the collector is deployed as a sidecar. To enable it, + use the following feature gate option: +```shell +"--feature-gates=receiver.prometheusreceiver.UseCollectorStartTimeFallback" +``` - `receiver.prometheusreceiver.EnableNativeHistograms`: process and turn native histogram metrics into OpenTelemetry exponential histograms. For more details consult the [Prometheus native histograms](#prometheus-native-histograms) section. ```shell diff --git a/receiver/prometheusreceiver/internal/starttimemetricadjuster.go b/receiver/prometheusreceiver/internal/starttimemetricadjuster.go index 1b3eb51529f5..e740c891a495 100644 --- a/receiver/prometheusreceiver/internal/starttimemetricadjuster.go +++ b/receiver/prometheusreceiver/internal/starttimemetricadjuster.go @@ -6,7 +6,9 @@ package internal // import "github.com/open-telemetry/opentelemetry-collector-co import ( "errors" "regexp" + "time" + "go.opentelemetry.io/collector/featuregate" "go.opentelemetry.io/collector/pdata/pmetric" "go.uber.org/zap" ) @@ -15,8 +17,27 @@ var ( errNoStartTimeMetrics = errors.New("start_time metric is missing") errNoDataPointsStartTimeMetric = errors.New("start time metric with no data points") errUnsupportedTypeStartTimeMetric = errors.New("unsupported data type for start time metric") + + // approximateCollectorStartTime is the approximate start time of the + // collector. Used as a fallback start time for metrics that don't have a + // start time set (when the + // receiver.prometheusreceiver.UseCollectorStartTimeFallback feature gate is + // enabled). Set when the component is initialized. + approximateCollectorStartTime time.Time +) + +var useCollectorStartTimeFallbackGate = featuregate.GlobalRegistry().MustRegister( + "receiver.prometheusreceiver.UseCollectorStartTimeFallback", + featuregate.StageAlpha, + featuregate.WithRegisterDescription("When enabled, the Prometheus receiver's"+ + " start time metric adjuster will fallback to using the collector start time"+ + " when a start time is not available"), ) +func init() { + approximateCollectorStartTime = time.Now() +} + type startTimeMetricAdjuster struct { startTimeMetricRegex *regexp.Regexp logger *zap.Logger @@ -33,7 +54,11 @@ func NewStartTimeMetricAdjuster(logger *zap.Logger, startTimeMetricRegex *regexp func (stma *startTimeMetricAdjuster) AdjustMetrics(metrics pmetric.Metrics) error { startTime, err := stma.getStartTime(metrics) if err != nil { - return err + if !useCollectorStartTimeFallbackGate.IsEnabled() { + return err + } + stma.logger.Info("Couldn't get start time for metrics. Using fallback start time.", zap.Error(err), zap.Time("fallback_start_time", approximateCollectorStartTime)) + startTime = float64(approximateCollectorStartTime.Unix()) } startTimeTs := timestampFromFloat64(startTime) diff --git a/receiver/prometheusreceiver/internal/starttimemetricadjuster_test.go b/receiver/prometheusreceiver/internal/starttimemetricadjuster_test.go index 84bdc2756ed5..4bfc6abc2237 100644 --- a/receiver/prometheusreceiver/internal/starttimemetricadjuster_test.go +++ b/receiver/prometheusreceiver/internal/starttimemetricadjuster_test.go @@ -6,11 +6,14 @@ package internal import ( "regexp" "testing" + "time" "github.com/stretchr/testify/assert" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" "go.uber.org/zap" + + "github.com/open-telemetry/opentelemetry-collector-contrib/internal/common/testutil" ) func TestStartTimeMetricMatch(t *testing.T) { @@ -154,3 +157,96 @@ func TestStartTimeMetricMatch(t *testing.T) { }) } } + +func TestStartTimeMetricFallback(t *testing.T) { + const startTime = pcommon.Timestamp(123 * 1e9) + const currentTime = pcommon.Timestamp(126 * 1e9) + mockStartTime := time.Now().Add(-10 * time.Hour) + mockStartTimeSeconds := float64(mockStartTime.Unix()) + processStartTime := mockStartTime.Add(-10 * time.Hour) + processStartTimeSeconds := float64(processStartTime.Unix()) + + tests := []struct { + name string + inputs pmetric.Metrics + startTimeMetricRegex *regexp.Regexp + expectedStartTime pcommon.Timestamp + expectedErr error + }{ + { + name: "regexp_match_metric_no_fallback", + inputs: metrics( + sumMetric("test_sum_metric", doublePoint(nil, startTime, currentTime, 16)), + histogramMetric("test_histogram_metric", histogramPoint(nil, startTime, currentTime, []float64{1, 2}, []uint64{2, 3, 4})), + summaryMetric("test_summary_metric", summaryPoint(nil, startTime, currentTime, 10, 100, []float64{10, 50, 90}, []float64{9, 15, 48})), + sumMetric("example_process_start_time_seconds", doublePoint(nil, startTime, currentTime, processStartTimeSeconds)), + sumMetric("process_start_time_seconds", doublePoint(nil, startTime, currentTime, processStartTimeSeconds)), + exponentialHistogramMetric("test_exponential_histogram_metric", exponentialHistogramPointSimplified(nil, startTime, currentTime, 3, 1, -5, 3)), + ), + startTimeMetricRegex: regexp.MustCompile("^.*_process_start_time_seconds$"), + expectedStartTime: timestampFromFloat64(processStartTimeSeconds), + }, + { + name: "regexp_no_regex_match_metric_fallback", + inputs: metrics( + sumMetric("test_sum_metric", doublePoint(nil, startTime, currentTime, 16)), + histogramMetric("test_histogram_metric", histogramPoint(nil, startTime, currentTime, []float64{1, 2}, []uint64{2, 3, 4})), + summaryMetric("test_summary_metric", summaryPoint(nil, startTime, currentTime, 10, 100, []float64{10, 50, 90}, []float64{9, 15, 48})), + ), + startTimeMetricRegex: regexp.MustCompile("^.*_process_start_time_seconds$"), + expectedStartTime: timestampFromFloat64(mockStartTimeSeconds), + }, + { + name: "match_no_match_metric_fallback", + inputs: metrics( + sumMetric("test_sum_metric", doublePoint(nil, startTime, currentTime, 16)), + histogramMetric("test_histogram_metric", histogramPoint(nil, startTime, currentTime, []float64{1, 2}, []uint64{2, 3, 4})), + summaryMetric("test_summary_metric", summaryPoint(nil, startTime, currentTime, 10, 100, []float64{10, 50, 90}, []float64{9, 15, 48})), + ), + expectedStartTime: timestampFromFloat64(mockStartTimeSeconds), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + testutil.SetFeatureGateForTest(t, useCollectorStartTimeFallbackGate, true) + stma := NewStartTimeMetricAdjuster(zap.NewNop(), tt.startTimeMetricRegex) + if tt.expectedErr != nil { + assert.ErrorIs(t, stma.AdjustMetrics(tt.inputs), tt.expectedErr) + return + } + + // To test that the adjuster is using the fallback correctly, override the fallback time to use + // directly. + approximateCollectorStartTime = mockStartTime + + assert.NoError(t, stma.AdjustMetrics(tt.inputs)) + for i := 0; i < tt.inputs.ResourceMetrics().Len(); i++ { + rm := tt.inputs.ResourceMetrics().At(i) + for j := 0; j < rm.ScopeMetrics().Len(); j++ { + ilm := rm.ScopeMetrics().At(j) + for k := 0; k < ilm.Metrics().Len(); k++ { + metric := ilm.Metrics().At(k) + switch metric.Type() { + case pmetric.MetricTypeSum: + dps := metric.Sum().DataPoints() + for l := 0; l < dps.Len(); l++ { + assert.Equal(t, tt.expectedStartTime, dps.At(l).StartTimestamp()) + } + case pmetric.MetricTypeSummary: + dps := metric.Summary().DataPoints() + for l := 0; l < dps.Len(); l++ { + assert.Equal(t, tt.expectedStartTime, dps.At(l).StartTimestamp()) + } + case pmetric.MetricTypeHistogram: + dps := metric.Histogram().DataPoints() + for l := 0; l < dps.Len(); l++ { + assert.Equal(t, tt.expectedStartTime, dps.At(l).StartTimestamp()) + } + } + } + } + } + }) + } +}