Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

receiver/prometheusreceiver: add option to fallback to collector starttime #36365

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .chloggen/starttime-fallback.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: prometheusreceiver

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add `receiver.prometheusreceiver.UseCollectorStartTimeFallback` featuregate for the start time metric adjuster to use the collector start time as an approximation of process start time as a fallback.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [36364]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: []
10 changes: 10 additions & 0 deletions receiver/prometheusreceiver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,17 @@ prometheus --config.file=prom.yaml
```shell
"--feature-gates=receiver.prometheusreceiver.UseCreatedMetric"
```
- `receiver.prometheusreceiver.UseCollectorStartTimeFallback`: enables using
the collector start time as the metric start time if the
process_start_time_seconds metric yields no result (for example if targets
expose no process_start_time_seconds metric). This is useful when the collector
start time is a good approximation of the process start time - for example in
serverless workloads when the collector is deployed as a sidecar. To enable it,
use the following feature gate option:

```shell
"--feature-gates=receiver.prometheusreceiver.UseCollectorStartTimeFallback"
```
- `receiver.prometheusreceiver.EnableNativeHistograms`: process and turn native histogram metrics into OpenTelemetry exponential histograms. For more details consult the [Prometheus native histograms](#prometheus-native-histograms) section.

```shell
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ package internal // import "github.com/open-telemetry/opentelemetry-collector-co
import (
"errors"
"regexp"
"time"

"go.opentelemetry.io/collector/featuregate"
"go.opentelemetry.io/collector/pdata/pmetric"
"go.uber.org/zap"
)
Expand All @@ -15,8 +17,27 @@ var (
errNoStartTimeMetrics = errors.New("start_time metric is missing")
errNoDataPointsStartTimeMetric = errors.New("start time metric with no data points")
errUnsupportedTypeStartTimeMetric = errors.New("unsupported data type for start time metric")

// approximateCollectorStartTime is the approximate start time of the
// collector. Used as a fallback start time for metrics that don't have a
// start time set (when the
// receiver.prometheusreceiver.UseCollectorStartTimeFallback feature gate is
// enabled). Set when the component is initialized.
approximateCollectorStartTime time.Time
)

var useCollectorStartTimeFallbackGate = featuregate.GlobalRegistry().MustRegister(
"receiver.prometheusreceiver.UseCollectorStartTimeFallback",
featuregate.StageAlpha,
featuregate.WithRegisterDescription("When enabled, the Prometheus receiver's"+
" start time metric adjuster will fallback to using the collector start time"+
" when a start time is not available"),
)

func init() {
approximateCollectorStartTime = time.Now()
}

type startTimeMetricAdjuster struct {
startTimeMetricRegex *regexp.Regexp
logger *zap.Logger
Expand All @@ -33,7 +54,11 @@ func NewStartTimeMetricAdjuster(logger *zap.Logger, startTimeMetricRegex *regexp
func (stma *startTimeMetricAdjuster) AdjustMetrics(metrics pmetric.Metrics) error {
startTime, err := stma.getStartTime(metrics)
if err != nil {
return err
if !useCollectorStartTimeFallbackGate.IsEnabled() {
return err
}
stma.logger.Info("Couldn't get start time for metrics. Using fallback start time.", zap.Error(err), zap.Time("fallback_start_time", approximateCollectorStartTime))
startTime = float64(approximateCollectorStartTime.Unix())
}

startTimeTs := timestampFromFloat64(startTime)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@ package internal
import (
"regexp"
"testing"
"time"

"github.com/stretchr/testify/assert"
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/pmetric"
"go.uber.org/zap"

"github.com/open-telemetry/opentelemetry-collector-contrib/internal/common/testutil"
)

func TestStartTimeMetricMatch(t *testing.T) {
Expand Down Expand Up @@ -154,3 +157,96 @@ func TestStartTimeMetricMatch(t *testing.T) {
})
}
}

func TestStartTimeMetricFallback(t *testing.T) {
const startTime = pcommon.Timestamp(123 * 1e9)
const currentTime = pcommon.Timestamp(126 * 1e9)
mockStartTime := time.Now().Add(-10 * time.Hour)
mockStartTimeSeconds := float64(mockStartTime.Unix())
processStartTime := mockStartTime.Add(-10 * time.Hour)
processStartTimeSeconds := float64(processStartTime.Unix())

tests := []struct {
name string
inputs pmetric.Metrics
startTimeMetricRegex *regexp.Regexp
expectedStartTime pcommon.Timestamp
expectedErr error
}{
{
name: "regexp_match_metric_no_fallback",
inputs: metrics(
sumMetric("test_sum_metric", doublePoint(nil, startTime, currentTime, 16)),
histogramMetric("test_histogram_metric", histogramPoint(nil, startTime, currentTime, []float64{1, 2}, []uint64{2, 3, 4})),
summaryMetric("test_summary_metric", summaryPoint(nil, startTime, currentTime, 10, 100, []float64{10, 50, 90}, []float64{9, 15, 48})),
sumMetric("example_process_start_time_seconds", doublePoint(nil, startTime, currentTime, processStartTimeSeconds)),
sumMetric("process_start_time_seconds", doublePoint(nil, startTime, currentTime, processStartTimeSeconds)),
exponentialHistogramMetric("test_exponential_histogram_metric", exponentialHistogramPointSimplified(nil, startTime, currentTime, 3, 1, -5, 3)),
),
startTimeMetricRegex: regexp.MustCompile("^.*_process_start_time_seconds$"),
expectedStartTime: timestampFromFloat64(processStartTimeSeconds),
},
{
name: "regexp_no_regex_match_metric_fallback",
inputs: metrics(
sumMetric("test_sum_metric", doublePoint(nil, startTime, currentTime, 16)),
histogramMetric("test_histogram_metric", histogramPoint(nil, startTime, currentTime, []float64{1, 2}, []uint64{2, 3, 4})),
summaryMetric("test_summary_metric", summaryPoint(nil, startTime, currentTime, 10, 100, []float64{10, 50, 90}, []float64{9, 15, 48})),
),
startTimeMetricRegex: regexp.MustCompile("^.*_process_start_time_seconds$"),
expectedStartTime: timestampFromFloat64(mockStartTimeSeconds),
},
{
name: "match_no_match_metric_fallback",
inputs: metrics(
sumMetric("test_sum_metric", doublePoint(nil, startTime, currentTime, 16)),
histogramMetric("test_histogram_metric", histogramPoint(nil, startTime, currentTime, []float64{1, 2}, []uint64{2, 3, 4})),
summaryMetric("test_summary_metric", summaryPoint(nil, startTime, currentTime, 10, 100, []float64{10, 50, 90}, []float64{9, 15, 48})),
),
expectedStartTime: timestampFromFloat64(mockStartTimeSeconds),
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
testutil.SetFeatureGateForTest(t, useCollectorStartTimeFallbackGate, true)
stma := NewStartTimeMetricAdjuster(zap.NewNop(), tt.startTimeMetricRegex)
if tt.expectedErr != nil {
assert.ErrorIs(t, stma.AdjustMetrics(tt.inputs), tt.expectedErr)
return
}

// To test that the adjuster is using the fallback correctly, override the fallback time to use
// directly.
approximateCollectorStartTime = mockStartTime

assert.NoError(t, stma.AdjustMetrics(tt.inputs))
for i := 0; i < tt.inputs.ResourceMetrics().Len(); i++ {
rm := tt.inputs.ResourceMetrics().At(i)
for j := 0; j < rm.ScopeMetrics().Len(); j++ {
ilm := rm.ScopeMetrics().At(j)
for k := 0; k < ilm.Metrics().Len(); k++ {
metric := ilm.Metrics().At(k)
switch metric.Type() {
case pmetric.MetricTypeSum:
dps := metric.Sum().DataPoints()
for l := 0; l < dps.Len(); l++ {
assert.Equal(t, tt.expectedStartTime, dps.At(l).StartTimestamp())
}
case pmetric.MetricTypeSummary:
dps := metric.Summary().DataPoints()
for l := 0; l < dps.Len(); l++ {
assert.Equal(t, tt.expectedStartTime, dps.At(l).StartTimestamp())
}
case pmetric.MetricTypeHistogram:
dps := metric.Histogram().DataPoints()
for l := 0; l < dps.Len(); l++ {
assert.Equal(t, tt.expectedStartTime, dps.At(l).StartTimestamp())
}
}
}
}
}
})
}
}
Loading