forked from open-telemetry/opentelemetry-collector-contrib
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[chore] [receiver/datadog] Add support for Service Checks (open-telem…
…etry#34474) Description: This PR adds support for Datadog Service Checks. Follow up of open-telemetry#33631 , open-telemetry#33957 and open-telemetry#34180. The full version of the code can be found in the cedwards/datadog-metrics-receiver-full branch, or in Grafana Alloy: https://github.com/grafana/alloy/tree/main/internal/etc/datadogreceiver Link to tracking Issue: open-telemetry#18278 Testing: Unit tests, as well as an end-to-end test, have been added. --------- Signed-off-by: alexgreenbank <alex.greenbank@grafana.com> Co-authored-by: Carrie Edwards <edwrdscarrie@gmail.com> Co-authored-by: Juraci Paixão Kröhling <juraci@kroehling.de>
- Loading branch information
Showing
4 changed files
with
461 additions
and
3 deletions.
There are no files selected for viewing
50 changes: 50 additions & 0 deletions
50
receiver/datadogreceiver/internal/translator/service_check_translator.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
// Copyright The OpenTelemetry Authors | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package translator // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/datadogreceiver/internal/translator" | ||
|
||
import ( | ||
"time" | ||
|
||
"github.com/DataDog/datadog-api-client-go/v2/api/datadogV1" | ||
"go.opentelemetry.io/collector/pdata/pcommon" | ||
"go.opentelemetry.io/collector/pdata/pmetric" | ||
|
||
"github.com/open-telemetry/opentelemetry-collector-contrib/internal/exp/metrics/identity" | ||
) | ||
|
||
type ServiceCheck struct { | ||
Check string `json:"check"` | ||
HostName string `json:"host_name"` | ||
Status datadogV1.ServiceCheckStatus `json:"status"` | ||
Timestamp int64 `json:"timestamp,omitempty"` | ||
Tags []string `json:"tags,omitempty"` | ||
} | ||
|
||
// More information on Datadog service checks: https://docs.datadoghq.com/api/latest/service-checks/ | ||
func (mt *MetricsTranslator) TranslateServices(services []ServiceCheck) pmetric.Metrics { | ||
bt := newBatcher() | ||
bt.Metrics = pmetric.NewMetrics() | ||
|
||
for _, service := range services { | ||
metricProperties := parseSeriesProperties("service_check", "service_check", service.Tags, service.HostName, mt.buildInfo.Version, mt.stringPool) | ||
metric, metricID := bt.Lookup(metricProperties) // TODO(alexg): proper name | ||
|
||
dps := metric.Gauge().DataPoints() | ||
dps.EnsureCapacity(1) | ||
|
||
dp := dps.AppendEmpty() | ||
dp.SetTimestamp(pcommon.Timestamp(service.Timestamp * time.Second.Nanoseconds())) // OTel uses nanoseconds, while Datadog uses seconds | ||
metricProperties.dpAttrs.CopyTo(dp.Attributes()) | ||
dp.SetIntValue(int64(service.Status)) | ||
|
||
// TODO(alexg): Do this stream thing for service check metrics? | ||
stream := identity.OfStream(metricID, dp) | ||
ts, ok := mt.streamHasTimestamp(stream) | ||
if ok { | ||
dp.SetStartTimestamp(ts) | ||
} | ||
mt.updateLastTsForStream(stream, dp.Timestamp()) | ||
} | ||
return bt.Metrics | ||
} |
322 changes: 322 additions & 0 deletions
322
receiver/datadogreceiver/internal/translator/service_check_translator_test.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,322 @@ | ||
// Copyright The OpenTelemetry Authors | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package translator | ||
|
||
import ( | ||
"encoding/json" | ||
"testing" | ||
|
||
"github.com/DataDog/datadog-api-client-go/v2/api/datadogV1" | ||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
"go.opentelemetry.io/collector/component" | ||
"go.opentelemetry.io/collector/pdata/pcommon" | ||
"go.opentelemetry.io/collector/pdata/pmetric" | ||
) | ||
|
||
var ( | ||
testTimestamp = int64(1700000000) | ||
) | ||
|
||
func TestHandleStructureParsing(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
checkRunPayload []byte | ||
expectedServices []ServiceCheck | ||
}{ | ||
{ | ||
name: "happy", | ||
checkRunPayload: []byte(`[ | ||
{ | ||
"check": "datadog.agent.check_status", | ||
"host_name": "hosta", | ||
"status": 0, | ||
"message": "", | ||
"tags": [ | ||
"check:container" | ||
] | ||
}, | ||
{ | ||
"check": "app.working", | ||
"host_name": "hosta", | ||
"timestamp": 1700000000, | ||
"status": 0, | ||
"message": "", | ||
"tags": null | ||
}, | ||
{ | ||
"check": "env.test", | ||
"host_name": "hosta", | ||
"status": 0, | ||
"message": "", | ||
"tags": [ | ||
"env:argle", "foo:bargle" | ||
] | ||
} | ||
]`), | ||
expectedServices: []ServiceCheck{ | ||
{ | ||
Check: "datadog.agent.check_status", | ||
HostName: "hosta", | ||
Status: 0, | ||
Tags: []string{"check:container"}, | ||
}, | ||
{ | ||
Check: "app.working", | ||
HostName: "hosta", | ||
Status: 0, | ||
Timestamp: 1700000000, | ||
}, | ||
{ | ||
Check: "env.test", | ||
HostName: "hosta", | ||
Status: 0, | ||
Tags: []string{"env:argle", "foo:bargle"}, | ||
}, | ||
}, | ||
}, | ||
{ | ||
name: "happy no tags", | ||
checkRunPayload: []byte(`[ | ||
{ | ||
"check": "app.working", | ||
"host_name": "hosta", | ||
"timestamp": 1700000000, | ||
"status": 0, | ||
"message": "", | ||
"tags": null | ||
} | ||
]`), | ||
expectedServices: []ServiceCheck{ | ||
{ | ||
Check: "app.working", | ||
HostName: "hosta", | ||
Status: 0, | ||
Timestamp: 1700000000, | ||
}, | ||
}, | ||
}, | ||
{ | ||
name: "happy no timestamp", | ||
checkRunPayload: []byte(`[ | ||
{ | ||
"check": "env.test", | ||
"host_name": "hosta", | ||
"status": 0, | ||
"message": "", | ||
"tags": [ | ||
"env:argle", "foo:bargle" | ||
] | ||
} | ||
]`), | ||
expectedServices: []ServiceCheck{ | ||
{ | ||
Check: "env.test", | ||
HostName: "hosta", | ||
Status: 0, | ||
Tags: []string{"env:argle", "foo:bargle"}, | ||
}, | ||
}, | ||
}, | ||
{ | ||
name: "empty", | ||
checkRunPayload: []byte(`[]`), | ||
expectedServices: []ServiceCheck{}, | ||
}, | ||
{ | ||
name: "happy no hostname", | ||
checkRunPayload: []byte(`[ | ||
{ | ||
"check": "env.test", | ||
"status": 0, | ||
"message": "", | ||
"tags": [ | ||
"env:argle", "foo:bargle" | ||
] | ||
} | ||
]`), | ||
expectedServices: []ServiceCheck{ | ||
{ | ||
Check: "env.test", | ||
Status: 0, | ||
Tags: []string{"env:argle", "foo:bargle"}, | ||
}, | ||
}, | ||
}, | ||
{ | ||
name: "empty", | ||
checkRunPayload: []byte(`[]`), | ||
expectedServices: []ServiceCheck{}, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
var services []ServiceCheck | ||
err := json.Unmarshal(tt.checkRunPayload, &services) | ||
require.NoError(t, err, "Failed to unmarshal service payload JSON") | ||
assert.Equal(t, tt.expectedServices, services, "Parsed series does not match expected series") | ||
}) | ||
} | ||
} | ||
|
||
func TestTranslateCheckRun(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
services []ServiceCheck | ||
expect func(t *testing.T, result pmetric.Metrics) | ||
}{ | ||
{ | ||
name: "OK status, with TS, no tags, no hostname", | ||
services: []ServiceCheck{ | ||
{ | ||
Check: "app.working", | ||
Timestamp: 1700000000, | ||
Status: datadogV1.SERVICECHECKSTATUS_OK, | ||
Tags: []string{}, | ||
}, | ||
}, | ||
expect: func(t *testing.T, result pmetric.Metrics) { | ||
expectedAttrs := tagsToAttributes([]string{}, "", newStringPool()) | ||
require.Equal(t, 1, result.ResourceMetrics().Len()) | ||
requireResourceAttributes(t, result.ResourceMetrics().At(0).Resource().Attributes(), expectedAttrs.resource) | ||
require.Equal(t, 1, result.MetricCount()) | ||
require.Equal(t, 1, result.DataPointCount()) | ||
|
||
requireScope(t, result, expectedAttrs.scope, component.NewDefaultBuildInfo().Version) | ||
|
||
metric := result.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics().At(0) | ||
requireGauge(t, metric, "service_check", 1) | ||
|
||
dp := metric.Gauge().DataPoints().At(0) | ||
requireDp(t, dp, expectedAttrs.dp, 1700000000, 0) | ||
}, | ||
}, | ||
{ | ||
name: "OK status, no TS", | ||
services: []ServiceCheck{ | ||
{ | ||
Check: "app.working", | ||
HostName: "foo", | ||
Status: datadogV1.SERVICECHECKSTATUS_OK, | ||
Tags: []string{"env:tag1", "version:tag2"}, | ||
}, | ||
}, | ||
expect: func(t *testing.T, result pmetric.Metrics) { | ||
expectedAttrs := tagsToAttributes([]string{"env:tag1", "version:tag2"}, "foo", newStringPool()) | ||
require.Equal(t, 1, result.ResourceMetrics().Len()) | ||
requireResourceAttributes(t, result.ResourceMetrics().At(0).Resource().Attributes(), expectedAttrs.resource) | ||
require.Equal(t, 1, result.MetricCount()) | ||
require.Equal(t, 1, result.DataPointCount()) | ||
|
||
requireScope(t, result, expectedAttrs.scope, component.NewDefaultBuildInfo().Version) | ||
|
||
metric := result.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics().At(0) | ||
requireGauge(t, metric, "service_check", 1) | ||
|
||
dp := metric.Gauge().DataPoints().At(0) | ||
requireDp(t, dp, expectedAttrs.dp, 0, 0) | ||
}, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
mt := createMetricsTranslator() | ||
mt.buildInfo = component.BuildInfo{ | ||
Command: "otelcol", | ||
Description: "OpenTelemetry Collector", | ||
Version: "latest", | ||
} | ||
result := mt.TranslateServices(tt.services) | ||
|
||
tt.expect(t, result) | ||
}) | ||
} | ||
} | ||
|
||
func TestTranslateCheckRunStatuses(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
services []ServiceCheck | ||
expectedStatus int64 | ||
}{ | ||
{ | ||
name: "OK status, no TS", | ||
services: []ServiceCheck{ | ||
{ | ||
Check: "app.working", | ||
HostName: "foo", | ||
Status: datadogV1.SERVICECHECKSTATUS_OK, | ||
Tags: []string{"env:tag1", "version:tag2"}, | ||
}, | ||
}, | ||
expectedStatus: 0, | ||
}, | ||
{ | ||
name: "Warning status", | ||
services: []ServiceCheck{ | ||
{ | ||
Check: "app.warning", | ||
HostName: "foo", | ||
Status: datadogV1.SERVICECHECKSTATUS_WARNING, | ||
Tags: []string{"env:tag1", "version:tag2"}, | ||
Timestamp: testTimestamp, | ||
}, | ||
}, | ||
expectedStatus: 1, | ||
}, | ||
{ | ||
name: "Critical status", | ||
services: []ServiceCheck{ | ||
{ | ||
Check: "app.critical", | ||
HostName: "foo", | ||
Status: datadogV1.SERVICECHECKSTATUS_CRITICAL, | ||
Tags: []string{"env:tag1", "version:tag2"}, | ||
Timestamp: testTimestamp, | ||
}, | ||
}, | ||
expectedStatus: 2, | ||
}, | ||
{ | ||
name: "Unknown status", | ||
services: []ServiceCheck{ | ||
{ | ||
Check: "app.unknown", | ||
HostName: "foo", | ||
Status: datadogV1.SERVICECHECKSTATUS_UNKNOWN, | ||
Tags: []string{"env:tag1", "version:tag2"}, | ||
Timestamp: testTimestamp, | ||
}, | ||
}, | ||
expectedStatus: 3, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
mt := createMetricsTranslator() | ||
mt.buildInfo = component.BuildInfo{ | ||
Command: "otelcol", | ||
Description: "OpenTelemetry Collector", | ||
Version: "latest", | ||
} | ||
result := mt.TranslateServices(tt.services) | ||
|
||
require.Equal(t, 1, result.MetricCount()) | ||
require.Equal(t, 1, result.DataPointCount()) | ||
|
||
requireScopeMetrics(t, result, 1, 1) | ||
|
||
requireScope(t, result, pcommon.NewMap(), component.NewDefaultBuildInfo().Version) | ||
|
||
metrics := result.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics() | ||
for i := 0; i < metrics.Len(); i++ { | ||
metric := metrics.At(i) | ||
assert.Equal(t, tt.expectedStatus, metric.Gauge().DataPoints().At(0).IntValue()) | ||
} | ||
}) | ||
} | ||
} |
Oops, something went wrong.