From f01217c60c5ffbd925fae787e777ff95339587fe Mon Sep 17 00:00:00 2001 From: Ruben van Staden Date: Mon, 30 Sep 2024 14:27:56 -0400 Subject: [PATCH 1/8] add sanitization utils for datastream fields --- .../data_stream_router.go | 3 ++ .../elasticsearchexporter/exporter_test.go | 4 +- exporter/elasticsearchexporter/model_test.go | 17 +++++++++ exporter/elasticsearchexporter/util.go | 38 +++++++++++++++++++ 4 files changed, 60 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/data_stream_router.go b/exporter/elasticsearchexporter/data_stream_router.go index 851bb92d9756..55a152f37414 100644 --- a/exporter/elasticsearchexporter/data_stream_router.go +++ b/exporter/elasticsearchexporter/data_stream_router.go @@ -40,6 +40,9 @@ func routeWithDefaults(defaultDSType string) func( } } + dataset = sanitizeDataStreamDataset(dataset) + namespace = sanitizeDataStreamNamespace(namespace) + // The naming convention for datastream is expected to be "logs-[dataset].otel-[namespace]". // This is in order to match the soon to be built-in logs-*.otel-* index template. if otel { diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index 3c11272f408f..519dcc5b84f4 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -216,7 +216,7 @@ func TestExporterLogs(t *testing.T) { server := newESTestServer(t, func(docs []itemRequest) ([]itemResponse, error) { rec.Record(docs) - assert.Equal(t, "logs-record.dataset-resource.namespace", actionJSONToIndex(t, docs[0].Action)) + assert.Equal(t, "logs-_record.dataset-resource.namespace", actionJSONToIndex(t, docs[0].Action)) return itemsAllOK(docs) }) @@ -226,7 +226,7 @@ func TestExporterLogs(t *testing.T) { }) logs := newLogsWithAttributes( map[string]any{ - dataStreamDataset: "record.dataset", + dataStreamDataset: "*record.dataset", }, nil, map[string]any{ diff --git a/exporter/elasticsearchexporter/model_test.go b/exporter/elasticsearchexporter/model_test.go index e0e719586b61..67e6ea4c1e74 100644 --- a/exporter/elasticsearchexporter/model_test.go +++ b/exporter/elasticsearchexporter/model_test.go @@ -960,6 +960,9 @@ func decodeOTelID(data []byte) ([]byte, error) { } func TestEncodeLogOtelMode(t *testing.T) { + randomString := strings.Repeat("abcdefghijklmnopqrstuvwxyz0123456789", 10) + maxLenNamespace := maxDataStreamBytes - len(disallowedNamespaceRunes) + maxLenDataset := maxDataStreamBytes - len(disallowedDatasetRunes) tests := []struct { name string @@ -1044,6 +1047,20 @@ func TestEncodeLogOtelMode(t *testing.T) { return assignDatastreamData(or, "", "third.otel") }, }, + { + name: "sanitize dataset/namespace", + rec: buildOTelRecordTestData(t, func(or OTelRecord) OTelRecord { + or.Attributes["data_stream.dataset"] = disallowedDatasetRunes + randomString + or.Attributes["data_stream.namespace"] = disallowedNamespaceRunes + randomString + return or + }), + wantFn: func(or OTelRecord) OTelRecord { + deleteDatasetAttributes(or) + ds := strings.Repeat("_", len(disallowedDatasetRunes)) + randomString[:maxLenDataset] + ".otel" + ns := strings.Repeat("_", len(disallowedNamespaceRunes)) + randomString[:maxLenNamespace] + return assignDatastreamData(or, "", ds, ns) + }, + }, } m := encodeModel{ diff --git a/exporter/elasticsearchexporter/util.go b/exporter/elasticsearchexporter/util.go index e5b398082b3b..7c03c5f5b236 100644 --- a/exporter/elasticsearchexporter/util.go +++ b/exporter/elasticsearchexporter/util.go @@ -6,11 +6,49 @@ package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry import ( "bytes" "fmt" + "strings" "time" + "unicode" "github.com/lestrrat-go/strftime" ) +const ( + maxDataStreamBytes = 100 + disallowedNamespaceRunes = "\\/*?\"<>| ,#:" + disallowedDatasetRunes = "-\\/*?\"<>| ,#:" +) + +// Sanitize the datastream fields (dataset, namespace) to apply restrictions +// as outlined in https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html +func sanitizeDataStreamDataset(field string) string { + field = strings.Map(replaceReservedRune(disallowedDatasetRunes), field) + if len(field) > maxDataStreamBytes { + return field[:maxDataStreamBytes] + } + + return field +} + +// Sanitize the datastream fields (dataset, namespace) to apply restrictions +// as outlined in https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html +func sanitizeDataStreamNamespace(field string) string { + field = strings.Map(replaceReservedRune(disallowedNamespaceRunes), field) + if len(field) > maxDataStreamBytes { + return field[:maxDataStreamBytes] + } + return field +} + +func replaceReservedRune(disallowedRunes string) func(r rune) rune { + return func(r rune) rune { + if strings.ContainsRune(disallowedRunes, r) { + return '_' + } + return unicode.ToLower(r) + } +} + func generateIndexWithLogstashFormat(index string, conf *LogstashFormatSettings, t time.Time) (string, error) { if conf.Enabled { partIndex := fmt.Sprintf("%s%s", index, conf.PrefixSeparator) From e973b093882de561e5fe5246a559364130f30ced Mon Sep 17 00:00:00 2001 From: Ruben van Staden Date: Mon, 30 Sep 2024 18:27:41 -0400 Subject: [PATCH 2/8] test: add sanitization check for exportering datastream --- exporter/elasticsearchexporter/exporter_test.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index 519dcc5b84f4..b609b927165e 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -11,6 +11,7 @@ import ( "math" "net/http" "runtime" + "strings" "sync" "sync/atomic" "testing" @@ -216,7 +217,10 @@ func TestExporterLogs(t *testing.T) { server := newESTestServer(t, func(docs []itemRequest) ([]itemResponse, error) { rec.Record(docs) - assert.Equal(t, "logs-_record.dataset-resource.namespace", actionJSONToIndex(t, docs[0].Action)) + ds := "record.dataset" + strings.Repeat("_", len(disallowedDatasetRunes)) + ns := "resource.namespace" + strings.Repeat("_", len(disallowedNamespaceRunes)) + + assert.Equal(t, fmt.Sprintf("logs-%s-%s", ds, ns), actionJSONToIndex(t, docs[0].Action)) return itemsAllOK(docs) }) @@ -226,12 +230,12 @@ func TestExporterLogs(t *testing.T) { }) logs := newLogsWithAttributes( map[string]any{ - dataStreamDataset: "*record.dataset", + dataStreamDataset: "record.dataset" + disallowedDatasetRunes, }, nil, map[string]any{ dataStreamDataset: "resource.dataset", - dataStreamNamespace: "resource.namespace", + dataStreamNamespace: "resource.namespace" + disallowedNamespaceRunes, }, ) logs.ResourceLogs().At(0).ScopeLogs().At(0).LogRecords().At(0).Body().SetStr("hello world") From 4383f67e82a26266c2a6d86eaab03da653c619ad Mon Sep 17 00:00:00 2001 From: Ruben van Staden Date: Tue, 1 Oct 2024 10:25:14 -0400 Subject: [PATCH 3/8] improve sanitization func to leverage inlining --- .../data_stream_router.go | 46 +++++++++++++++---- .../elasticsearchexporter/exporter_test.go | 22 ++++----- exporter/elasticsearchexporter/model_test.go | 2 +- exporter/elasticsearchexporter/util.go | 38 --------------- 4 files changed, 48 insertions(+), 60 deletions(-) diff --git a/exporter/elasticsearchexporter/data_stream_router.go b/exporter/elasticsearchexporter/data_stream_router.go index 7bb0d5bea3cf..da257e8dd98e 100644 --- a/exporter/elasticsearchexporter/data_stream_router.go +++ b/exporter/elasticsearchexporter/data_stream_router.go @@ -6,12 +6,45 @@ package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry import ( "fmt" "regexp" + "strings" + "unicode" "go.opentelemetry.io/collector/pdata/pcommon" ) var receiverRegex = regexp.MustCompile(`/receiver/(\w*receiver)`) +const ( + maxDataStreamBytes = 100 + disallowedNamespaceRunes = "\\/*?\"<>| ,#:" + disallowedDatasetRunes = "-\\/*?\"<>| ,#:" +) + +// Sanitize the datastream fields (dataset, namespace) to apply restrictions +// as outlined in https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html +func sanitizeDataStreamField(field, disallowed string, otel bool) string { + // For Dataset, the naming convention for datastream is expected to be "logs-[dataset].otel-[namespace]". + // This is in order to match the built-in logs-*.otel-* index template. + var suffix string + if otel { + suffix += ".otel" + } + + field = strings.Map(func(r rune) rune { + if strings.ContainsRune(disallowed, r) { + return '_' + } + return unicode.ToLower(r) + }, field) + + if len(field) > maxDataStreamBytes-len(suffix) { + field = field[:maxDataStreamBytes-len(suffix)] + } + field += suffix + + return field +} + func routeWithDefaults(defaultDSType string) func( pcommon.Map, pcommon.Map, @@ -44,10 +77,7 @@ func routeWithDefaults(defaultDSType string) func( } } - dataset = sanitizeDataStreamDataset(dataset) - namespace = sanitizeDataStreamNamespace(namespace) - - // Receiver-based routing + // Receiver-based routing // For example, hostmetricsreceiver (or hostmetricsreceiver.otel in the OTel output mode) // for the scope name // github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/cpuscraper @@ -56,15 +86,13 @@ func routeWithDefaults(defaultDSType string) func( dataset = receiverName } - // The naming convention for datastream is expected to be "logs-[dataset].otel-[namespace]". - // This is in order to match the built-in logs-*.otel-* index template. - if otel { - dataset += ".otel" - } + dataset = sanitizeDataStreamField(dataset, disallowedDatasetRunes, otel) + namespace = sanitizeDataStreamField(namespace, disallowedNamespaceRunes, false) recordAttr.PutStr(dataStreamDataset, dataset) recordAttr.PutStr(dataStreamNamespace, namespace) recordAttr.PutStr(dataStreamType, defaultDSType) + return fmt.Sprintf("%s-%s-%s", defaultDSType, dataset, namespace) } } diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index ad8ff01ea62c..83238398863c 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -11,7 +11,6 @@ import ( "math" "net/http" "runtime" - "strings" "sync" "sync/atomic" "testing" @@ -217,10 +216,8 @@ func TestExporterLogs(t *testing.T) { server := newESTestServer(t, func(docs []itemRequest) ([]itemResponse, error) { rec.Record(docs) - ds := "record.dataset" + strings.Repeat("_", len(disallowedDatasetRunes)) - ns := "resource.namespace" + strings.Repeat("_", len(disallowedNamespaceRunes)) - - assert.Equal(t, fmt.Sprintf("logs-%s-%s", ds, ns), actionJSONToIndex(t, docs[0].Action)) + expected := "logs-record.dataset.____________-resource.namespace.-____________" + assert.Equal(t, expected, actionJSONToIndex(t, docs[0].Action)) return itemsAllOK(docs) }) @@ -230,12 +227,12 @@ func TestExporterLogs(t *testing.T) { }) logs := newLogsWithAttributes( map[string]any{ - dataStreamDataset: "record.dataset" + disallowedDatasetRunes, + dataStreamDataset: "record.dataset.\\/*?\"<>| ,#:", }, nil, map[string]any{ dataStreamDataset: "resource.dataset", - dataStreamNamespace: "resource.namespace" + disallowedNamespaceRunes, + dataStreamNamespace: "resource.namespace.-\\/*?\"<>| ,#:", }, ) logs.ResourceLogs().At(0).ScopeLogs().At(0).LogRecords().At(0).Body().SetStr("hello world") @@ -586,7 +583,7 @@ func TestExporterMetrics(t *testing.T) { server := newESTestServer(t, func(docs []itemRequest) ([]itemResponse, error) { rec.Record(docs) - expected := "metrics-resource.dataset-data.point.namespace" + expected := "metrics-resource.dataset.____________-data.point.namespace.-____________" assert.Equal(t, expected, actionJSONToIndex(t, docs[0].Action)) return itemsAllOK(docs) @@ -598,11 +595,11 @@ func TestExporterMetrics(t *testing.T) { }) metrics := newMetricsWithAttributes( map[string]any{ - dataStreamNamespace: "data.point.namespace", + dataStreamNamespace: "data.point.namespace.-\\/*?\"<>| ,#:", }, nil, map[string]any{ - dataStreamDataset: "resource.dataset", + dataStreamDataset: "resource.dataset.\\/*?\"<>| ,#:", dataStreamNamespace: "resource.namespace", }, ) @@ -1177,7 +1174,8 @@ func TestExporterTraces(t *testing.T) { server := newESTestServer(t, func(docs []itemRequest) ([]itemResponse, error) { rec.Record(docs) - expected := "traces-span.dataset-default" + //expected := "traces-span.dataset-default" + expected := "traces-span.dataset.____________-default" assert.Equal(t, expected, actionJSONToIndex(t, docs[0].Action)) return itemsAllOK(docs) @@ -1189,7 +1187,7 @@ func TestExporterTraces(t *testing.T) { mustSendTraces(t, exporter, newTracesWithAttributes( map[string]any{ - dataStreamDataset: "span.dataset", + dataStreamDataset: "span.dataset.\\/*?\"<>| ,#:", }, nil, map[string]any{ diff --git a/exporter/elasticsearchexporter/model_test.go b/exporter/elasticsearchexporter/model_test.go index 68e8253b48ec..5a7177c58d64 100644 --- a/exporter/elasticsearchexporter/model_test.go +++ b/exporter/elasticsearchexporter/model_test.go @@ -962,7 +962,7 @@ func decodeOTelID(data []byte) ([]byte, error) { func TestEncodeLogOtelMode(t *testing.T) { randomString := strings.Repeat("abcdefghijklmnopqrstuvwxyz0123456789", 10) maxLenNamespace := maxDataStreamBytes - len(disallowedNamespaceRunes) - maxLenDataset := maxDataStreamBytes - len(disallowedDatasetRunes) + maxLenDataset := maxDataStreamBytes - len(disallowedDatasetRunes) - len(".otel") tests := []struct { name string diff --git a/exporter/elasticsearchexporter/util.go b/exporter/elasticsearchexporter/util.go index 7c03c5f5b236..e5b398082b3b 100644 --- a/exporter/elasticsearchexporter/util.go +++ b/exporter/elasticsearchexporter/util.go @@ -6,49 +6,11 @@ package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry import ( "bytes" "fmt" - "strings" "time" - "unicode" "github.com/lestrrat-go/strftime" ) -const ( - maxDataStreamBytes = 100 - disallowedNamespaceRunes = "\\/*?\"<>| ,#:" - disallowedDatasetRunes = "-\\/*?\"<>| ,#:" -) - -// Sanitize the datastream fields (dataset, namespace) to apply restrictions -// as outlined in https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html -func sanitizeDataStreamDataset(field string) string { - field = strings.Map(replaceReservedRune(disallowedDatasetRunes), field) - if len(field) > maxDataStreamBytes { - return field[:maxDataStreamBytes] - } - - return field -} - -// Sanitize the datastream fields (dataset, namespace) to apply restrictions -// as outlined in https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html -func sanitizeDataStreamNamespace(field string) string { - field = strings.Map(replaceReservedRune(disallowedNamespaceRunes), field) - if len(field) > maxDataStreamBytes { - return field[:maxDataStreamBytes] - } - return field -} - -func replaceReservedRune(disallowedRunes string) func(r rune) rune { - return func(r rune) rune { - if strings.ContainsRune(disallowedRunes, r) { - return '_' - } - return unicode.ToLower(r) - } -} - func generateIndexWithLogstashFormat(index string, conf *LogstashFormatSettings, t time.Time) (string, error) { if conf.Enabled { partIndex := fmt.Sprintf("%s%s", index, conf.PrefixSeparator) From 86cc2aeeb04255ad4f1fd727c13a1bf83ca3f14b Mon Sep 17 00:00:00 2001 From: Ruben van Staden Date: Tue, 1 Oct 2024 12:38:57 -0400 Subject: [PATCH 4/8] minor change to datastream sanitization func --- .../data_stream_router.go | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/exporter/elasticsearchexporter/data_stream_router.go b/exporter/elasticsearchexporter/data_stream_router.go index da257e8dd98e..26523d183bae 100644 --- a/exporter/elasticsearchexporter/data_stream_router.go +++ b/exporter/elasticsearchexporter/data_stream_router.go @@ -22,14 +22,8 @@ const ( // Sanitize the datastream fields (dataset, namespace) to apply restrictions // as outlined in https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html -func sanitizeDataStreamField(field, disallowed string, otel bool) string { - // For Dataset, the naming convention for datastream is expected to be "logs-[dataset].otel-[namespace]". - // This is in order to match the built-in logs-*.otel-* index template. - var suffix string - if otel { - suffix += ".otel" - } - +// The suffix will be appended after truncation of max bytes. +func sanitizeDataStreamField(field, disallowed, appendSuffix string) string { field = strings.Map(func(r rune) rune { if strings.ContainsRune(disallowed, r) { return '_' @@ -37,10 +31,10 @@ func sanitizeDataStreamField(field, disallowed string, otel bool) string { return unicode.ToLower(r) }, field) - if len(field) > maxDataStreamBytes-len(suffix) { - field = field[:maxDataStreamBytes-len(suffix)] + if len(field) > maxDataStreamBytes-len(appendSuffix) { + field = field[:maxDataStreamBytes-len(appendSuffix)] } - field += suffix + field += appendSuffix return field } @@ -86,8 +80,15 @@ func routeWithDefaults(defaultDSType string) func( dataset = receiverName } - dataset = sanitizeDataStreamField(dataset, disallowedDatasetRunes, otel) - namespace = sanitizeDataStreamField(namespace, disallowedNamespaceRunes, false) + // For Dataset, the naming convention for datastream is expected to be "logs-[dataset].otel-[namespace]". + // This is in order to match the built-in logs-*.otel-* index template. + var datasetSuffix string + if otel { + datasetSuffix += ".otel" + } + + dataset = sanitizeDataStreamField(dataset, disallowedDatasetRunes, datasetSuffix) + namespace = sanitizeDataStreamField(namespace, disallowedNamespaceRunes, "") recordAttr.PutStr(dataStreamDataset, dataset) recordAttr.PutStr(dataStreamNamespace, namespace) From 71ffaca432da2d33b6c89b95e1008741e9d02f63 Mon Sep 17 00:00:00 2001 From: Ruben van Staden Date: Tue, 1 Oct 2024 14:14:46 -0400 Subject: [PATCH 5/8] add changelog --- ...chexporter_sanitize-datastream-fields.yaml | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .chloggen/elasticsearchexporter_sanitize-datastream-fields.yaml diff --git a/.chloggen/elasticsearchexporter_sanitize-datastream-fields.yaml b/.chloggen/elasticsearchexporter_sanitize-datastream-fields.yaml new file mode 100644 index 000000000000..92ea2b187712 --- /dev/null +++ b/.chloggen/elasticsearchexporter_sanitize-datastream-fields.yaml @@ -0,0 +1,28 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: elasticsearchexporter + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Sanitize datastream routing fields + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [34285] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + Sanitize the dataset and namespace fields according to https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html. + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] From 9e3564ee22e73236917107db2ca688692f026c53 Mon Sep 17 00:00:00 2001 From: Ruben van Staden Date: Tue, 1 Oct 2024 18:51:16 -0400 Subject: [PATCH 6/8] fix linting issue in comment --- exporter/elasticsearchexporter/exporter_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index c04536056d40..4da5380c00cc 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -1224,7 +1224,7 @@ func TestExporterTraces(t *testing.T) { server := newESTestServer(t, func(docs []itemRequest) ([]itemResponse, error) { rec.Record(docs) - //expected := "traces-span.dataset-default" + // expected := "traces-span.dataset-default" expected := "traces-span.dataset.____________-default" assert.Equal(t, expected, actionJSONToIndex(t, docs[0].Action)) From 4020fc7579bd24773a479406a9d8421ae34a111a Mon Sep 17 00:00:00 2001 From: Ruben van Staden Date: Wed, 2 Oct 2024 21:18:28 -0400 Subject: [PATCH 7/8] minor changes --- exporter/elasticsearchexporter/data_stream_router.go | 2 +- exporter/elasticsearchexporter/exporter_test.go | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/data_stream_router.go b/exporter/elasticsearchexporter/data_stream_router.go index 26523d183bae..a64b15d0ad97 100644 --- a/exporter/elasticsearchexporter/data_stream_router.go +++ b/exporter/elasticsearchexporter/data_stream_router.go @@ -80,7 +80,7 @@ func routeWithDefaults(defaultDSType string) func( dataset = receiverName } - // For Dataset, the naming convention for datastream is expected to be "logs-[dataset].otel-[namespace]". + // For dataset, the naming convention for datastream is expected to be "logs-[dataset].otel-[namespace]". // This is in order to match the built-in logs-*.otel-* index template. var datasetSuffix string if otel { diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index 4da5380c00cc..dd83212d44ae 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -1224,7 +1224,6 @@ func TestExporterTraces(t *testing.T) { server := newESTestServer(t, func(docs []itemRequest) ([]itemResponse, error) { rec.Record(docs) - // expected := "traces-span.dataset-default" expected := "traces-span.dataset.____________-default" assert.Equal(t, expected, actionJSONToIndex(t, docs[0].Action)) From d89ed9410abbaa84ad6202ea947cfe361d2f228e Mon Sep 17 00:00:00 2001 From: Ruben van Staden Date: Tue, 8 Oct 2024 18:28:11 -0400 Subject: [PATCH 8/8] doc: add reference to data stream field restrictions --- exporter/elasticsearchexporter/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/exporter/elasticsearchexporter/README.md b/exporter/elasticsearchexporter/README.md index 31d2cae89c6f..6a43616455b8 100644 --- a/exporter/elasticsearchexporter/README.md +++ b/exporter/elasticsearchexporter/README.md @@ -121,7 +121,7 @@ This can be customised through the following settings: - `logs_dynamic_index` (optional): uses resource, scope, or log record attributes to dynamically construct index name. - `enabled`(default=false): Enable/Disable dynamic index for log records. If `data_stream.dataset` or `data_stream.namespace` exist in attributes (precedence: log record attribute > scope attribute > resource attribute), they will be used to dynamically construct index name in the form `logs-${data_stream.dataset}-${data_stream.namespace}`. Otherwise, if - `elasticsearch.index.prefix` or `elasticsearch.index.suffix` exist in attributes (precedence: resource attribute > scope attribute > log record attribute), they will be used to dynamically construct index name in the form `${elasticsearch.index.prefix}${logs_index}${elasticsearch.index.suffix}`. Otherwise, if scope name matches regex `/receiver/(\w*receiver)`, `data_stream.dataset` will be capture group #1. Otherwise, the index name falls back to `logs-generic-default`, and `logs_index` config will be ignored. Except for prefix/suffix attribute presence, the resulting docs will contain the corresponding `data_stream.*` fields. + `elasticsearch.index.prefix` or `elasticsearch.index.suffix` exist in attributes (precedence: resource attribute > scope attribute > log record attribute), they will be used to dynamically construct index name in the form `${elasticsearch.index.prefix}${logs_index}${elasticsearch.index.suffix}`. Otherwise, if scope name matches regex `/receiver/(\w*receiver)`, `data_stream.dataset` will be capture group #1. Otherwise, the index name falls back to `logs-generic-default`, and `logs_index` config will be ignored. Except for prefix/suffix attribute presence, the resulting docs will contain the corresponding `data_stream.*` fields, see restrictions applied to [Data Stream Fields](https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html). - `metrics_index` (optional): The [index] or [data stream] name to publish metrics to. The default value is `metrics-generic-default`. ⚠️ Note that metrics support is currently in development. @@ -129,13 +129,13 @@ This can be customised through the following settings: - `metrics_dynamic_index` (optional): uses resource, scope or data point attributes to dynamically construct index name. ⚠️ Note that metrics support is currently in development. - `enabled`(default=true): Enable/disable dynamic index for metrics. If `data_stream.dataset` or `data_stream.namespace` exist in attributes (precedence: data point attribute > scope attribute > resource attribute), they will be used to dynamically construct index name in the form `metrics-${data_stream.dataset}-${data_stream.namespace}`. Otherwise, if - `elasticsearch.index.prefix` or `elasticsearch.index.suffix` exist in attributes (precedence: resource attribute > scope attribute > data point attribute), they will be used to dynamically construct index name in the form `${elasticsearch.index.prefix}${metrics_index}${elasticsearch.index.suffix}`. Otherwise, if scope name matches regex `/receiver/(\w*receiver)`, `data_stream.dataset` will be capture group #1. Otherwise, the index name falls back to `metrics-generic-default`, and `metrics_index` config will be ignored. Except for prefix/suffix attribute presence, the resulting docs will contain the corresponding `data_stream.*` fields. + `elasticsearch.index.prefix` or `elasticsearch.index.suffix` exist in attributes (precedence: resource attribute > scope attribute > data point attribute), they will be used to dynamically construct index name in the form `${elasticsearch.index.prefix}${metrics_index}${elasticsearch.index.suffix}`. Otherwise, if scope name matches regex `/receiver/(\w*receiver)`, `data_stream.dataset` will be capture group #1. Otherwise, the index name falls back to `metrics-generic-default`, and `metrics_index` config will be ignored. Except for prefix/suffix attribute presence, the resulting docs will contain the corresponding `data_stream.*` fields, see restrictions applied to [Data Stream Fields](https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html). - `traces_index`: The [index] or [data stream] name to publish traces to. The default value is `traces-generic-default`. - `traces_dynamic_index` (optional): uses resource, scope, or span attributes to dynamically construct index name. - `enabled`(default=false): Enable/Disable dynamic index for trace spans. If `data_stream.dataset` or `data_stream.namespace` exist in attributes (precedence: span attribute > scope attribute > resource attribute), they will be used to dynamically construct index name in the form `traces-${data_stream.dataset}-${data_stream.namespace}`. Otherwise, if - `elasticsearch.index.prefix` or `elasticsearch.index.suffix` exist in attributes (precedence: resource attribute > scope attribute > span attribute), they will be used to dynamically construct index name in the form `${elasticsearch.index.prefix}${traces_index}${elasticsearch.index.suffix}`. Otherwise, if scope name matches regex `/receiver/(\w*receiver)`, `data_stream.dataset` will be capture group #1. Otherwise, the index name falls back to `traces-generic-default`, and `traces_index` config will be ignored. Except for prefix/suffix attribute presence, the resulting docs will contain the corresponding `data_stream.*` fields. There is an exception for span events under OTel mapping mode (`mapping::mode: otel`), where span event attributes instead of span attributes are considered, and `data_stream.type` is always `logs` instead of `traces` such that documents are routed to `logs-${data_stream.dataset}-${data_stream.namespace}`. + `elasticsearch.index.prefix` or `elasticsearch.index.suffix` exist in attributes (precedence: resource attribute > scope attribute > span attribute), they will be used to dynamically construct index name in the form `${elasticsearch.index.prefix}${traces_index}${elasticsearch.index.suffix}`. Otherwise, if scope name matches regex `/receiver/(\w*receiver)`, `data_stream.dataset` will be capture group #1. Otherwise, the index name falls back to `traces-generic-default`, and `traces_index` config will be ignored. Except for prefix/suffix attribute presence, the resulting docs will contain the corresponding `data_stream.*` fields, see restrictions applied to [Data Stream Fields](https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html). There is an exception for span events under OTel mapping mode (`mapping::mode: otel`), where span event attributes instead of span attributes are considered, and `data_stream.type` is always `logs` instead of `traces` such that documents are routed to `logs-${data_stream.dataset}-${data_stream.namespace}`. - `logstash_format` (optional): Logstash format compatibility. Logs, metrics and traces can be written into an index in Logstash format. - `enabled`(default=false): Enable/disable Logstash format compatibility. When `logstash_format.enabled` is `true`, the index name is composed using `(logs|metrics|traces)_index` or `(logs|metrics|traces)_dynamic_index` as prefix and the date as suffix, @@ -337,4 +337,4 @@ Otherwise, it is mapped to an empty string (""). #### `@timestamp` -In case the record contains `timestamp`, this value is used. Otherwise, the `observed timestamp` is used. \ No newline at end of file +In case the record contains `timestamp`, this value is used. Otherwise, the `observed timestamp` is used.