From d3b327bbb0063b237232fe25b298f3a7b5055694 Mon Sep 17 00:00:00 2001 From: davidvader Date: Mon, 16 Sep 2024 11:01:04 -0500 Subject: [PATCH 1/2] enhance(tracing): customizable tasks sampler --- docker-compose.yml | 3 ++ tracing.tasks.example.json | 8 ++++ tracing/config.go | 30 ++++++++++++ tracing/flags.go | 6 +++ tracing/sampler.go | 19 ++++++++ tracing/sampler_test.go | 93 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 159 insertions(+) create mode 100644 tracing.tasks.example.json create mode 100644 tracing/sampler_test.go diff --git a/docker-compose.yml b/docker-compose.yml index a96b4b87f..6f0b4af8a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -47,11 +47,14 @@ services: VELA_OTEL_TRACING_ENABLE: true VELA_OTEL_EXPORTER_OTLP_ENDPOINT: http://jaeger:4318 VELA_OTEL_TRACING_SAMPLER_RATELIMIT_PER_SECOND: 100 + VELA_OTEL_TRACING_SAMPLER_TASKS_CONFIG_FILEPATH: tracing.json env_file: - .env restart: always ports: - '8080:8080' + volumes: + - './tracing.tasks.example.json:/tracing.json' depends_on: postgres: condition: service_healthy diff --git a/tracing.tasks.example.json b/tracing.tasks.example.json new file mode 100644 index 000000000..d867624fb --- /dev/null +++ b/tracing.tasks.example.json @@ -0,0 +1,8 @@ +{ + "/health": { + "active": false + }, + "gorm.query": { + "active": true + } +} \ No newline at end of file diff --git a/tracing/config.go b/tracing/config.go index 7cbb7b431..4e6ffbe29 100644 --- a/tracing/config.go +++ b/tracing/config.go @@ -3,10 +3,13 @@ package tracing import ( + "encoding/json" + "fmt" "maps" "os" "strings" + "github.com/pkg/errors" "github.com/urfave/cli/v2" sdktrace "go.opentelemetry.io/otel/sdk/trace" ) @@ -34,6 +37,18 @@ type Config struct { // Used to determine if a trace should be sampled. type Sampler struct { PerSecond float64 + Tasks +} + +// Tasks represents a map of task names to per-task configurations. +// A 'task name' is the endpoint or instrumentation scope, depending on the task. +// For example, database trace tasks could be 'gorm.query' and HTTP requests could be 'api/v1/:worker' depending on the endpoint. +type Tasks map[string]Task + +// Task represents the sampler configurations on a per-task basis. +// 'Active' will disable/enable the task. If tracing encounters a task name not present in the map, it is considered Active (true). +type Task struct { + Active bool } // FromCLIContext takes cli context and returns a tracing config to supply to traceable services. @@ -49,9 +64,24 @@ func FromCLIContext(c *cli.Context) (*Client, error) { SpanAttributes: map[string]string{}, Sampler: Sampler{ PerSecond: c.Float64("tracing.sampler.persecond"), + Tasks: Tasks{}, }, } + // read per-endpoint configurations from file + endpointsConfigPath := c.String("tracing.sampler.endpoints") + if len(endpointsConfigPath) > 0 { + f, err := os.ReadFile(endpointsConfigPath) + if err != nil { + return nil, errors.Wrap(err, fmt.Sprintf("unable to read tracing endpoints config file from path %s", endpointsConfigPath)) + } + + err = json.Unmarshal(f, &cfg.Sampler.Tasks) + if err != nil { + return nil, errors.Wrap(err, fmt.Sprintf("unable to parse tracing endpoints config file from path %s", endpointsConfigPath)) + } + } + // identity func used to map a string back to itself identityFn := func(s string) string { return s } diff --git a/tracing/flags.go b/tracing/flags.go index 5e08868b6..7f68d328a 100644 --- a/tracing/flags.go +++ b/tracing/flags.go @@ -73,4 +73,10 @@ var Flags = []cli.Flag{ Usage: "set otel tracing head-sampler rate-limiting to N per second. see: https://opentelemetry.io/docs/concepts/sampling/", Value: 100, }, + + &cli.StringFlag{ + EnvVars: []string{"VELA_OTEL_TRACING_SAMPLER_TASKS_CONFIG_FILEPATH"}, + Name: "tracing.sampler.endpoints", + Usage: "set otel tracing head-sampler endpoint configurations to alter how certain endpoints are sampled. no path indicates all endpoints are sampler using default parameters. see: https://opentelemetry.io/docs/concepts/sampling/", + }, } diff --git a/tracing/sampler.go b/tracing/sampler.go index 198a74700..a3c0037a9 100644 --- a/tracing/sampler.go +++ b/tracing/sampler.go @@ -4,6 +4,7 @@ package tracing import ( "fmt" + "strings" "time" "go.opentelemetry.io/otel/attribute" @@ -54,6 +55,10 @@ func (s *RateLimitSampler) ShouldSample(p sdktrace.SamplingParameters) sdktrace. result.Decision = sdktrace.RecordAndSample } + if !s.ShouldSampleTask(p) { + result.Decision = sdktrace.Drop + } + return result } @@ -61,3 +66,17 @@ func (s *RateLimitSampler) ShouldSample(p sdktrace.SamplingParameters) sdktrace. func (s *RateLimitSampler) Description() string { return fmt.Sprintf("rate-limit-sampler{%v}", s.Config.PerSecond) } + +// ShouldSampleTask returns whether a task should be sampled. +func (s *RateLimitSampler) ShouldSampleTask(p sdktrace.SamplingParameters) bool { + taskName := strings.ToLower(p.Name) + + endpoint, ok := s.Config.Tasks[taskName] + if ok { + if !endpoint.Active { + return false + } + } + + return true +} diff --git a/tracing/sampler_test.go b/tracing/sampler_test.go new file mode 100644 index 000000000..6317488c1 --- /dev/null +++ b/tracing/sampler_test.go @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: Apache-2.0 + +package tracing + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + sdktrace "go.opentelemetry.io/otel/sdk/trace" +) + +func TestTracing_Sampler_ShouldSampleTask(t *testing.T) { + // setup tests + tests := []struct { + sampler RateLimitSampler + tasks Tasks + samplerParams sdktrace.SamplingParameters + want bool + }{ + // no tasks + { + sampler: RateLimitSampler{ + Config: Config{ + Sampler: Sampler{}, + }, + }, + samplerParams: sdktrace.SamplingParameters{ + Name: "/health", + }, + want: true, + }, + // task is active + { + sampler: RateLimitSampler{ + Config: Config{ + Sampler: Sampler{ + Tasks: Tasks{ + "/health": { + Active: true, + }, + }}, + }, + }, + samplerParams: sdktrace.SamplingParameters{ + Name: "/health", + }, + want: true, + }, + // task is inactive + { + sampler: RateLimitSampler{ + Config: Config{ + Sampler: Sampler{ + Tasks: Tasks{ + "/health": { + Active: false, + }, + }}, + }, + }, + samplerParams: sdktrace.SamplingParameters{ + Name: "/health", + }, + want: false, + }, + // task is non-endpoint + { + sampler: RateLimitSampler{ + Config: Config{ + Sampler: Sampler{ + Tasks: Tasks{ + "gorm.query": { + Active: false, + }, + }}, + }, + }, + samplerParams: sdktrace.SamplingParameters{ + Name: "Gorm.Query", + }, + want: false, + }, + } + + // run tests + for _, test := range tests { + got := test.sampler.ShouldSampleTask(test.samplerParams) + + if diff := cmp.Diff(got, test.want); diff != "" { + t.Errorf("ShouldSampleTask mismatch (-want +got):\n%s", diff) + } + } +} From bc7cdf3ef0f57743fff41ea59aea30b340344ee2 Mon Sep 17 00:00:00 2001 From: davidvader Date: Tue, 17 Sep 2024 08:55:26 -0500 Subject: [PATCH 2/2] chore: remove example json --- docker-compose.yml | 3 --- tracing.tasks.example.json | 8 -------- 2 files changed, 11 deletions(-) delete mode 100644 tracing.tasks.example.json diff --git a/docker-compose.yml b/docker-compose.yml index 6f0b4af8a..a96b4b87f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -47,14 +47,11 @@ services: VELA_OTEL_TRACING_ENABLE: true VELA_OTEL_EXPORTER_OTLP_ENDPOINT: http://jaeger:4318 VELA_OTEL_TRACING_SAMPLER_RATELIMIT_PER_SECOND: 100 - VELA_OTEL_TRACING_SAMPLER_TASKS_CONFIG_FILEPATH: tracing.json env_file: - .env restart: always ports: - '8080:8080' - volumes: - - './tracing.tasks.example.json:/tracing.json' depends_on: postgres: condition: service_healthy diff --git a/tracing.tasks.example.json b/tracing.tasks.example.json deleted file mode 100644 index d867624fb..000000000 --- a/tracing.tasks.example.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "/health": { - "active": false - }, - "gorm.query": { - "active": true - } -} \ No newline at end of file