diff --git a/tracing/config.go b/tracing/config.go index 7cbb7b431..4e6ffbe29 100644 --- a/tracing/config.go +++ b/tracing/config.go @@ -3,10 +3,13 @@ package tracing import ( + "encoding/json" + "fmt" "maps" "os" "strings" + "github.com/pkg/errors" "github.com/urfave/cli/v2" sdktrace "go.opentelemetry.io/otel/sdk/trace" ) @@ -34,6 +37,18 @@ type Config struct { // Used to determine if a trace should be sampled. type Sampler struct { PerSecond float64 + Tasks +} + +// Tasks represents a map of task names to per-task configurations. +// A 'task name' is the endpoint or instrumentation scope, depending on the task. +// For example, database trace tasks could be 'gorm.query' and HTTP requests could be 'api/v1/:worker' depending on the endpoint. +type Tasks map[string]Task + +// Task represents the sampler configurations on a per-task basis. +// 'Active' will disable/enable the task. If tracing encounters a task name not present in the map, it is considered Active (true). +type Task struct { + Active bool } // FromCLIContext takes cli context and returns a tracing config to supply to traceable services. @@ -49,9 +64,24 @@ func FromCLIContext(c *cli.Context) (*Client, error) { SpanAttributes: map[string]string{}, Sampler: Sampler{ PerSecond: c.Float64("tracing.sampler.persecond"), + Tasks: Tasks{}, }, } + // read per-endpoint configurations from file + endpointsConfigPath := c.String("tracing.sampler.endpoints") + if len(endpointsConfigPath) > 0 { + f, err := os.ReadFile(endpointsConfigPath) + if err != nil { + return nil, errors.Wrap(err, fmt.Sprintf("unable to read tracing endpoints config file from path %s", endpointsConfigPath)) + } + + err = json.Unmarshal(f, &cfg.Sampler.Tasks) + if err != nil { + return nil, errors.Wrap(err, fmt.Sprintf("unable to parse tracing endpoints config file from path %s", endpointsConfigPath)) + } + } + // identity func used to map a string back to itself identityFn := func(s string) string { return s } diff --git a/tracing/flags.go b/tracing/flags.go index 5e08868b6..7f68d328a 100644 --- a/tracing/flags.go +++ b/tracing/flags.go @@ -73,4 +73,10 @@ var Flags = []cli.Flag{ Usage: "set otel tracing head-sampler rate-limiting to N per second. see: https://opentelemetry.io/docs/concepts/sampling/", Value: 100, }, + + &cli.StringFlag{ + EnvVars: []string{"VELA_OTEL_TRACING_SAMPLER_TASKS_CONFIG_FILEPATH"}, + Name: "tracing.sampler.endpoints", + Usage: "set otel tracing head-sampler endpoint configurations to alter how certain endpoints are sampled. no path indicates all endpoints are sampler using default parameters. see: https://opentelemetry.io/docs/concepts/sampling/", + }, } diff --git a/tracing/sampler.go b/tracing/sampler.go index 198a74700..a3c0037a9 100644 --- a/tracing/sampler.go +++ b/tracing/sampler.go @@ -4,6 +4,7 @@ package tracing import ( "fmt" + "strings" "time" "go.opentelemetry.io/otel/attribute" @@ -54,6 +55,10 @@ func (s *RateLimitSampler) ShouldSample(p sdktrace.SamplingParameters) sdktrace. result.Decision = sdktrace.RecordAndSample } + if !s.ShouldSampleTask(p) { + result.Decision = sdktrace.Drop + } + return result } @@ -61,3 +66,17 @@ func (s *RateLimitSampler) ShouldSample(p sdktrace.SamplingParameters) sdktrace. func (s *RateLimitSampler) Description() string { return fmt.Sprintf("rate-limit-sampler{%v}", s.Config.PerSecond) } + +// ShouldSampleTask returns whether a task should be sampled. +func (s *RateLimitSampler) ShouldSampleTask(p sdktrace.SamplingParameters) bool { + taskName := strings.ToLower(p.Name) + + endpoint, ok := s.Config.Tasks[taskName] + if ok { + if !endpoint.Active { + return false + } + } + + return true +} diff --git a/tracing/sampler_test.go b/tracing/sampler_test.go new file mode 100644 index 000000000..6317488c1 --- /dev/null +++ b/tracing/sampler_test.go @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: Apache-2.0 + +package tracing + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + sdktrace "go.opentelemetry.io/otel/sdk/trace" +) + +func TestTracing_Sampler_ShouldSampleTask(t *testing.T) { + // setup tests + tests := []struct { + sampler RateLimitSampler + tasks Tasks + samplerParams sdktrace.SamplingParameters + want bool + }{ + // no tasks + { + sampler: RateLimitSampler{ + Config: Config{ + Sampler: Sampler{}, + }, + }, + samplerParams: sdktrace.SamplingParameters{ + Name: "/health", + }, + want: true, + }, + // task is active + { + sampler: RateLimitSampler{ + Config: Config{ + Sampler: Sampler{ + Tasks: Tasks{ + "/health": { + Active: true, + }, + }}, + }, + }, + samplerParams: sdktrace.SamplingParameters{ + Name: "/health", + }, + want: true, + }, + // task is inactive + { + sampler: RateLimitSampler{ + Config: Config{ + Sampler: Sampler{ + Tasks: Tasks{ + "/health": { + Active: false, + }, + }}, + }, + }, + samplerParams: sdktrace.SamplingParameters{ + Name: "/health", + }, + want: false, + }, + // task is non-endpoint + { + sampler: RateLimitSampler{ + Config: Config{ + Sampler: Sampler{ + Tasks: Tasks{ + "gorm.query": { + Active: false, + }, + }}, + }, + }, + samplerParams: sdktrace.SamplingParameters{ + Name: "Gorm.Query", + }, + want: false, + }, + } + + // run tests + for _, test := range tests { + got := test.sampler.ShouldSampleTask(test.samplerParams) + + if diff := cmp.Diff(got, test.want); diff != "" { + t.Errorf("ShouldSampleTask mismatch (-want +got):\n%s", diff) + } + } +}