From f6b93339bec6daee126e82d32f49952af0a050ea Mon Sep 17 00:00:00 2001 From: Damien Mehala Date: Tue, 28 May 2024 11:23:28 +0200 Subject: [PATCH] fix: trace sampling rules order (#125) Reset trace sampling rules legacy order. --- src/datadog/config_manager.cpp | 52 +++++++++++++++----- src/datadog/config_manager.h | 2 +- src/datadog/span_matcher.h | 9 ---- src/datadog/trace_sampler.cpp | 20 ++++---- src/datadog/trace_sampler.h | 6 +-- src/datadog/trace_sampler_config.cpp | 21 ++++++-- src/datadog/trace_sampler_config.h | 9 ++-- test/test_tracer_config.cpp | 72 ++++++++++++---------------- 8 files changed, 103 insertions(+), 88 deletions(-) diff --git a/src/datadog/config_manager.cpp b/src/datadog/config_manager.cpp index d8c78128..8a453f36 100644 --- a/src/datadog/config_manager.cpp +++ b/src/datadog/config_manager.cpp @@ -8,8 +8,7 @@ namespace datadog { namespace tracing { namespace { -using Rules = - std::unordered_map; +using Rules = std::vector; Expected parse_trace_sampling_rules(const nlohmann::json& json_rules) { Rules parsed_rules; @@ -27,7 +26,9 @@ Expected parse_trace_sampling_rules(const nlohmann::json& json_rules) { return error->with_prefix(prefix); } - TraceSamplerRate rate; + TraceSamplerRule rule; + rule.matcher = std::move(*matcher); + if (auto sample_rate = json_rule.find("sample_rate"); sample_rate != json_rule.end()) { type = sample_rate->type_name(); @@ -42,7 +43,10 @@ Expected parse_trace_sampling_rules(const nlohmann::json& json_rules) { return *error; } - rate.value = *maybe_rate; + rule.rate = *maybe_rate; + } else { + return Error{Error::TRACE_SAMPLING_RULES_INVALID_JSON, + "Missing \"sample_rate\" field"}; } if (auto provenance_it = json_rule.find("provenance"); @@ -53,15 +57,21 @@ Expected parse_trace_sampling_rules(const nlohmann::json& json_rules) { std::move(message)}; } - auto provenance = provenance_it->get(); + auto provenance = to_lower(provenance_it->get()); if (provenance == "customer") { - rate.mechanism = SamplingMechanism::REMOTE_RULE; + rule.mechanism = SamplingMechanism::REMOTE_RULE; } else if (provenance == "dynamic") { - rate.mechanism = SamplingMechanism::REMOTE_ADAPTIVE_RULE; + rule.mechanism = SamplingMechanism::REMOTE_ADAPTIVE_RULE; + } else { + return Error{Error::TRACE_SAMPLING_RULES_UNKNOWN_PROPERTY, + "Unknown \"provenance\" value"}; } + } else { + return Error{Error::TRACE_SAMPLING_RULES_INVALID_JSON, + "Missing \"provenance\" field"}; } - parsed_rules.emplace(std::move(*matcher), std::move(rate)); + parsed_rules.emplace_back(std::move(rule)); } return parsed_rules; @@ -98,7 +108,19 @@ std::vector ConfigManager::update(const ConfigUpdate& conf) { std::lock_guard lock(mutex_); - decltype(rules_) rules; + // NOTE(@dmehala): Sampling rules are generally not well specified. + // + // Rules are evaluated in the order they are inserted, which means the most + // specific matching rule might not be evaluated, even though it should be. + // For now, we must follow this legacy behavior. + // + // Additionally, I exploit this behavior to avoid a merge operation. + // The resulting array can contain duplicate `SpanMatcher`, but only the first + // encountered one will be evaluated, acting as an override. + // + // Remote Configuration rules will/should always be placed at the begining of + // the array, ensuring they are evaluated first. + auto rules = rules_; if (!conf.trace_sampling_rate) { auto found = default_metadata_.find(ConfigName::TRACE_SAMPLING_RATE); @@ -112,7 +134,12 @@ std::vector ConfigManager::update(const ConfigUpdate& conf) { ConfigMetadata::Origin::REMOTE_CONFIG); auto rate = Rate::from(*conf.trace_sampling_rate); - rules[catch_all] = TraceSamplerRate{*rate, SamplingMechanism::RULE}; + + TraceSamplerRule rule; + rule.rate = *rate; + rule.matcher = catch_all; + rule.mechanism = SamplingMechanism::RULE; + rules.emplace(rules.cbegin(), std::move(rule)); metadata.emplace_back(std::move(trace_sampling_metadata)); } @@ -131,14 +158,13 @@ std::vector ConfigManager::update(const ConfigUpdate& conf) { if (auto error = maybe_rules.if_error()) { trace_sampling_rules_metadata.error = std::move(*error); } else { - rules.merge(*maybe_rules); + rules.insert(rules.cbegin(), maybe_rules->begin(), maybe_rules->end()); } metadata.emplace_back(std::move(trace_sampling_rules_metadata)); } - rules.insert(rules_.cbegin(), rules_.cend()); - trace_sampler_->set_rules(rules); + trace_sampler_->set_rules(std::move(rules)); if (!conf.tags) { reset_config(ConfigName::TAGS, span_defaults_, metadata); diff --git a/src/datadog/config_manager.h b/src/datadog/config_manager.h index 5c0536bc..ce43f2ab 100644 --- a/src/datadog/config_manager.h +++ b/src/datadog/config_manager.h @@ -55,7 +55,7 @@ class ConfigManager { std::unordered_map default_metadata_; std::shared_ptr trace_sampler_; - std::unordered_map rules_; + std::vector rules_; DynamicConfig> span_defaults_; DynamicConfig report_traces_; diff --git a/src/datadog/span_matcher.h b/src/datadog/span_matcher.h index 2ff0f4c8..ab70c91e 100644 --- a/src/datadog/span_matcher.h +++ b/src/datadog/span_matcher.h @@ -37,15 +37,6 @@ struct SpanMatcher { return (service == other.service && name == other.name && resource == other.resource && tags == other.tags); } - - // TODO: add tags - struct Hash { - size_t operator()(const SpanMatcher& rule) const { - return std::hash()(rule.service) ^ - (std::hash()(rule.name) << 1) ^ - (std::hash()(rule.resource) << 2); - } - }; }; static const SpanMatcher catch_all; diff --git a/src/datadog/trace_sampler.cpp b/src/datadog/trace_sampler.cpp index 356381a5..7688e237 100644 --- a/src/datadog/trace_sampler.cpp +++ b/src/datadog/trace_sampler.cpp @@ -21,9 +21,7 @@ TraceSampler::TraceSampler(const FinalizedTraceSamplerConfig& config, limiter_(clock, config.max_per_second), limiter_max_per_second_(config.max_per_second) {} -void TraceSampler::set_rules( - std::unordered_map - rules) { +void TraceSampler::set_rules(std::vector rules) { std::lock_guard lock(mutex_); rules_ = std::move(rules); } @@ -35,18 +33,18 @@ SamplingDecision TraceSampler::decide(const SpanData& span) { // First check sampling rules. const auto found_rule = std::find_if(rules_.cbegin(), rules_.cend(), - [&](const auto& it) { return it.first.match(span); }); + [&](const auto& it) { return it.matcher.match(span); }); // `mutex_` protects `limiter_`, `collector_sample_rates_`, and // `collector_default_sample_rate_`, so let's lock it here. std::lock_guard lock(mutex_); if (found_rule != rules_.end()) { - const auto& [rule, rate] = *found_rule; - decision.mechanism = int(rate.mechanism); + const auto& rule = *found_rule; + decision.mechanism = int(rule.mechanism); decision.limiter_max_per_second = limiter_max_per_second_; - decision.configured_rate = rate.value; - const std::uint64_t threshold = max_id_from_rate(rate.value); + decision.configured_rate = rule.rate; + const std::uint64_t threshold = max_id_from_rate(rule.rate); if (knuth_hash(span.trace_id.low) < threshold) { const auto result = limiter_.allow(); if (result.allowed) { @@ -106,10 +104,8 @@ void TraceSampler::handle_collector_response( nlohmann::json TraceSampler::config_json() const { std::vector rules; - for (const auto& [rule, rate] : rules_) { - nlohmann::json j = rule.to_json(); - j["sampling_rate"] = rate.value.value(); - rules.push_back(std::move(j)); + for (const auto& rule : rules_) { + rules.push_back(rule.to_json()); } return nlohmann::json::object({ diff --git a/src/datadog/trace_sampler.h b/src/datadog/trace_sampler.h index 03d19fa5..30a1259c 100644 --- a/src/datadog/trace_sampler.h +++ b/src/datadog/trace_sampler.h @@ -107,16 +107,14 @@ class TraceSampler { Optional collector_default_sample_rate_; std::unordered_map collector_sample_rates_; - std::unordered_map rules_; + std::vector rules_; Limiter limiter_; double limiter_max_per_second_; public: TraceSampler(const FinalizedTraceSamplerConfig& config, const Clock& clock); - void set_rules( - std::unordered_map - rules); + void set_rules(std::vector rules); // Return a sampling decision for the specified root span. SamplingDecision decide(const SpanData&); diff --git a/src/datadog/trace_sampler_config.cpp b/src/datadog/trace_sampler_config.cpp index 4e39914e..8c0b0dcc 100644 --- a/src/datadog/trace_sampler_config.cpp +++ b/src/datadog/trace_sampler_config.cpp @@ -144,6 +144,12 @@ std::string to_string(const std::vector &rules) { } // namespace +nlohmann::json TraceSamplerRule::to_json() const { + auto j = matcher.to_json(); + j["sample_rate"] = rate.value(); + return j; +} + TraceSamplerConfig::Rule::Rule(const SpanMatcher &base) : SpanMatcher(base) {} Expected finalize_config( @@ -181,9 +187,11 @@ Expected finalize_config( return error->with_prefix(prefix); } - SpanMatcher matcher = rule; - result.rules.emplace( - matcher, TraceSamplerRate{*maybe_rate, SamplingMechanism::RULE}); + TraceSamplerRule finalized_rule; + finalized_rule.matcher = rule; + finalized_rule.rate = *maybe_rate; + finalized_rule.mechanism = SamplingMechanism::RULE; + result.rules.emplace_back(std::move(finalized_rule)); } Optional sample_rate; @@ -213,8 +221,11 @@ Expected finalize_config( "Unable to parse overall sample_rate for trace sampling: "); } - result.rules.emplace( - catch_all, TraceSamplerRate{*maybe_rate, SamplingMechanism::RULE}); + TraceSamplerRule finalized_rule; + finalized_rule.rate = *maybe_rate; + finalized_rule.matcher = catch_all; + finalized_rule.mechanism = SamplingMechanism::RULE; + result.rules.emplace_back(std::move(finalized_rule)); } const auto [origin, max_per_second] = diff --git a/src/datadog/trace_sampler_config.h b/src/datadog/trace_sampler_config.h index 2e0f65b7..5dae211a 100644 --- a/src/datadog/trace_sampler_config.h +++ b/src/datadog/trace_sampler_config.h @@ -21,9 +21,12 @@ namespace datadog { namespace tracing { -struct TraceSamplerRate final { - Rate value; +struct TraceSamplerRule final { + Rate rate; + SpanMatcher matcher; SamplingMechanism mechanism; + + nlohmann::json to_json() const; }; struct TraceSamplerConfig { @@ -48,8 +51,8 @@ class FinalizedTraceSamplerConfig { public: double max_per_second; + std::vector rules; std::unordered_map metadata; - std::unordered_map rules; }; Expected finalize_config( diff --git a/test/test_tracer_config.cpp b/test/test_tracer_config.cpp index ed5a2df1..3aa536a1 100644 --- a/test/test_tracer_config.cpp +++ b/test/test_tracer_config.cpp @@ -606,11 +606,11 @@ TEST_CASE("TracerConfig::trace_sampler") { SECTION("yields one sampling rule") { auto finalized = finalize_config(config); REQUIRE(finalized); - REQUIRE(finalized->trace_sampler.rules.count(catch_all)); + REQUIRE(finalized->trace_sampler.rules.size() == 1); // and the default sample_rate is 100% - const auto& rate = finalized->trace_sampler.rules[catch_all]; - CHECK(rate.value == 1.0); - CHECK(rate.mechanism == SamplingMechanism::RULE); + const auto& rule = finalized->trace_sampler.rules.front(); + CHECK(rule.rate == 1.0); + CHECK(rule.mechanism == SamplingMechanism::RULE); } SECTION("has to have a valid sample_rate") { @@ -631,21 +631,24 @@ TEST_CASE("TracerConfig::trace_sampler") { rules[1].sample_rate = 0.6; auto finalized = finalize_config(config); REQUIRE(finalized); - REQUIRE(finalized->trace_sampler.rules.count(catch_all)); + REQUIRE(finalized->trace_sampler.rules.size() == 2); - const auto& rate = finalized->trace_sampler.rules[catch_all]; - CHECK(rate.value == 0.5); - CHECK(rate.mechanism == SamplingMechanism::RULE); + const auto& rule = finalized->trace_sampler.rules.front(); + CHECK(rule.rate == 0.5); + CHECK(rule.mechanism == SamplingMechanism::RULE); } SECTION("global sample_rate creates a catch-all rule") { config.trace_sampler.sample_rate = 0.25; auto finalized = finalize_config(config); REQUIRE(finalized); - REQUIRE(finalized->trace_sampler.rules.count(catch_all)); - const auto& rate = finalized->trace_sampler.rules[catch_all]; - CHECK(rate.value == 0.25); - CHECK(rate.mechanism == SamplingMechanism::RULE); + REQUIRE(finalized->trace_sampler.rules.size() == 1); + const auto& rule = finalized->trace_sampler.rules.front(); + REQUIRE(rule.rate == 0.25); + REQUIRE(rule.matcher.service == "*"); + REQUIRE(rule.matcher.name == "*"); + REQUIRE(rule.matcher.resource == "*"); + REQUIRE(rule.matcher.tags.empty()); } SECTION("DD_TRACE_SAMPLE_RATE") { @@ -653,10 +656,10 @@ TEST_CASE("TracerConfig::trace_sampler") { const EnvGuard guard{"DD_TRACE_SAMPLE_RATE", "0.5"}; auto finalized = finalize_config(config); REQUIRE(finalized); - REQUIRE(finalized->trace_sampler.rules.count(catch_all)); - const auto& rate = finalized->trace_sampler.rules[catch_all]; - CHECK(rate.value == 0.5); - CHECK(rate.mechanism == SamplingMechanism::RULE); + REQUIRE(finalized->trace_sampler.rules.size() == 1); + REQUIRE(finalized->trace_sampler.rules.front().rate == 0.5); + REQUIRE(finalized->trace_sampler.rules.front().mechanism == + SamplingMechanism::RULE); } SECTION("overrides TraceSamplerConfig::sample_rate") { @@ -664,10 +667,8 @@ TEST_CASE("TracerConfig::trace_sampler") { const EnvGuard guard{"DD_TRACE_SAMPLE_RATE", "0.5"}; auto finalized = finalize_config(config); REQUIRE(finalized); - REQUIRE(finalized->trace_sampler.rules.count(catch_all)); - const auto& rate = finalized->trace_sampler.rules[catch_all]; - CHECK(rate.value == 0.5); - CHECK(rate.mechanism == SamplingMechanism::RULE); + REQUIRE(finalized->trace_sampler.rules.size() == 1); + REQUIRE(finalized->trace_sampler.rules.front().rate == 0.5); } SECTION("has to have a valid value") { @@ -799,27 +800,16 @@ TEST_CASE("TracerConfig::trace_sampler") { CAPTURE(rules_json); CAPTURE(rules); REQUIRE(rules.size() == 2); - - SpanMatcher matcher; - matcher.service = "poohbear"; - matcher.name = "get.honey"; - - auto found = rules.find(matcher); - REQUIRE(found != rules.cend()); - - CHECK(found->second.value == 0); - CHECK(found->second.mechanism == SamplingMechanism::RULE); - - SpanMatcher matcher2; - matcher2.service = "*"; - matcher2.name = "*"; - matcher2.tags.emplace("error", "*"); - matcher2.resource = "/admin/*"; - - found = rules.find(matcher2); - REQUIRE(found != rules.cend()); - CHECK(found->second.value == 1); - CHECK(found->second.mechanism == SamplingMechanism::RULE); + REQUIRE(rules[0].matcher.service == "poohbear"); + REQUIRE(rules[0].matcher.name == "get.honey"); + REQUIRE(rules[0].rate == 0); + REQUIRE(rules[0].matcher.tags.size() == 0); + REQUIRE(rules[1].matcher.service == "*"); + REQUIRE(rules[1].matcher.name == "*"); + REQUIRE(rules[1].rate == 1); + REQUIRE(rules[1].matcher.tags.size() == 1); + REQUIRE(rules[1].matcher.tags.at("error") == "*"); + REQUIRE(rules[1].matcher.resource == "/admin/*"); } SECTION("must be valid") {