Skip to content

Commit

Permalink
fix: trace sampling rules order (#125)
Browse files Browse the repository at this point in the history
Reset trace sampling rules legacy order.
  • Loading branch information
dmehala authored May 28, 2024
1 parent 0ada79d commit f6b9333
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 88 deletions.
52 changes: 39 additions & 13 deletions src/datadog/config_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ namespace datadog {
namespace tracing {
namespace {

using Rules =
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash>;
using Rules = std::vector<TraceSamplerRule>;

Expected<Rules> parse_trace_sampling_rules(const nlohmann::json& json_rules) {
Rules parsed_rules;
Expand All @@ -27,7 +26,9 @@ Expected<Rules> parse_trace_sampling_rules(const nlohmann::json& json_rules) {
return error->with_prefix(prefix);
}

TraceSamplerRate rate;
TraceSamplerRule rule;
rule.matcher = std::move(*matcher);

if (auto sample_rate = json_rule.find("sample_rate");
sample_rate != json_rule.end()) {
type = sample_rate->type_name();
Expand All @@ -42,7 +43,10 @@ Expected<Rules> parse_trace_sampling_rules(const nlohmann::json& json_rules) {
return *error;
}

rate.value = *maybe_rate;
rule.rate = *maybe_rate;
} else {
return Error{Error::TRACE_SAMPLING_RULES_INVALID_JSON,
"Missing \"sample_rate\" field"};
}

if (auto provenance_it = json_rule.find("provenance");
Expand All @@ -53,15 +57,21 @@ Expected<Rules> parse_trace_sampling_rules(const nlohmann::json& json_rules) {
std::move(message)};
}

auto provenance = provenance_it->get<std::string_view>();
auto provenance = to_lower(provenance_it->get<StringView>());
if (provenance == "customer") {
rate.mechanism = SamplingMechanism::REMOTE_RULE;
rule.mechanism = SamplingMechanism::REMOTE_RULE;
} else if (provenance == "dynamic") {
rate.mechanism = SamplingMechanism::REMOTE_ADAPTIVE_RULE;
rule.mechanism = SamplingMechanism::REMOTE_ADAPTIVE_RULE;
} else {
return Error{Error::TRACE_SAMPLING_RULES_UNKNOWN_PROPERTY,
"Unknown \"provenance\" value"};
}
} else {
return Error{Error::TRACE_SAMPLING_RULES_INVALID_JSON,
"Missing \"provenance\" field"};
}

parsed_rules.emplace(std::move(*matcher), std::move(rate));
parsed_rules.emplace_back(std::move(rule));
}

return parsed_rules;
Expand Down Expand Up @@ -98,7 +108,19 @@ std::vector<ConfigMetadata> ConfigManager::update(const ConfigUpdate& conf) {

std::lock_guard<std::mutex> lock(mutex_);

decltype(rules_) rules;
// NOTE(@dmehala): Sampling rules are generally not well specified.
//
// Rules are evaluated in the order they are inserted, which means the most
// specific matching rule might not be evaluated, even though it should be.
// For now, we must follow this legacy behavior.
//
// Additionally, I exploit this behavior to avoid a merge operation.
// The resulting array can contain duplicate `SpanMatcher`, but only the first
// encountered one will be evaluated, acting as an override.
//
// Remote Configuration rules will/should always be placed at the begining of
// the array, ensuring they are evaluated first.
auto rules = rules_;

if (!conf.trace_sampling_rate) {
auto found = default_metadata_.find(ConfigName::TRACE_SAMPLING_RATE);
Expand All @@ -112,7 +134,12 @@ std::vector<ConfigMetadata> ConfigManager::update(const ConfigUpdate& conf) {
ConfigMetadata::Origin::REMOTE_CONFIG);

auto rate = Rate::from(*conf.trace_sampling_rate);
rules[catch_all] = TraceSamplerRate{*rate, SamplingMechanism::RULE};

TraceSamplerRule rule;
rule.rate = *rate;
rule.matcher = catch_all;
rule.mechanism = SamplingMechanism::RULE;
rules.emplace(rules.cbegin(), std::move(rule));

metadata.emplace_back(std::move(trace_sampling_metadata));
}
Expand All @@ -131,14 +158,13 @@ std::vector<ConfigMetadata> ConfigManager::update(const ConfigUpdate& conf) {
if (auto error = maybe_rules.if_error()) {
trace_sampling_rules_metadata.error = std::move(*error);
} else {
rules.merge(*maybe_rules);
rules.insert(rules.cbegin(), maybe_rules->begin(), maybe_rules->end());
}

metadata.emplace_back(std::move(trace_sampling_rules_metadata));
}

rules.insert(rules_.cbegin(), rules_.cend());
trace_sampler_->set_rules(rules);
trace_sampler_->set_rules(std::move(rules));

if (!conf.tags) {
reset_config(ConfigName::TAGS, span_defaults_, metadata);
Expand Down
2 changes: 1 addition & 1 deletion src/datadog/config_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class ConfigManager {
std::unordered_map<ConfigName, ConfigMetadata> default_metadata_;

std::shared_ptr<TraceSampler> trace_sampler_;
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash> rules_;
std::vector<TraceSamplerRule> rules_;

DynamicConfig<std::shared_ptr<const SpanDefaults>> span_defaults_;
DynamicConfig<bool> report_traces_;
Expand Down
9 changes: 0 additions & 9 deletions src/datadog/span_matcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,6 @@ struct SpanMatcher {
return (service == other.service && name == other.name &&
resource == other.resource && tags == other.tags);
}

// TODO: add tags
struct Hash {
size_t operator()(const SpanMatcher& rule) const {
return std::hash<std::string>()(rule.service) ^
(std::hash<std::string>()(rule.name) << 1) ^
(std::hash<std::string>()(rule.resource) << 2);
}
};
};

static const SpanMatcher catch_all;
Expand Down
20 changes: 8 additions & 12 deletions src/datadog/trace_sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ TraceSampler::TraceSampler(const FinalizedTraceSamplerConfig& config,
limiter_(clock, config.max_per_second),
limiter_max_per_second_(config.max_per_second) {}

void TraceSampler::set_rules(
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash>
rules) {
void TraceSampler::set_rules(std::vector<TraceSamplerRule> rules) {
std::lock_guard lock(mutex_);
rules_ = std::move(rules);
}
Expand All @@ -35,18 +33,18 @@ SamplingDecision TraceSampler::decide(const SpanData& span) {
// First check sampling rules.
const auto found_rule =
std::find_if(rules_.cbegin(), rules_.cend(),
[&](const auto& it) { return it.first.match(span); });
[&](const auto& it) { return it.matcher.match(span); });

// `mutex_` protects `limiter_`, `collector_sample_rates_`, and
// `collector_default_sample_rate_`, so let's lock it here.
std::lock_guard lock(mutex_);

if (found_rule != rules_.end()) {
const auto& [rule, rate] = *found_rule;
decision.mechanism = int(rate.mechanism);
const auto& rule = *found_rule;
decision.mechanism = int(rule.mechanism);
decision.limiter_max_per_second = limiter_max_per_second_;
decision.configured_rate = rate.value;
const std::uint64_t threshold = max_id_from_rate(rate.value);
decision.configured_rate = rule.rate;
const std::uint64_t threshold = max_id_from_rate(rule.rate);
if (knuth_hash(span.trace_id.low) < threshold) {
const auto result = limiter_.allow();
if (result.allowed) {
Expand Down Expand Up @@ -106,10 +104,8 @@ void TraceSampler::handle_collector_response(

nlohmann::json TraceSampler::config_json() const {
std::vector<nlohmann::json> rules;
for (const auto& [rule, rate] : rules_) {
nlohmann::json j = rule.to_json();
j["sampling_rate"] = rate.value.value();
rules.push_back(std::move(j));
for (const auto& rule : rules_) {
rules.push_back(rule.to_json());
}

return nlohmann::json::object({
Expand Down
6 changes: 2 additions & 4 deletions src/datadog/trace_sampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,14 @@ class TraceSampler {

Optional<Rate> collector_default_sample_rate_;
std::unordered_map<std::string, Rate> collector_sample_rates_;
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash> rules_;
std::vector<TraceSamplerRule> rules_;
Limiter limiter_;
double limiter_max_per_second_;

public:
TraceSampler(const FinalizedTraceSamplerConfig& config, const Clock& clock);

void set_rules(
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash>
rules);
void set_rules(std::vector<TraceSamplerRule> rules);

// Return a sampling decision for the specified root span.
SamplingDecision decide(const SpanData&);
Expand Down
21 changes: 16 additions & 5 deletions src/datadog/trace_sampler_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,12 @@ std::string to_string(const std::vector<TraceSamplerConfig::Rule> &rules) {

} // namespace

nlohmann::json TraceSamplerRule::to_json() const {
auto j = matcher.to_json();
j["sample_rate"] = rate.value();
return j;
}

TraceSamplerConfig::Rule::Rule(const SpanMatcher &base) : SpanMatcher(base) {}

Expected<FinalizedTraceSamplerConfig> finalize_config(
Expand Down Expand Up @@ -181,9 +187,11 @@ Expected<FinalizedTraceSamplerConfig> finalize_config(
return error->with_prefix(prefix);
}

SpanMatcher matcher = rule;
result.rules.emplace(
matcher, TraceSamplerRate{*maybe_rate, SamplingMechanism::RULE});
TraceSamplerRule finalized_rule;
finalized_rule.matcher = rule;
finalized_rule.rate = *maybe_rate;
finalized_rule.mechanism = SamplingMechanism::RULE;
result.rules.emplace_back(std::move(finalized_rule));
}

Optional<double> sample_rate;
Expand Down Expand Up @@ -213,8 +221,11 @@ Expected<FinalizedTraceSamplerConfig> finalize_config(
"Unable to parse overall sample_rate for trace sampling: ");
}

result.rules.emplace(
catch_all, TraceSamplerRate{*maybe_rate, SamplingMechanism::RULE});
TraceSamplerRule finalized_rule;
finalized_rule.rate = *maybe_rate;
finalized_rule.matcher = catch_all;
finalized_rule.mechanism = SamplingMechanism::RULE;
result.rules.emplace_back(std::move(finalized_rule));
}

const auto [origin, max_per_second] =
Expand Down
9 changes: 6 additions & 3 deletions src/datadog/trace_sampler_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@
namespace datadog {
namespace tracing {

struct TraceSamplerRate final {
Rate value;
struct TraceSamplerRule final {
Rate rate;
SpanMatcher matcher;
SamplingMechanism mechanism;

nlohmann::json to_json() const;
};

struct TraceSamplerConfig {
Expand All @@ -48,8 +51,8 @@ class FinalizedTraceSamplerConfig {

public:
double max_per_second;
std::vector<TraceSamplerRule> rules;
std::unordered_map<ConfigName, ConfigMetadata> metadata;
std::unordered_map<SpanMatcher, TraceSamplerRate, SpanMatcher::Hash> rules;
};

Expected<FinalizedTraceSamplerConfig> finalize_config(
Expand Down
72 changes: 31 additions & 41 deletions test/test_tracer_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,11 +606,11 @@ TEST_CASE("TracerConfig::trace_sampler") {
SECTION("yields one sampling rule") {
auto finalized = finalize_config(config);
REQUIRE(finalized);
REQUIRE(finalized->trace_sampler.rules.count(catch_all));
REQUIRE(finalized->trace_sampler.rules.size() == 1);
// and the default sample_rate is 100%
const auto& rate = finalized->trace_sampler.rules[catch_all];
CHECK(rate.value == 1.0);
CHECK(rate.mechanism == SamplingMechanism::RULE);
const auto& rule = finalized->trace_sampler.rules.front();
CHECK(rule.rate == 1.0);
CHECK(rule.mechanism == SamplingMechanism::RULE);
}

SECTION("has to have a valid sample_rate") {
Expand All @@ -631,43 +631,44 @@ TEST_CASE("TracerConfig::trace_sampler") {
rules[1].sample_rate = 0.6;
auto finalized = finalize_config(config);
REQUIRE(finalized);
REQUIRE(finalized->trace_sampler.rules.count(catch_all));
REQUIRE(finalized->trace_sampler.rules.size() == 2);

const auto& rate = finalized->trace_sampler.rules[catch_all];
CHECK(rate.value == 0.5);
CHECK(rate.mechanism == SamplingMechanism::RULE);
const auto& rule = finalized->trace_sampler.rules.front();
CHECK(rule.rate == 0.5);
CHECK(rule.mechanism == SamplingMechanism::RULE);
}

SECTION("global sample_rate creates a catch-all rule") {
config.trace_sampler.sample_rate = 0.25;
auto finalized = finalize_config(config);
REQUIRE(finalized);
REQUIRE(finalized->trace_sampler.rules.count(catch_all));
const auto& rate = finalized->trace_sampler.rules[catch_all];
CHECK(rate.value == 0.25);
CHECK(rate.mechanism == SamplingMechanism::RULE);
REQUIRE(finalized->trace_sampler.rules.size() == 1);
const auto& rule = finalized->trace_sampler.rules.front();
REQUIRE(rule.rate == 0.25);
REQUIRE(rule.matcher.service == "*");
REQUIRE(rule.matcher.name == "*");
REQUIRE(rule.matcher.resource == "*");
REQUIRE(rule.matcher.tags.empty());
}

SECTION("DD_TRACE_SAMPLE_RATE") {
SECTION("sets the global sample_rate") {
const EnvGuard guard{"DD_TRACE_SAMPLE_RATE", "0.5"};
auto finalized = finalize_config(config);
REQUIRE(finalized);
REQUIRE(finalized->trace_sampler.rules.count(catch_all));
const auto& rate = finalized->trace_sampler.rules[catch_all];
CHECK(rate.value == 0.5);
CHECK(rate.mechanism == SamplingMechanism::RULE);
REQUIRE(finalized->trace_sampler.rules.size() == 1);
REQUIRE(finalized->trace_sampler.rules.front().rate == 0.5);
REQUIRE(finalized->trace_sampler.rules.front().mechanism ==
SamplingMechanism::RULE);
}

SECTION("overrides TraceSamplerConfig::sample_rate") {
config.trace_sampler.sample_rate = 0.25;
const EnvGuard guard{"DD_TRACE_SAMPLE_RATE", "0.5"};
auto finalized = finalize_config(config);
REQUIRE(finalized);
REQUIRE(finalized->trace_sampler.rules.count(catch_all));
const auto& rate = finalized->trace_sampler.rules[catch_all];
CHECK(rate.value == 0.5);
CHECK(rate.mechanism == SamplingMechanism::RULE);
REQUIRE(finalized->trace_sampler.rules.size() == 1);
REQUIRE(finalized->trace_sampler.rules.front().rate == 0.5);
}

SECTION("has to have a valid value") {
Expand Down Expand Up @@ -799,27 +800,16 @@ TEST_CASE("TracerConfig::trace_sampler") {
CAPTURE(rules_json);
CAPTURE(rules);
REQUIRE(rules.size() == 2);

SpanMatcher matcher;
matcher.service = "poohbear";
matcher.name = "get.honey";

auto found = rules.find(matcher);
REQUIRE(found != rules.cend());

CHECK(found->second.value == 0);
CHECK(found->second.mechanism == SamplingMechanism::RULE);

SpanMatcher matcher2;
matcher2.service = "*";
matcher2.name = "*";
matcher2.tags.emplace("error", "*");
matcher2.resource = "/admin/*";

found = rules.find(matcher2);
REQUIRE(found != rules.cend());
CHECK(found->second.value == 1);
CHECK(found->second.mechanism == SamplingMechanism::RULE);
REQUIRE(rules[0].matcher.service == "poohbear");
REQUIRE(rules[0].matcher.name == "get.honey");
REQUIRE(rules[0].rate == 0);
REQUIRE(rules[0].matcher.tags.size() == 0);
REQUIRE(rules[1].matcher.service == "*");
REQUIRE(rules[1].matcher.name == "*");
REQUIRE(rules[1].rate == 1);
REQUIRE(rules[1].matcher.tags.size() == 1);
REQUIRE(rules[1].matcher.tags.at("error") == "*");
REQUIRE(rules[1].matcher.resource == "/admin/*");
}

SECTION("must be valid") {
Expand Down

0 comments on commit f6b9333

Please sign in to comment.