Skip to content

Commit

Permalink
heterogeneous trace context extraction (#72)
Browse files Browse the repository at this point in the history
* more context in extraction error messages

* (unrelated) update library version in example

* move extraction code into its own component

* remove copy/pasted comments

* eats, shoots, and leaves

* bin/format should forward arguments to clang-format

This is used by bin/check to do a "dry run" format.

* default propagation style is now [Datadog, W3C]

* heterogeneous extraction, untested

* unit test for heterogeneous extraction

* missed a spot when moving code into extraction_util

* don't lie to your teammates

* trade a conditional jump for an addition
  • Loading branch information
dgoffredo authored Nov 17, 2023
1 parent 9781acf commit 4080fa5
Show file tree
Hide file tree
Showing 12 changed files with 538 additions and 198 deletions.
2 changes: 2 additions & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ cc_library(
"src/datadog/default_http_client_null.cpp",
"src/datadog/environment.cpp",
"src/datadog/error.cpp",
"src/datadog/extraction_util.cpp",
"src/datadog/glob.cpp",
"src/datadog/id_generator.cpp",
"src/datadog/limiter.cpp",
Expand Down Expand Up @@ -59,6 +60,7 @@ cc_library(
"src/datadog/event_scheduler.h",
"src/datadog/expected.h",
"src/datadog/extracted_data.h",
"src/datadog/extraction_util.h",
"src/datadog/glob.h",
"src/datadog/hex.h",
"src/datadog/http_client.h",
Expand Down
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ target_sources(dd_trace_cpp-objects PRIVATE
# src/datadog/default_http_client_null.cpp use libcurl
src/datadog/environment.cpp
src/datadog/error.cpp
src/datadog/extraction_util.cpp
src/datadog/glob.cpp
src/datadog/id_generator.cpp
src/datadog/limiter.cpp
Expand Down Expand Up @@ -160,6 +161,7 @@ target_sources(dd_trace_cpp-objects PUBLIC
src/datadog/event_scheduler.h
src/datadog/expected.h
src/datadog/extracted_data.h
src/datadog/extraction_util.h
src/datadog/glob.h
src/datadog/hex.h
src/datadog/http_client.h
Expand Down
2 changes: 1 addition & 1 deletion bin/format
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ cd "$(dirname "$0")"/..
# occasionally bumps the required version, reformatting everything.
version=14
formatter=clang-format-$version
formatter_options="--style=file -i"
formatter_options="--style=file -i $*"

find_sources() {
find src/ examples/ test/ fuzz/ -type f \( -name '*.h' -o -name '*.cpp' \) "$@"
Expand Down
2 changes: 1 addition & 1 deletion examples/http-server/server/install-dd-trace-cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -e

# Adjust for the latest release.
# See <https://github.com/DataDog/dd-trace-cpp/releases/latest>.
VERSION_TAG=v0.1.9
VERSION_TAG=v0.1.10

cd /tmp
git clone --branch "$VERSION_TAG" 'https://github.com/datadog/dd-trace-cpp'
Expand Down
8 changes: 8 additions & 0 deletions src/datadog/extracted_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <vector>

#include "optional.h"
#include "propagation_style.h"
#include "trace_id.h"

namespace datadog {
Expand All @@ -33,6 +34,13 @@ struct ExtractedData {
// `additional_datadog_w3c_tracestate` is null.
// `additional_datadog_w3c_tracestate` is used for the `W3C` injection style.
Optional<std::string> additional_datadog_w3c_tracestate;
// `style` is the extraction style used to obtain this `ExtractedData`. It's
// for diagnostics.
Optional<PropagationStyle> style;
// `headers_examined` are the name/value pairs of HTTP headers (or equivalent
// request meta-data) that were looked up and had values during the
// preparation of this `ExtractedData`. It's for diagnostics.
std::vector<std::pair<std::string, std::string>> headers_examined;
};

} // namespace tracing
Expand Down
283 changes: 283 additions & 0 deletions src/datadog/extraction_util.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
#include "extraction_util.h"

#include <algorithm>
#include <cstdint>
#include <sstream>
#include <string>
#include <unordered_map>

#include "extracted_data.h"
#include "json.hpp"
#include "logger.h"
#include "parse_util.h"
#include "tag_propagation.h"
#include "tags.h"

namespace datadog {
namespace tracing {

Optional<std::uint64_t> parse_trace_id_high(const std::string& value) {
if (value.size() != 16) {
return nullopt;
}

auto high = parse_uint64(value, 16);
if (high) {
return *high;
}

return nullopt;
}

void handle_trace_tags(StringView trace_tags, ExtractedData& result,
std::unordered_map<std::string, std::string>& span_tags,
Logger& logger) {
auto maybe_trace_tags = decode_tags(trace_tags);
if (auto* error = maybe_trace_tags.if_error()) {
logger.log_error(*error);
span_tags[tags::internal::propagation_error] = "decoding_error";
return;
}

for (auto& [key, value] : *maybe_trace_tags) {
if (!starts_with(key, "_dd.p.")) {
continue;
}

if (key == tags::internal::trace_id_high) {
// _dd.p.tid contains the high 64 bits of the trace ID.
const Optional<std::uint64_t> high = parse_trace_id_high(value);
if (!high) {
span_tags[tags::internal::propagation_error] = "malformed_tid " + value;
continue;
}

if (result.trace_id) {
// Note that this assumes the lower 64 bits of the trace ID have already
// been extracted (i.e. we look for X-Datadog-Trace-ID first).
result.trace_id->high = *high;
}
}

result.trace_tags.emplace_back(std::move(key), std::move(value));
}
}

Expected<Optional<std::uint64_t>> extract_id_header(const DictReader& headers,
StringView header,
StringView header_kind,
StringView style_name,
int base) {
auto found = headers.lookup(header);
if (!found) {
return nullopt;
}
auto result = parse_uint64(*found, base);
if (auto* error = result.if_error()) {
std::string prefix;
prefix += "Could not extract ";
append(prefix, style_name);
prefix += "-style ";
append(prefix, header_kind);
prefix += "ID from ";
append(prefix, header);
prefix += ": ";
append(prefix, *found);
prefix += ' ';
return error->with_prefix(prefix);
}
return *result;
}

Expected<ExtractedData> extract_datadog(
const DictReader& headers,
std::unordered_map<std::string, std::string>& span_tags, Logger& logger) {
ExtractedData result;
result.style = PropagationStyle::DATADOG;

auto trace_id =
extract_id_header(headers, "x-datadog-trace-id", "trace", "Datadog", 10);
if (auto* error = trace_id.if_error()) {
return std::move(*error);
}
if (*trace_id) {
result.trace_id = TraceID(**trace_id);
}

auto parent_id = extract_id_header(headers, "x-datadog-parent-id",
"parent span", "Datadog", 10);
if (auto* error = parent_id.if_error()) {
return std::move(*error);
}
result.parent_id = *parent_id;

const StringView sampling_priority_header = "x-datadog-sampling-priority";
if (auto found = headers.lookup(sampling_priority_header)) {
auto sampling_priority = parse_int(*found, 10);
if (auto* error = sampling_priority.if_error()) {
std::string prefix;
prefix += "Could not extract Datadog-style sampling priority from ";
append(prefix, sampling_priority_header);
prefix += ": ";
append(prefix, *found);
prefix += ' ';
return error->with_prefix(prefix);
}
result.sampling_priority = *sampling_priority;
}

auto origin = headers.lookup("x-datadog-origin");
if (origin) {
result.origin = std::string(*origin);
}

auto trace_tags = headers.lookup("x-datadog-tags");
if (trace_tags) {
handle_trace_tags(*trace_tags, result, span_tags, logger);
}

return result;
}

Expected<ExtractedData> extract_b3(
const DictReader& headers, std::unordered_map<std::string, std::string>&,
Logger&) {
ExtractedData result;
result.style = PropagationStyle::B3;

if (auto found = headers.lookup("x-b3-traceid")) {
auto parsed = TraceID::parse_hex(*found);
if (auto* error = parsed.if_error()) {
std::string prefix = "Could not extract B3-style trace ID from \"";
append(prefix, *found);
prefix += "\": ";
return error->with_prefix(prefix);
}
result.trace_id = *parsed;
}

auto parent_id =
extract_id_header(headers, "x-b3-spanid", "parent span", "B3", 16);
if (auto* error = parent_id.if_error()) {
return std::move(*error);
}
result.parent_id = *parent_id;

const StringView sampling_priority_header = "x-b3-sampled";
if (auto found = headers.lookup(sampling_priority_header)) {
auto sampling_priority = parse_int(*found, 10);
if (auto* error = sampling_priority.if_error()) {
std::string prefix;
prefix += "Could not extract B3-style sampling priority from ";
append(prefix, sampling_priority_header);
prefix += ": ";
append(prefix, *found);
prefix += ' ';
return error->with_prefix(prefix);
}
result.sampling_priority = *sampling_priority;
}

return result;
}

Expected<ExtractedData> extract_none(
const DictReader&, std::unordered_map<std::string, std::string>&, Logger&) {
ExtractedData result;
result.style = PropagationStyle::NONE;
return result;
}

std::string extraction_error_prefix(
const Optional<PropagationStyle>& style,
const std::vector<std::pair<std::string, std::string>>& headers_examined) {
std::ostringstream stream;
stream << "While extracting trace context";
if (style) {
stream << " in the " << to_json(*style) << " propagation style";
}
auto it = headers_examined.begin();
if (it != headers_examined.end()) {
stream << " from the following headers: [";
stream << nlohmann::json(it->first + ": " + it->second);
for (++it; it != headers_examined.end(); ++it) {
stream << ", ";
stream << nlohmann::json(it->first + ": " + it->second);
}
stream << "]";
}
stream << ", an error occurred: ";
return stream.str();
}

AuditedReader::AuditedReader(const DictReader& underlying)
: underlying(underlying) {}

Optional<StringView> AuditedReader::lookup(StringView key) const {
auto value = underlying.lookup(key);
if (value) {
entries_found.emplace_back(key, *value);
}
return value;
}

void AuditedReader::visit(
const std::function<void(StringView key, StringView value)>& visitor)
const {
underlying.visit([&, this](StringView key, StringView value) {
entries_found.emplace_back(key, value);
visitor(key, value);
});
}

ExtractedData merge(const std::vector<ExtractedData>& contexts) {
ExtractedData result;

const auto found = std::find_if(
contexts.begin(), contexts.end(),
[](const ExtractedData& data) { return data.trace_id.has_value(); });

if (found == contexts.end()) {
// Nothing extracted a trace ID. Return the first context that includes a
// parent ID, if any, or otherwise just return an empty `ExtractedData`.
// The purpose of looking for a parent ID is to allow for the error
// "extracted a parent ID without a trace ID," if that's what happened.
const auto other = std::find_if(
contexts.begin(), contexts.end(),
[](const ExtractedData& data) { return data.parent_id.has_value(); });
if (other != contexts.end()) {
result = *other;
}
return result;
}

// `found` refers to the first extracted context that yielded a trace ID.
// This will be our main context.
//
// If the style of `found` is not W3C, then examine the remaining contexts
// for W3C-style tracestate that we might want to include in `result`.
result = *found;
if (result.style == PropagationStyle::W3C) {
return result;
}

const auto other =
std::find_if(found + 1, contexts.end(), [&](const ExtractedData& data) {
return data.style == PropagationStyle::W3C &&
data.trace_id == found->trace_id;
});

if (other != contexts.end()) {
result.additional_w3c_tracestate = other->additional_w3c_tracestate;
result.additional_datadog_w3c_tracestate =
other->additional_datadog_w3c_tracestate;
result.headers_examined.insert(result.headers_examined.end(),
other->headers_examined.begin(),
other->headers_examined.end());
}

return result;
}

} // namespace tracing
} // namespace datadog
Loading

0 comments on commit 4080fa5

Please sign in to comment.