heterogeneous trace context extraction (#72)

* more context in extraction error messages * (unrelated) update library version in example * move extraction code into its own component * remove copy/pasted comments * eats, shoots, and leaves * bin/format should forward arguments to clang-format This is used by bin/check to do a "dry run" format. * default propagation style is now [Datadog, W3C] * heterogeneous extraction, untested * unit test for heterogeneous extraction * missed a spot when moving code into extraction_util * don't lie to your teammates * trade a conditional jump for an addition
DataDog · Nov 17, 2023 · 4080fa5 · 4080fa5
1 parent 9781acf
commit 4080fa5
Show file tree

Hide file tree

Showing 12 changed files with 538 additions and 198 deletions.
diff --git a/BUILD.bazel b/BUILD.bazel
@@ -11,6 +11,7 @@ cc_library(
     "src/datadog/default_http_client_null.cpp",
     "src/datadog/environment.cpp",
     "src/datadog/error.cpp",
+    "src/datadog/extraction_util.cpp",
     "src/datadog/glob.cpp",
     "src/datadog/id_generator.cpp",
     "src/datadog/limiter.cpp",
@@ -59,6 +60,7 @@ cc_library(
     "src/datadog/event_scheduler.h",
     "src/datadog/expected.h",
     "src/datadog/extracted_data.h",
+    "src/datadog/extraction_util.h",
     "src/datadog/glob.h",
     "src/datadog/hex.h",
     "src/datadog/http_client.h",

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -106,6 +106,7 @@ target_sources(dd_trace_cpp-objects PRIVATE
 #     src/datadog/default_http_client_null.cpp use libcurl
     src/datadog/environment.cpp
     src/datadog/error.cpp
+    src/datadog/extraction_util.cpp
     src/datadog/glob.cpp
     src/datadog/id_generator.cpp
     src/datadog/limiter.cpp
@@ -160,6 +161,7 @@ target_sources(dd_trace_cpp-objects PUBLIC
   src/datadog/event_scheduler.h
   src/datadog/expected.h
   src/datadog/extracted_data.h
+  src/datadog/extraction_util.h
   src/datadog/glob.h
   src/datadog/hex.h
   src/datadog/http_client.h

diff --git a/bin/format b/bin/format
@@ -11,7 +11,7 @@ cd "$(dirname "$0")"/..
 # occasionally bumps the required version, reformatting everything.
 version=14
 formatter=clang-format-$version
-formatter_options="--style=file -i"
+formatter_options="--style=file -i $*"
 
 find_sources() {
     find src/ examples/ test/ fuzz/ -type f \( -name '*.h' -o -name '*.cpp' \) "$@"

diff --git a/examples/http-server/server/install-dd-trace-cpp b/examples/http-server/server/install-dd-trace-cpp
@@ -5,7 +5,7 @@ set -e
 
 # Adjust for the latest release.
 # See <https://github.com/DataDog/dd-trace-cpp/releases/latest>.
-VERSION_TAG=v0.1.9
+VERSION_TAG=v0.1.10
 
 cd /tmp
 git clone --branch "$VERSION_TAG" 'https://github.com/datadog/dd-trace-cpp'

diff --git a/src/datadog/extracted_data.h b/src/datadog/extracted_data.h
@@ -9,6 +9,7 @@
 #include <vector>
 
 #include "optional.h"
+#include "propagation_style.h"
 #include "trace_id.h"
 
 namespace datadog {
@@ -33,6 +34,13 @@ struct ExtractedData {
   // `additional_datadog_w3c_tracestate` is null.
   // `additional_datadog_w3c_tracestate` is used for the `W3C` injection style.
   Optional<std::string> additional_datadog_w3c_tracestate;
+  // `style` is the extraction style used to obtain this `ExtractedData`. It's
+  // for diagnostics.
+  Optional<PropagationStyle> style;
+  // `headers_examined` are the name/value pairs of HTTP headers (or equivalent
+  // request meta-data) that were looked up and had values during the
+  // preparation of this `ExtractedData`. It's for diagnostics.
+  std::vector<std::pair<std::string, std::string>> headers_examined;
 };
 
 }  // namespace tracing

diff --git a/src/datadog/extraction_util.cpp b/src/datadog/extraction_util.cpp
@@ -0,0 +1,283 @@
+#include "extraction_util.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+
+#include "extracted_data.h"
+#include "json.hpp"
+#include "logger.h"
+#include "parse_util.h"
+#include "tag_propagation.h"
+#include "tags.h"
+
+namespace datadog {
+namespace tracing {
+
+Optional<std::uint64_t> parse_trace_id_high(const std::string& value) {
+  if (value.size() != 16) {
+    return nullopt;
+  }
+
+  auto high = parse_uint64(value, 16);
+  if (high) {
+    return *high;
+  }
+
+  return nullopt;
+}
+
+void handle_trace_tags(StringView trace_tags, ExtractedData& result,
+                       std::unordered_map<std::string, std::string>& span_tags,
+                       Logger& logger) {
+  auto maybe_trace_tags = decode_tags(trace_tags);
+  if (auto* error = maybe_trace_tags.if_error()) {
+    logger.log_error(*error);
+    span_tags[tags::internal::propagation_error] = "decoding_error";
+    return;
+  }
+
+  for (auto& [key, value] : *maybe_trace_tags) {
+    if (!starts_with(key, "_dd.p.")) {
+      continue;
+    }
+
+    if (key == tags::internal::trace_id_high) {
+      // _dd.p.tid contains the high 64 bits of the trace ID.
+      const Optional<std::uint64_t> high = parse_trace_id_high(value);
+      if (!high) {
+        span_tags[tags::internal::propagation_error] = "malformed_tid " + value;
+        continue;
+      }
+
+      if (result.trace_id) {
+        // Note that this assumes the lower 64 bits of the trace ID have already
+        // been extracted (i.e. we look for X-Datadog-Trace-ID first).
+        result.trace_id->high = *high;
+      }
+    }
+
+    result.trace_tags.emplace_back(std::move(key), std::move(value));
+  }
+}
+
+Expected<Optional<std::uint64_t>> extract_id_header(const DictReader& headers,
+                                                    StringView header,
+                                                    StringView header_kind,
+                                                    StringView style_name,
+                                                    int base) {
+  auto found = headers.lookup(header);
+  if (!found) {
+    return nullopt;
+  }
+  auto result = parse_uint64(*found, base);
+  if (auto* error = result.if_error()) {
+    std::string prefix;
+    prefix += "Could not extract ";
+    append(prefix, style_name);
+    prefix += "-style ";
+    append(prefix, header_kind);
+    prefix += "ID from ";
+    append(prefix, header);
+    prefix += ": ";
+    append(prefix, *found);
+    prefix += ' ';
+    return error->with_prefix(prefix);
+  }
+  return *result;
+}
+
+Expected<ExtractedData> extract_datadog(
+    const DictReader& headers,
+    std::unordered_map<std::string, std::string>& span_tags, Logger& logger) {
+  ExtractedData result;
+  result.style = PropagationStyle::DATADOG;
+
+  auto trace_id =
+      extract_id_header(headers, "x-datadog-trace-id", "trace", "Datadog", 10);
+  if (auto* error = trace_id.if_error()) {
+    return std::move(*error);
+  }
+  if (*trace_id) {
+    result.trace_id = TraceID(**trace_id);
+  }
+
+  auto parent_id = extract_id_header(headers, "x-datadog-parent-id",
+                                     "parent span", "Datadog", 10);
+  if (auto* error = parent_id.if_error()) {
+    return std::move(*error);
+  }
+  result.parent_id = *parent_id;
+
+  const StringView sampling_priority_header = "x-datadog-sampling-priority";
+  if (auto found = headers.lookup(sampling_priority_header)) {
+    auto sampling_priority = parse_int(*found, 10);
+    if (auto* error = sampling_priority.if_error()) {
+      std::string prefix;
+      prefix += "Could not extract Datadog-style sampling priority from ";
+      append(prefix, sampling_priority_header);
+      prefix += ": ";
+      append(prefix, *found);
+      prefix += ' ';
+      return error->with_prefix(prefix);
+    }
+    result.sampling_priority = *sampling_priority;
+  }
+
+  auto origin = headers.lookup("x-datadog-origin");
+  if (origin) {
+    result.origin = std::string(*origin);
+  }
+
+  auto trace_tags = headers.lookup("x-datadog-tags");
+  if (trace_tags) {
+    handle_trace_tags(*trace_tags, result, span_tags, logger);
+  }
+
+  return result;
+}
+
+Expected<ExtractedData> extract_b3(
+    const DictReader& headers, std::unordered_map<std::string, std::string>&,
+    Logger&) {
+  ExtractedData result;
+  result.style = PropagationStyle::B3;
+
+  if (auto found = headers.lookup("x-b3-traceid")) {
+    auto parsed = TraceID::parse_hex(*found);
+    if (auto* error = parsed.if_error()) {
+      std::string prefix = "Could not extract B3-style trace ID from \"";
+      append(prefix, *found);
+      prefix += "\": ";
+      return error->with_prefix(prefix);
+    }
+    result.trace_id = *parsed;
+  }
+
+  auto parent_id =
+      extract_id_header(headers, "x-b3-spanid", "parent span", "B3", 16);
+  if (auto* error = parent_id.if_error()) {
+    return std::move(*error);
+  }
+  result.parent_id = *parent_id;
+
+  const StringView sampling_priority_header = "x-b3-sampled";
+  if (auto found = headers.lookup(sampling_priority_header)) {
+    auto sampling_priority = parse_int(*found, 10);
+    if (auto* error = sampling_priority.if_error()) {
+      std::string prefix;
+      prefix += "Could not extract B3-style sampling priority from ";
+      append(prefix, sampling_priority_header);
+      prefix += ": ";
+      append(prefix, *found);
+      prefix += ' ';
+      return error->with_prefix(prefix);
+    }
+    result.sampling_priority = *sampling_priority;
+  }
+
+  return result;
+}
+
+Expected<ExtractedData> extract_none(
+    const DictReader&, std::unordered_map<std::string, std::string>&, Logger&) {
+  ExtractedData result;
+  result.style = PropagationStyle::NONE;
+  return result;
+}
+
+std::string extraction_error_prefix(
+    const Optional<PropagationStyle>& style,
+    const std::vector<std::pair<std::string, std::string>>& headers_examined) {
+  std::ostringstream stream;
+  stream << "While extracting trace context";
+  if (style) {
+    stream << " in the " << to_json(*style) << " propagation style";
+  }
+  auto it = headers_examined.begin();
+  if (it != headers_examined.end()) {
+    stream << " from the following headers: [";
+    stream << nlohmann::json(it->first + ": " + it->second);
+    for (++it; it != headers_examined.end(); ++it) {
+      stream << ", ";
+      stream << nlohmann::json(it->first + ": " + it->second);
+    }
+    stream << "]";
+  }
+  stream << ", an error occurred: ";
+  return stream.str();
+}
+
+AuditedReader::AuditedReader(const DictReader& underlying)
+    : underlying(underlying) {}
+
+Optional<StringView> AuditedReader::lookup(StringView key) const {
+  auto value = underlying.lookup(key);
+  if (value) {
+    entries_found.emplace_back(key, *value);
+  }
+  return value;
+}
+
+void AuditedReader::visit(
+    const std::function<void(StringView key, StringView value)>& visitor)
+    const {
+  underlying.visit([&, this](StringView key, StringView value) {
+    entries_found.emplace_back(key, value);
+    visitor(key, value);
+  });
+}
+
+ExtractedData merge(const std::vector<ExtractedData>& contexts) {
+  ExtractedData result;
+
+  const auto found = std::find_if(
+      contexts.begin(), contexts.end(),
+      [](const ExtractedData& data) { return data.trace_id.has_value(); });
+
+  if (found == contexts.end()) {
+    // Nothing extracted a trace ID. Return the first context that includes a
+    // parent ID, if any, or otherwise just return an empty `ExtractedData`.
+    // The purpose of looking for a parent ID is to allow for the error
+    // "extracted a parent ID without a trace ID," if that's what happened.
+    const auto other = std::find_if(
+        contexts.begin(), contexts.end(),
+        [](const ExtractedData& data) { return data.parent_id.has_value(); });
+    if (other != contexts.end()) {
+      result = *other;
+    }
+    return result;
+  }
+
+  // `found` refers to the first extracted context that yielded a trace ID.
+  // This will be our main context.
+  //
+  // If the style of `found` is not W3C, then examine the remaining contexts
+  // for W3C-style tracestate that we might want to include in `result`.
+  result = *found;
+  if (result.style == PropagationStyle::W3C) {
+    return result;
+  }
+
+  const auto other =
+      std::find_if(found + 1, contexts.end(), [&](const ExtractedData& data) {
+        return data.style == PropagationStyle::W3C &&
+               data.trace_id == found->trace_id;
+      });
+
+  if (other != contexts.end()) {
+    result.additional_w3c_tracestate = other->additional_w3c_tracestate;
+    result.additional_datadog_w3c_tracestate =
+        other->additional_datadog_w3c_tracestate;
+    result.headers_examined.insert(result.headers_examined.end(),
+                                   other->headers_examined.begin(),
+                                   other->headers_examined.end());
+  }
+
+  return result;
+}
+
+}  // namespace tracing
+}  // namespace datadog