Merge pull request #3162 from DataDog/ivoanjo/prof-7440-timeline-stat…

…es-v1 [PROF-7440] Add approximate thread state categorization for timeline
DataDog · Oct 3, 2023 · 0b0dd29 · 0b0dd29
2 parents 6a3a9e7 + f508e71
commit 0b0dd29
Show file tree

Hide file tree

Showing 9 changed files with 351 additions and 46 deletions.
diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.c b/ext/ddtrace_profiling_native_extension/collectors_stack.c
@@ -40,7 +40,7 @@ static void record_placeholder_stack_in_native_code(
   sampling_buffer* buffer,
   VALUE recorder_instance,
   sample_values values,
-  ddog_prof_Slice_Label labels,
+  sample_labels labels,
   sampling_buffer *record_buffer,
   int extra_frames_in_record_buffer
 );
@@ -49,7 +49,7 @@ static void sample_thread_internal(
   sampling_buffer* buffer,
   VALUE recorder_instance,
   sample_values values,
-  ddog_prof_Slice_Label labels,
+  sample_labels labels,
   sampling_buffer *record_buffer,
   int extra_frames_in_record_buffer
 );
@@ -85,13 +85,14 @@ static VALUE _native_sample(
   VALUE zero = INT2NUM(0);
   sample_values values = {
     .cpu_time_ns   = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-time"),      zero)),
-    .cpu_samples   = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-samples"),   zero)),
+    .cpu_or_wall_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-samples"), zero)),
     .wall_time_ns  = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("wall-time"),     zero)),
     .alloc_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("alloc-samples"), zero)),
   };
 
   long labels_count = RARRAY_LEN(labels_array) + RARRAY_LEN(numeric_labels_array);
   ddog_prof_Label labels[labels_count];
+  ddog_prof_Label *state_label = NULL;
 
   for (int i = 0; i < RARRAY_LEN(labels_array); i++) {
     VALUE key_str_pair = rb_ary_entry(labels_array, i);
@@ -100,6 +101,10 @@ static VALUE _native_sample(
       .key = char_slice_from_ruby_string(rb_ary_entry(key_str_pair, 0)),
       .str = char_slice_from_ruby_string(rb_ary_entry(key_str_pair, 1))
     };
+
+    if (rb_str_equal(rb_ary_entry(key_str_pair, 0), rb_str_new_cstr("state"))) {
+      state_label = &labels[i];
+    }
   }
   for (int i = 0; i < RARRAY_LEN(numeric_labels_array); i++) {
     VALUE key_str_pair = rb_ary_entry(numeric_labels_array, i);
@@ -115,12 +120,14 @@ static VALUE _native_sample(
 
   sampling_buffer *buffer = sampling_buffer_new(max_frames_requested);
 
+  ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = labels_count};
+
   sample_thread(
     thread,
     buffer,
     recorder_instance,
     values,
-    (ddog_prof_Slice_Label) {.ptr = labels, .len = labels_count},
+    (sample_labels) {.labels = slice_labels, .state_label = state_label},
     RTEST(in_gc) ? SAMPLE_IN_GC : SAMPLE_REGULAR
   );
 
@@ -134,7 +141,7 @@ void sample_thread(
   sampling_buffer* buffer,
   VALUE recorder_instance,
   sample_values values,
-  ddog_prof_Slice_Label labels,
+  sample_labels labels,
   sample_type type
 ) {
   // Samples thread into recorder
@@ -171,6 +178,8 @@ void sample_thread(
   rb_raise(rb_eArgError, "Unexpected value for sample_type: %d", type);
 }
 
+#define CHARSLICE_EQUALS(must_be_a_literal, charslice) (strlen("" must_be_a_literal) == charslice.len && strncmp(must_be_a_literal, charslice.ptr, charslice.len) == 0)
+
 // Idea: Should we release the global vm lock (GVL) after we get the data from `rb_profile_frames`? That way other Ruby threads
 // could continue making progress while the sample was ingested into the profile.
 //
@@ -195,7 +204,7 @@ static void sample_thread_internal(
   sampling_buffer* buffer,
   VALUE recorder_instance,
   sample_values values,
-  ddog_prof_Slice_Label labels,
+  sample_labels labels,
   sampling_buffer *record_buffer,
   int extra_frames_in_record_buffer
 ) {
@@ -228,6 +237,15 @@ static void sample_thread_internal(
   VALUE last_ruby_frame = Qnil;
   int last_ruby_line = 0;
 
+  ddog_prof_Label *state_label = labels.state_label;
+  bool cpu_or_wall_sample = values.cpu_or_wall_samples > 0;
+  bool has_cpu_time = cpu_or_wall_sample && values.cpu_time_ns > 0;
+  bool only_wall_time = cpu_or_wall_sample && values.cpu_time_ns == 0 && values.wall_time_ns > 0;
+
+  if (cpu_or_wall_sample && state_label == NULL) rb_raise(rb_eRuntimeError, "BUG: Unexpected missing state_label");
+
+  if (has_cpu_time) state_label->str = DDOG_CHARSLICE_C("had cpu");
+
   for (int i = captured_frames - 1; i >= 0; i--) {
     VALUE name, filename;
     int line;
@@ -248,10 +266,55 @@ static void sample_thread_internal(
     name = NIL_P(name) ? missing_string : name;
     filename = NIL_P(filename) ? missing_string : filename;
 
+    ddog_CharSlice name_slice = char_slice_from_ruby_string(name);
+    ddog_CharSlice filename_slice = char_slice_from_ruby_string(filename);
+
+    bool top_of_the_stack = i == 0;
+
+    // When there's only wall-time in a sample, this means that the thread was not active in the sampled period.
+    //
+    // We try to categorize what it was doing based on what we observe at the top of the stack. This is a very rough
+    // approximation, and in the future we hope to replace this with a more accurate approach (such as using the
+    // GVL instrumentation API.)
+    if (top_of_the_stack && only_wall_time) {
+      if (!buffer->is_ruby_frame[i]) {
+        // We know that known versions of Ruby implement these using native code; thus if we find a method with the
+        // same name that is not native code, we ignore it, as it's probably a user method that coincidentally
+        // has the same name. Thus, even though "matching just by method name" is kinda weak,
+        // "matching by method name" + is native code seems actually to be good enough for a lot of cases.
+
+        if (CHARSLICE_EQUALS("sleep", name_slice)) { // Expected to be Kernel.sleep
+          state_label->str  = DDOG_CHARSLICE_C("sleeping");
+        } else if (CHARSLICE_EQUALS("select", name_slice)) { // Expected to be Kernel.select
+          state_label->str  = DDOG_CHARSLICE_C("waiting");
+        } else if (
+            CHARSLICE_EQUALS("synchronize", name_slice) || // Expected to be Monitor/Mutex#synchronize
+            CHARSLICE_EQUALS("lock", name_slice) ||        // Expected to be Mutex#lock
+            CHARSLICE_EQUALS("join", name_slice)           // Expected to be Thread#join
+        ) {
+          state_label->str  = DDOG_CHARSLICE_C("blocked");
+        } else if (CHARSLICE_EQUALS("wait_readable", name_slice)) { // Expected to be IO#wait_readable
+          state_label->str  = DDOG_CHARSLICE_C("network");
+        }
+        #ifdef NO_PRIMITIVE_POP // Ruby < 3.2
+          else if (CHARSLICE_EQUALS("pop", name_slice)) { // Expected to be Queue/SizedQueue#pop
+            state_label->str  = DDOG_CHARSLICE_C("waiting");
+          }
+        #endif
+      } else {
+        #ifndef NO_PRIMITIVE_POP // Ruby >= 3.2
+          // Unlike the above, Ruby actually treats this one specially and gives it a nice file name we can match on!
+          if (CHARSLICE_EQUALS("pop", name_slice) && CHARSLICE_EQUALS("<internal:thread_sync>", filename_slice)) { // Expected to be Queue/SizedQueue#pop
+            state_label->str  = DDOG_CHARSLICE_C("waiting");
+          }
+        #endif
+      }
+    }
+
     buffer->locations[i] = (ddog_prof_Location) {
       .function = (ddog_prof_Function) {
-        .name = char_slice_from_ruby_string(name),
-        .filename = char_slice_from_ruby_string(filename)
+        .name = name_slice,
+        .filename = filename_slice,
       },
       .line = line,
     };
@@ -320,7 +383,7 @@ static void record_placeholder_stack_in_native_code(
   sampling_buffer* buffer,
   VALUE recorder_instance,
   sample_values values,
-  ddog_prof_Slice_Label labels,
+  sample_labels labels,
   sampling_buffer *record_buffer,
   int extra_frames_in_record_buffer
 ) {

diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.h b/ext/ddtrace_profiling_native_extension/collectors_stack.h
@@ -13,7 +13,7 @@ void sample_thread(
   sampling_buffer* buffer,
   VALUE recorder_instance,
   sample_values values,
-  ddog_prof_Slice_Label labels,
+  sample_labels labels,
   sample_type type
 );
 sampling_buffer *sampling_buffer_new(unsigned int max_frames);

diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c
@@ -471,7 +471,7 @@ void update_metrics_and_sample(
     thread_being_sampled,
     stack_from_thread,
     thread_context,
-    (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
+    (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
     SAMPLE_REGULAR,
     current_monotonic_wall_time_ns,
     NULL,
@@ -616,7 +616,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
       /* thread: */  thread,
       /* stack_from_thread: */ thread,
       thread_context,
-      (sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns},
+      (sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns},
       SAMPLE_IN_GC,
       INVALID_TIME, // For now we're not collecting timestamps for these events
       NULL,
@@ -662,6 +662,7 @@ static void trigger_sample_for_thread(
     1 + // profiler overhead
     1 + // end_timestamp_ns
     2 + // ruby vm type and allocation class
+    1 + // state (only set for cpu/wall-time samples)
     2;  // local root span id and span id
   ddog_prof_Label labels[max_label_count];
   int label_pos = 0;
@@ -745,6 +746,20 @@ static void trigger_sample_for_thread(
     };
   }
 
+  // This label is handled specially:
+  // 1. It's only set for cpu/wall-time samples
+  // 2. We set it here to its default state of "unknown", but the `Collectors::Stack` may choose to override it with
+  //    something more interesting.
+  ddog_prof_Label *state_label = NULL;
+  if (values.cpu_or_wall_samples > 0) {
+    state_label = &labels[label_pos++];
+    *state_label = (ddog_prof_Label) {
+      .key = DDOG_CHARSLICE_C("state"),
+      .str = DDOG_CHARSLICE_C("unknown"),
+      .num = 0, // This shouldn't be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
+    };
+  }
+
   // The number of times `label_pos++` shows up in this function needs to match `max_label_count`. To avoid "oops I
   // forgot to update max_label_count" in the future, we've also added this validation.
   // @ivoanjo: I wonder if C compilers are smart enough to statically prove this check never triggers unless someone
@@ -753,12 +768,14 @@ static void trigger_sample_for_thread(
     rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
   }
 
+  ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos};
+
   sample_thread(
     stack_from_thread,
     state->sampling_buffer,
     state->recorder_instance,
     values,
-    (ddog_prof_Slice_Label) {.ptr = labels, .len = label_pos},
+    (sample_labels) {.labels = slice_labels, .state_label = state_label},
     type
   );
 }

diff --git a/ext/ddtrace_profiling_native_extension/extconf.rb b/ext/ddtrace_profiling_native_extension/extconf.rb
@@ -146,6 +146,9 @@ def add_compiler_flag(flag)
 # On older Rubies, the first_lineno inside a location was a VALUE and not a int (https://github.com/ruby/ruby/pull/6430)
 $defs << '-DNO_INT_FIRST_LINENO' if RUBY_VERSION < '3.2'
 
+# On older Rubies, "pop" was not a primitive operation
+$defs << '-DNO_PRIMITIVE_POP' if RUBY_VERSION < '3.2'
+
 # On older Rubies, there was no tid member in the internal thread structure
 $defs << '-DNO_THREAD_TID' if RUBY_VERSION < '3.1'
 

diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c
@@ -399,7 +399,7 @@ static VALUE ruby_time_from(ddog_Timespec ddprof_time) {
   return rb_time_timespec_new(&time, utc);
 }
 
-void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, ddog_prof_Slice_Label labels) {
+void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels) {
   struct stack_recorder_state *state;
   TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
 
@@ -413,7 +413,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
   uint8_t *position_for = state->position_for;
 
   metric_values[position_for[CPU_TIME_VALUE_ID]]      = values.cpu_time_ns;
-  metric_values[position_for[CPU_SAMPLES_VALUE_ID]]   = values.cpu_samples;
+  metric_values[position_for[CPU_SAMPLES_VALUE_ID]]   = values.cpu_or_wall_samples;
   metric_values[position_for[WALL_TIME_VALUE_ID]]     = values.wall_time_ns;
   metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples;
 
@@ -422,7 +422,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
     (ddog_prof_Sample) {
       .locations = locations,
       .values = (ddog_Slice_I64) {.ptr = metric_values, .len = state->enabled_values_count},
-      .labels = labels
+      .labels = labels.labels
     }
   );
 

diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.h b/ext/ddtrace_profiling_native_extension/stack_recorder.h
@@ -2,13 +2,21 @@
 
 #include <datadog/profiling.h>
 
-typedef struct sample_values {
+typedef struct {
   int64_t cpu_time_ns;
   int64_t wall_time_ns;
-  uint32_t cpu_samples;
+  uint32_t cpu_or_wall_samples;
   uint32_t alloc_samples;
 } sample_values;
 
-void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, ddog_prof_Slice_Label labels);
+typedef struct sample_labels {
+  ddog_prof_Slice_Label labels;
+
+  // This is used to allow the `Collectors::Stack` to modify the existing label, if any. This MUST be NULL or point
+  // somewhere inside the labels slice above.
+  ddog_prof_Label *state_label;
+} sample_labels;
+
+void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels);
 void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint);
 VALUE enforce_recorder_instance(VALUE object);