Skip to content

Commit

Permalink
Merge pull request #3162 from DataDog/ivoanjo/prof-7440-timeline-stat…
Browse files Browse the repository at this point in the history
…es-v1

[PROF-7440] Add approximate thread state categorization for timeline
  • Loading branch information
ivoanjo authored Oct 3, 2023
2 parents 6a3a9e7 + f508e71 commit 0b0dd29
Show file tree
Hide file tree
Showing 9 changed files with 351 additions and 46 deletions.
81 changes: 72 additions & 9 deletions ext/ddtrace_profiling_native_extension/collectors_stack.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ static void record_placeholder_stack_in_native_code(
sampling_buffer* buffer,
VALUE recorder_instance,
sample_values values,
ddog_prof_Slice_Label labels,
sample_labels labels,
sampling_buffer *record_buffer,
int extra_frames_in_record_buffer
);
Expand All @@ -49,7 +49,7 @@ static void sample_thread_internal(
sampling_buffer* buffer,
VALUE recorder_instance,
sample_values values,
ddog_prof_Slice_Label labels,
sample_labels labels,
sampling_buffer *record_buffer,
int extra_frames_in_record_buffer
);
Expand Down Expand Up @@ -85,13 +85,14 @@ static VALUE _native_sample(
VALUE zero = INT2NUM(0);
sample_values values = {
.cpu_time_ns = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-time"), zero)),
.cpu_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-samples"), zero)),
.cpu_or_wall_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-samples"), zero)),
.wall_time_ns = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("wall-time"), zero)),
.alloc_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("alloc-samples"), zero)),
};

long labels_count = RARRAY_LEN(labels_array) + RARRAY_LEN(numeric_labels_array);
ddog_prof_Label labels[labels_count];
ddog_prof_Label *state_label = NULL;

for (int i = 0; i < RARRAY_LEN(labels_array); i++) {
VALUE key_str_pair = rb_ary_entry(labels_array, i);
Expand All @@ -100,6 +101,10 @@ static VALUE _native_sample(
.key = char_slice_from_ruby_string(rb_ary_entry(key_str_pair, 0)),
.str = char_slice_from_ruby_string(rb_ary_entry(key_str_pair, 1))
};

if (rb_str_equal(rb_ary_entry(key_str_pair, 0), rb_str_new_cstr("state"))) {
state_label = &labels[i];
}
}
for (int i = 0; i < RARRAY_LEN(numeric_labels_array); i++) {
VALUE key_str_pair = rb_ary_entry(numeric_labels_array, i);
Expand All @@ -115,12 +120,14 @@ static VALUE _native_sample(

sampling_buffer *buffer = sampling_buffer_new(max_frames_requested);

ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = labels_count};

sample_thread(
thread,
buffer,
recorder_instance,
values,
(ddog_prof_Slice_Label) {.ptr = labels, .len = labels_count},
(sample_labels) {.labels = slice_labels, .state_label = state_label},
RTEST(in_gc) ? SAMPLE_IN_GC : SAMPLE_REGULAR
);

Expand All @@ -134,7 +141,7 @@ void sample_thread(
sampling_buffer* buffer,
VALUE recorder_instance,
sample_values values,
ddog_prof_Slice_Label labels,
sample_labels labels,
sample_type type
) {
// Samples thread into recorder
Expand Down Expand Up @@ -171,6 +178,8 @@ void sample_thread(
rb_raise(rb_eArgError, "Unexpected value for sample_type: %d", type);
}

#define CHARSLICE_EQUALS(must_be_a_literal, charslice) (strlen("" must_be_a_literal) == charslice.len && strncmp(must_be_a_literal, charslice.ptr, charslice.len) == 0)

// Idea: Should we release the global vm lock (GVL) after we get the data from `rb_profile_frames`? That way other Ruby threads
// could continue making progress while the sample was ingested into the profile.
//
Expand All @@ -195,7 +204,7 @@ static void sample_thread_internal(
sampling_buffer* buffer,
VALUE recorder_instance,
sample_values values,
ddog_prof_Slice_Label labels,
sample_labels labels,
sampling_buffer *record_buffer,
int extra_frames_in_record_buffer
) {
Expand Down Expand Up @@ -228,6 +237,15 @@ static void sample_thread_internal(
VALUE last_ruby_frame = Qnil;
int last_ruby_line = 0;

ddog_prof_Label *state_label = labels.state_label;
bool cpu_or_wall_sample = values.cpu_or_wall_samples > 0;
bool has_cpu_time = cpu_or_wall_sample && values.cpu_time_ns > 0;
bool only_wall_time = cpu_or_wall_sample && values.cpu_time_ns == 0 && values.wall_time_ns > 0;

if (cpu_or_wall_sample && state_label == NULL) rb_raise(rb_eRuntimeError, "BUG: Unexpected missing state_label");

if (has_cpu_time) state_label->str = DDOG_CHARSLICE_C("had cpu");

for (int i = captured_frames - 1; i >= 0; i--) {
VALUE name, filename;
int line;
Expand All @@ -248,10 +266,55 @@ static void sample_thread_internal(
name = NIL_P(name) ? missing_string : name;
filename = NIL_P(filename) ? missing_string : filename;

ddog_CharSlice name_slice = char_slice_from_ruby_string(name);
ddog_CharSlice filename_slice = char_slice_from_ruby_string(filename);

bool top_of_the_stack = i == 0;

// When there's only wall-time in a sample, this means that the thread was not active in the sampled period.
//
// We try to categorize what it was doing based on what we observe at the top of the stack. This is a very rough
// approximation, and in the future we hope to replace this with a more accurate approach (such as using the
// GVL instrumentation API.)
if (top_of_the_stack && only_wall_time) {
if (!buffer->is_ruby_frame[i]) {
// We know that known versions of Ruby implement these using native code; thus if we find a method with the
// same name that is not native code, we ignore it, as it's probably a user method that coincidentally
// has the same name. Thus, even though "matching just by method name" is kinda weak,
// "matching by method name" + is native code seems actually to be good enough for a lot of cases.

if (CHARSLICE_EQUALS("sleep", name_slice)) { // Expected to be Kernel.sleep
state_label->str = DDOG_CHARSLICE_C("sleeping");
} else if (CHARSLICE_EQUALS("select", name_slice)) { // Expected to be Kernel.select
state_label->str = DDOG_CHARSLICE_C("waiting");
} else if (
CHARSLICE_EQUALS("synchronize", name_slice) || // Expected to be Monitor/Mutex#synchronize
CHARSLICE_EQUALS("lock", name_slice) || // Expected to be Mutex#lock
CHARSLICE_EQUALS("join", name_slice) // Expected to be Thread#join
) {
state_label->str = DDOG_CHARSLICE_C("blocked");
} else if (CHARSLICE_EQUALS("wait_readable", name_slice)) { // Expected to be IO#wait_readable
state_label->str = DDOG_CHARSLICE_C("network");
}
#ifdef NO_PRIMITIVE_POP // Ruby < 3.2
else if (CHARSLICE_EQUALS("pop", name_slice)) { // Expected to be Queue/SizedQueue#pop
state_label->str = DDOG_CHARSLICE_C("waiting");
}
#endif
} else {
#ifndef NO_PRIMITIVE_POP // Ruby >= 3.2
// Unlike the above, Ruby actually treats this one specially and gives it a nice file name we can match on!
if (CHARSLICE_EQUALS("pop", name_slice) && CHARSLICE_EQUALS("<internal:thread_sync>", filename_slice)) { // Expected to be Queue/SizedQueue#pop
state_label->str = DDOG_CHARSLICE_C("waiting");
}
#endif
}
}

buffer->locations[i] = (ddog_prof_Location) {
.function = (ddog_prof_Function) {
.name = char_slice_from_ruby_string(name),
.filename = char_slice_from_ruby_string(filename)
.name = name_slice,
.filename = filename_slice,
},
.line = line,
};
Expand Down Expand Up @@ -320,7 +383,7 @@ static void record_placeholder_stack_in_native_code(
sampling_buffer* buffer,
VALUE recorder_instance,
sample_values values,
ddog_prof_Slice_Label labels,
sample_labels labels,
sampling_buffer *record_buffer,
int extra_frames_in_record_buffer
) {
Expand Down
2 changes: 1 addition & 1 deletion ext/ddtrace_profiling_native_extension/collectors_stack.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ void sample_thread(
sampling_buffer* buffer,
VALUE recorder_instance,
sample_values values,
ddog_prof_Slice_Label labels,
sample_labels labels,
sample_type type
);
sampling_buffer *sampling_buffer_new(unsigned int max_frames);
Expand Down
23 changes: 20 additions & 3 deletions ext/ddtrace_profiling_native_extension/collectors_thread_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ void update_metrics_and_sample(
thread_being_sampled,
stack_from_thread,
thread_context,
(sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
(sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
SAMPLE_REGULAR,
current_monotonic_wall_time_ns,
NULL,
Expand Down Expand Up @@ -616,7 +616,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
/* thread: */ thread,
/* stack_from_thread: */ thread,
thread_context,
(sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns},
(sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns},
SAMPLE_IN_GC,
INVALID_TIME, // For now we're not collecting timestamps for these events
NULL,
Expand Down Expand Up @@ -662,6 +662,7 @@ static void trigger_sample_for_thread(
1 + // profiler overhead
1 + // end_timestamp_ns
2 + // ruby vm type and allocation class
1 + // state (only set for cpu/wall-time samples)
2; // local root span id and span id
ddog_prof_Label labels[max_label_count];
int label_pos = 0;
Expand Down Expand Up @@ -745,6 +746,20 @@ static void trigger_sample_for_thread(
};
}

// This label is handled specially:
// 1. It's only set for cpu/wall-time samples
// 2. We set it here to its default state of "unknown", but the `Collectors::Stack` may choose to override it with
// something more interesting.
ddog_prof_Label *state_label = NULL;
if (values.cpu_or_wall_samples > 0) {
state_label = &labels[label_pos++];
*state_label = (ddog_prof_Label) {
.key = DDOG_CHARSLICE_C("state"),
.str = DDOG_CHARSLICE_C("unknown"),
.num = 0, // This shouldn't be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
};
}

// The number of times `label_pos++` shows up in this function needs to match `max_label_count`. To avoid "oops I
// forgot to update max_label_count" in the future, we've also added this validation.
// @ivoanjo: I wonder if C compilers are smart enough to statically prove this check never triggers unless someone
Expand All @@ -753,12 +768,14 @@ static void trigger_sample_for_thread(
rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
}

ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos};

sample_thread(
stack_from_thread,
state->sampling_buffer,
state->recorder_instance,
values,
(ddog_prof_Slice_Label) {.ptr = labels, .len = label_pos},
(sample_labels) {.labels = slice_labels, .state_label = state_label},
type
);
}
Expand Down
3 changes: 3 additions & 0 deletions ext/ddtrace_profiling_native_extension/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ def add_compiler_flag(flag)
# On older Rubies, the first_lineno inside a location was a VALUE and not a int (https://github.com/ruby/ruby/pull/6430)
$defs << '-DNO_INT_FIRST_LINENO' if RUBY_VERSION < '3.2'

# On older Rubies, "pop" was not a primitive operation
$defs << '-DNO_PRIMITIVE_POP' if RUBY_VERSION < '3.2'

# On older Rubies, there was no tid member in the internal thread structure
$defs << '-DNO_THREAD_TID' if RUBY_VERSION < '3.1'

Expand Down
6 changes: 3 additions & 3 deletions ext/ddtrace_profiling_native_extension/stack_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ static VALUE ruby_time_from(ddog_Timespec ddprof_time) {
return rb_time_timespec_new(&time, utc);
}

void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, ddog_prof_Slice_Label labels) {
void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels) {
struct stack_recorder_state *state;
TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);

Expand All @@ -413,7 +413,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
uint8_t *position_for = state->position_for;

metric_values[position_for[CPU_TIME_VALUE_ID]] = values.cpu_time_ns;
metric_values[position_for[CPU_SAMPLES_VALUE_ID]] = values.cpu_samples;
metric_values[position_for[CPU_SAMPLES_VALUE_ID]] = values.cpu_or_wall_samples;
metric_values[position_for[WALL_TIME_VALUE_ID]] = values.wall_time_ns;
metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples;

Expand All @@ -422,7 +422,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
(ddog_prof_Sample) {
.locations = locations,
.values = (ddog_Slice_I64) {.ptr = metric_values, .len = state->enabled_values_count},
.labels = labels
.labels = labels.labels
}
);

Expand Down
14 changes: 11 additions & 3 deletions ext/ddtrace_profiling_native_extension/stack_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,21 @@

#include <datadog/profiling.h>

typedef struct sample_values {
typedef struct {
int64_t cpu_time_ns;
int64_t wall_time_ns;
uint32_t cpu_samples;
uint32_t cpu_or_wall_samples;
uint32_t alloc_samples;
} sample_values;

void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, ddog_prof_Slice_Label labels);
typedef struct sample_labels {
ddog_prof_Slice_Label labels;

// This is used to allow the `Collectors::Stack` to modify the existing label, if any. This MUST be NULL or point
// somewhere inside the labels slice above.
ddog_prof_Label *state_label;
} sample_labels;

void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels);
void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint);
VALUE enforce_recorder_instance(VALUE object);
Loading

0 comments on commit 0b0dd29

Please sign in to comment.