diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.c b/ext/ddtrace_profiling_native_extension/collectors_stack.c index d4d8061901d..7611f13e41e 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_stack.c +++ b/ext/ddtrace_profiling_native_extension/collectors_stack.c @@ -40,7 +40,7 @@ static void record_placeholder_stack_in_native_code( sampling_buffer* buffer, VALUE recorder_instance, sample_values values, - ddog_prof_Slice_Label labels, + sample_labels labels, sampling_buffer *record_buffer, int extra_frames_in_record_buffer ); @@ -49,7 +49,7 @@ static void sample_thread_internal( sampling_buffer* buffer, VALUE recorder_instance, sample_values values, - ddog_prof_Slice_Label labels, + sample_labels labels, sampling_buffer *record_buffer, int extra_frames_in_record_buffer ); @@ -85,13 +85,14 @@ static VALUE _native_sample( VALUE zero = INT2NUM(0); sample_values values = { .cpu_time_ns = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-time"), zero)), - .cpu_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-samples"), zero)), + .cpu_or_wall_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-samples"), zero)), .wall_time_ns = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("wall-time"), zero)), .alloc_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("alloc-samples"), zero)), }; long labels_count = RARRAY_LEN(labels_array) + RARRAY_LEN(numeric_labels_array); ddog_prof_Label labels[labels_count]; + ddog_prof_Label *state_label = NULL; for (int i = 0; i < RARRAY_LEN(labels_array); i++) { VALUE key_str_pair = rb_ary_entry(labels_array, i); @@ -100,6 +101,10 @@ static VALUE _native_sample( .key = char_slice_from_ruby_string(rb_ary_entry(key_str_pair, 0)), .str = char_slice_from_ruby_string(rb_ary_entry(key_str_pair, 1)) }; + + if (rb_str_equal(rb_ary_entry(key_str_pair, 0), rb_str_new_cstr("state"))) { + state_label = &labels[i]; + } } for (int i = 0; i < RARRAY_LEN(numeric_labels_array); i++) { VALUE key_str_pair = rb_ary_entry(numeric_labels_array, i); @@ -115,12 +120,14 @@ static VALUE _native_sample( sampling_buffer *buffer = sampling_buffer_new(max_frames_requested); + ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = labels_count}; + sample_thread( thread, buffer, recorder_instance, values, - (ddog_prof_Slice_Label) {.ptr = labels, .len = labels_count}, + (sample_labels) {.labels = slice_labels, .state_label = state_label}, RTEST(in_gc) ? SAMPLE_IN_GC : SAMPLE_REGULAR ); @@ -134,7 +141,7 @@ void sample_thread( sampling_buffer* buffer, VALUE recorder_instance, sample_values values, - ddog_prof_Slice_Label labels, + sample_labels labels, sample_type type ) { // Samples thread into recorder @@ -171,6 +178,8 @@ void sample_thread( rb_raise(rb_eArgError, "Unexpected value for sample_type: %d", type); } +#define CHARSLICE_EQUALS(must_be_a_literal, charslice) (strlen("" must_be_a_literal) == charslice.len && strncmp(must_be_a_literal, charslice.ptr, charslice.len) == 0) + // Idea: Should we release the global vm lock (GVL) after we get the data from `rb_profile_frames`? That way other Ruby threads // could continue making progress while the sample was ingested into the profile. // @@ -195,7 +204,7 @@ static void sample_thread_internal( sampling_buffer* buffer, VALUE recorder_instance, sample_values values, - ddog_prof_Slice_Label labels, + sample_labels labels, sampling_buffer *record_buffer, int extra_frames_in_record_buffer ) { @@ -228,6 +237,15 @@ static void sample_thread_internal( VALUE last_ruby_frame = Qnil; int last_ruby_line = 0; + ddog_prof_Label *state_label = labels.state_label; + bool cpu_or_wall_sample = values.cpu_or_wall_samples > 0; + bool has_cpu_time = cpu_or_wall_sample && values.cpu_time_ns > 0; + bool only_wall_time = cpu_or_wall_sample && values.cpu_time_ns == 0 && values.wall_time_ns > 0; + + if (cpu_or_wall_sample && state_label == NULL) rb_raise(rb_eRuntimeError, "BUG: Unexpected missing state_label"); + + if (has_cpu_time) state_label->str = DDOG_CHARSLICE_C("had cpu"); + for (int i = captured_frames - 1; i >= 0; i--) { VALUE name, filename; int line; @@ -248,10 +266,55 @@ static void sample_thread_internal( name = NIL_P(name) ? missing_string : name; filename = NIL_P(filename) ? missing_string : filename; + ddog_CharSlice name_slice = char_slice_from_ruby_string(name); + ddog_CharSlice filename_slice = char_slice_from_ruby_string(filename); + + bool top_of_the_stack = i == 0; + + // When there's only wall-time in a sample, this means that the thread was not active in the sampled period. + // + // We try to categorize what it was doing based on what we observe at the top of the stack. This is a very rough + // approximation, and in the future we hope to replace this with a more accurate approach (such as using the + // GVL instrumentation API.) + if (top_of_the_stack && only_wall_time) { + if (!buffer->is_ruby_frame[i]) { + // We know that known versions of Ruby implement these using native code; thus if we find a method with the + // same name that is not native code, we ignore it, as it's probably a user method that coincidentally + // has the same name. Thus, even though "matching just by method name" is kinda weak, + // "matching by method name" + is native code seems actually to be good enough for a lot of cases. + + if (CHARSLICE_EQUALS("sleep", name_slice)) { // Expected to be Kernel.sleep + state_label->str = DDOG_CHARSLICE_C("sleeping"); + } else if (CHARSLICE_EQUALS("select", name_slice)) { // Expected to be Kernel.select + state_label->str = DDOG_CHARSLICE_C("waiting"); + } else if ( + CHARSLICE_EQUALS("synchronize", name_slice) || // Expected to be Monitor/Mutex#synchronize + CHARSLICE_EQUALS("lock", name_slice) || // Expected to be Mutex#lock + CHARSLICE_EQUALS("join", name_slice) // Expected to be Thread#join + ) { + state_label->str = DDOG_CHARSLICE_C("blocked"); + } else if (CHARSLICE_EQUALS("wait_readable", name_slice)) { // Expected to be IO#wait_readable + state_label->str = DDOG_CHARSLICE_C("network"); + } + #ifdef NO_PRIMITIVE_POP // Ruby < 3.2 + else if (CHARSLICE_EQUALS("pop", name_slice)) { // Expected to be Queue/SizedQueue#pop + state_label->str = DDOG_CHARSLICE_C("waiting"); + } + #endif + } else { + #ifndef NO_PRIMITIVE_POP // Ruby >= 3.2 + // Unlike the above, Ruby actually treats this one specially and gives it a nice file name we can match on! + if (CHARSLICE_EQUALS("pop", name_slice) && CHARSLICE_EQUALS("", filename_slice)) { // Expected to be Queue/SizedQueue#pop + state_label->str = DDOG_CHARSLICE_C("waiting"); + } + #endif + } + } + buffer->locations[i] = (ddog_prof_Location) { .function = (ddog_prof_Function) { - .name = char_slice_from_ruby_string(name), - .filename = char_slice_from_ruby_string(filename) + .name = name_slice, + .filename = filename_slice, }, .line = line, }; @@ -320,7 +383,7 @@ static void record_placeholder_stack_in_native_code( sampling_buffer* buffer, VALUE recorder_instance, sample_values values, - ddog_prof_Slice_Label labels, + sample_labels labels, sampling_buffer *record_buffer, int extra_frames_in_record_buffer ) { diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.h b/ext/ddtrace_profiling_native_extension/collectors_stack.h index ee5f9a0b1c1..e4175ae35e8 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_stack.h +++ b/ext/ddtrace_profiling_native_extension/collectors_stack.h @@ -13,7 +13,7 @@ void sample_thread( sampling_buffer* buffer, VALUE recorder_instance, sample_values values, - ddog_prof_Slice_Label labels, + sample_labels labels, sample_type type ); sampling_buffer *sampling_buffer_new(unsigned int max_frames); diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 4938c611d05..7f52653b8eb 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -471,7 +471,7 @@ void update_metrics_and_sample( thread_being_sampled, stack_from_thread, thread_context, - (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_samples = 1, .wall_time_ns = wall_time_elapsed_ns}, + (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns}, SAMPLE_REGULAR, current_monotonic_wall_time_ns, NULL, @@ -616,7 +616,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) { /* thread: */ thread, /* stack_from_thread: */ thread, thread_context, - (sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns}, + (sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns}, SAMPLE_IN_GC, INVALID_TIME, // For now we're not collecting timestamps for these events NULL, @@ -662,6 +662,7 @@ static void trigger_sample_for_thread( 1 + // profiler overhead 1 + // end_timestamp_ns 2 + // ruby vm type and allocation class + 1 + // state (only set for cpu/wall-time samples) 2; // local root span id and span id ddog_prof_Label labels[max_label_count]; int label_pos = 0; @@ -745,6 +746,20 @@ static void trigger_sample_for_thread( }; } + // This label is handled specially: + // 1. It's only set for cpu/wall-time samples + // 2. We set it here to its default state of "unknown", but the `Collectors::Stack` may choose to override it with + // something more interesting. + ddog_prof_Label *state_label = NULL; + if (values.cpu_or_wall_samples > 0) { + state_label = &labels[label_pos++]; + *state_label = (ddog_prof_Label) { + .key = DDOG_CHARSLICE_C("state"), + .str = DDOG_CHARSLICE_C("unknown"), + .num = 0, // This shouldn't be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this + }; + } + // The number of times `label_pos++` shows up in this function needs to match `max_label_count`. To avoid "oops I // forgot to update max_label_count" in the future, we've also added this validation. // @ivoanjo: I wonder if C compilers are smart enough to statically prove this check never triggers unless someone @@ -753,12 +768,14 @@ static void trigger_sample_for_thread( rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); } + ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos}; + sample_thread( stack_from_thread, state->sampling_buffer, state->recorder_instance, values, - (ddog_prof_Slice_Label) {.ptr = labels, .len = label_pos}, + (sample_labels) {.labels = slice_labels, .state_label = state_label}, type ); } diff --git a/ext/ddtrace_profiling_native_extension/extconf.rb b/ext/ddtrace_profiling_native_extension/extconf.rb index a7cb23e9a94..933fc87d157 100644 --- a/ext/ddtrace_profiling_native_extension/extconf.rb +++ b/ext/ddtrace_profiling_native_extension/extconf.rb @@ -146,6 +146,9 @@ def add_compiler_flag(flag) # On older Rubies, the first_lineno inside a location was a VALUE and not a int (https://github.com/ruby/ruby/pull/6430) $defs << '-DNO_INT_FIRST_LINENO' if RUBY_VERSION < '3.2' +# On older Rubies, "pop" was not a primitive operation +$defs << '-DNO_PRIMITIVE_POP' if RUBY_VERSION < '3.2' + # On older Rubies, there was no tid member in the internal thread structure $defs << '-DNO_THREAD_TID' if RUBY_VERSION < '3.1' diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index 60d71146ccb..32f5340dad9 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -399,7 +399,7 @@ static VALUE ruby_time_from(ddog_Timespec ddprof_time) { return rb_time_timespec_new(&time, utc); } -void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, ddog_prof_Slice_Label labels) { +void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); @@ -413,7 +413,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, uint8_t *position_for = state->position_for; metric_values[position_for[CPU_TIME_VALUE_ID]] = values.cpu_time_ns; - metric_values[position_for[CPU_SAMPLES_VALUE_ID]] = values.cpu_samples; + metric_values[position_for[CPU_SAMPLES_VALUE_ID]] = values.cpu_or_wall_samples; metric_values[position_for[WALL_TIME_VALUE_ID]] = values.wall_time_ns; metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples; @@ -422,7 +422,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, (ddog_prof_Sample) { .locations = locations, .values = (ddog_Slice_I64) {.ptr = metric_values, .len = state->enabled_values_count}, - .labels = labels + .labels = labels.labels } ); diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.h b/ext/ddtrace_profiling_native_extension/stack_recorder.h index 64a95c43469..fc1602c383d 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.h +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.h @@ -2,13 +2,21 @@ #include -typedef struct sample_values { +typedef struct { int64_t cpu_time_ns; int64_t wall_time_ns; - uint32_t cpu_samples; + uint32_t cpu_or_wall_samples; uint32_t alloc_samples; } sample_values; -void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, ddog_prof_Slice_Label labels); +typedef struct sample_labels { + ddog_prof_Slice_Label labels; + + // This is used to allow the `Collectors::Stack` to modify the existing label, if any. This MUST be NULL or point + // somewhere inside the labels slice above. + ddog_prof_Label *state_label; +} sample_labels; + +void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels); void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint); VALUE enforce_recorder_instance(VALUE object); diff --git a/spec/datadog/profiling/collectors/stack_spec.rb b/spec/datadog/profiling/collectors/stack_spec.rb index 9c681cdc08a..a55be303849 100644 --- a/spec/datadog/profiling/collectors/stack_spec.rb +++ b/spec/datadog/profiling/collectors/stack_spec.rb @@ -11,7 +11,7 @@ subject(:collectors_stack) { described_class.new } let(:metric_values) { { 'cpu-time' => 123, 'cpu-samples' => 456, 'wall-time' => 789 } } - let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b' }.to_a } + let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a } let(:raw_reference_stack) { stacks.fetch(:reference) } let(:reference_stack) { convert_reference_stack(raw_reference_stack) } @@ -219,6 +219,203 @@ def call_sleep expect(reference_stack.first.base_label).to eq 'sleep' end end + + describe 'approximate thread state categorization based on current stack' do + describe 'state label validation' do + let(:do_in_background_thread) do + proc do |ready_queue| + ready_queue << true + sleep + end + end + let(:labels) { [] } + + context 'when taking a cpu/wall-time sample and the state label is missing' do + let(:metric_values) { { 'cpu-samples' => 1 } } + + it 'raises an exception' do + expect { gathered_stack }.to raise_error(RuntimeError, /BUG: Unexpected missing state_label/) + end + end + + context 'when taking a non-cpu/wall-time sample and the state label is missing' do + let(:metric_values) { { 'cpu-samples' => 0 } } + + it 'does not raise an exception' do + expect(gathered_stack).to be_truthy + end + end + end + + context 'when sampling a thread with cpu-time' do + let(:do_in_background_thread) do + proc do |ready_queue| + ready_queue << true + sleep + end + end + let(:metric_values) { { 'cpu-time' => 123, 'cpu-samples' => 456, 'wall-time' => 789 } } + + it do + expect(sample_and_decode(background_thread, :labels)).to include(state: 'had cpu') + end + end + + context 'when sampling a sleeping thread with no cpu-time' do + let(:do_in_background_thread) do + proc do |ready_queue| + ready_queue << true + sleep + end + end + let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } } + + it do + expect(sample_and_decode(background_thread, :labels)).to include(state: 'sleeping') + end + end + + context 'when sampling a thread waiting on a select' do + let(:server_socket) { TCPServer.new(6006) } + let(:background_thread) { Thread.new(ready_queue, server_socket, &do_in_background_thread) } + let(:do_in_background_thread) do + proc do |ready_queue, server_socket| + ready_queue << true + IO.select([server_socket]) + end + end + let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } } + + after do + background_thread.kill + background_thread.join + server_socket.close + end + + it do + expect(sample_and_decode(background_thread, :labels)).to include(state: 'waiting') + end + end + + context 'when sampling a thread blocked on Thread#join' do + let(:another_thread) { Thread.new { sleep } } + let(:background_thread) { Thread.new(ready_queue, another_thread, &do_in_background_thread) } + let(:do_in_background_thread) do + proc do |ready_queue, another_thread| + ready_queue << true + another_thread.join + end + end + let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } } + + after do + another_thread.kill + another_thread.join + end + + it do + expect(sample_and_decode(background_thread, :labels)).to include(state: 'blocked') + end + end + + context 'when sampling a thread blocked on Mutex#synchronize' do + let(:locked_mutex) { Mutex.new.tap(&:lock) } + let(:background_thread) { Thread.new(ready_queue, locked_mutex, &do_in_background_thread) } + let(:do_in_background_thread) do + proc do |ready_queue, locked_mutex| + ready_queue << true + locked_mutex.synchronize {} + end + end + let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } } + + it do + expect(sample_and_decode(background_thread, :labels)).to include(state: 'blocked') + end + end + + context 'when sampling a thread blocked on Mutex#lock' do + let(:locked_mutex) { Mutex.new.tap(&:lock) } + let(:background_thread) { Thread.new(ready_queue, locked_mutex, &do_in_background_thread) } + let(:do_in_background_thread) do + proc do |ready_queue, locked_mutex| + ready_queue << true + locked_mutex.lock + end + end + let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } } + + it do + expect(sample_and_decode(background_thread, :labels)).to include(state: 'blocked') + end + end + + context 'when sampling a thread blocked on Monitor#synchronize' do + let(:locked_monitor) { Monitor.new.tap(&:enter) } + let(:background_thread) { Thread.new(ready_queue, locked_monitor, &do_in_background_thread) } + let(:do_in_background_thread) do + proc do |ready_queue, locked_monitor| + ready_queue << true + locked_monitor.synchronize {} + end + end + let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } } + + it do + expect(sample_and_decode(background_thread, :labels)).to include(state: 'blocked') + end + end + + context 'when sampling a thread waiting on a IO object' do + let(:server_socket) { TCPServer.new(6006) } + let(:background_thread) { Thread.new(ready_queue, server_socket, &do_in_background_thread) } + let(:do_in_background_thread) do + proc do |ready_queue, server_socket| + ready_queue << true + server_socket.wait_readable + end + end + let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } } + + after do + background_thread.kill + background_thread.join + server_socket.close + end + + it do + expect(sample_and_decode(background_thread, :labels)).to include(state: 'network') + end + end + + context 'when sampling a thread waiting on a Queue object' do + let(:do_in_background_thread) do + proc do |ready_queue| + ready_queue << true + Queue.new.pop + end + end + let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } } + + it do + expect(sample_and_decode(background_thread, :labels)).to include(state: 'waiting') + end + end + + context 'when sampling a thread in an unknown state' do + let(:do_in_background_thread) do + proc do |ready_queue| + ready_queue << true + Thread.stop + end + end + let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } } + + it do + expect(sample_and_decode(background_thread, :labels)).to include(state: 'unknown') + end + end + end end context 'when sampling a thread with a stack that is deeper than the configured max_frames' do @@ -415,13 +612,13 @@ def convert_reference_stack(raw_reference_stack) end end - def sample_and_decode(thread, max_frames: 400, recorder: build_stack_recorder, in_gc: false) + def sample_and_decode(thread, data = :locations, max_frames: 400, recorder: build_stack_recorder, in_gc: false) sample(thread, recorder, metric_values, labels, max_frames: max_frames, in_gc: in_gc) samples = samples_from_pprof(recorder.serialize!) expect(samples.size).to be 1 - samples.first.locations + samples.first.public_send(data) end end diff --git a/spec/datadog/profiling/collectors/thread_context_spec.rb b/spec/datadog/profiling/collectors/thread_context_spec.rb index b2aa16cf490..37a57348355 100644 --- a/spec/datadog/profiling/collectors/thread_context_spec.rb +++ b/spec/datadog/profiling/collectors/thread_context_spec.rb @@ -108,7 +108,10 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current sample expect(Thread.list).to eq(all_threads), 'Threads finished during this spec, causing flakiness!' - expect(samples.size).to be all_threads.size + + seen_threads = samples.map(&:labels).map { |it| it.fetch(:'thread id') }.uniq + + expect(seen_threads.size).to be all_threads.size end it 'tags the samples with the object ids of the Threads they belong to' do @@ -167,20 +170,17 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current per_thread_context.fetch(t1).fetch(:wall_time_at_previous_sample_ns) t1_samples = samples_for_thread(samples, t1) - wall_time = t1_samples.first.values.fetch(:'wall-time') - - expect(t1_samples.size) - .to be(1), "Expected thread t1 to always have same stack trace (because it's sleeping), got #{t1_samples.inspect}" + wall_time = t1_samples.map(&:values).map { |it| it.fetch(:'wall-time') }.reduce(:+) expect(wall_time).to be(wall_time_at_second_sample - wall_time_at_first_sample) end it 'tags samples with how many times they were seen' do 5.times { sample } - t1_sample = samples_for_thread(samples, t1).first + t1_samples = samples_for_thread(samples, t1) - expect(t1_sample.values).to include(:'cpu-samples' => 5) + expect(t1_samples.map(&:values).map { |it| it.fetch(:'cpu-samples') }.reduce(:+)).to eq 5 end [:before, :after].each do |on_gc_finish_order| @@ -239,7 +239,7 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current it 'sets the cpu-time on every sample to zero' do 5.times { sample } - expect(samples).to all include(values: include(:'cpu-time' => 0)) + expect(samples).to all have_attributes(values: include(:'cpu-time' => 0)) end end @@ -499,9 +499,9 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current t1_samples = samples_for_thread(samples, t1) - expect(t1_samples).to have(1).item - expect(t1_samples.first.labels).to include(:'trace endpoint' => 'changed_after_first_sample') - expect(t1_samples.first.values).to include(:'cpu-samples' => 2) + expect(t1_samples) + .to all have_attributes(labels: include(:'trace endpoint' => 'changed_after_first_sample')) + expect(t1_samples.map(&:values).map { |it| it.fetch(:'cpu-samples') }.reduce(:+)).to eq 2 end context 'when the resource is changed multiple times' do @@ -514,9 +514,9 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current t1_samples = samples_for_thread(samples, t1) - expect(t1_samples).to have(1).item - expect(t1_samples.first.labels).to include(:'trace endpoint' => 'changed_after_second_sample') - expect(t1_samples.first.values).to include(:'cpu-samples' => 3) + expect(t1_samples) + .to all have_attributes(labels: include(:'trace endpoint' => 'changed_after_second_sample')) + expect(t1_samples.map(&:values).map { |it| it.fetch(:'cpu-samples') }.reduce(:+)).to eq 3 end end end diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index baeafbb50d1..9ed86ae1223 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -186,7 +186,7 @@ def sample_types_from(decoded_profile) context 'when profile has a sample' do let(:metric_values) { { 'cpu-time' => 123, 'cpu-samples' => 456, 'wall-time' => 789, 'alloc-samples' => 4242 } } - let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b' }.to_a } + let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a } let(:samples) { samples_from_pprof(encoded_pprof) } @@ -211,7 +211,10 @@ def sample_types_from(decoded_profile) end it 'encodes the sample with the labels provided' do - expect(samples.first.labels).to eq(label_a: 'value_a', label_b: 'value_b') + labels = samples.first.labels + labels.delete(:state) # We test this separately! + + expect(labels).to eq(label_a: 'value_a', label_b: 'value_b') end it 'encodes a single empty mapping' do @@ -240,7 +243,13 @@ def sample_types_from(decoded_profile) # We're using `_native_sample` here to test the behavior of `record_sample` in `stack_recorder.c` expect do Datadog::Profiling::Collectors::Stack::Testing._native_sample( - Thread.current, stack_recorder, metric_values, { 'local root span id' => 'incorrect' }.to_a, [], 400, false + Thread.current, + stack_recorder, + metric_values, + { 'local root span id' => 'incorrect', 'state' => 'unknown' }.to_a, + [], + 400, + false, ) end.to raise_error(ArgumentError) end @@ -256,8 +265,9 @@ def sample_types_from(decoded_profile) local_root_span_id_without_endpoint = { 'local root span id' => 456 } sample = proc do |numeric_labels = {}| - Datadog::Profiling::Collectors::Stack::Testing - ._native_sample(Thread.current, stack_recorder, metric_values, [], numeric_labels.to_a, 400, false) + Datadog::Profiling::Collectors::Stack::Testing._native_sample( + Thread.current, stack_recorder, metric_values, { 'state' => 'unknown' }.to_a, numeric_labels.to_a, 400, false + ) end sample.call @@ -270,16 +280,23 @@ def sample_types_from(decoded_profile) sample.call(local_root_span_id_without_endpoint) sample.call(local_root_span_id_with_endpoint) - expect(samples).to have(6).items + expect(samples).to have(6).items # Samples are guaranteed unique since each sample call is on a different line + + labels_without_state = proc { |labels| labels.reject { |key| key == :state } } # Other samples have not been changed - expect(samples.select { |it| it[:labels].empty? }).to have(2).items - expect(samples.select { |it| it[:labels] == { :'local root span id' => 456 } }).to have(2).items + expect(samples.select { |it| labels_without_state.call(it[:labels]).empty? }).to have(2).items + expect( + samples.select do |it| + labels_without_state.call(it[:labels]) == { :'local root span id' => 456 } + end + ).to have(2).items # Matching samples taken before and after recording the endpoint have been changed expect( samples.select do |it| - it[:labels] == { :'local root span id' => 123, :'trace endpoint' => 'recorded-endpoint' } + labels_without_state.call(it[:labels]) == + { :'local root span id' => 123, :'trace endpoint' => 'recorded-endpoint' } end ).to have(2).items end @@ -382,7 +399,7 @@ def sample_types_from(decoded_profile) context 'when profile has a sample' do let(:metric_values) { { 'cpu-time' => 123, 'cpu-samples' => 456, 'wall-time' => 789 } } - let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b' }.to_a } + let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a } it 'makes the next calls to serialize return no data' do # Add some data