Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PROF-10123] Add placeholder for skipped samples in allocation profiler #3760

Merged
merged 9 commits into from
Jul 9, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -1140,9 +1140,15 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data) {
discrete_dynamic_sampler_events_since_last_sample(&state->allocation_sampler) :
// if we aren't, then we're sampling every event
1;
// TODO: Signal in the profile that clamping happened?

// To control bias from sampling, we clamp the maximum weight attributed to a single allocation sample. This avoids
// assigning a very large number to a sample, if for instance the dynamic sampling mechanism chose a really big interval.
unsigned int weight = allocations_since_last_sample > MAX_ALLOC_WEIGHT ? MAX_ALLOC_WEIGHT : (unsigned int) allocations_since_last_sample;
thread_context_collector_sample_allocation(state->thread_context_collector_instance, weight, new_object);
// ...but we still represent the skipped samples in the profile, thus the data will account for all allocations.
if (weight < allocations_since_last_sample) {
thread_context_collector_sample_skipped_allocation_samples(state->thread_context_collector_instance, allocations_since_last_sample - weight);
}

// Return a dummy VALUE because we're called from rb_rescue2 which requires it
return Qnil;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ static void ddtrace_otel_trace_identifiers_for(
VALUE active_span,
VALUE otel_values
);
static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE skipped_samples);

void collectors_thread_context_init(VALUE profiling_module) {
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
Expand Down Expand Up @@ -261,6 +262,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
rb_define_singleton_method(testing_module, "_native_sample_skipped_allocation_samples", _native_sample_skipped_allocation_samples, 2);

at_active_span_id = rb_intern_const("@active_span");
at_active_trace_id = rb_intern_const("@active_trace");
Expand Down Expand Up @@ -1400,3 +1402,33 @@ static void ddtrace_otel_trace_identifiers_for(
*active_trace = current_trace;
*numeric_span_id = resolved_numeric_span_id;
}

void thread_context_collector_sample_skipped_allocation_samples(VALUE self_instance, unsigned int skipped_samples) {
struct thread_context_collector_state *state;
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);

ddog_prof_Label labels[] = {
// Providing .num = 0 should not be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
{.key = DDOG_CHARSLICE_C("thread id"), .str = DDOG_CHARSLICE_C("SA"), .num = 0},
{.key = DDOG_CHARSLICE_C("thread name"), .str = DDOG_CHARSLICE_C("Skipped Samples"), .num = 0},
{.key = DDOG_CHARSLICE_C("allocation class"), .str = DDOG_CHARSLICE_C("(Skipped Samples)"), .num = 0},
};
ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = sizeof(labels) / sizeof(labels[0])};

record_placeholder_stack(
state->sampling_buffer,
state->recorder_instance,
(sample_values) {.alloc_samples = skipped_samples},
(sample_labels) {
.labels = slice_labels,
.state_label = NULL,
.end_timestamp_ns = 0, // For now we're not collecting timestamps for allocation events
},
DDOG_CHARSLICE_C("Skipped Samples")
);
}

static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE skipped_samples) {
thread_context_collector_sample_skipped_allocation_samples(collector_instance, NUM2UINT(skipped_samples));
return Qtrue;
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ void thread_context_collector_sample(
VALUE profiler_overhead_stack_thread
);
void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object);
void thread_context_collector_sample_skipped_allocation_samples(VALUE self_instance, unsigned int skipped_samples);
VALUE thread_context_collector_sample_after_gc(VALUE self_instance);
void thread_context_collector_on_gc_start(VALUE self_instance);
bool thread_context_collector_on_gc_finish(VALUE self_instance);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,7 @@

context 'with dynamic_sampling_rate_enabled' do
let(:options) { { dynamic_sampling_rate_enabled: true } }

it 'keeps statistics on how allocation sampling is doing' do
stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo))

Expand Down Expand Up @@ -536,6 +537,30 @@
one_second_in_ns = 1_000_000_000
expect(sampling_time_ns_max).to be < one_second_in_ns, "A single sample should not take longer than 1s, #{stats}"
end

# When large numbers of objects are allocated, the dynamic sampling rate kicks in, and we don't sample every
# object.
# We then assign a weight to every sample to compensate for this; to avoid bias, we have a limit on this weight,
# and we clamp it if it goes over the limit.
# But the total amount of allocations recorded should match the number we observed, and thus we record the
# remainder above the clamped value as a separate "Skipped Samples" step.
it 'records skipped allocation samples when weights are clamped' do
start

thread_that_allocates_as_fast_as_possible = Thread.new { loop { BasicObject.new } }

allocation_samples = try_wait_until do
samples = samples_from_pprof(recorder.serialize!).select { |it| it.values[:'alloc-samples'] > 0 }
samples if samples.any? { |it| it.labels[:'thread name'] == 'Skipped Samples' }
end

thread_that_allocates_as_fast_as_possible.kill
thread_that_allocates_as_fast_as_possible.join

cpu_and_wall_time_worker.stop

expect(allocation_samples).to_not be_empty
end
end

context 'when sampling optimized Ruby strings' do
Expand Down
29 changes: 29 additions & 0 deletions spec/datadog/profiling/collectors/thread_context_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ def sample_allocation(weight:, new_object: Object.new)
described_class::Testing._native_sample_allocation(cpu_and_wall_time_collector, weight, new_object)
end

def sample_skipped_allocation_samples(skipped_samples)
described_class::Testing._native_sample_skipped_allocation_samples(cpu_and_wall_time_collector, skipped_samples)
end

def thread_list
described_class::Testing._native_thread_list
end
Expand Down Expand Up @@ -1210,6 +1214,31 @@ def self.otel_sdk_available?
end
end

describe '#sample_skipped_allocation_samples' do
let(:single_sample) do
expect(samples.size).to be 1
samples.first
end
before { sample_skipped_allocation_samples(123) }

it 'records the number of skipped allocations' do
expect(single_sample.values).to include('alloc-samples': 123)
end

it 'attributes the skipped samples to a "Skipped Samples" thread' do
expect(single_sample.labels).to include('thread id': 'SA', 'thread name': 'Skipped Samples')
end

it 'attributes the skipped samples to a "(Skipped Samples)" allocation class' do
expect(single_sample.labels).to include('allocation class': '(Skipped Samples)')
end

it 'includes a placeholder stack attributed to "Skipped Samples"' do
expect(single_sample.locations.size).to be 1
expect(single_sample.locations.first.path).to eq 'Skipped Samples'
end
end

describe '#thread_list' do
it "returns the same as Ruby's Thread.list" do
expect(thread_list).to eq Thread.list
Expand Down
Loading