DataDog · ivoanjo · Jul 9, 2024 · Jul 5, 2024 · Jul 8, 2024 · Jul 8, 2024
@@ -1140,9 +1140,15 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data) {
     discrete_dynamic_sampler_events_since_last_sample(&state->allocation_sampler) :
     // if we aren't, then we're sampling every event
     1;
-  // TODO: Signal in the profile that clamping happened?
+
+  // To control bias from sampling, we clamp the maximum weight attributed to a single allocation sample. This avoids
+  // assigning a very large number to a sample, if for instance the dynamic sampling mechanism chose a really big interval.
   unsigned int weight = allocations_since_last_sample > MAX_ALLOC_WEIGHT ? MAX_ALLOC_WEIGHT : (unsigned int) allocations_since_last_sample;
   thread_context_collector_sample_allocation(state->thread_context_collector_instance, weight, new_object);
+  // ...but we still represent the skipped samples in the profile, thus the data will account for all allocations.
+  if (weight < allocations_since_last_sample) {
+    thread_context_collector_sample_skipped_allocation_samples(state->thread_context_collector_instance, allocations_since_last_sample - weight);
+  }
 
   // Return a dummy VALUE because we're called from rb_rescue2 which requires it
   return Qnil;

@@ -231,6 +231,7 @@ static void ddtrace_otel_trace_identifiers_for(
   VALUE active_span,
   VALUE otel_values
 );
+static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE skipped_samples);
 
 void collectors_thread_context_init(VALUE profiling_module) {
   VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
@@ -261,6 +262,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
   rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
   rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
   rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
+  rb_define_singleton_method(testing_module, "_native_sample_skipped_allocation_samples", _native_sample_skipped_allocation_samples, 2);
 
   at_active_span_id = rb_intern_const("@active_span");
   at_active_trace_id = rb_intern_const("@active_trace");
@@ -1400,3 +1402,33 @@ static void ddtrace_otel_trace_identifiers_for(
   *active_trace = current_trace;
   *numeric_span_id = resolved_numeric_span_id;
 }
+
+void thread_context_collector_sample_skipped_allocation_samples(VALUE self_instance, unsigned int skipped_samples) {
+  struct thread_context_collector_state *state;
+  TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
+
+  ddog_prof_Label labels[] = {
+    // Providing .num = 0 should not be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
+    {.key = DDOG_CHARSLICE_C("thread id"),        .str = DDOG_CHARSLICE_C("SA"),                .num = 0},
+    {.key = DDOG_CHARSLICE_C("thread name"),      .str = DDOG_CHARSLICE_C("Skipped Samples"),   .num = 0},
+    {.key = DDOG_CHARSLICE_C("allocation class"), .str = DDOG_CHARSLICE_C("(Skipped Samples)"), .num = 0},
+  };
+  ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = sizeof(labels) / sizeof(labels[0])};
+
+  record_placeholder_stack(
+    state->sampling_buffer,
+    state->recorder_instance,
+    (sample_values) {.alloc_samples = skipped_samples},
+    (sample_labels) {
+      .labels = slice_labels,
+      .state_label = NULL,
+      .end_timestamp_ns = 0, // For now we're not collecting timestamps for allocation events
+    },
+    DDOG_CHARSLICE_C("Skipped Samples")
+  );
+}
+
+static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE skipped_samples) {
+  thread_context_collector_sample_skipped_allocation_samples(collector_instance, NUM2UINT(skipped_samples));
+  return Qtrue;
+}
@@ -9,6 +9,7 @@ void thread_context_collector_sample(
   VALUE profiler_overhead_stack_thread
 );
 void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object);
+void thread_context_collector_sample_skipped_allocation_samples(VALUE self_instance, unsigned int skipped_samples);
 VALUE thread_context_collector_sample_after_gc(VALUE self_instance);
 void thread_context_collector_on_gc_start(VALUE self_instance);
 bool thread_context_collector_on_gc_finish(VALUE self_instance);

@@ -507,6 +507,7 @@
 
       context 'with dynamic_sampling_rate_enabled' do
         let(:options) { { dynamic_sampling_rate_enabled: true } }
+
         it 'keeps statistics on how allocation sampling is doing' do
           stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo))
 
@@ -536,6 +537,30 @@
           one_second_in_ns = 1_000_000_000
           expect(sampling_time_ns_max).to be < one_second_in_ns, "A single sample should not take longer than 1s, #{stats}"
         end
+
+        # When large numbers of objects are allocated, the dynamic sampling rate kicks in, and we don't sample every
+        # object.
+        # We then assign a weight to every sample to compensate for this; to avoid bias, we have a limit on this weight,
+        # and we clamp it if it goes over the limit.
+        # But the total amount of allocations recorded should match the number we observed, and thus we record the
+        # remainder above the clamped value as a separate "Skipped Samples" step.
+        it 'records skipped allocation samples when weights are clamped' do
+          start
+
+          thread_that_allocates_as_fast_as_possible = Thread.new { loop { BasicObject.new } }
+
+          allocation_samples = try_wait_until do
+            samples = samples_from_pprof(recorder.serialize!).select { |it| it.values[:'alloc-samples'] > 0 }
+            samples if samples.any? { |it| it.labels[:'thread name'] == 'Skipped Samples' }
+          end
+
+          thread_that_allocates_as_fast_as_possible.kill
+          thread_that_allocates_as_fast_as_possible.join
+
+          cpu_and_wall_time_worker.stop
+
+          expect(allocation_samples).to_not be_empty
+        end
       end
 
       context 'when sampling optimized Ruby strings' do

@@ -79,6 +79,10 @@ def sample_allocation(weight:, new_object: Object.new)
     described_class::Testing._native_sample_allocation(cpu_and_wall_time_collector, weight, new_object)
   end
 
+  def sample_skipped_allocation_samples(skipped_samples)
+    described_class::Testing._native_sample_skipped_allocation_samples(cpu_and_wall_time_collector, skipped_samples)
+  end
+
   def thread_list
     described_class::Testing._native_thread_list
   end
@@ -1210,6 +1214,31 @@ def self.otel_sdk_available?
     end
   end
 
+  describe '#sample_skipped_allocation_samples' do
+    let(:single_sample) do
+      expect(samples.size).to be 1
+      samples.first
+    end
+    before { sample_skipped_allocation_samples(123) }
+
+    it 'records the number of skipped allocations' do
+      expect(single_sample.values).to include('alloc-samples': 123)
+    end
+
+    it 'attributes the skipped samples to a "Skipped Samples" thread' do
+      expect(single_sample.labels).to include('thread id': 'SA', 'thread name': 'Skipped Samples')
+    end
+
+    it 'attributes the skipped samples to a "(Skipped Samples)" allocation class' do
+      expect(single_sample.labels).to include('allocation class': '(Skipped Samples)')
+    end
+
+    it 'includes a placeholder stack attributed to "Skipped Samples"' do
+      expect(single_sample.locations.size).to be 1
+      expect(single_sample.locations.first.path).to eq 'Skipped Samples'
+    end
+  end
+
   describe '#thread_list' do
     it "returns the same as Ruby's Thread.list" do
       expect(thread_list).to eq Thread.list