Merge pull request #3770 from DataDog/ivoanjo/prof-10125-add-unscaled…

…-allocation-counts [PROF-10125] Track unscaled allocation counts in allocation profiler
DataDog · Jul 9, 2024 · d9912ed · d9912ed
2 parents 7732142 + 87eeb0f
commit d9912ed
Show file tree

Hide file tree

Showing 6 changed files with 47 additions and 19 deletions.
diff --git a/ext/datadog_profiling_native_extension/collectors_stack.c b/ext/datadog_profiling_native_extension/collectors_stack.c
@@ -70,6 +70,7 @@ static VALUE _native_sample(
     .cpu_or_wall_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-samples"), zero)),
     .wall_time_ns  = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("wall-time"),     zero)),
     .alloc_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("alloc-samples"), zero)),
+    .alloc_samples_unscaled = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("alloc-samples-unscaled"), zero)),
     .timeline_wall_time_ns = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("timeline"), zero)),
   };
 

diff --git a/ext/datadog_profiling_native_extension/collectors_thread_context.c b/ext/datadog_profiling_native_extension/collectors_thread_context.c
@@ -1292,7 +1292,7 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
     /* thread: */  current_thread,
     /* stack_from_thread: */ current_thread,
     get_or_create_context_for(current_thread, state),
-    (sample_values) {.alloc_samples = sample_weight},
+    (sample_values) {.alloc_samples = sample_weight, .alloc_samples_unscaled = 1},
     INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
     &ruby_vm_type,
     optional_class_name

diff --git a/ext/datadog_profiling_native_extension/stack_recorder.c b/ext/datadog_profiling_native_extension/stack_recorder.c
@@ -151,21 +151,23 @@ static VALUE error_symbol = Qnil; // :error in Ruby
 #define WALL_TIME_VALUE_ID 2
 #define ALLOC_SAMPLES_VALUE     {.type_ = VALUE_STRING("alloc-samples"),     .unit = VALUE_STRING("count")}
 #define ALLOC_SAMPLES_VALUE_ID 3
+#define ALLOC_SAMPLES_UNSCALED_VALUE {.type_ = VALUE_STRING("alloc-samples-unscaled"), .unit = VALUE_STRING("count")}
+#define ALLOC_SAMPLES_UNSCALED_VALUE_ID 4
 #define HEAP_SAMPLES_VALUE      {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")}
-#define HEAP_SAMPLES_VALUE_ID 4
+#define HEAP_SAMPLES_VALUE_ID 5
 #define HEAP_SIZE_VALUE         {.type_ = VALUE_STRING("heap-live-size"),    .unit = VALUE_STRING("bytes")}
-#define HEAP_SIZE_VALUE_ID 5
+#define HEAP_SIZE_VALUE_ID 6
 #define TIMELINE_VALUE          {.type_ = VALUE_STRING("timeline"),          .unit = VALUE_STRING("nanoseconds")}
-#define TIMELINE_VALUE_ID 6
+#define TIMELINE_VALUE_ID 7
 
 static const ddog_prof_ValueType all_value_types[] =
-  {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, HEAP_SIZE_VALUE, TIMELINE_VALUE};
+  {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, ALLOC_SAMPLES_UNSCALED_VALUE, HEAP_SAMPLES_VALUE, HEAP_SIZE_VALUE, TIMELINE_VALUE};
 
 // This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it
 // occupies on the all_value_types array.
 // E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types.
 static const uint8_t all_value_types_positions[] =
-  {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, HEAP_SIZE_VALUE_ID, TIMELINE_VALUE_ID};
+  {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, ALLOC_SAMPLES_UNSCALED_VALUE_ID, HEAP_SAMPLES_VALUE_ID, HEAP_SIZE_VALUE_ID, TIMELINE_VALUE_ID};
 
 #define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType))
 
@@ -429,7 +431,7 @@ static VALUE _native_initialize(
 
   uint8_t requested_values_count = ALL_VALUE_TYPES_COUNT -
     (cpu_time_enabled == Qtrue ? 0 : 1) -
-    (alloc_samples_enabled == Qtrue? 0 : 1) -
+    (alloc_samples_enabled == Qtrue? 0 : 2) -
     (heap_samples_enabled == Qtrue ? 0 : 1) -
     (heap_size_enabled == Qtrue ? 0 : 1) -
     (timeline_enabled == Qtrue ? 0 : 1);
@@ -464,8 +466,12 @@ static VALUE _native_initialize(
   if (alloc_samples_enabled == Qtrue) {
     enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) ALLOC_SAMPLES_VALUE;
     state->position_for[ALLOC_SAMPLES_VALUE_ID] = next_enabled_pos++;
+
+    enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) ALLOC_SAMPLES_UNSCALED_VALUE;
+    state->position_for[ALLOC_SAMPLES_UNSCALED_VALUE_ID] = next_enabled_pos++;
   } else {
     state->position_for[ALLOC_SAMPLES_VALUE_ID] = next_disabled_pos++;
+    state->position_for[ALLOC_SAMPLES_UNSCALED_VALUE_ID] = next_disabled_pos++;
   }
 
   if (heap_samples_enabled == Qtrue) {
@@ -603,6 +609,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
   metric_values[position_for[CPU_SAMPLES_VALUE_ID]]   = values.cpu_or_wall_samples;
   metric_values[position_for[WALL_TIME_VALUE_ID]]     = values.wall_time_ns;
   metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples;
+  metric_values[position_for[ALLOC_SAMPLES_UNSCALED_VALUE_ID]] = values.alloc_samples_unscaled;
   metric_values[position_for[TIMELINE_VALUE_ID]]      = values.timeline_wall_time_ns;
 
   if (!placeholder && values.alloc_samples > 0) {

diff --git a/ext/datadog_profiling_native_extension/stack_recorder.h b/ext/datadog_profiling_native_extension/stack_recorder.h
@@ -8,6 +8,7 @@ typedef struct {
   int64_t wall_time_ns;
   uint32_t cpu_or_wall_samples;
   uint32_t alloc_samples;
+  uint32_t alloc_samples_unscaled;
   int64_t timeline_wall_time_ns;
 } sample_values;
 

diff --git a/spec/datadog/profiling/collectors/thread_context_spec.rb b/spec/datadog/profiling/collectors/thread_context_spec.rb
@@ -1048,6 +1048,12 @@ def self.otel_sdk_available?
       expect(single_sample.values).to include(:'alloc-samples' => 123)
     end
 
+    it 'tags the sample with the unscaled weight' do
+      sample_allocation(weight: 123)
+
+      expect(single_sample.values).to include('alloc-samples-unscaled': 1)
+    end
+
     it 'includes the thread names, if available' do
       thread_with_name = Thread.new do
         Thread.current.name = 'thread_with_name'

diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb
@@ -137,14 +137,17 @@ def slot_two_mutex_locked?
             'cpu-samples' => 'count',
             'wall-time' => 'nanoseconds',
             'alloc-samples' => 'count',
+            'alloc-samples-unscaled' => 'count',
             'heap-live-samples' => 'count',
             'heap-live-size' => 'bytes',
             'timeline' => 'nanoseconds',
           }
         end
 
-        def profile_types_without(type)
-          all_profile_types.dup.tap { |it| it.delete(type) { raise 'Missing key' } }
+        def profile_types_without(*types)
+          result = all_profile_types.dup
+          types.each { |type| result.delete(type) { raise 'Missing key' } }
+          result
         end
 
         context 'when all profile types are enabled' do
@@ -165,7 +168,8 @@ def profile_types_without(type)
           let(:alloc_samples_enabled) { false }
 
           it 'returns a pprof without the alloc-samples type' do
-            expect(sample_types_from(decoded_profile)).to eq(profile_types_without('alloc-samples'))
+            expect(sample_types_from(decoded_profile))
+              .to eq(profile_types_without('alloc-samples', 'alloc-samples-unscaled'))
           end
         end
 
@@ -243,7 +247,14 @@ def sample_types_from(decoded_profile)
 
     context 'when profile has a sample' do
       let(:metric_values) do
-        { 'cpu-time' => 123, 'cpu-samples' => 456, 'wall-time' => 789, 'alloc-samples' => 4242, 'timeline' => 1111 }
+        {
+          'cpu-time' => 123,
+          'cpu-samples' => 456,
+          'wall-time' => 789,
+          'alloc-samples' => 4242,
+          'alloc-samples-unscaled' => 2222,
+          'timeline' => 1111,
+        }
       end
       let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a }
 
@@ -258,20 +269,22 @@ def sample_types_from(decoded_profile)
       it 'encodes the sample with the metrics provided' do
         expect(samples.first.values)
           .to eq(
-            :'cpu-time' => 123,
-            :'cpu-samples' => 456,
-            :'wall-time' => 789,
-            :'alloc-samples' => 4242,
-            :timeline => 1111,
+            'cpu-time': 123,
+            'cpu-samples': 456,
+            'wall-time': 789,
+            'alloc-samples': 4242,
+            'alloc-samples-unscaled': 2222,
+            timeline: 1111,
           )
       end
 
       context 'when disabling an optional profile sample type' do
         let(:cpu_time_enabled) { false }
 
         it 'encodes the sample with the metrics provided, ignoring the disabled ones' do
-          expect(samples.first.values)
-            .to eq(:'cpu-samples' => 456, :'wall-time' => 789, :'alloc-samples' => 4242, :timeline => 1111)
+          expect(samples.first.values).to eq(
+            'cpu-samples': 456, 'wall-time': 789, 'alloc-samples': 4242, 'alloc-samples-unscaled': 2222, timeline: 1111
+          )
         end
       end
 
@@ -526,7 +539,7 @@ def sample_allocation(obj)
           # We use the same metric_values in all sample calls in before. So we'd expect
           # the summed values to match `@num_allocations * metric_values[profile-type]`
           # for each profile-type there in.
-          expected_summed_values = { :'heap-live-samples' => 0, :'heap-live-size' => 0, }
+          expected_summed_values = { 'heap-live-samples': 0, 'heap-live-size': 0, 'alloc-samples-unscaled': 0 }
           metric_values.each_pair do |k, v|
             expected_summed_values[k.to_sym] = v * @num_allocations
           end