From 78d7d27c78167919acaf4bf976aed917243307ef Mon Sep 17 00:00:00 2001
From: Ivo Anjo <ivo.anjo@datadoghq.com>
Date: Mon, 8 Jul 2024 17:56:52 +0100
Subject: [PATCH 1/2] Bootstrap new profile value type for unscaled counts

---
 .../collectors_stack.c                        |  1 +
 .../stack_recorder.c                          | 19 +++++++---
 .../stack_recorder.h                          |  1 +
 spec/datadog/profiling/stack_recorder_spec.rb | 37 +++++++++++++------
 4 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/ext/datadog_profiling_native_extension/collectors_stack.c b/ext/datadog_profiling_native_extension/collectors_stack.c
index 87549050e4e..28d7988cc61 100644
--- a/ext/datadog_profiling_native_extension/collectors_stack.c
+++ b/ext/datadog_profiling_native_extension/collectors_stack.c
@@ -70,6 +70,7 @@ static VALUE _native_sample(
     .cpu_or_wall_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-samples"), zero)),
     .wall_time_ns  = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("wall-time"),     zero)),
     .alloc_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("alloc-samples"), zero)),
+    .alloc_samples_unscaled = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("alloc-samples-unscaled"), zero)),
     .timeline_wall_time_ns = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("timeline"), zero)),
   };
 
diff --git a/ext/datadog_profiling_native_extension/stack_recorder.c b/ext/datadog_profiling_native_extension/stack_recorder.c
index 52322e95716..d16471ce496 100644
--- a/ext/datadog_profiling_native_extension/stack_recorder.c
+++ b/ext/datadog_profiling_native_extension/stack_recorder.c
@@ -151,21 +151,23 @@ static VALUE error_symbol = Qnil; // :error in Ruby
 #define WALL_TIME_VALUE_ID 2
 #define ALLOC_SAMPLES_VALUE     {.type_ = VALUE_STRING("alloc-samples"),     .unit = VALUE_STRING("count")}
 #define ALLOC_SAMPLES_VALUE_ID 3
+#define ALLOC_SAMPLES_UNSCALED_VALUE {.type_ = VALUE_STRING("alloc-samples-unscaled"), .unit = VALUE_STRING("count")}
+#define ALLOC_SAMPLES_UNSCALED_VALUE_ID 4
 #define HEAP_SAMPLES_VALUE      {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")}
-#define HEAP_SAMPLES_VALUE_ID 4
+#define HEAP_SAMPLES_VALUE_ID 5
 #define HEAP_SIZE_VALUE         {.type_ = VALUE_STRING("heap-live-size"),    .unit = VALUE_STRING("bytes")}
-#define HEAP_SIZE_VALUE_ID 5
+#define HEAP_SIZE_VALUE_ID 6
 #define TIMELINE_VALUE          {.type_ = VALUE_STRING("timeline"),          .unit = VALUE_STRING("nanoseconds")}
-#define TIMELINE_VALUE_ID 6
+#define TIMELINE_VALUE_ID 7
 
 static const ddog_prof_ValueType all_value_types[] =
-  {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, HEAP_SIZE_VALUE, TIMELINE_VALUE};
+  {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, ALLOC_SAMPLES_UNSCALED_VALUE, HEAP_SAMPLES_VALUE, HEAP_SIZE_VALUE, TIMELINE_VALUE};
 
 // This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it
 // occupies on the all_value_types array.
 // E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types.
 static const uint8_t all_value_types_positions[] =
-  {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, HEAP_SIZE_VALUE_ID, TIMELINE_VALUE_ID};
+  {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, ALLOC_SAMPLES_UNSCALED_VALUE_ID, HEAP_SAMPLES_VALUE_ID, HEAP_SIZE_VALUE_ID, TIMELINE_VALUE_ID};
 
 #define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType))
 
@@ -429,7 +431,7 @@ static VALUE _native_initialize(
 
   uint8_t requested_values_count = ALL_VALUE_TYPES_COUNT -
     (cpu_time_enabled == Qtrue ? 0 : 1) -
-    (alloc_samples_enabled == Qtrue? 0 : 1) -
+    (alloc_samples_enabled == Qtrue? 0 : 2) -
     (heap_samples_enabled == Qtrue ? 0 : 1) -
     (heap_size_enabled == Qtrue ? 0 : 1) -
     (timeline_enabled == Qtrue ? 0 : 1);
@@ -464,8 +466,12 @@ static VALUE _native_initialize(
   if (alloc_samples_enabled == Qtrue) {
     enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) ALLOC_SAMPLES_VALUE;
     state->position_for[ALLOC_SAMPLES_VALUE_ID] = next_enabled_pos++;
+
+    enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) ALLOC_SAMPLES_UNSCALED_VALUE;
+    state->position_for[ALLOC_SAMPLES_UNSCALED_VALUE_ID] = next_enabled_pos++;
   } else {
     state->position_for[ALLOC_SAMPLES_VALUE_ID] = next_disabled_pos++;
+    state->position_for[ALLOC_SAMPLES_UNSCALED_VALUE_ID] = next_disabled_pos++;
   }
 
   if (heap_samples_enabled == Qtrue) {
@@ -603,6 +609,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
   metric_values[position_for[CPU_SAMPLES_VALUE_ID]]   = values.cpu_or_wall_samples;
   metric_values[position_for[WALL_TIME_VALUE_ID]]     = values.wall_time_ns;
   metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples;
+  metric_values[position_for[ALLOC_SAMPLES_UNSCALED_VALUE_ID]] = values.alloc_samples_unscaled;
   metric_values[position_for[TIMELINE_VALUE_ID]]      = values.timeline_wall_time_ns;
 
   if (!placeholder && values.alloc_samples > 0) {
diff --git a/ext/datadog_profiling_native_extension/stack_recorder.h b/ext/datadog_profiling_native_extension/stack_recorder.h
index 949b737ca71..3908bdb0e7b 100644
--- a/ext/datadog_profiling_native_extension/stack_recorder.h
+++ b/ext/datadog_profiling_native_extension/stack_recorder.h
@@ -8,6 +8,7 @@ typedef struct {
   int64_t wall_time_ns;
   uint32_t cpu_or_wall_samples;
   uint32_t alloc_samples;
+  uint32_t alloc_samples_unscaled;
   int64_t timeline_wall_time_ns;
 } sample_values;
 
diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb
index c9a2efe432e..1731e0cce6d 100644
--- a/spec/datadog/profiling/stack_recorder_spec.rb
+++ b/spec/datadog/profiling/stack_recorder_spec.rb
@@ -137,14 +137,17 @@ def slot_two_mutex_locked?
             'cpu-samples' => 'count',
             'wall-time' => 'nanoseconds',
             'alloc-samples' => 'count',
+            'alloc-samples-unscaled' => 'count',
             'heap-live-samples' => 'count',
             'heap-live-size' => 'bytes',
             'timeline' => 'nanoseconds',
           }
         end
 
-        def profile_types_without(type)
-          all_profile_types.dup.tap { |it| it.delete(type) { raise 'Missing key' } }
+        def profile_types_without(*types)
+          result = all_profile_types.dup
+          types.each { |type| result.delete(type) { raise 'Missing key' } }
+          result
         end
 
         context 'when all profile types are enabled' do
@@ -165,7 +168,8 @@ def profile_types_without(type)
           let(:alloc_samples_enabled) { false }
 
           it 'returns a pprof without the alloc-samples type' do
-            expect(sample_types_from(decoded_profile)).to eq(profile_types_without('alloc-samples'))
+            expect(sample_types_from(decoded_profile))
+              .to eq(profile_types_without('alloc-samples', 'alloc-samples-unscaled'))
           end
         end
 
@@ -243,7 +247,14 @@ def sample_types_from(decoded_profile)
 
     context 'when profile has a sample' do
       let(:metric_values) do
-        { 'cpu-time' => 123, 'cpu-samples' => 456, 'wall-time' => 789, 'alloc-samples' => 4242, 'timeline' => 1111 }
+        {
+          'cpu-time' => 123,
+          'cpu-samples' => 456,
+          'wall-time' => 789,
+          'alloc-samples' => 4242,
+          'alloc-samples-unscaled' => 2222,
+          'timeline' => 1111,
+        }
       end
       let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a }
 
@@ -258,11 +269,12 @@ def sample_types_from(decoded_profile)
       it 'encodes the sample with the metrics provided' do
         expect(samples.first.values)
           .to eq(
-            :'cpu-time' => 123,
-            :'cpu-samples' => 456,
-            :'wall-time' => 789,
-            :'alloc-samples' => 4242,
-            :timeline => 1111,
+            'cpu-time': 123,
+            'cpu-samples': 456,
+            'wall-time': 789,
+            'alloc-samples': 4242,
+            'alloc-samples-unscaled': 2222,
+            timeline: 1111,
           )
       end
 
@@ -270,8 +282,9 @@ def sample_types_from(decoded_profile)
         let(:cpu_time_enabled) { false }
 
         it 'encodes the sample with the metrics provided, ignoring the disabled ones' do
-          expect(samples.first.values)
-            .to eq(:'cpu-samples' => 456, :'wall-time' => 789, :'alloc-samples' => 4242, :timeline => 1111)
+          expect(samples.first.values).to eq(
+            'cpu-samples': 456, 'wall-time': 789, 'alloc-samples': 4242, 'alloc-samples-unscaled': 2222, timeline: 1111
+          )
         end
       end
 
@@ -526,7 +539,7 @@ def sample_allocation(obj)
           # We use the same metric_values in all sample calls in before. So we'd expect
           # the summed values to match `@num_allocations * metric_values[profile-type]`
           # for each profile-type there in.
-          expected_summed_values = { :'heap-live-samples' => 0, :'heap-live-size' => 0, }
+          expected_summed_values = { 'heap-live-samples': 0, 'heap-live-size': 0, 'alloc-samples-unscaled': 0 }
           metric_values.each_pair do |k, v|
             expected_summed_values[k.to_sym] = v * @num_allocations
           end

From 87eeb0f91c475b3b957120512499aaaffaeceab8 Mon Sep 17 00:00:00 2001
From: Ivo Anjo <ivo.anjo@datadoghq.com>
Date: Tue, 9 Jul 2024 14:57:44 +0100
Subject: [PATCH 2/2] [PROF-10125] Track unscaled allocation counts in
 allocation profiler

**What does this PR do?**

This PR extends the allocation profiler to also track the unscaled
allocation counts.

Specifically, like other profile types, the allocation profiler assigns
a weight to every sample, making it "represent" all the objects that
weren't sampled.

**Motivation:**

For debugging, in corner cases, it comes in handy to know exactly how
many samples the profiler observed, and what was the impact of scaling.

As part of preparing the allocation profiler feature for GA, I'm adding
this feature so we can use it to confirm the exact sample counts
whenever needed.

Note also this is something we do for the cpu/wall-time profiler, where
we track the `cpu_or_wall_samples` as an exact count of how many samples
were taken (and again, without the weight -- which in the case of those
profilers, represents time).

**Additional Notes:**

N/A

**How to test the change?**

This change includes test coverage.
---
 .../collectors_thread_context.c                             | 2 +-
 spec/datadog/profiling/collectors/thread_context_spec.rb    | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/ext/datadog_profiling_native_extension/collectors_thread_context.c b/ext/datadog_profiling_native_extension/collectors_thread_context.c
index bb78763be68..6b675ee4d58 100644
--- a/ext/datadog_profiling_native_extension/collectors_thread_context.c
+++ b/ext/datadog_profiling_native_extension/collectors_thread_context.c
@@ -1292,7 +1292,7 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
     /* thread: */  current_thread,
     /* stack_from_thread: */ current_thread,
     get_or_create_context_for(current_thread, state),
-    (sample_values) {.alloc_samples = sample_weight},
+    (sample_values) {.alloc_samples = sample_weight, .alloc_samples_unscaled = 1},
     INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
     &ruby_vm_type,
     optional_class_name
diff --git a/spec/datadog/profiling/collectors/thread_context_spec.rb b/spec/datadog/profiling/collectors/thread_context_spec.rb
index 44de2feb97d..a6e3ccaa385 100644
--- a/spec/datadog/profiling/collectors/thread_context_spec.rb
+++ b/spec/datadog/profiling/collectors/thread_context_spec.rb
@@ -1048,6 +1048,12 @@ def self.otel_sdk_available?
       expect(single_sample.values).to include(:'alloc-samples' => 123)
     end
 
+    it 'tags the sample with the unscaled weight' do
+      sample_allocation(weight: 123)
+
+      expect(single_sample.values).to include('alloc-samples-unscaled': 1)
+    end
+
     it 'includes the thread names, if available' do
       thread_with_name = Thread.new do
         Thread.current.name = 'thread_with_name'