diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.c b/ext/ddtrace_profiling_native_extension/collectors_stack.c
index d4d8061901d..7611f13e41e 100644
--- a/ext/ddtrace_profiling_native_extension/collectors_stack.c
+++ b/ext/ddtrace_profiling_native_extension/collectors_stack.c
@@ -40,7 +40,7 @@ static void record_placeholder_stack_in_native_code(
   sampling_buffer* buffer,
   VALUE recorder_instance,
   sample_values values,
-  ddog_prof_Slice_Label labels,
+  sample_labels labels,
   sampling_buffer *record_buffer,
   int extra_frames_in_record_buffer
 );
@@ -49,7 +49,7 @@ static void sample_thread_internal(
   sampling_buffer* buffer,
   VALUE recorder_instance,
   sample_values values,
-  ddog_prof_Slice_Label labels,
+  sample_labels labels,
   sampling_buffer *record_buffer,
   int extra_frames_in_record_buffer
 );
@@ -85,13 +85,14 @@ static VALUE _native_sample(
   VALUE zero = INT2NUM(0);
   sample_values values = {
     .cpu_time_ns   = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-time"),      zero)),
-    .cpu_samples   = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-samples"),   zero)),
+    .cpu_or_wall_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-samples"), zero)),
     .wall_time_ns  = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("wall-time"),     zero)),
     .alloc_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("alloc-samples"), zero)),
   };
 
   long labels_count = RARRAY_LEN(labels_array) + RARRAY_LEN(numeric_labels_array);
   ddog_prof_Label labels[labels_count];
+  ddog_prof_Label *state_label = NULL;
 
   for (int i = 0; i < RARRAY_LEN(labels_array); i++) {
     VALUE key_str_pair = rb_ary_entry(labels_array, i);
@@ -100,6 +101,10 @@ static VALUE _native_sample(
       .key = char_slice_from_ruby_string(rb_ary_entry(key_str_pair, 0)),
       .str = char_slice_from_ruby_string(rb_ary_entry(key_str_pair, 1))
     };
+
+    if (rb_str_equal(rb_ary_entry(key_str_pair, 0), rb_str_new_cstr("state"))) {
+      state_label = &labels[i];
+    }
   }
   for (int i = 0; i < RARRAY_LEN(numeric_labels_array); i++) {
     VALUE key_str_pair = rb_ary_entry(numeric_labels_array, i);
@@ -115,12 +120,14 @@ static VALUE _native_sample(
 
   sampling_buffer *buffer = sampling_buffer_new(max_frames_requested);
 
+  ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = labels_count};
+
   sample_thread(
     thread,
     buffer,
     recorder_instance,
     values,
-    (ddog_prof_Slice_Label) {.ptr = labels, .len = labels_count},
+    (sample_labels) {.labels = slice_labels, .state_label = state_label},
     RTEST(in_gc) ? SAMPLE_IN_GC : SAMPLE_REGULAR
   );
 
@@ -134,7 +141,7 @@ void sample_thread(
   sampling_buffer* buffer,
   VALUE recorder_instance,
   sample_values values,
-  ddog_prof_Slice_Label labels,
+  sample_labels labels,
   sample_type type
 ) {
   // Samples thread into recorder
@@ -171,6 +178,8 @@ void sample_thread(
   rb_raise(rb_eArgError, "Unexpected value for sample_type: %d", type);
 }
 
+#define CHARSLICE_EQUALS(must_be_a_literal, charslice) (strlen("" must_be_a_literal) == charslice.len && strncmp(must_be_a_literal, charslice.ptr, charslice.len) == 0)
+
 // Idea: Should we release the global vm lock (GVL) after we get the data from `rb_profile_frames`? That way other Ruby threads
 // could continue making progress while the sample was ingested into the profile.
 //
@@ -195,7 +204,7 @@ static void sample_thread_internal(
   sampling_buffer* buffer,
   VALUE recorder_instance,
   sample_values values,
-  ddog_prof_Slice_Label labels,
+  sample_labels labels,
   sampling_buffer *record_buffer,
   int extra_frames_in_record_buffer
 ) {
@@ -228,6 +237,15 @@ static void sample_thread_internal(
   VALUE last_ruby_frame = Qnil;
   int last_ruby_line = 0;
 
+  ddog_prof_Label *state_label = labels.state_label;
+  bool cpu_or_wall_sample = values.cpu_or_wall_samples > 0;
+  bool has_cpu_time = cpu_or_wall_sample && values.cpu_time_ns > 0;
+  bool only_wall_time = cpu_or_wall_sample && values.cpu_time_ns == 0 && values.wall_time_ns > 0;
+
+  if (cpu_or_wall_sample && state_label == NULL) rb_raise(rb_eRuntimeError, "BUG: Unexpected missing state_label");
+
+  if (has_cpu_time) state_label->str = DDOG_CHARSLICE_C("had cpu");
+
   for (int i = captured_frames - 1; i >= 0; i--) {
     VALUE name, filename;
     int line;
@@ -248,10 +266,55 @@ static void sample_thread_internal(
     name = NIL_P(name) ? missing_string : name;
     filename = NIL_P(filename) ? missing_string : filename;
 
+    ddog_CharSlice name_slice = char_slice_from_ruby_string(name);
+    ddog_CharSlice filename_slice = char_slice_from_ruby_string(filename);
+
+    bool top_of_the_stack = i == 0;
+
+    // When there's only wall-time in a sample, this means that the thread was not active in the sampled period.
+    //
+    // We try to categorize what it was doing based on what we observe at the top of the stack. This is a very rough
+    // approximation, and in the future we hope to replace this with a more accurate approach (such as using the
+    // GVL instrumentation API.)
+    if (top_of_the_stack && only_wall_time) {
+      if (!buffer->is_ruby_frame[i]) {
+        // We know that known versions of Ruby implement these using native code; thus if we find a method with the
+        // same name that is not native code, we ignore it, as it's probably a user method that coincidentally
+        // has the same name. Thus, even though "matching just by method name" is kinda weak,
+        // "matching by method name" + is native code seems actually to be good enough for a lot of cases.
+
+        if (CHARSLICE_EQUALS("sleep", name_slice)) { // Expected to be Kernel.sleep
+          state_label->str  = DDOG_CHARSLICE_C("sleeping");
+        } else if (CHARSLICE_EQUALS("select", name_slice)) { // Expected to be Kernel.select
+          state_label->str  = DDOG_CHARSLICE_C("waiting");
+        } else if (
+            CHARSLICE_EQUALS("synchronize", name_slice) || // Expected to be Monitor/Mutex#synchronize
+            CHARSLICE_EQUALS("lock", name_slice) ||        // Expected to be Mutex#lock
+            CHARSLICE_EQUALS("join", name_slice)           // Expected to be Thread#join
+        ) {
+          state_label->str  = DDOG_CHARSLICE_C("blocked");
+        } else if (CHARSLICE_EQUALS("wait_readable", name_slice)) { // Expected to be IO#wait_readable
+          state_label->str  = DDOG_CHARSLICE_C("network");
+        }
+        #ifdef NO_PRIMITIVE_POP // Ruby < 3.2
+          else if (CHARSLICE_EQUALS("pop", name_slice)) { // Expected to be Queue/SizedQueue#pop
+            state_label->str  = DDOG_CHARSLICE_C("waiting");
+          }
+        #endif
+      } else {
+        #ifndef NO_PRIMITIVE_POP // Ruby >= 3.2
+          // Unlike the above, Ruby actually treats this one specially and gives it a nice file name we can match on!
+          if (CHARSLICE_EQUALS("pop", name_slice) && CHARSLICE_EQUALS("<internal:thread_sync>", filename_slice)) { // Expected to be Queue/SizedQueue#pop
+            state_label->str  = DDOG_CHARSLICE_C("waiting");
+          }
+        #endif
+      }
+    }
+
     buffer->locations[i] = (ddog_prof_Location) {
       .function = (ddog_prof_Function) {
-        .name = char_slice_from_ruby_string(name),
-        .filename = char_slice_from_ruby_string(filename)
+        .name = name_slice,
+        .filename = filename_slice,
       },
       .line = line,
     };
@@ -320,7 +383,7 @@ static void record_placeholder_stack_in_native_code(
   sampling_buffer* buffer,
   VALUE recorder_instance,
   sample_values values,
-  ddog_prof_Slice_Label labels,
+  sample_labels labels,
   sampling_buffer *record_buffer,
   int extra_frames_in_record_buffer
 ) {
diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.h b/ext/ddtrace_profiling_native_extension/collectors_stack.h
index ee5f9a0b1c1..e4175ae35e8 100644
--- a/ext/ddtrace_profiling_native_extension/collectors_stack.h
+++ b/ext/ddtrace_profiling_native_extension/collectors_stack.h
@@ -13,7 +13,7 @@ void sample_thread(
   sampling_buffer* buffer,
   VALUE recorder_instance,
   sample_values values,
-  ddog_prof_Slice_Label labels,
+  sample_labels labels,
   sample_type type
 );
 sampling_buffer *sampling_buffer_new(unsigned int max_frames);
diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c
index 4938c611d05..7f52653b8eb 100644
--- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c
+++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c
@@ -471,7 +471,7 @@ void update_metrics_and_sample(
     thread_being_sampled,
     stack_from_thread,
     thread_context,
-    (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
+    (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
     SAMPLE_REGULAR,
     current_monotonic_wall_time_ns,
     NULL,
@@ -616,7 +616,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
       /* thread: */  thread,
       /* stack_from_thread: */ thread,
       thread_context,
-      (sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns},
+      (sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns},
       SAMPLE_IN_GC,
       INVALID_TIME, // For now we're not collecting timestamps for these events
       NULL,
@@ -662,6 +662,7 @@ static void trigger_sample_for_thread(
     1 + // profiler overhead
     1 + // end_timestamp_ns
     2 + // ruby vm type and allocation class
+    1 + // state (only set for cpu/wall-time samples)
     2;  // local root span id and span id
   ddog_prof_Label labels[max_label_count];
   int label_pos = 0;
@@ -745,6 +746,20 @@ static void trigger_sample_for_thread(
     };
   }
 
+  // This label is handled specially:
+  // 1. It's only set for cpu/wall-time samples
+  // 2. We set it here to its default state of "unknown", but the `Collectors::Stack` may choose to override it with
+  //    something more interesting.
+  ddog_prof_Label *state_label = NULL;
+  if (values.cpu_or_wall_samples > 0) {
+    state_label = &labels[label_pos++];
+    *state_label = (ddog_prof_Label) {
+      .key = DDOG_CHARSLICE_C("state"),
+      .str = DDOG_CHARSLICE_C("unknown"),
+      .num = 0, // This shouldn't be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
+    };
+  }
+
   // The number of times `label_pos++` shows up in this function needs to match `max_label_count`. To avoid "oops I
   // forgot to update max_label_count" in the future, we've also added this validation.
   // @ivoanjo: I wonder if C compilers are smart enough to statically prove this check never triggers unless someone
@@ -753,12 +768,14 @@ static void trigger_sample_for_thread(
     rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
   }
 
+  ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos};
+
   sample_thread(
     stack_from_thread,
     state->sampling_buffer,
     state->recorder_instance,
     values,
-    (ddog_prof_Slice_Label) {.ptr = labels, .len = label_pos},
+    (sample_labels) {.labels = slice_labels, .state_label = state_label},
     type
   );
 }
diff --git a/ext/ddtrace_profiling_native_extension/extconf.rb b/ext/ddtrace_profiling_native_extension/extconf.rb
index a7cb23e9a94..933fc87d157 100644
--- a/ext/ddtrace_profiling_native_extension/extconf.rb
+++ b/ext/ddtrace_profiling_native_extension/extconf.rb
@@ -146,6 +146,9 @@ def add_compiler_flag(flag)
 # On older Rubies, the first_lineno inside a location was a VALUE and not a int (https://github.com/ruby/ruby/pull/6430)
 $defs << '-DNO_INT_FIRST_LINENO' if RUBY_VERSION < '3.2'
 
+# On older Rubies, "pop" was not a primitive operation
+$defs << '-DNO_PRIMITIVE_POP' if RUBY_VERSION < '3.2'
+
 # On older Rubies, there was no tid member in the internal thread structure
 $defs << '-DNO_THREAD_TID' if RUBY_VERSION < '3.1'
 
diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c
index 60d71146ccb..32f5340dad9 100644
--- a/ext/ddtrace_profiling_native_extension/stack_recorder.c
+++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c
@@ -399,7 +399,7 @@ static VALUE ruby_time_from(ddog_Timespec ddprof_time) {
   return rb_time_timespec_new(&time, utc);
 }
 
-void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, ddog_prof_Slice_Label labels) {
+void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels) {
   struct stack_recorder_state *state;
   TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
 
@@ -413,7 +413,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
   uint8_t *position_for = state->position_for;
 
   metric_values[position_for[CPU_TIME_VALUE_ID]]      = values.cpu_time_ns;
-  metric_values[position_for[CPU_SAMPLES_VALUE_ID]]   = values.cpu_samples;
+  metric_values[position_for[CPU_SAMPLES_VALUE_ID]]   = values.cpu_or_wall_samples;
   metric_values[position_for[WALL_TIME_VALUE_ID]]     = values.wall_time_ns;
   metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples;
 
@@ -422,7 +422,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
     (ddog_prof_Sample) {
       .locations = locations,
       .values = (ddog_Slice_I64) {.ptr = metric_values, .len = state->enabled_values_count},
-      .labels = labels
+      .labels = labels.labels
     }
   );
 
diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.h b/ext/ddtrace_profiling_native_extension/stack_recorder.h
index 64a95c43469..fc1602c383d 100644
--- a/ext/ddtrace_profiling_native_extension/stack_recorder.h
+++ b/ext/ddtrace_profiling_native_extension/stack_recorder.h
@@ -2,13 +2,21 @@
 
 #include <datadog/profiling.h>
 
-typedef struct sample_values {
+typedef struct {
   int64_t cpu_time_ns;
   int64_t wall_time_ns;
-  uint32_t cpu_samples;
+  uint32_t cpu_or_wall_samples;
   uint32_t alloc_samples;
 } sample_values;
 
-void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, ddog_prof_Slice_Label labels);
+typedef struct sample_labels {
+  ddog_prof_Slice_Label labels;
+
+  // This is used to allow the `Collectors::Stack` to modify the existing label, if any. This MUST be NULL or point
+  // somewhere inside the labels slice above.
+  ddog_prof_Label *state_label;
+} sample_labels;
+
+void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels);
 void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint);
 VALUE enforce_recorder_instance(VALUE object);
diff --git a/spec/datadog/profiling/collectors/stack_spec.rb b/spec/datadog/profiling/collectors/stack_spec.rb
index 9c681cdc08a..a55be303849 100644
--- a/spec/datadog/profiling/collectors/stack_spec.rb
+++ b/spec/datadog/profiling/collectors/stack_spec.rb
@@ -11,7 +11,7 @@
   subject(:collectors_stack) { described_class.new }
 
   let(:metric_values) { { 'cpu-time' => 123, 'cpu-samples' => 456, 'wall-time' => 789 } }
-  let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b' }.to_a }
+  let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a }
 
   let(:raw_reference_stack) { stacks.fetch(:reference) }
   let(:reference_stack) { convert_reference_stack(raw_reference_stack) }
@@ -219,6 +219,203 @@ def call_sleep
         expect(reference_stack.first.base_label).to eq 'sleep'
       end
     end
+
+    describe 'approximate thread state categorization based on current stack' do
+      describe 'state label validation' do
+        let(:do_in_background_thread) do
+          proc do |ready_queue|
+            ready_queue << true
+            sleep
+          end
+        end
+        let(:labels) { [] }
+
+        context 'when taking a cpu/wall-time sample and the state label is missing' do
+          let(:metric_values) { { 'cpu-samples' => 1 } }
+
+          it 'raises an exception' do
+            expect { gathered_stack }.to raise_error(RuntimeError, /BUG: Unexpected missing state_label/)
+          end
+        end
+
+        context 'when taking a non-cpu/wall-time sample and the state label is missing' do
+          let(:metric_values) { { 'cpu-samples' => 0 } }
+
+          it 'does not raise an exception' do
+            expect(gathered_stack).to be_truthy
+          end
+        end
+      end
+
+      context 'when sampling a thread with cpu-time' do
+        let(:do_in_background_thread) do
+          proc do |ready_queue|
+            ready_queue << true
+            sleep
+          end
+        end
+        let(:metric_values) { { 'cpu-time' => 123, 'cpu-samples' => 456, 'wall-time' => 789 } }
+
+        it do
+          expect(sample_and_decode(background_thread, :labels)).to include(state: 'had cpu')
+        end
+      end
+
+      context 'when sampling a sleeping thread with no cpu-time' do
+        let(:do_in_background_thread) do
+          proc do |ready_queue|
+            ready_queue << true
+            sleep
+          end
+        end
+        let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } }
+
+        it do
+          expect(sample_and_decode(background_thread, :labels)).to include(state: 'sleeping')
+        end
+      end
+
+      context 'when sampling a thread waiting on a select' do
+        let(:server_socket) { TCPServer.new(6006) }
+        let(:background_thread) { Thread.new(ready_queue, server_socket, &do_in_background_thread) }
+        let(:do_in_background_thread) do
+          proc do |ready_queue, server_socket|
+            ready_queue << true
+            IO.select([server_socket])
+          end
+        end
+        let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } }
+
+        after do
+          background_thread.kill
+          background_thread.join
+          server_socket.close
+        end
+
+        it do
+          expect(sample_and_decode(background_thread, :labels)).to include(state: 'waiting')
+        end
+      end
+
+      context 'when sampling a thread blocked on Thread#join' do
+        let(:another_thread) { Thread.new { sleep } }
+        let(:background_thread) { Thread.new(ready_queue, another_thread, &do_in_background_thread) }
+        let(:do_in_background_thread) do
+          proc do |ready_queue, another_thread|
+            ready_queue << true
+            another_thread.join
+          end
+        end
+        let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } }
+
+        after do
+          another_thread.kill
+          another_thread.join
+        end
+
+        it do
+          expect(sample_and_decode(background_thread, :labels)).to include(state: 'blocked')
+        end
+      end
+
+      context 'when sampling a thread blocked on Mutex#synchronize' do
+        let(:locked_mutex) { Mutex.new.tap(&:lock) }
+        let(:background_thread) { Thread.new(ready_queue, locked_mutex, &do_in_background_thread) }
+        let(:do_in_background_thread) do
+          proc do |ready_queue, locked_mutex|
+            ready_queue << true
+            locked_mutex.synchronize {}
+          end
+        end
+        let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } }
+
+        it do
+          expect(sample_and_decode(background_thread, :labels)).to include(state: 'blocked')
+        end
+      end
+
+      context 'when sampling a thread blocked on Mutex#lock' do
+        let(:locked_mutex) { Mutex.new.tap(&:lock) }
+        let(:background_thread) { Thread.new(ready_queue, locked_mutex, &do_in_background_thread) }
+        let(:do_in_background_thread) do
+          proc do |ready_queue, locked_mutex|
+            ready_queue << true
+            locked_mutex.lock
+          end
+        end
+        let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } }
+
+        it do
+          expect(sample_and_decode(background_thread, :labels)).to include(state: 'blocked')
+        end
+      end
+
+      context 'when sampling a thread blocked on Monitor#synchronize' do
+        let(:locked_monitor) { Monitor.new.tap(&:enter) }
+        let(:background_thread) { Thread.new(ready_queue, locked_monitor, &do_in_background_thread) }
+        let(:do_in_background_thread) do
+          proc do |ready_queue, locked_monitor|
+            ready_queue << true
+            locked_monitor.synchronize {}
+          end
+        end
+        let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } }
+
+        it do
+          expect(sample_and_decode(background_thread, :labels)).to include(state: 'blocked')
+        end
+      end
+
+      context 'when sampling a thread waiting on a IO object' do
+        let(:server_socket) { TCPServer.new(6006) }
+        let(:background_thread) { Thread.new(ready_queue, server_socket, &do_in_background_thread) }
+        let(:do_in_background_thread) do
+          proc do |ready_queue, server_socket|
+            ready_queue << true
+            server_socket.wait_readable
+          end
+        end
+        let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } }
+
+        after do
+          background_thread.kill
+          background_thread.join
+          server_socket.close
+        end
+
+        it do
+          expect(sample_and_decode(background_thread, :labels)).to include(state: 'network')
+        end
+      end
+
+      context 'when sampling a thread waiting on a Queue object' do
+        let(:do_in_background_thread) do
+          proc do |ready_queue|
+            ready_queue << true
+            Queue.new.pop
+          end
+        end
+        let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } }
+
+        it do
+          expect(sample_and_decode(background_thread, :labels)).to include(state: 'waiting')
+        end
+      end
+
+      context 'when sampling a thread in an unknown state' do
+        let(:do_in_background_thread) do
+          proc do |ready_queue|
+            ready_queue << true
+            Thread.stop
+          end
+        end
+        let(:metric_values) { { 'cpu-time' => 0, 'cpu-samples' => 1, 'wall-time' => 1 } }
+
+        it do
+          expect(sample_and_decode(background_thread, :labels)).to include(state: 'unknown')
+        end
+      end
+    end
   end
 
   context 'when sampling a thread with a stack that is deeper than the configured max_frames' do
@@ -415,13 +612,13 @@ def convert_reference_stack(raw_reference_stack)
     end
   end
 
-  def sample_and_decode(thread, max_frames: 400, recorder: build_stack_recorder, in_gc: false)
+  def sample_and_decode(thread, data = :locations, max_frames: 400, recorder: build_stack_recorder, in_gc: false)
     sample(thread, recorder, metric_values, labels, max_frames: max_frames, in_gc: in_gc)
 
     samples = samples_from_pprof(recorder.serialize!)
 
     expect(samples.size).to be 1
-    samples.first.locations
+    samples.first.public_send(data)
   end
 end
 
diff --git a/spec/datadog/profiling/collectors/thread_context_spec.rb b/spec/datadog/profiling/collectors/thread_context_spec.rb
index b2aa16cf490..37a57348355 100644
--- a/spec/datadog/profiling/collectors/thread_context_spec.rb
+++ b/spec/datadog/profiling/collectors/thread_context_spec.rb
@@ -108,7 +108,10 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current
       sample
 
       expect(Thread.list).to eq(all_threads), 'Threads finished during this spec, causing flakiness!'
-      expect(samples.size).to be all_threads.size
+
+      seen_threads = samples.map(&:labels).map { |it| it.fetch(:'thread id') }.uniq
+
+      expect(seen_threads.size).to be all_threads.size
     end
 
     it 'tags the samples with the object ids of the Threads they belong to' do
@@ -167,20 +170,17 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current
         per_thread_context.fetch(t1).fetch(:wall_time_at_previous_sample_ns)
 
       t1_samples = samples_for_thread(samples, t1)
-      wall_time = t1_samples.first.values.fetch(:'wall-time')
-
-      expect(t1_samples.size)
-        .to be(1), "Expected thread t1 to always have same stack trace (because it's sleeping), got #{t1_samples.inspect}"
 
+      wall_time = t1_samples.map(&:values).map { |it| it.fetch(:'wall-time') }.reduce(:+)
       expect(wall_time).to be(wall_time_at_second_sample - wall_time_at_first_sample)
     end
 
     it 'tags samples with how many times they were seen' do
       5.times { sample }
 
-      t1_sample = samples_for_thread(samples, t1).first
+      t1_samples = samples_for_thread(samples, t1)
 
-      expect(t1_sample.values).to include(:'cpu-samples' => 5)
+      expect(t1_samples.map(&:values).map { |it| it.fetch(:'cpu-samples') }.reduce(:+)).to eq 5
     end
 
     [:before, :after].each do |on_gc_finish_order|
@@ -239,7 +239,7 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current
         it 'sets the cpu-time on every sample to zero' do
           5.times { sample }
 
-          expect(samples).to all include(values: include(:'cpu-time' => 0))
+          expect(samples).to all have_attributes(values: include(:'cpu-time' => 0))
         end
       end
 
@@ -499,9 +499,9 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current
 
                 t1_samples = samples_for_thread(samples, t1)
 
-                expect(t1_samples).to have(1).item
-                expect(t1_samples.first.labels).to include(:'trace endpoint' => 'changed_after_first_sample')
-                expect(t1_samples.first.values).to include(:'cpu-samples' => 2)
+                expect(t1_samples)
+                  .to all have_attributes(labels: include(:'trace endpoint' => 'changed_after_first_sample'))
+                expect(t1_samples.map(&:values).map { |it| it.fetch(:'cpu-samples') }.reduce(:+)).to eq 2
               end
 
               context 'when the resource is changed multiple times' do
@@ -514,9 +514,9 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current
 
                   t1_samples = samples_for_thread(samples, t1)
 
-                  expect(t1_samples).to have(1).item
-                  expect(t1_samples.first.labels).to include(:'trace endpoint' => 'changed_after_second_sample')
-                  expect(t1_samples.first.values).to include(:'cpu-samples' => 3)
+                  expect(t1_samples)
+                    .to all have_attributes(labels: include(:'trace endpoint' => 'changed_after_second_sample'))
+                  expect(t1_samples.map(&:values).map { |it| it.fetch(:'cpu-samples') }.reduce(:+)).to eq 3
                 end
               end
             end
diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb
index baeafbb50d1..9ed86ae1223 100644
--- a/spec/datadog/profiling/stack_recorder_spec.rb
+++ b/spec/datadog/profiling/stack_recorder_spec.rb
@@ -186,7 +186,7 @@ def sample_types_from(decoded_profile)
 
     context 'when profile has a sample' do
       let(:metric_values) { { 'cpu-time' => 123, 'cpu-samples' => 456, 'wall-time' => 789, 'alloc-samples' => 4242 } }
-      let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b' }.to_a }
+      let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a }
 
       let(:samples) { samples_from_pprof(encoded_pprof) }
 
@@ -211,7 +211,10 @@ def sample_types_from(decoded_profile)
       end
 
       it 'encodes the sample with the labels provided' do
-        expect(samples.first.labels).to eq(label_a: 'value_a', label_b: 'value_b')
+        labels = samples.first.labels
+        labels.delete(:state) # We test this separately!
+
+        expect(labels).to eq(label_a: 'value_a', label_b: 'value_b')
       end
 
       it 'encodes a single empty mapping' do
@@ -240,7 +243,13 @@ def sample_types_from(decoded_profile)
           # We're using `_native_sample` here to test the behavior of `record_sample` in `stack_recorder.c`
           expect do
             Datadog::Profiling::Collectors::Stack::Testing._native_sample(
-              Thread.current, stack_recorder, metric_values, { 'local root span id' => 'incorrect' }.to_a, [], 400, false
+              Thread.current,
+              stack_recorder,
+              metric_values,
+              { 'local root span id' => 'incorrect', 'state' => 'unknown' }.to_a,
+              [],
+              400,
+              false,
             )
           end.to raise_error(ArgumentError)
         end
@@ -256,8 +265,9 @@ def sample_types_from(decoded_profile)
         local_root_span_id_without_endpoint = { 'local root span id' => 456 }
 
         sample = proc do |numeric_labels = {}|
-          Datadog::Profiling::Collectors::Stack::Testing
-            ._native_sample(Thread.current, stack_recorder, metric_values, [], numeric_labels.to_a, 400, false)
+          Datadog::Profiling::Collectors::Stack::Testing._native_sample(
+            Thread.current, stack_recorder, metric_values, { 'state' => 'unknown' }.to_a, numeric_labels.to_a, 400, false
+          )
         end
 
         sample.call
@@ -270,16 +280,23 @@ def sample_types_from(decoded_profile)
         sample.call(local_root_span_id_without_endpoint)
         sample.call(local_root_span_id_with_endpoint)
 
-        expect(samples).to have(6).items
+        expect(samples).to have(6).items # Samples are guaranteed unique since each sample call is on a different line
+
+        labels_without_state = proc { |labels| labels.reject { |key| key == :state } }
 
         # Other samples have not been changed
-        expect(samples.select { |it| it[:labels].empty? }).to have(2).items
-        expect(samples.select { |it| it[:labels] == { :'local root span id' => 456 } }).to have(2).items
+        expect(samples.select { |it| labels_without_state.call(it[:labels]).empty? }).to have(2).items
+        expect(
+          samples.select do |it|
+            labels_without_state.call(it[:labels]) == { :'local root span id' => 456 }
+          end
+        ).to have(2).items
 
         # Matching samples taken before and after recording the endpoint have been changed
         expect(
           samples.select do |it|
-            it[:labels] == { :'local root span id' => 123, :'trace endpoint' => 'recorded-endpoint' }
+            labels_without_state.call(it[:labels]) ==
+              { :'local root span id' => 123, :'trace endpoint' => 'recorded-endpoint' }
           end
         ).to have(2).items
       end
@@ -382,7 +399,7 @@ def sample_types_from(decoded_profile)
 
     context 'when profile has a sample' do
       let(:metric_values) { { 'cpu-time' => 123, 'cpu-samples' => 456, 'wall-time' => 789 } }
-      let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b' }.to_a }
+      let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a }
 
       it 'makes the next calls to serialize return no data' do
         # Add some data