Skip to content

[PROF-10201] Disable profiler allocation counting feature by default #3798

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 24, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -96,6 +96,7 @@ struct cpu_and_wall_time_worker_state {
bool no_signals_workaround_enabled;
bool dynamic_sampling_rate_enabled;
bool allocation_profiling_enabled;
bool allocation_counting_enabled;
bool skip_idle_samples_for_testing;
VALUE self_instance;
VALUE thread_context_collector_instance;
@@ -181,6 +182,7 @@ static VALUE _native_initialize(
VALUE dynamic_sampling_rate_enabled,
VALUE dynamic_sampling_rate_overhead_target_percentage,
VALUE allocation_profiling_enabled,
VALUE allocation_counting_enabled,
VALUE skip_idle_samples_for_testing
);
static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
@@ -278,7 +280,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);

rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9);
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 10);
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
@@ -325,6 +327,7 @@ static VALUE _native_new(VALUE klass) {
state->no_signals_workaround_enabled = false;
state->dynamic_sampling_rate_enabled = true;
state->allocation_profiling_enabled = false;
state->allocation_counting_enabled = false;
state->skip_idle_samples_for_testing = false;
state->thread_context_collector_instance = Qnil;
state->idle_sampling_helper_instance = Qnil;
@@ -366,13 +369,15 @@ static VALUE _native_initialize(
VALUE dynamic_sampling_rate_enabled,
VALUE dynamic_sampling_rate_overhead_target_percentage,
VALUE allocation_profiling_enabled,
VALUE allocation_counting_enabled,
VALUE skip_idle_samples_for_testing
) {
ENFORCE_BOOLEAN(gc_profiling_enabled);
ENFORCE_BOOLEAN(no_signals_workaround_enabled);
ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
ENFORCE_BOOLEAN(allocation_profiling_enabled);
ENFORCE_BOOLEAN(allocation_counting_enabled);
ENFORCE_BOOLEAN(skip_idle_samples_for_testing)

struct cpu_and_wall_time_worker_state *state;
@@ -382,6 +387,7 @@ static VALUE _native_initialize(
state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);

double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
@@ -1040,7 +1046,9 @@ static void sleep_for(uint64_t time_ns) {
}

static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled;
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state;

bool are_allocations_being_tracked = state != NULL && state->allocation_profiling_enabled && state->allocation_counting_enabled;

return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil;
}
@@ -1067,19 +1075,21 @@ static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
// On big applications, path 1. is the hottest, since we don't sample every option. So it's quite important for it to
// be as fast as possible.
static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) {
// Update thread-local allocation count
if (RB_UNLIKELY(allocation_count == UINT64_MAX)) {
allocation_count = 0;
} else {
allocation_count++;
}

struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above

// This should not happen in a normal situation because the tracepoint is always enabled after the instance is set
// and disabled before it is cleared, but just in case...
if (state == NULL) return;

if (RB_UNLIKELY(state->allocation_counting_enabled)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is RB_UNLIKELY some kind of compiler optimisation macro to optimise resulting code for the false outcome?
And is allocation_counting_enabled unlikely because it is off by default and you don't expect many users to enable it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes and yes!

(Here's where the wrapper is defined)

TBH I don't expect it to have a huge difference between having and not having this macro, but I liked using it in this situation to have more self-describing code in terms of what's the expected common path and the exception code.

// Update thread-local allocation count
if (RB_UNLIKELY(allocation_count == UINT64_MAX)) {
allocation_count = 0;
} else {
allocation_count++;
}
}

// In rare cases, we may actually be allocating an object as part of profiler sampling. We don't want to recursively
// sample, so we just return early
if (state->during_sample) {
10 changes: 10 additions & 0 deletions lib/datadog/core/configuration/settings.rb
Original file line number Diff line number Diff line change
@@ -301,6 +301,16 @@ def initialize(*_)
o.default true
end

# Can be used to enable/disable the Datadog::Profiling.allocation_count feature.
#
# Requires allocation profiling to be enabled.
#
# @default false
option :allocation_counting_enabled do |o|
o.type :bool
o.default false
end

# Can be used to enable/disable the collection of heap profiles.
#
# This feature is alpha and disabled by default
1 change: 1 addition & 0 deletions lib/datadog/profiling.rb
Original file line number Diff line number Diff line change
@@ -47,6 +47,7 @@ def self.start_if_enabled
# (This is similar to some OS-based time representations.)
#
# Note 2: All fibers in the same thread will share the same counter values.
# Note 3: This counter is not accurate when using the M:N scheduler.
#
# Only available when the profiler is running, and allocation-related features are not disabled via configuration.
#
Original file line number Diff line number Diff line change
@@ -21,6 +21,7 @@ def initialize(
thread_context_collector:,
dynamic_sampling_rate_overhead_target_percentage:,
allocation_profiling_enabled:,
allocation_counting_enabled:,
# **NOTE**: This should only be used for testing; disabling the dynamic sampling rate will increase the
# profiler overhead!
dynamic_sampling_rate_enabled: true,
@@ -42,6 +43,7 @@ def initialize(
dynamic_sampling_rate_enabled,
dynamic_sampling_rate_overhead_target_percentage,
allocation_profiling_enabled,
allocation_counting_enabled,
skip_idle_samples_for_testing,
)
@worker_thread = nil
1 change: 1 addition & 0 deletions lib/datadog/profiling/component.rb
Original file line number Diff line number Diff line change
@@ -61,6 +61,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
thread_context_collector: thread_context_collector,
dynamic_sampling_rate_overhead_target_percentage: overhead_target_percentage,
allocation_profiling_enabled: allocation_profiling_enabled,
allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled,
)

internal_metadata = {
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@ module Datadog
?idle_sampling_helper: Datadog::Profiling::Collectors::IdleSamplingHelper,
?dynamic_sampling_rate_enabled: bool,
allocation_profiling_enabled: bool,
allocation_counting_enabled: bool,
?skip_idle_samples_for_testing: false,
) -> void

@@ -27,6 +28,7 @@ module Datadog
bool dynamic_sampling_rate_enabled,
Float dynamic_sampling_rate_overhead_target_percentage,
bool allocation_profiling_enabled,
bool allocation_counting_enabled,
bool skip_idle_samples_for_testing,
) -> true

15 changes: 15 additions & 0 deletions spec/datadog/core/configuration/settings_spec.rb
Original file line number Diff line number Diff line change
@@ -526,6 +526,21 @@
end
end

describe '#allocation_counting_enabled' do
subject(:allocation_counting_enabled) { settings.profiling.advanced.allocation_counting_enabled }

it { is_expected.to be false }
end

describe '#allocation_counting_enabled=' do
it 'updates the #allocation_counting_enabled setting' do
expect { settings.profiling.advanced.allocation_counting_enabled = true }
.to change { settings.profiling.advanced.allocation_counting_enabled }
.from(false)
.to(true)
end
end

describe '#experimental_heap_enabled' do
subject(:experimental_heap_enabled) { settings.profiling.advanced.experimental_heap_enabled }

Original file line number Diff line number Diff line change
@@ -15,13 +15,15 @@
let(:no_signals_workaround_enabled) { false }
let(:timeline_enabled) { false }
let(:options) { {} }
let(:allocation_counting_enabled) { false }
let(:worker_settings) do
{
gc_profiling_enabled: gc_profiling_enabled,
no_signals_workaround_enabled: no_signals_workaround_enabled,
thread_context_collector: build_thread_context_collector(recorder),
dynamic_sampling_rate_overhead_target_percentage: 2.0,
allocation_profiling_enabled: allocation_profiling_enabled,
allocation_counting_enabled: allocation_counting_enabled,
**options
}
end
@@ -961,8 +963,9 @@
cpu_and_wall_time_worker.stop
end

context 'when allocation profiling is enabled' do
context 'when allocation profiling and allocation counting is enabled' do
let(:allocation_profiling_enabled) { true }
let(:allocation_counting_enabled) { true }

it 'always returns a >= 0 value' do
expect(described_class._native_allocation_count).to be >= 0
@@ -1040,10 +1043,21 @@
expect(after_t1 - before_t1).to be 100
expect(after_allocations - before_allocations).to be < 10
end

context 'when allocation profiling is enabled but allocation counting is disabled' do
let(:allocation_counting_enabled) { false }

it 'always returns a nil value' do
100.times { Object.new }

expect(described_class._native_allocation_count).to be nil
end
end
end

context 'when allocation profiling is disabled' do
let(:allocation_profiling_enabled) { false }

it 'always returns a nil value' do
100.times { Object.new }

3 changes: 3 additions & 0 deletions spec/datadog/profiling/component_spec.rb
Original file line number Diff line number Diff line change
@@ -84,13 +84,16 @@
.and_return(:overhead_target_percentage_config)
expect(described_class).to receive(:valid_overhead_target)
.with(:overhead_target_percentage_config).and_return(:overhead_target_percentage_config)
expect(settings.profiling.advanced)
.to receive(:allocation_counting_enabled).and_return(:allocation_counting_enabled_config)

expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with(
gc_profiling_enabled: anything,
no_signals_workaround_enabled: :no_signals_result,
thread_context_collector: instance_of(Datadog::Profiling::Collectors::ThreadContext),
dynamic_sampling_rate_overhead_target_percentage: :overhead_target_percentage_config,
allocation_profiling_enabled: false,
allocation_counting_enabled: :allocation_counting_enabled_config,
)

build_profiler_component