Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PROF-10201] Disable profiler allocation counting feature by default #3798

Merged
merged 1 commit into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ struct cpu_and_wall_time_worker_state {
bool no_signals_workaround_enabled;
bool dynamic_sampling_rate_enabled;
bool allocation_profiling_enabled;
bool allocation_counting_enabled;
bool skip_idle_samples_for_testing;
VALUE self_instance;
VALUE thread_context_collector_instance;
Expand Down Expand Up @@ -181,6 +182,7 @@ static VALUE _native_initialize(
VALUE dynamic_sampling_rate_enabled,
VALUE dynamic_sampling_rate_overhead_target_percentage,
VALUE allocation_profiling_enabled,
VALUE allocation_counting_enabled,
VALUE skip_idle_samples_for_testing
);
static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
Expand Down Expand Up @@ -278,7 +280,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);

rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9);
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 10);
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
Expand Down Expand Up @@ -325,6 +327,7 @@ static VALUE _native_new(VALUE klass) {
state->no_signals_workaround_enabled = false;
state->dynamic_sampling_rate_enabled = true;
state->allocation_profiling_enabled = false;
state->allocation_counting_enabled = false;
state->skip_idle_samples_for_testing = false;
state->thread_context_collector_instance = Qnil;
state->idle_sampling_helper_instance = Qnil;
Expand Down Expand Up @@ -366,13 +369,15 @@ static VALUE _native_initialize(
VALUE dynamic_sampling_rate_enabled,
VALUE dynamic_sampling_rate_overhead_target_percentage,
VALUE allocation_profiling_enabled,
VALUE allocation_counting_enabled,
VALUE skip_idle_samples_for_testing
) {
ENFORCE_BOOLEAN(gc_profiling_enabled);
ENFORCE_BOOLEAN(no_signals_workaround_enabled);
ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
ENFORCE_BOOLEAN(allocation_profiling_enabled);
ENFORCE_BOOLEAN(allocation_counting_enabled);
ENFORCE_BOOLEAN(skip_idle_samples_for_testing)

struct cpu_and_wall_time_worker_state *state;
Expand All @@ -382,6 +387,7 @@ static VALUE _native_initialize(
state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);

double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
Expand Down Expand Up @@ -1040,7 +1046,9 @@ static void sleep_for(uint64_t time_ns) {
}

static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled;
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state;

bool are_allocations_being_tracked = state != NULL && state->allocation_profiling_enabled && state->allocation_counting_enabled;

return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil;
}
Expand All @@ -1067,19 +1075,21 @@ static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
// On big applications, path 1. is the hottest, since we don't sample every option. So it's quite important for it to
// be as fast as possible.
static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) {
// Update thread-local allocation count
if (RB_UNLIKELY(allocation_count == UINT64_MAX)) {
allocation_count = 0;
} else {
allocation_count++;
}

struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above

// This should not happen in a normal situation because the tracepoint is always enabled after the instance is set
// and disabled before it is cleared, but just in case...
if (state == NULL) return;

if (RB_UNLIKELY(state->allocation_counting_enabled)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is RB_UNLIKELY some kind of compiler optimisation macro to optimise resulting code for the false outcome?
And is allocation_counting_enabled unlikely because it is off by default and you don't expect many users to enable it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes and yes!

(Here's where the wrapper is defined)

TBH I don't expect it to have a huge difference between having and not having this macro, but I liked using it in this situation to have more self-describing code in terms of what's the expected common path and the exception code.

// Update thread-local allocation count
if (RB_UNLIKELY(allocation_count == UINT64_MAX)) {
allocation_count = 0;
} else {
allocation_count++;
}
}

// In rare cases, we may actually be allocating an object as part of profiler sampling. We don't want to recursively
// sample, so we just return early
if (state->during_sample) {
Expand Down
10 changes: 10 additions & 0 deletions lib/datadog/core/configuration/settings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,16 @@ def initialize(*_)
o.default true
end

# Can be used to enable/disable the Datadog::Profiling.allocation_count feature.
#
# Requires allocation profiling to be enabled.
#
# @default false
option :allocation_counting_enabled do |o|
o.type :bool
o.default false
end

# Can be used to enable/disable the collection of heap profiles.
#
# This feature is alpha and disabled by default
Expand Down
1 change: 1 addition & 0 deletions lib/datadog/profiling.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def self.start_if_enabled
# (This is similar to some OS-based time representations.)
#
# Note 2: All fibers in the same thread will share the same counter values.
# Note 3: This counter is not accurate when using the M:N scheduler.
#
# Only available when the profiler is running, and allocation-related features are not disabled via configuration.
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def initialize(
thread_context_collector:,
dynamic_sampling_rate_overhead_target_percentage:,
allocation_profiling_enabled:,
allocation_counting_enabled:,
# **NOTE**: This should only be used for testing; disabling the dynamic sampling rate will increase the
# profiler overhead!
dynamic_sampling_rate_enabled: true,
Expand All @@ -42,6 +43,7 @@ def initialize(
dynamic_sampling_rate_enabled,
dynamic_sampling_rate_overhead_target_percentage,
allocation_profiling_enabled,
allocation_counting_enabled,
skip_idle_samples_for_testing,
)
@worker_thread = nil
Expand Down
1 change: 1 addition & 0 deletions lib/datadog/profiling/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
thread_context_collector: thread_context_collector,
dynamic_sampling_rate_overhead_target_percentage: overhead_target_percentage,
allocation_profiling_enabled: allocation_profiling_enabled,
allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled,
)

internal_metadata = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ module Datadog
?idle_sampling_helper: Datadog::Profiling::Collectors::IdleSamplingHelper,
?dynamic_sampling_rate_enabled: bool,
allocation_profiling_enabled: bool,
allocation_counting_enabled: bool,
?skip_idle_samples_for_testing: false,
) -> void

Expand All @@ -27,6 +28,7 @@ module Datadog
bool dynamic_sampling_rate_enabled,
Float dynamic_sampling_rate_overhead_target_percentage,
bool allocation_profiling_enabled,
bool allocation_counting_enabled,
bool skip_idle_samples_for_testing,
) -> true

Expand Down
15 changes: 15 additions & 0 deletions spec/datadog/core/configuration/settings_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,21 @@
end
end

describe '#allocation_counting_enabled' do
subject(:allocation_counting_enabled) { settings.profiling.advanced.allocation_counting_enabled }

it { is_expected.to be false }
end

describe '#allocation_counting_enabled=' do
it 'updates the #allocation_counting_enabled setting' do
expect { settings.profiling.advanced.allocation_counting_enabled = true }
.to change { settings.profiling.advanced.allocation_counting_enabled }
.from(false)
.to(true)
end
end

describe '#experimental_heap_enabled' do
subject(:experimental_heap_enabled) { settings.profiling.advanced.experimental_heap_enabled }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@
let(:no_signals_workaround_enabled) { false }
let(:timeline_enabled) { false }
let(:options) { {} }
let(:allocation_counting_enabled) { false }
let(:worker_settings) do
{
gc_profiling_enabled: gc_profiling_enabled,
no_signals_workaround_enabled: no_signals_workaround_enabled,
thread_context_collector: build_thread_context_collector(recorder),
dynamic_sampling_rate_overhead_target_percentage: 2.0,
allocation_profiling_enabled: allocation_profiling_enabled,
allocation_counting_enabled: allocation_counting_enabled,
**options
}
end
Expand Down Expand Up @@ -961,8 +963,9 @@
cpu_and_wall_time_worker.stop
end

context 'when allocation profiling is enabled' do
context 'when allocation profiling and allocation counting is enabled' do
let(:allocation_profiling_enabled) { true }
let(:allocation_counting_enabled) { true }

it 'always returns a >= 0 value' do
expect(described_class._native_allocation_count).to be >= 0
Expand Down Expand Up @@ -1040,10 +1043,21 @@
expect(after_t1 - before_t1).to be 100
expect(after_allocations - before_allocations).to be < 10
end

context 'when allocation profiling is enabled but allocation counting is disabled' do
let(:allocation_counting_enabled) { false }

it 'always returns a nil value' do
100.times { Object.new }

expect(described_class._native_allocation_count).to be nil
end
end
end

context 'when allocation profiling is disabled' do
let(:allocation_profiling_enabled) { false }

it 'always returns a nil value' do
100.times { Object.new }

Expand Down
3 changes: 3 additions & 0 deletions spec/datadog/profiling/component_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,16 @@
.and_return(:overhead_target_percentage_config)
expect(described_class).to receive(:valid_overhead_target)
.with(:overhead_target_percentage_config).and_return(:overhead_target_percentage_config)
expect(settings.profiling.advanced)
.to receive(:allocation_counting_enabled).and_return(:allocation_counting_enabled_config)

expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with(
gc_profiling_enabled: anything,
no_signals_workaround_enabled: :no_signals_result,
thread_context_collector: instance_of(Datadog::Profiling::Collectors::ThreadContext),
dynamic_sampling_rate_overhead_target_percentage: :overhead_target_percentage_config,
allocation_profiling_enabled: false,
allocation_counting_enabled: :allocation_counting_enabled_config,
)

build_profiler_component
Expand Down
Loading