diff --git a/benchmarks/profiler_allocation.rb b/benchmarks/profiler_allocation.rb new file mode 100644 index 00000000000..baa9c9f5584 --- /dev/null +++ b/benchmarks/profiler_allocation.rb @@ -0,0 +1,66 @@ +# Used to quickly run benchmark under RSpec as part of the usual test suite, to validate it didn't bitrot +VALIDATE_BENCHMARK_MODE = ENV['VALIDATE_BENCHMARK'] == 'true' + +return unless __FILE__ == $PROGRAM_NAME || VALIDATE_BENCHMARK_MODE + +require 'benchmark/ips' +require 'datadog' +require 'pry' +require_relative 'dogstatsd_reporter' + +# This benchmark measures the performance of allocation profiling + +class ExportToFile + PPROF_PREFIX = ENV.fetch('DD_PROFILING_PPROF_PREFIX', 'profiler-allocation') + + def export(flush) + File.write("#{PPROF_PREFIX}#{flush.start.strftime('%Y%m%dT%H%M%SZ')}.pprof", flush.pprof_data) + true + end +end + +class ProfilerAllocationBenchmark + def run_benchmark + Benchmark.ips do |x| + benchmark_time = VALIDATE_BENCHMARK_MODE ? { time: 0.01, warmup: 0 } : { time: 10, warmup: 2 } + x.config( + **benchmark_time, + suite: report_to_dogstatsd_if_enabled_via_environment_variable(benchmark_name: 'profiler_allocation') + ) + + x.report('Allocations (baseline)', 'BasicObject.new') + + x.save! 'profiler-allocation-results.json' unless VALIDATE_BENCHMARK_MODE + x.compare! + end + + Datadog.configure do |c| + c.profiling.enabled = true + c.profiling.allocation_enabled = true + c.profiling.advanced.gc_enabled = false + c.profiling.exporter.transport = ExportToFile.new unless VALIDATE_BENCHMARK_MODE + end + Datadog::Profiling.wait_until_running + + 3.times { GC.start } + + Benchmark.ips do |x| + benchmark_time = VALIDATE_BENCHMARK_MODE ? { time: 0.01, warmup: 0 } : { time: 10, warmup: 2 } + x.config( + **benchmark_time, + suite: report_to_dogstatsd_if_enabled_via_environment_variable(benchmark_name: 'profiler_allocation') + ) + + x.report("Allocations (#{ENV['CONFIG']})", 'BasicObject.new') + + x.save! 'profiler-allocation-results.json' unless VALIDATE_BENCHMARK_MODE + x.compare! + end + end +end + +puts "Current pid is #{Process.pid}" + +ProfilerAllocationBenchmark.new.instance_exec do + run_benchmark +end diff --git a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index bde8fc7281c..30a05e4f55a 100644 --- a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -1045,8 +1045,27 @@ static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) { return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil; } +#define HANDLE_CLOCK_FAILURE(call) ({ \ + long _result = (call); \ + if (_result == 0) { \ + delayed_error(state, ERR_CLOCK_FAIL); \ + return; \ + } \ + _result; \ +}) + // Implements memory-related profiling events. This function is called by Ruby via the `object_allocation_tracepoint` // when the RUBY_INTERNAL_EVENT_NEWOBJ event is triggered. +// +// When allocation sampling is enabled, this function gets called for almost all* objects allocated by the Ruby VM. +// (*In some weird cases the VM may skip this tracepoint.) +// +// At a high level, there's two paths through this function: +// 1. should_sample == false -> return +// 2. should_sample == true -> sample +// +// On big applications, path 1. is the hottest, since we don't sample every option. So it's quite important for it to +// be as fast as possible. static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) { // Update thread-local allocation count if (RB_UNLIKELY(allocation_count == UINT64_MAX)) { @@ -1061,25 +1080,39 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) // and disabled before it is cleared, but just in case... if (state == NULL) return; - // In a few cases, we may actually be allocating an object as part of profiler sampling. We don't want to recursively + // In rare cases, we may actually be allocating an object as part of profiler sampling. We don't want to recursively // sample, so we just return early if (state->during_sample) { state->stats.allocations_during_sample++; return; } - if (state->dynamic_sampling_rate_enabled) { - long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE); - if (now == 0) { - delayed_error(state, ERR_CLOCK_FAIL); - return; - } - if (!discrete_dynamic_sampler_should_sample(&state->allocation_sampler, now)) { - state->stats.allocation_skipped++; - return; + // Hot path: Dynamic sampling rate is usually enabled and the sampling decision is usually false + if (RB_LIKELY(state->dynamic_sampling_rate_enabled && !discrete_dynamic_sampler_should_sample(&state->allocation_sampler))) { + state->stats.allocation_skipped++; + + coarse_instant now = monotonic_coarse_wall_time_now_ns(); + HANDLE_CLOCK_FAILURE(now.timestamp_ns); + + bool needs_readjust = discrete_dynamic_sampler_skipped_sample(&state->allocation_sampler, now); + if (RB_UNLIKELY(needs_readjust)) { + // We rarely readjust, so this is a cold path + // Also, while above we used the cheaper monotonic_coarse, for this call we want the regular monotonic call, + // which is why we end up getting time "again". + discrete_dynamic_sampler_readjust( + &state->allocation_sampler, HANDLE_CLOCK_FAILURE(monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE)) + ); } + + return; } + // From here on, we've decided to go ahead with the sample, which is way less common than skipping it + + discrete_dynamic_sampler_before_sample( + &state->allocation_sampler, HANDLE_CLOCK_FAILURE(monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE)) + ); + // @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous // invocation is still pending, (e.g. it wouldn't call `on_newobj_event` while it's already running), but I decided // to keep this here for consistency -- every call to the thread context (other than the special gc calls which are diff --git a/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c b/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c index 6e188b8e83a..2db303e07f4 100644 --- a/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +++ b/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c @@ -20,6 +20,9 @@ #define EMA_SMOOTHING_FACTOR 0.6 +static void maybe_readjust(discrete_dynamic_sampler *sampler, long now_ns); +inline bool should_readjust(discrete_dynamic_sampler *sampler, coarse_instant now); + void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name, long now_ns) { sampler->debug_name = debug_name; discrete_dynamic_sampler_set_overhead_target_percentage(sampler, BASE_OVERHEAD_PCT, now_ns); @@ -54,27 +57,25 @@ void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sa return discrete_dynamic_sampler_reset(sampler, now_ns); } -static void maybe_readjust(discrete_dynamic_sampler *sampler, long now); - -bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns) { +// NOTE: See header for an explanation of when this should get used +__attribute__((warn_unused_result)) +bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler) { // For efficiency reasons we don't do true random sampling but rather systematic // sampling following a sample interval/skip. This can be biased and hide patterns - // but the dynamic interval and rather indeterministic pattern of allocations in + // but the dynamic interval and rather nondeterministic pattern of allocations in // most real applications should help reduce the bias impact. sampler->events_since_last_sample++; sampler->events_since_last_readjustment++; - bool should_sample = sampler->sampling_interval > 0 && sampler->events_since_last_sample >= sampler->sampling_interval; - if (should_sample) { - sampler->sample_start_time_ns = now_ns; - } else { - // check if we should readjust our sampler after this event, even if we didn't sample it - maybe_readjust(sampler, now_ns); - } + return sampler->sampling_interval > 0 && sampler->events_since_last_sample >= sampler->sampling_interval; +} - return should_sample; +// NOTE: See header for an explanation of when this should get used +void discrete_dynamic_sampler_before_sample(discrete_dynamic_sampler *sampler, long now_ns) { + sampler->sample_start_time_ns = now_ns; } +// NOTE: See header for an explanation of when this should get used long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns) { long last_sampling_time_ns = sampler->sample_start_time_ns == 0 ? 0 : long_max_of(0, now_ns - sampler->sample_start_time_ns); sampler->samples_since_last_readjustment++; @@ -95,6 +96,11 @@ size_t discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sample return sampler->events_since_last_sample; } +// NOTE: See header for an explanation of when this should get used +bool discrete_dynamic_sampler_skipped_sample(discrete_dynamic_sampler *sampler, coarse_instant now) { + return should_readjust(sampler, now); +} + static double ewma_adj_window(double latest_value, double avg, long current_window_time_ns, bool is_first) { if (is_first) { return latest_value; @@ -109,19 +115,27 @@ static double ewma_adj_window(double latest_value, double avg, long current_wind return (1-alpha) * avg + alpha * latest_value; } -static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) { - long this_window_time_ns = sampler->last_readjust_time_ns == 0 ? ADJUSTMENT_WINDOW_NS : now - sampler->last_readjust_time_ns; +static void maybe_readjust(discrete_dynamic_sampler *sampler, long now_ns) { + if (should_readjust(sampler, to_coarse_instant(now_ns))) discrete_dynamic_sampler_readjust(sampler, now_ns); +} + +inline bool should_readjust(discrete_dynamic_sampler *sampler, coarse_instant now) { + long this_window_time_ns = + sampler->last_readjust_time_ns == 0 ? ADJUSTMENT_WINDOW_NS : now.timestamp_ns - sampler->last_readjust_time_ns; bool should_readjust_based_on_time = this_window_time_ns >= ADJUSTMENT_WINDOW_NS; bool should_readjust_based_on_samples = sampler->samples_since_last_readjustment >= ADJUSTMENT_WINDOW_SAMPLES; - if (!should_readjust_based_on_time && !should_readjust_based_on_samples) { - // not enough time or samples have passed to perform a readjustment - return; - } + return should_readjust_based_on_time || should_readjust_based_on_samples; +} - if (this_window_time_ns == 0) { - // should not be possible given previous condition but lets protect against div by 0 below. +// NOTE: This method ASSUMES should_readjust == true +// NOTE: See header for an explanation of when this should get used +void discrete_dynamic_sampler_readjust(discrete_dynamic_sampler *sampler, long now_ns) { + long this_window_time_ns = sampler->last_readjust_time_ns == 0 ? ADJUSTMENT_WINDOW_NS : now_ns - sampler->last_readjust_time_ns; + + if (this_window_time_ns <= 0) { + // should not be possible given should_readjust but lets protect against div by 0 below. return; } @@ -143,7 +157,7 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) { // Lets update our average sampling time per event long avg_sampling_time_in_window_ns = sampler->samples_since_last_readjustment == 0 ? 0 : sampler->sampling_time_since_last_readjustment_ns / sampler->samples_since_last_readjustment; if (avg_sampling_time_in_window_ns > sampler->max_sampling_time_ns) { - // If the average sampling time in the previous window was deemed unnacceptable, clamp it to the + // If the average sampling time in the previous window was deemed unacceptable, clamp it to the // maximum acceptable value and register this operation in our counter. // NOTE: This is important so that events like suspensions or system overloads do not lead us to // learn arbitrarily big sampling times which may then result in us not sampling anything @@ -286,7 +300,7 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) { sampler->events_since_last_readjustment = 0; sampler->samples_since_last_readjustment = 0; sampler->sampling_time_since_last_readjustment_ns = 0; - sampler->last_readjust_time_ns = now; + sampler->last_readjust_time_ns = now_ns; sampler->has_completed_full_adjustment_window = true; } @@ -406,7 +420,14 @@ VALUE _native_should_sample(VALUE self, VALUE now_ns) { sampler_state *state; TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state); - return discrete_dynamic_sampler_should_sample(&state->sampler, NUM2LONG(now_ns)) ? Qtrue : Qfalse; + if (discrete_dynamic_sampler_should_sample(&state->sampler)) { + discrete_dynamic_sampler_before_sample(&state->sampler, NUM2LONG(now_ns)); + return Qtrue; + } else { + bool needs_readjust = discrete_dynamic_sampler_skipped_sample(&state->sampler, to_coarse_instant(NUM2LONG(now_ns))); + if (needs_readjust) discrete_dynamic_sampler_readjust(&state->sampler, NUM2LONG(now_ns)); + return Qfalse; + } } VALUE _native_after_sample(VALUE self, VALUE now_ns) { diff --git a/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h b/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h index 374dc453b6d..7e1edfb5c42 100644 --- a/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h +++ b/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h @@ -5,6 +5,8 @@ #include +#include "time_helpers.h" + // A sampler that will sample discrete events based on the overhead of their // sampling. // @@ -62,7 +64,6 @@ typedef struct discrete_dynamic_sampler { unsigned long sampling_time_clamps; } discrete_dynamic_sampler; - // Init a new sampler with sane defaults. void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name, long now_ns); @@ -80,9 +81,38 @@ void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sa // @return True if the event associated with this decision should be sampled, false // otherwise. // -// NOTE: If true is returned we implicitly assume the start of a sampling operation -// and it is expected that a follow-up after_sample call is issued. -bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns); +// IMPORTANT: A call to this method MUST be followed by a call to either +// `discrete_dynamic_sampler_before_sample` if return is `true` or +// `discrete_dynamic_sampler_skipped_sample` if return is `false`. +// +// In the past, this method took care of before_sample/skipped_sample/readjust as well. +// We've had to split it up because we wanted to both use coarse time and regular monotonic time depending on the +// situation, but also wanted time to come as an argument from the outside. +// +// TL;DR here's how they should be used as Ruby code: +// if discrete_dynamic_sampler_should_sample +// discrete_dynamic_sampler_before_sample(monotonic_wall_time_now_ns()) +// else +// needs_readjust = discrete_dynamic_sampler_skipped_sample(monotonic_coarse_wall_time_now_ns()) +// discrete_dynamic_sampler_readjust(monotonic_wall_time_now_ns()) if needs_readjust +// end +__attribute__((warn_unused_result)) +bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler); + +// Signal the start of a sampling operation. +// MUST be called after `discrete_dynamic_sampler_should_sample` returns `true`. +void discrete_dynamic_sampler_before_sample(discrete_dynamic_sampler *sampler, long now_ns); + +// Signals that sampling did not happen +// MUST be called after `discrete_dynamic_sampler_skipped_sample` returns `false`. +// +// @return True if the sampler needs to be readjusted. +// +// IMPORTANT: A call to this method MUST be followed by a call to `discrete_dynamic_sampler_readjust` if return is `true`. +__attribute__((warn_unused_result)) +bool discrete_dynamic_sampler_skipped_sample(discrete_dynamic_sampler *sampler, coarse_instant now); + +void discrete_dynamic_sampler_readjust(discrete_dynamic_sampler *sampler, long now_ns); // Signal the end of a sampling operation. // diff --git a/ext/datadog_profiling_native_extension/extconf.rb b/ext/datadog_profiling_native_extension/extconf.rb index 9a1a7718ca6..bdb833622d2 100644 --- a/ext/datadog_profiling_native_extension/extconf.rb +++ b/ext/datadog_profiling_native_extension/extconf.rb @@ -129,6 +129,9 @@ def add_compiler_flag(flag) # but it's slower to build # so instead we just assume that we have the function we need on Linux, and nowhere else $defs << '-DHAVE_PTHREAD_GETCPUCLOCKID' + + # Not available on macOS + $defs << '-DHAVE_CLOCK_MONOTONIC_COARSE' end have_func 'malloc_stats' diff --git a/ext/datadog_profiling_native_extension/time_helpers.c b/ext/datadog_profiling_native_extension/time_helpers.c index 5d761f0b127..15d499348a6 100644 --- a/ext/datadog_profiling_native_extension/time_helpers.c +++ b/ext/datadog_profiling_native_extension/time_helpers.c @@ -4,21 +4,6 @@ #include "ruby_helpers.h" #include "time_helpers.h" -// Safety: This function is assumed never to raise exceptions by callers when raise_on_failure == false -long retrieve_clock_as_ns(clockid_t clock_id, bool raise_on_failure) { - struct timespec clock_value; - - if (clock_gettime(clock_id, &clock_value) != 0) { - if (raise_on_failure) ENFORCE_SUCCESS_GVL(errno); - return 0; - } - - return clock_value.tv_nsec + SECONDS_AS_NS(clock_value.tv_sec); -} - -long monotonic_wall_time_now_ns(bool raise_on_failure) { return retrieve_clock_as_ns(CLOCK_MONOTONIC, raise_on_failure); } -long system_epoch_time_now_ns(bool raise_on_failure) { return retrieve_clock_as_ns(CLOCK_REALTIME, raise_on_failure); } - // Design: The monotonic_to_system_epoch_state struct is kept somewhere by the caller, and MUST be initialized to // MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER. // diff --git a/ext/datadog_profiling_native_extension/time_helpers.h b/ext/datadog_profiling_native_extension/time_helpers.h index 82ed1eb2a0d..1475ed3aad8 100644 --- a/ext/datadog_profiling_native_extension/time_helpers.h +++ b/ext/datadog_profiling_native_extension/time_helpers.h @@ -1,12 +1,16 @@ #pragma once #include +#include +#include + +#include "extconf.h" // Needed for HAVE_CLOCK_MONOTONIC_COARSE +#include "ruby_helpers.h" #define SECONDS_AS_NS(value) (value * 1000 * 1000 * 1000L) #define MILLIS_AS_NS(value) (value * 1000 * 1000L) -#define RAISE_ON_FAILURE true -#define DO_NOT_RAISE_ON_FAILURE false +typedef enum { RAISE_ON_FAILURE, DO_NOT_RAISE_ON_FAILURE } raise_on_failure_setting; #define INVALID_TIME -1 @@ -17,10 +21,36 @@ typedef struct { #define MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER {.system_epoch_ns_reference = INVALID_TIME, .delta_to_epoch_ns = INVALID_TIME} -// Safety: This function is assumed never to raise exceptions by callers when raise_on_failure == false -long monotonic_wall_time_now_ns(bool raise_on_failure); +inline long retrieve_clock_as_ns(clockid_t clock_id, raise_on_failure_setting raise_on_failure) { + struct timespec clock_value; + + if (clock_gettime(clock_id, &clock_value) != 0) { + if (raise_on_failure == RAISE_ON_FAILURE) ENFORCE_SUCCESS_GVL(errno); + return 0; + } + + return clock_value.tv_nsec + SECONDS_AS_NS(clock_value.tv_sec); +} + +inline long monotonic_wall_time_now_ns(raise_on_failure_setting raise_on_failure) { return retrieve_clock_as_ns(CLOCK_MONOTONIC, raise_on_failure); } +inline long system_epoch_time_now_ns(raise_on_failure_setting raise_on_failure) { return retrieve_clock_as_ns(CLOCK_REALTIME, raise_on_failure); } + +// Coarse instants use CLOCK_MONOTONIC_COARSE on Linux which is expected to provide resolution in the millisecond range: +// https://docs.redhat.com/en/documentation/red_hat_enterprise_linux_for_real_time/7/html/reference_guide/sect-posix_clocks#Using_clock_getres_to_compare_clock_resolution +// We introduce here a separate type for it, so as to make it harder to misuse/more explicit when these timestamps are used + +typedef struct coarse_instant { + long timestamp_ns; +} coarse_instant; + +inline coarse_instant to_coarse_instant(long timestamp_ns) { return (coarse_instant) {.timestamp_ns = timestamp_ns}; } -// Safety: This function is assumed never to raise exceptions by callers when raise_on_failure == false -long system_epoch_time_now_ns(bool raise_on_failure); +inline coarse_instant monotonic_coarse_wall_time_now_ns(void) { + #ifdef HAVE_CLOCK_MONOTONIC_COARSE // Linux + return to_coarse_instant(retrieve_clock_as_ns(CLOCK_MONOTONIC_COARSE, DO_NOT_RAISE_ON_FAILURE)); + #else // macOS + return to_coarse_instant(retrieve_clock_as_ns(CLOCK_MONOTONIC, DO_NOT_RAISE_ON_FAILURE)); + #endif +} long monotonic_to_system_epoch_ns(monotonic_to_system_epoch_state *state, long monotonic_wall_time_ns); diff --git a/spec/datadog/profiling/validate_benchmarks_spec.rb b/spec/datadog/profiling/validate_benchmarks_spec.rb index 74cad9583bb..07146fe3825 100644 --- a/spec/datadog/profiling/validate_benchmarks_spec.rb +++ b/spec/datadog/profiling/validate_benchmarks_spec.rb @@ -16,6 +16,7 @@ 'profiler_memory_sample_serialize', 'profiler_gc', 'profiler_hold_resume_interruptions', + 'profiler_allocation', ].freeze benchmarks_to_validate.each do |benchmark|