Skip to content

Commit

Permalink
Merge pull request #3797 from DataDog/ivoanjo/prof-10201-coarse-times…
Browse files Browse the repository at this point in the history
…tamps

[PROF-10201] Reduce allocation profiling overhead by using coarse timestamps
  • Loading branch information
ivoanjo authored Jul 24, 2024
2 parents 5d1295c + 446f2ad commit 2cdc371
Show file tree
Hide file tree
Showing 8 changed files with 227 additions and 58 deletions.
66 changes: 66 additions & 0 deletions benchmarks/profiler_allocation.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Used to quickly run benchmark under RSpec as part of the usual test suite, to validate it didn't bitrot
VALIDATE_BENCHMARK_MODE = ENV['VALIDATE_BENCHMARK'] == 'true'

return unless __FILE__ == $PROGRAM_NAME || VALIDATE_BENCHMARK_MODE

require 'benchmark/ips'
require 'datadog'
require 'pry'
require_relative 'dogstatsd_reporter'

# This benchmark measures the performance of allocation profiling

class ExportToFile
PPROF_PREFIX = ENV.fetch('DD_PROFILING_PPROF_PREFIX', 'profiler-allocation')

def export(flush)
File.write("#{PPROF_PREFIX}#{flush.start.strftime('%Y%m%dT%H%M%SZ')}.pprof", flush.pprof_data)
true
end
end

class ProfilerAllocationBenchmark
def run_benchmark
Benchmark.ips do |x|
benchmark_time = VALIDATE_BENCHMARK_MODE ? { time: 0.01, warmup: 0 } : { time: 10, warmup: 2 }
x.config(
**benchmark_time,
suite: report_to_dogstatsd_if_enabled_via_environment_variable(benchmark_name: 'profiler_allocation')
)

x.report('Allocations (baseline)', 'BasicObject.new')

x.save! 'profiler-allocation-results.json' unless VALIDATE_BENCHMARK_MODE
x.compare!
end

Datadog.configure do |c|
c.profiling.enabled = true
c.profiling.allocation_enabled = true
c.profiling.advanced.gc_enabled = false
c.profiling.exporter.transport = ExportToFile.new unless VALIDATE_BENCHMARK_MODE
end
Datadog::Profiling.wait_until_running

3.times { GC.start }

Benchmark.ips do |x|
benchmark_time = VALIDATE_BENCHMARK_MODE ? { time: 0.01, warmup: 0 } : { time: 10, warmup: 2 }
x.config(
**benchmark_time,
suite: report_to_dogstatsd_if_enabled_via_environment_variable(benchmark_name: 'profiler_allocation')
)

x.report("Allocations (#{ENV['CONFIG']})", 'BasicObject.new')

x.save! 'profiler-allocation-results.json' unless VALIDATE_BENCHMARK_MODE
x.compare!
end
end
end

puts "Current pid is #{Process.pid}"

ProfilerAllocationBenchmark.new.instance_exec do
run_benchmark
end
Original file line number Diff line number Diff line change
Expand Up @@ -1045,8 +1045,27 @@ static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil;
}

#define HANDLE_CLOCK_FAILURE(call) ({ \
long _result = (call); \
if (_result == 0) { \
delayed_error(state, ERR_CLOCK_FAIL); \
return; \
} \
_result; \
})

// Implements memory-related profiling events. This function is called by Ruby via the `object_allocation_tracepoint`
// when the RUBY_INTERNAL_EVENT_NEWOBJ event is triggered.
//
// When allocation sampling is enabled, this function gets called for almost all* objects allocated by the Ruby VM.
// (*In some weird cases the VM may skip this tracepoint.)
//
// At a high level, there's two paths through this function:
// 1. should_sample == false -> return
// 2. should_sample == true -> sample
//
// On big applications, path 1. is the hottest, since we don't sample every option. So it's quite important for it to
// be as fast as possible.
static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) {
// Update thread-local allocation count
if (RB_UNLIKELY(allocation_count == UINT64_MAX)) {
Expand All @@ -1061,25 +1080,39 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
// and disabled before it is cleared, but just in case...
if (state == NULL) return;

// In a few cases, we may actually be allocating an object as part of profiler sampling. We don't want to recursively
// In rare cases, we may actually be allocating an object as part of profiler sampling. We don't want to recursively
// sample, so we just return early
if (state->during_sample) {
state->stats.allocations_during_sample++;
return;
}

if (state->dynamic_sampling_rate_enabled) {
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
if (now == 0) {
delayed_error(state, ERR_CLOCK_FAIL);
return;
}
if (!discrete_dynamic_sampler_should_sample(&state->allocation_sampler, now)) {
state->stats.allocation_skipped++;
return;
// Hot path: Dynamic sampling rate is usually enabled and the sampling decision is usually false
if (RB_LIKELY(state->dynamic_sampling_rate_enabled && !discrete_dynamic_sampler_should_sample(&state->allocation_sampler))) {
state->stats.allocation_skipped++;

coarse_instant now = monotonic_coarse_wall_time_now_ns();
HANDLE_CLOCK_FAILURE(now.timestamp_ns);

bool needs_readjust = discrete_dynamic_sampler_skipped_sample(&state->allocation_sampler, now);
if (RB_UNLIKELY(needs_readjust)) {
// We rarely readjust, so this is a cold path
// Also, while above we used the cheaper monotonic_coarse, for this call we want the regular monotonic call,
// which is why we end up getting time "again".
discrete_dynamic_sampler_readjust(
&state->allocation_sampler, HANDLE_CLOCK_FAILURE(monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE))
);
}

return;
}

// From here on, we've decided to go ahead with the sample, which is way less common than skipping it

discrete_dynamic_sampler_before_sample(
&state->allocation_sampler, HANDLE_CLOCK_FAILURE(monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE))
);

// @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous
// invocation is still pending, (e.g. it wouldn't call `on_newobj_event` while it's already running), but I decided
// to keep this here for consistency -- every call to the thread context (other than the special gc calls which are
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

#define EMA_SMOOTHING_FACTOR 0.6

static void maybe_readjust(discrete_dynamic_sampler *sampler, long now_ns);
inline bool should_readjust(discrete_dynamic_sampler *sampler, coarse_instant now);

void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name, long now_ns) {
sampler->debug_name = debug_name;
discrete_dynamic_sampler_set_overhead_target_percentage(sampler, BASE_OVERHEAD_PCT, now_ns);
Expand Down Expand Up @@ -54,27 +57,25 @@ void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sa
return discrete_dynamic_sampler_reset(sampler, now_ns);
}

static void maybe_readjust(discrete_dynamic_sampler *sampler, long now);

bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns) {
// NOTE: See header for an explanation of when this should get used
__attribute__((warn_unused_result))
bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler) {
// For efficiency reasons we don't do true random sampling but rather systematic
// sampling following a sample interval/skip. This can be biased and hide patterns
// but the dynamic interval and rather indeterministic pattern of allocations in
// but the dynamic interval and rather nondeterministic pattern of allocations in
// most real applications should help reduce the bias impact.
sampler->events_since_last_sample++;
sampler->events_since_last_readjustment++;
bool should_sample = sampler->sampling_interval > 0 && sampler->events_since_last_sample >= sampler->sampling_interval;

if (should_sample) {
sampler->sample_start_time_ns = now_ns;
} else {
// check if we should readjust our sampler after this event, even if we didn't sample it
maybe_readjust(sampler, now_ns);
}
return sampler->sampling_interval > 0 && sampler->events_since_last_sample >= sampler->sampling_interval;
}

return should_sample;
// NOTE: See header for an explanation of when this should get used
void discrete_dynamic_sampler_before_sample(discrete_dynamic_sampler *sampler, long now_ns) {
sampler->sample_start_time_ns = now_ns;
}

// NOTE: See header for an explanation of when this should get used
long discrete_dynamic_sampler_after_sample(discrete_dynamic_sampler *sampler, long now_ns) {
long last_sampling_time_ns = sampler->sample_start_time_ns == 0 ? 0 : long_max_of(0, now_ns - sampler->sample_start_time_ns);
sampler->samples_since_last_readjustment++;
Expand All @@ -95,6 +96,11 @@ size_t discrete_dynamic_sampler_events_since_last_sample(discrete_dynamic_sample
return sampler->events_since_last_sample;
}

// NOTE: See header for an explanation of when this should get used
bool discrete_dynamic_sampler_skipped_sample(discrete_dynamic_sampler *sampler, coarse_instant now) {
return should_readjust(sampler, now);
}

static double ewma_adj_window(double latest_value, double avg, long current_window_time_ns, bool is_first) {
if (is_first) {
return latest_value;
Expand All @@ -109,19 +115,27 @@ static double ewma_adj_window(double latest_value, double avg, long current_wind
return (1-alpha) * avg + alpha * latest_value;
}

static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
long this_window_time_ns = sampler->last_readjust_time_ns == 0 ? ADJUSTMENT_WINDOW_NS : now - sampler->last_readjust_time_ns;
static void maybe_readjust(discrete_dynamic_sampler *sampler, long now_ns) {
if (should_readjust(sampler, to_coarse_instant(now_ns))) discrete_dynamic_sampler_readjust(sampler, now_ns);
}

inline bool should_readjust(discrete_dynamic_sampler *sampler, coarse_instant now) {
long this_window_time_ns =
sampler->last_readjust_time_ns == 0 ? ADJUSTMENT_WINDOW_NS : now.timestamp_ns - sampler->last_readjust_time_ns;

bool should_readjust_based_on_time = this_window_time_ns >= ADJUSTMENT_WINDOW_NS;
bool should_readjust_based_on_samples = sampler->samples_since_last_readjustment >= ADJUSTMENT_WINDOW_SAMPLES;

if (!should_readjust_based_on_time && !should_readjust_based_on_samples) {
// not enough time or samples have passed to perform a readjustment
return;
}
return should_readjust_based_on_time || should_readjust_based_on_samples;
}

if (this_window_time_ns == 0) {
// should not be possible given previous condition but lets protect against div by 0 below.
// NOTE: This method ASSUMES should_readjust == true
// NOTE: See header for an explanation of when this should get used
void discrete_dynamic_sampler_readjust(discrete_dynamic_sampler *sampler, long now_ns) {
long this_window_time_ns = sampler->last_readjust_time_ns == 0 ? ADJUSTMENT_WINDOW_NS : now_ns - sampler->last_readjust_time_ns;

if (this_window_time_ns <= 0) {
// should not be possible given should_readjust but lets protect against div by 0 below.
return;
}

Expand All @@ -143,7 +157,7 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
// Lets update our average sampling time per event
long avg_sampling_time_in_window_ns = sampler->samples_since_last_readjustment == 0 ? 0 : sampler->sampling_time_since_last_readjustment_ns / sampler->samples_since_last_readjustment;
if (avg_sampling_time_in_window_ns > sampler->max_sampling_time_ns) {
// If the average sampling time in the previous window was deemed unnacceptable, clamp it to the
// If the average sampling time in the previous window was deemed unacceptable, clamp it to the
// maximum acceptable value and register this operation in our counter.
// NOTE: This is important so that events like suspensions or system overloads do not lead us to
// learn arbitrarily big sampling times which may then result in us not sampling anything
Expand Down Expand Up @@ -286,7 +300,7 @@ static void maybe_readjust(discrete_dynamic_sampler *sampler, long now) {
sampler->events_since_last_readjustment = 0;
sampler->samples_since_last_readjustment = 0;
sampler->sampling_time_since_last_readjustment_ns = 0;
sampler->last_readjust_time_ns = now;
sampler->last_readjust_time_ns = now_ns;
sampler->has_completed_full_adjustment_window = true;
}

Expand Down Expand Up @@ -406,7 +420,14 @@ VALUE _native_should_sample(VALUE self, VALUE now_ns) {
sampler_state *state;
TypedData_Get_Struct(self, sampler_state, &sampler_typed_data, state);

return discrete_dynamic_sampler_should_sample(&state->sampler, NUM2LONG(now_ns)) ? Qtrue : Qfalse;
if (discrete_dynamic_sampler_should_sample(&state->sampler)) {
discrete_dynamic_sampler_before_sample(&state->sampler, NUM2LONG(now_ns));
return Qtrue;
} else {
bool needs_readjust = discrete_dynamic_sampler_skipped_sample(&state->sampler, to_coarse_instant(NUM2LONG(now_ns)));
if (needs_readjust) discrete_dynamic_sampler_readjust(&state->sampler, NUM2LONG(now_ns));
return Qfalse;
}
}

VALUE _native_after_sample(VALUE self, VALUE now_ns) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

#include <ruby.h>

#include "time_helpers.h"

// A sampler that will sample discrete events based on the overhead of their
// sampling.
//
Expand Down Expand Up @@ -62,7 +64,6 @@ typedef struct discrete_dynamic_sampler {
unsigned long sampling_time_clamps;
} discrete_dynamic_sampler;


// Init a new sampler with sane defaults.
void discrete_dynamic_sampler_init(discrete_dynamic_sampler *sampler, const char *debug_name, long now_ns);

Expand All @@ -80,9 +81,38 @@ void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sa
// @return True if the event associated with this decision should be sampled, false
// otherwise.
//
// NOTE: If true is returned we implicitly assume the start of a sampling operation
// and it is expected that a follow-up after_sample call is issued.
bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler, long now_ns);
// IMPORTANT: A call to this method MUST be followed by a call to either
// `discrete_dynamic_sampler_before_sample` if return is `true` or
// `discrete_dynamic_sampler_skipped_sample` if return is `false`.
//
// In the past, this method took care of before_sample/skipped_sample/readjust as well.
// We've had to split it up because we wanted to both use coarse time and regular monotonic time depending on the
// situation, but also wanted time to come as an argument from the outside.
//
// TL;DR here's how they should be used as Ruby code:
// if discrete_dynamic_sampler_should_sample
// discrete_dynamic_sampler_before_sample(monotonic_wall_time_now_ns())
// else
// needs_readjust = discrete_dynamic_sampler_skipped_sample(monotonic_coarse_wall_time_now_ns())
// discrete_dynamic_sampler_readjust(monotonic_wall_time_now_ns()) if needs_readjust
// end
__attribute__((warn_unused_result))
bool discrete_dynamic_sampler_should_sample(discrete_dynamic_sampler *sampler);

// Signal the start of a sampling operation.
// MUST be called after `discrete_dynamic_sampler_should_sample` returns `true`.
void discrete_dynamic_sampler_before_sample(discrete_dynamic_sampler *sampler, long now_ns);

// Signals that sampling did not happen
// MUST be called after `discrete_dynamic_sampler_skipped_sample` returns `false`.
//
// @return True if the sampler needs to be readjusted.
//
// IMPORTANT: A call to this method MUST be followed by a call to `discrete_dynamic_sampler_readjust` if return is `true`.
__attribute__((warn_unused_result))
bool discrete_dynamic_sampler_skipped_sample(discrete_dynamic_sampler *sampler, coarse_instant now);

void discrete_dynamic_sampler_readjust(discrete_dynamic_sampler *sampler, long now_ns);

// Signal the end of a sampling operation.
//
Expand Down
3 changes: 3 additions & 0 deletions ext/datadog_profiling_native_extension/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ def add_compiler_flag(flag)
# but it's slower to build
# so instead we just assume that we have the function we need on Linux, and nowhere else
$defs << '-DHAVE_PTHREAD_GETCPUCLOCKID'

# Not available on macOS
$defs << '-DHAVE_CLOCK_MONOTONIC_COARSE'
end

have_func 'malloc_stats'
Expand Down
15 changes: 0 additions & 15 deletions ext/datadog_profiling_native_extension/time_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,6 @@
#include "ruby_helpers.h"
#include "time_helpers.h"

// Safety: This function is assumed never to raise exceptions by callers when raise_on_failure == false
long retrieve_clock_as_ns(clockid_t clock_id, bool raise_on_failure) {
struct timespec clock_value;

if (clock_gettime(clock_id, &clock_value) != 0) {
if (raise_on_failure) ENFORCE_SUCCESS_GVL(errno);
return 0;
}

return clock_value.tv_nsec + SECONDS_AS_NS(clock_value.tv_sec);
}

long monotonic_wall_time_now_ns(bool raise_on_failure) { return retrieve_clock_as_ns(CLOCK_MONOTONIC, raise_on_failure); }
long system_epoch_time_now_ns(bool raise_on_failure) { return retrieve_clock_as_ns(CLOCK_REALTIME, raise_on_failure); }

// Design: The monotonic_to_system_epoch_state struct is kept somewhere by the caller, and MUST be initialized to
// MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER.
//
Expand Down
Loading

0 comments on commit 2cdc371

Please sign in to comment.