From 039079521ac26cc33f30da01eb7dc4291a67e741 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 8 Nov 2024 11:29:56 -0500 Subject: [PATCH 01/14] DEBUG-2334 DI probe manager class --- lib/datadog/di/probe_manager.rb | 180 +++++++ lib/datadog/di/probe_notification_builder.rb | 16 +- lib/datadog/di/transport.rb | 6 + .../instrumentation_integration_test_class.rb | 12 + ...nstrumentation_integration_test_class_2.rb | 12 + ...nstrumentation_integration_test_class_3.rb | 12 + ...nstrumentation_integration_test_class_4.rb | 12 + .../di/integration/instrumentation_spec.rb | 457 ++++++++++++++++++ .../probe_notification_builder_spec.rb | 2 + spec/datadog/di/probe_manager_spec.rb | 311 ++++++++++++ .../di/probe_notification_builder_spec.rb | 1 + 11 files changed, 1009 insertions(+), 12 deletions(-) create mode 100644 lib/datadog/di/probe_manager.rb create mode 100644 spec/datadog/di/integration/instrumentation_integration_test_class.rb create mode 100644 spec/datadog/di/integration/instrumentation_integration_test_class_2.rb create mode 100644 spec/datadog/di/integration/instrumentation_integration_test_class_3.rb create mode 100644 spec/datadog/di/integration/instrumentation_integration_test_class_4.rb create mode 100644 spec/datadog/di/integration/instrumentation_spec.rb create mode 100644 spec/datadog/di/probe_manager_spec.rb diff --git a/lib/datadog/di/probe_manager.rb b/lib/datadog/di/probe_manager.rb new file mode 100644 index 00000000000..38b25e1d3d8 --- /dev/null +++ b/lib/datadog/di/probe_manager.rb @@ -0,0 +1,180 @@ +# frozen_string_literal: true + +module Datadog + module DI + # Stores probes received from remote config (that we can parse, in other + # words, whose type/attributes we support), requests needed instrumentation + # for the probes via Instrumenter, and stores pending probes (those which + # haven't yet been instrumented successfully due to their targets not + # existing) and failed probes (where we are certain the target will not + # ever be loaded, or otherwise become valid). + # + # @api private + class ProbeManager + def initialize(settings, instrumenter, probe_notification_builder, + probe_notifier_worker, logger, telemetry: nil) + @settings = settings + @instrumenter = instrumenter + @probe_notification_builder = probe_notification_builder + @probe_notifier_worker = probe_notifier_worker + @logger = logger + @telemetry = telemetry + @installed_probes = {} + @pending_probes = {} + @failed_probes = {} + + @definition_trace_point = TracePoint.trace(:end) do |tp| + begin + install_pending_method_probes(tp.self) + rescue => exc + raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions + logger.warn("Unhandled exception in definition trace point: #{exc.class}: #{exc}") + telemetry&.report(exc, description: "Unhandled exception in definition trace point") + # TODO test this path + end + end + end + + attr_reader :logger + attr_reader :telemetry + + # TODO test that close is called during component teardown and + # the trace point is cleared + def close + definition_trace_point.disable + clear_hooks + end + + def clear_hooks + pending_probes.clear + installed_probes.each do |probe_id, probe| + instrumenter.unhook(probe) + end + installed_probes.clear + end + + attr_reader :settings + attr_reader :instrumenter + attr_reader :probe_notification_builder + attr_reader :probe_notifier_worker + attr_reader :installed_probes + attr_reader :pending_probes + + # Probes that failed to instrument for reasons other than the target is + # not yet loaded are added to this collection, so that we do not try + # to instrument them every time remote configuration is processed. + attr_reader :failed_probes + + def add_probe(probe) + # TODO lock here? + + # Probe failed to install previously, do not try to install it again. + if msg = failed_probes[probe.id] + # TODO test this path + raise Error::ProbePreviouslyFailed, msg + end + + begin + instrumenter.hook(probe, &method(:probe_executed_callback)) + + installed_probes[probe.id] = probe + payload = probe_notification_builder.build_installed(probe) + probe_notifier_worker.add_status(payload) + # The probe would only be in the pending probes list if it was + # previously attempted to be installed and the target was not loaded. + # Always remove from pending list here because it makes the + # API smaller and shouldn't cause any actual problems. + pending_probes.delete(probe.id) + true + rescue Error::DITargetNotDefined => exc + pending_probes[probe.id] = probe + false + end + rescue => exc + # In "propagate all exceptions" mode we will try to instrument again. + raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions + + logger.warn("Error processing probe configuration: #{exc.class}: #{exc}") + telemetry&.report(exc, description: "Error processing probe configuration") + # TODO report probe as failed to agent since we won't attempt to + # install it again. + + # TODO add top stack frame to message + failed_probes[probe.id] = "#{exc.class}: #{exc}" + + raise + end + + def remove_other_probes(probe_ids) + pending_probes.values.each do |probe| + unless probe_ids.include?(probe.id) + pending_probes.delete(probe.id) + end + end + installed_probes.values.each do |probe| + unless probe_ids.include?(probe.id) + begin + instrumenter.unhook(probe) + # Only remove the probe from installed list if it was + # successfully de-instrumented. Active probes do incur overhead + # for the running application, and if the error is ephemeral + # we want to try removing the probe again at the next opportunity. + # + # TODO give up after some time? + installed_probes.delete(probe.id) + rescue => exc + raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions + # Silence all exceptions? + # TODO should we propagate here and rescue upstream? + logger.warn("Error removing probe #{probe.id}: #{exc.class}: #{exc}") + telemetry&.report(exc, description: "Error removing probe #{probe.id}") + end + end + end + end + + private def install_pending_method_probes(cls) + # TODO search more efficiently than linearly + pending_probes.each do |probe_id, probe| + if probe.method? + # TODO move this stringification elsewhere + if probe.type_name == cls.name + # TODO is it OK to hook from trace point handler? + # TODO the class is now defined, but can hooking still fail? + hook_method(probe.type_name, probe.method_name, + rate_limiter: probe.rate_limiter, &instance_method(:probe_executed_callback)) + pending_probes.delete(probe.id) + break + end + end + end + end + + def install_pending_line_probes(file) + pending_probes.values.each do |probe| + if probe.line? + if probe.file_matches?(file) + add_probe(probe) + end + end + end + end + + def probe_executed_callback(probe:, **opts) + unless probe.emitting_notified? + payload = probe_notification_builder.build_emitting(probe) + probe_notifier_worker.add_status(payload) + probe.emitting_notified = true + end + + payload = probe_notification_builder.build_executed(probe, **opts) + probe_notifier_worker.add_snapshot(payload) + end + + # Class/module definition trace point (:end type). + # Used to install hooks when the target classes/modules aren't yet + # defined when the hook request is received. + attr_reader :definition_trace_point + end + end +end diff --git a/lib/datadog/di/probe_notification_builder.rb b/lib/datadog/di/probe_notification_builder.rb index 3160b7e8d70..4c5ad19d23c 100644 --- a/lib/datadog/di/probe_notification_builder.rb +++ b/lib/datadog/di/probe_notification_builder.rb @@ -46,11 +46,13 @@ def build_executed(probe, # this should be all frames for enriched probes and no frames for # non-enriched probes? build_snapshot(probe, rv: rv, snapshot: snapshot, + # Actual path of the instrumented file. + path: trace_point&.path, duration: duration, caller_locations: caller_locations, args: args, kwargs: kwargs, serialized_entry_args: serialized_entry_args) end - def build_snapshot(probe, rv: nil, snapshot: nil, + def build_snapshot(probe, rv: nil, snapshot: nil, path: nil, duration: nil, caller_locations: nil, args: nil, kwargs: nil, serialized_entry_args: nil) # TODO also verify that non-capturing probe does not pass @@ -85,18 +87,8 @@ def build_snapshot(probe, rv: nil, snapshot: nil, end location = if probe.line? - actual_file = if probe.file - # Normally caller_locations should always be filled for a line probe - # but in the test suite we don't always provide all arguments. - actual_file_basename = File.basename(probe.file) - caller_locations&.detect do |loc| - # TODO record actual path that probe was installed into, - # perform exact match here against that path. - File.basename(loc.path) == actual_file_basename - end&.path || probe.file - end { - file: actual_file, + file: path, lines: [probe.line_no], } elsif probe.method? diff --git a/lib/datadog/di/transport.rb b/lib/datadog/di/transport.rb index b6cf3ed1ddf..d4529e92b02 100644 --- a/lib/datadog/di/transport.rb +++ b/lib/datadog/di/transport.rb @@ -48,6 +48,12 @@ def send_input(payload) headers: {'content-type' => 'application/json'},) end + # TODO status should use either input or diagnostics endpoints + # depending on agent version. + alias send_status send_diagnostics + + alias send_snapshot send_input + private attr_reader :client diff --git a/spec/datadog/di/integration/instrumentation_integration_test_class.rb b/spec/datadog/di/integration/instrumentation_integration_test_class.rb new file mode 100644 index 00000000000..194761dab4a --- /dev/null +++ b/spec/datadog/di/integration/instrumentation_integration_test_class.rb @@ -0,0 +1,12 @@ +class InstrumentationIntegrationTestClass + def test_method + a = 21 + password = 'password' + redacted = {b: 33, session: 'blah'} + # padding + # padding + # padding + # padding + a * 2 # line 10 + end +end diff --git a/spec/datadog/di/integration/instrumentation_integration_test_class_2.rb b/spec/datadog/di/integration/instrumentation_integration_test_class_2.rb new file mode 100644 index 00000000000..941a6b94670 --- /dev/null +++ b/spec/datadog/di/integration/instrumentation_integration_test_class_2.rb @@ -0,0 +1,12 @@ +class InstrumentationIntegrationTestClass2 + def test_method + a = 21 + password = 'password' + redacted = {b: 33, session: 'blah'} + # padding + # padding + # padding + # padding + a * 2 # line 10 + end +end diff --git a/spec/datadog/di/integration/instrumentation_integration_test_class_3.rb b/spec/datadog/di/integration/instrumentation_integration_test_class_3.rb new file mode 100644 index 00000000000..d227a1e7dc9 --- /dev/null +++ b/spec/datadog/di/integration/instrumentation_integration_test_class_3.rb @@ -0,0 +1,12 @@ +class InstrumentationIntegrationTestClass3 + def test_method + a = 21 + password = 'password' + redacted = {b: 33, session: 'blah'} + # padding + # padding + # padding + # padding + a * 2 # line 10 + end +end diff --git a/spec/datadog/di/integration/instrumentation_integration_test_class_4.rb b/spec/datadog/di/integration/instrumentation_integration_test_class_4.rb new file mode 100644 index 00000000000..558fe1c5014 --- /dev/null +++ b/spec/datadog/di/integration/instrumentation_integration_test_class_4.rb @@ -0,0 +1,12 @@ +class InstrumentationIntegrationTestClass4 + def test_method + a = 21 + password = 'password' + redacted = {b: 33, session: 'blah'} + # padding + # padding + # padding + # padding + a * 2 # line 10 + end +end diff --git a/spec/datadog/di/integration/instrumentation_spec.rb b/spec/datadog/di/integration/instrumentation_spec.rb new file mode 100644 index 00000000000..0c35cafe80a --- /dev/null +++ b/spec/datadog/di/integration/instrumentation_spec.rb @@ -0,0 +1,457 @@ +require 'datadog/di' + +# Note: this file contains integration tests for instrumentation. +# This level of testing requires using ProbeManager in addition to Instrumenter. +# For Instrumenter-only tests, use instrumenter_spec.rb in the parent +# directory. + +class InstrumentationSpecTestClass + def test_method(a = 1) + 42 + end + + def mutating_method(greeting) + greeting.sub!('hello', 'bye') + end +end + +RSpec.describe 'Instrumentation integration' do + di_test + + before(:all) do + Datadog::DI.activate_tracking! + require_relative 'instrumentation_integration_test_class' + end + + after do + component.shutdown! + end + + let(:settings) do + Datadog::Core::Configuration::Settings.new.tap do |settings| + settings.remote.enabled = true + settings.dynamic_instrumentation.enabled = true + settings.dynamic_instrumentation.internal.development = true + settings.dynamic_instrumentation.internal.propagate_all_exceptions = true + end + end + + let(:instrumenter) do + component.instrumenter + end + + let(:probe_manager) do + component.probe_manager + end + + let(:agent_settings) do + double('agent settings') + end + + let(:component) do + Datadog::DI::Component.build!(settings, agent_settings) + end + + let(:expected_installed_payload) do + {ddsource: 'dd_debugger', + debugger: { + diagnostics: { + parentId: nil, + probeId: String, + probeVersion: 0, + runtimeId: String, + status: 'INSTALLED', + }}, + message: String, + service: 'rspec', + timestamp: Integer, + } + end + + let(:expected_emitting_payload) do + {ddsource: 'dd_debugger', + debugger: { + diagnostics: { + parentId: nil, + probeId: String, + probeVersion: 0, + runtimeId: String, + status: 'EMITTING', + }}, + message: String, + service: 'rspec', + timestamp: Integer, + } + end + + context 'log probe' do + before do + allow(agent_settings).to receive(:hostname) + allow(agent_settings).to receive(:port) + allow(agent_settings).to receive(:timeout_seconds).and_return(1) + allow(agent_settings).to receive(:ssl) + + allow(Datadog::DI).to receive(:component).and_return(component) + end + + context 'method probe' do + context 'enriched probe' do + let(:probe) do + Datadog::DI::Probe.new(id: "1234", type: :log, + type_name: 'InstrumentationSpecTestClass', method_name: 'test_method', + capture_snapshot: true,) + end + + let(:expected_captures) do + {entry: {arguments: {}, throwable: nil}, + return: {arguments: {"@return": {type: 'Integer', value: '42'}}, throwable: nil},} + end + + it 'invokes probe' do + expect(component.transport).to receive(:send_request).at_least(:once) + probe_manager.add_probe(probe) + expect(component.probe_notifier_worker).to receive(:add_snapshot).once.and_call_original + expect(InstrumentationSpecTestClass.new.test_method).to eq(42) + component.probe_notifier_worker.flush + end + + def run_test + expect(component.transport).to receive(:send_request).at_least(:once) + probe_manager.add_probe(probe) + payload = nil + expect(component.probe_notifier_worker).to receive(:add_snapshot) do |_payload| + payload = _payload + end + + yield + + component.probe_notifier_worker.flush + + expect(payload).to be_a(Hash) + captures = payload.fetch(:"debugger.snapshot").fetch(:captures) + expect(captures).to eq(expected_captures) + end + + it 'assembles expected notification payload' do + run_test do + expect(InstrumentationSpecTestClass.new.test_method).to eq(42) + end + end + + context 'when argument is mutated by method' do + let(:probe) do + Datadog::DI::Probe.new(id: "1234", type: :log, + type_name: 'InstrumentationSpecTestClass', method_name: 'mutating_method', + capture_snapshot: true,) + end + + let(:expected_captures) do + {entry: {arguments: { + arg1: {type: 'String', value: 'hello world'}, + }, throwable: nil}, + return: {arguments: { + "@return": {type: 'String', value: 'bye world'}, + }, throwable: nil},} + end + + it 'captures original argument value at entry' do + run_test do + expect(InstrumentationSpecTestClass.new.mutating_method('hello world')).to eq('bye world') + end + end + end + end + + context 'when target is invoked' do + let(:probe) do + Datadog::DI::Probe.new(id: "1234", type: :log, + type_name: 'InstrumentationSpecTestClass', method_name: 'test_method') + end + + it 'notifies agent that probe is emitting' do + expect(component.probe_notifier_worker).to receive(:add_status) do |status| + expect(status).to match(expected_installed_payload) + end + probe_manager.add_probe(probe) + expect(component.probe_notifier_worker).to receive(:add_status) do |status| + expect(status).to match(expected_emitting_payload) + end + allow(component.probe_notifier_worker).to receive(:add_snapshot) + expect(InstrumentationSpecTestClass.new.test_method).to eq(42) + component.probe_notifier_worker.flush + end + + context 'when target is invoked multiple times' do + it 'notifies that probe is emitting only once at first invocation' do + expect(component.probe_notifier_worker).to receive(:add_status) do |status| + expect(status).to match(expected_installed_payload) + end + probe_manager.add_probe(probe) + + expect(component.probe_notifier_worker).to receive(:add_status) do |status| + expect(status).to match(expected_emitting_payload) + end + expect(component.probe_notifier_worker).to receive(:add_snapshot) + expect(InstrumentationSpecTestClass.new.test_method).to eq(42) + component.probe_notifier_worker.flush + + expect(component.probe_notifier_worker).not_to receive(:add_status) + expect(component.probe_notifier_worker).to receive(:add_snapshot) + expect(InstrumentationSpecTestClass.new.test_method).to eq(42) + component.probe_notifier_worker.flush + end + end + end + end + + context 'line probe' do + context 'simple log probe' do + let(:probe) do + Datadog::DI::Probe.new(id: "1234", type: :log, + file: 'instrumentation_integration_test_class.rb', line_no: 10, + capture_snapshot: false,) + end + + it 'invokes probe' do + expect(component.transport).to receive(:send_request).at_least(:once) + probe_manager.add_probe(probe) + component.probe_notifier_worker.flush + expect(probe_manager.installed_probes.length).to eq 1 + expect(component.probe_notifier_worker).to receive(:add_snapshot).once.and_call_original + expect(InstrumentationIntegrationTestClass.new.test_method).to eq(42) + end + + describe 'payload' do + let(:payload) do + probe_manager.add_probe(probe) + payload = nil + expect(component.probe_notifier_worker).to receive(:add_snapshot) do |_payload| + payload = _payload + end + expect(InstrumentationIntegrationTestClass.new.test_method).to eq(42) + component.probe_notifier_worker.flush + expect(payload).to be_a(Hash) + payload + end + + let(:snapshot) do + payload.fetch(:"debugger.snapshot") + end + + it 'does not have captures' do + expect(component.transport).to receive(:send_request).at_least(:once) + expect(snapshot.fetch(:captures)).to be nil + end + + let(:stack) do + snapshot.fetch(:stack) + end + + let(:top_stack_frame) do + stack.first + end + + it 'has instrumented location as top stack frame' do + expect(component.transport).to receive(:send_request).at_least(:once) + expect(File.basename(top_stack_frame.fetch(:fileName))).to eq 'instrumentation_integration_test_class.rb' + end + end + end + + context 'enriched probe' do + let(:probe) do + Datadog::DI::Probe.new(id: "1234", type: :log, + file: 'instrumentation_integration_test_class.rb', line_no: 10, + capture_snapshot: true,) + end + + let(:expected_captures) do + {lines: {10 => {locals: { + a: {type: 'Integer', value: '21'}, + password: {type: 'String', notCapturedReason: 'redactedIdent'}, + redacted: {type: 'Hash', entries: [ + [{type: 'Symbol', value: 'b'}, {type: 'Integer', value: '33'}], + [{type: 'Symbol', value: 'session'}, {type: 'String', notCapturedReason: 'redactedIdent'}], + ]}, + }}}} + end + + it 'invokes probe' do + expect(component.transport).to receive(:send_request).at_least(:once) + probe_manager.add_probe(probe) + expect(component.probe_notifier_worker).to receive(:add_snapshot).once.and_call_original + expect(InstrumentationIntegrationTestClass.new.test_method).to eq(42) + component.probe_notifier_worker.flush + end + + it 'assembles expected notification payload' do + expect(component.transport).to receive(:send_request).at_least(:once) + probe_manager.add_probe(probe) + payload = nil + expect(component.probe_notifier_worker).to receive(:add_snapshot) do |_payload| + payload = _payload + end + expect(InstrumentationIntegrationTestClass.new.test_method).to eq(42) + component.probe_notifier_worker.flush + + expect(payload).to be_a(Hash) + captures = payload.fetch(:"debugger.snapshot").fetch(:captures) + expect(captures).to eq(expected_captures) + end + end + + context 'when target file is not loaded initially and is loaded later' do + context 'when code tracking is available' do + before do + Datadog::DI.activate_tracking! + end + + let(:probe) do + Datadog::DI::Probe.new(id: "1234", type: :log, + file: 'instrumentation_integration_test_class_2.rb', line_no: 10,) + end + + it 'instruments file when it is loaded' do + probe_manager.add_probe(probe) + + expect(probe_manager.pending_probes.length).to eq 1 + expect(probe_manager.installed_probes.length).to eq 0 + + expect(component.probe_notification_builder).to receive(:build_installed).and_call_original + expect(component.transport).to receive(:send_request).at_least(:once) + + require_relative 'instrumentation_integration_test_class_2' + + expect(probe_manager.pending_probes.length).to eq 0 + expect(probe_manager.installed_probes.length).to eq 1 + + expect(component.probe_notification_builder).to receive(:build_executed).and_call_original + + InstrumentationIntegrationTestClass2.new.test_method + + component.probe_notifier_worker.flush + end + end + + context 'when code tracking is not available' do + before do + Datadog::DI.deactivate_tracking! + end + + context 'untargeted trace points enabled' do + let(:probe) do + Datadog::DI::Probe.new(id: "1234", type: :log, + file: 'instrumentation_integration_test_class_3.rb', line_no: 10,) + end + + before do + settings.dynamic_instrumentation.internal.untargeted_trace_points = true + end + + after do + settings.dynamic_instrumentation.internal.untargeted_trace_points = false + end + + it 'instruments file immediately' do + expect(component.transport).to receive(:send_request).at_least(:once) + + probe_manager.add_probe(probe) + + expect(probe_manager.pending_probes.length).to eq 0 + expect(probe_manager.installed_probes.length).to eq 1 + + # This require does not change instrumentation + require_relative 'instrumentation_integration_test_class_3' + + expect(probe_manager.pending_probes.length).to eq 0 + expect(probe_manager.installed_probes.length).to eq 1 + + expect(component.probe_notification_builder).to receive(:build_executed).and_call_original + + InstrumentationIntegrationTestClass3.new.test_method + + component.probe_notifier_worker.flush + end + end + + context 'untargeted trace points disabled' do + let(:probe) do + Datadog::DI::Probe.new(id: "1234", type: :log, + file: 'instrumentation_integration_test_class_4.rb', line_no: 10,) + end + + before do + settings.dynamic_instrumentation.internal.untargeted_trace_points = false + end + + it 'does not instrument file when it is loaded' do + probe_manager.add_probe(probe) + + expect(probe_manager.pending_probes.length).to eq 1 + expect(probe_manager.installed_probes.length).to eq 0 + + require_relative 'instrumentation_integration_test_class_4' + + expect(probe_manager.pending_probes.length).to eq 1 + expect(probe_manager.installed_probes.length).to eq 0 + + expect(component.probe_notification_builder).not_to receive(:build_executed).and_call_original + + InstrumentationIntegrationTestClass4.new.test_method + + component.probe_notifier_worker.flush + end + end + end + end + + context 'when target is invoked' do + before do + Datadog::DI.activate_tracking! + load File.join(File.dirname(__FILE__), 'instrumentation_integration_test_class.rb') + end + + let(:probe) do + Datadog::DI::Probe.new(id: "1234", type: :log, + file: 'instrumentation_integration_test_class.rb', line_no: 10, + capture_snapshot: false,) + end + + it 'notifies agent that probe is emitting' do + expect(component.probe_notifier_worker).to receive(:add_status) do |status| + expect(status).to match(expected_installed_payload) + end + expect(probe_manager.add_probe(probe)).to be true + + expect(component.probe_notifier_worker).to receive(:add_status) do |status| + expect(status).to match(expected_emitting_payload) + end + allow(component.probe_notifier_worker).to receive(:add_snapshot) + expect(InstrumentationIntegrationTestClass.new.test_method).to eq(42) + component.probe_notifier_worker.flush + end + + context 'when target is invoked multiple times' do + it 'notifies that probe is emitting only once at first invocation' do + expect(component.probe_notifier_worker).to receive(:add_status) do |status| + expect(status).to match(expected_installed_payload) + end + expect(probe_manager.add_probe(probe)).to be true + + expect(component.probe_notifier_worker).to receive(:add_status) do |status| + expect(status).to match(expected_emitting_payload) + end + expect(component.probe_notifier_worker).to receive(:add_snapshot) + expect(InstrumentationIntegrationTestClass.new.test_method).to eq(42) + component.probe_notifier_worker.flush + + expect(component.probe_notifier_worker).not_to receive(:add_status) + expect(component.probe_notifier_worker).to receive(:add_snapshot) + expect(InstrumentationIntegrationTestClass.new.test_method).to eq(42) + component.probe_notifier_worker.flush + end + end + end + end + end +end diff --git a/spec/datadog/di/integration/probe_notification_builder_spec.rb b/spec/datadog/di/integration/probe_notification_builder_spec.rb index 4a052b95564..373a23d661f 100644 --- a/spec/datadog/di/integration/probe_notification_builder_spec.rb +++ b/spec/datadog/di/integration/probe_notification_builder_spec.rb @@ -3,6 +3,8 @@ require 'datadog/di/probe_notification_builder' RSpec.describe Datadog::DI::ProbeNotificationBuilder do + di_test + describe 'log probe' do let(:redactor) { Datadog::DI::Redactor.new(settings) } let(:serializer) { Datadog::DI::Serializer.new(settings, redactor) } diff --git a/spec/datadog/di/probe_manager_spec.rb b/spec/datadog/di/probe_manager_spec.rb new file mode 100644 index 00000000000..6971c1ea839 --- /dev/null +++ b/spec/datadog/di/probe_manager_spec.rb @@ -0,0 +1,311 @@ +require "datadog/di/spec_helper" +require 'datadog/di/probe_manager' + +class ProbeManagerSpecTestClass; end + +RSpec.describe Datadog::DI::ProbeManager do + di_test + + mock_settings_for_di do |settings| + allow(settings.dynamic_instrumentation).to receive(:enabled).and_return(true) + allow(settings.dynamic_instrumentation.internal).to receive(:propagate_all_exceptions).and_return(false) + end + + let(:instrumenter) do + instance_double(Datadog::DI::Instrumenter) + end + + let(:probe_notification_builder) do + instance_double(Datadog::DI::ProbeNotificationBuilder) + end + + let(:probe_notifier_worker) do + instance_double(Datadog::DI::ProbeNotifierWorker) + end + + let(:logger) do + instance_double(Logger) + end + + let(:manager) do + described_class.new(settings, instrumenter, probe_notification_builder, probe_notifier_worker, logger) + end + + describe '.new' do + after do + manager.close + end + + it 'creates an instance' do + expect(manager).to be_a(described_class) + end + end + + describe '#add_probe' do + after do + allow(instrumenter).to receive(:unhook) + manager.close + end + + context 'log probe' do + let(:probe) do + Datadog::DI::Probe.new( + id: '3ecfd456-2d7c-4359-a51f-d4cc44141ffe', type: :log, file: 'xx', line_no: 123, + ) + end + + context 'when probe is installed successfully' do + it 'returns true and adds probe to the installed probe list' do + expect(instrumenter).to receive(:hook) do |probe_| + expect(probe_).to be(probe) + end + + expect(probe_notification_builder).to receive(:build_installed) + expect(probe_notifier_worker).to receive(:add_status) + + expect(manager.add_probe(probe)).to be true + + expect(manager.pending_probes.length).to eq 0 + expect(manager.failed_probes.length).to eq 0 + + expect(manager.installed_probes.length).to eq 1 + expect(manager.installed_probes["3ecfd456-2d7c-4359-a51f-d4cc44141ffe"]).to be(probe) + end + end + + context 'when instrumentation target is missing' do + it 'returns false and adds probe to the pending probe list' do + expect(instrumenter).to receive(:hook) do |probe_| + expect(probe_).to be(probe) + raise Datadog::DI::Error::DITargetNotDefined + end + + expect(probe_notification_builder).not_to receive(:build_installed) + expect(probe_notifier_worker).not_to receive(:add_status) + + expect(manager.add_probe(probe)).to be false + + expect(manager.pending_probes.length).to eq 1 + expect(manager.pending_probes["3ecfd456-2d7c-4359-a51f-d4cc44141ffe"]).to be(probe) + + expect(manager.installed_probes.length).to eq 0 + expect(manager.failed_probes.length).to eq 0 + end + end + + context 'when there is an exception during instrumentation' do + it 'logs warning, drops probe and reraises the exception' do + expect(logger).to receive(:warn) do |msg| + expect(msg).to match(/Error processing probe configuration.*Instrumentation error/) + end + + expect(instrumenter).to receive(:hook) do |probe_| + expect(probe_).to be(probe) + raise "Instrumentation error" + end + + expect(probe_notification_builder).not_to receive(:build_installed) + expect(probe_notifier_worker).not_to receive(:add_status) + + expect do + manager.add_probe(probe) + end.to raise_error(RuntimeError, 'Instrumentation error') + + expect(manager.pending_probes.length).to eq 0 + + expect(manager.installed_probes.length).to eq 0 + + expect(manager.failed_probes.length).to eq 1 + expect(manager.failed_probes[probe.id]).to match(/Instrumentation error/) + end + end + end + end + + describe '#remove_other_probes' do + let(:probe) do + Datadog::DI::Probe.new( + id: '3ecfd456-2d7c-4359-a51f-d4cc44141ffe', type: :log, file: 'xx', line_no: 123, + ) + end + + context 'when there are pending probes' do + before do + manager.pending_probes[probe.id] = probe + end + + context 'when pending probe id is in probe ids' do + it 'does not remove pending probe' do + manager.remove_other_probes([probe.id]) + + expect(manager.pending_probes).to eq(probe.id => probe) + end + end + + context 'when pending probe id is not in probe ids' do + it 'removes pending probe' do + manager.remove_other_probes(['123']) + + expect(manager.pending_probes).to eq({}) + end + end + end + + context 'when there are installed probes' do + before do + manager.installed_probes[probe.id] = probe + end + + context 'when installed probe id is in probe ids' do + it 'does not remove installed probe' do + manager.remove_other_probes([probe.id]) + + expect(manager.installed_probes).to eq(probe.id => probe) + end + end + + context 'when installed probe id is not in probe ids' do + it 'removes installed probe' do + expect(instrumenter).to receive(:unhook).with(probe) + + manager.remove_other_probes(['123']) + + expect(manager.installed_probes).to eq({}) + end + end + + context 'when there is an exception during de-instrumentation' do + it 'logs warning and keeps probe in installed list' do + expect(instrumenter).to receive(:unhook).with(probe).and_raise("Deinstrumentation error") + + expect(logger).to receive(:warn) do |msg| + expect(msg).to match(/Error removing probe.*Deinstrumentation error/) + end + + manager.remove_other_probes(['123']) + + expect(manager.pending_probes.length).to eq 0 + + expect(manager.installed_probes.length).to eq 1 + end + + context 'when there are two probes to be unhooked' do + let(:probe2) do + Datadog::DI::Probe.new( + id: '3ecfd456-2d7c-ffff-a51f-d4cc44141ffe', type: :log, file: 'xx', line_no: 123, + ) + end + + before do + manager.installed_probes[probe2.id] = probe2 + expect(manager.installed_probes.length).to eq 2 + end + + it 'logs warning and unhooks the second probe' do + expect(instrumenter).to receive(:unhook).with(probe).and_raise("Deinstrumentation error") + expect(instrumenter).to receive(:unhook).with(probe2) + + expect(logger).to receive(:warn) do |msg| + expect(msg).to match(/Error removing probe.*Deinstrumentation error/) + end + + manager.remove_other_probes(['123']) + + expect(manager.pending_probes.length).to eq 0 + + expect(manager.installed_probes.length).to eq 1 + end + end + end + end + end + + describe '#close' do + let(:trace_point) do + instance_double(TracePoint) + end + + it 'disables the trace point' do + expect(TracePoint).to receive(:trace).with(:end).and_return(trace_point) + + manager + + expect(trace_point).to receive(:disable) + manager.close + end + + it 'clears hooks' do + expect(manager).to receive(:clear_hooks) + manager.close + end + end + + describe '#clear_hooks' do + context 'pending probes' do + let(:probe) do + Datadog::DI::Probe.new(id: 1, type: :log, + type_name: 'foo', method_name: 'bar') + end + + before do + manager.pending_probes[probe.id] = probe + end + + it 'does not unhook' do + expect(instrumenter).not_to receive(:unhook) + + manager.clear_hooks + end + + it 'clears pending probes list' do + expect(instrumenter).not_to receive(:unhook) + + manager.clear_hooks + + expect(manager.pending_probes).to be_empty + end + end + + context 'installed probes' do + let(:probe) do + Datadog::DI::Probe.new(id: 1, type: :log, + type_name: 'ProbeManagerSpecTestClass', method_name: 'bar') + end + + before do + manager.installed_probes[probe.id] = probe + end + + it 'unhooks' do + expect(instrumenter).to receive(:unhook).with(probe) + + manager.clear_hooks + end + + it 'clears installed probes list' do + expect(instrumenter).to receive(:unhook).with(probe) + + manager.clear_hooks + + expect(manager.installed_probes).to be_empty + end + end + end + + describe '#install_pending_method_probes' do + context 'when a class with the same name as target type is defined' do + let(:probe) do + Datadog::DI::Probe.new(id: 1, type: :log, type_name: 'ProbeManagerSpecTestClass', method_name: 'bar') + end + + it 'is invoked' do + expect(manager).to receive(:install_pending_method_probes) do |cls| + expect(cls).to be_a(Class) + expect(cls.name).to eq 'ProbeManagerSpecTestClass' + end + + class ProbeManagerSpecTestClass; end + end + end + end +end diff --git a/spec/datadog/di/probe_notification_builder_spec.rb b/spec/datadog/di/probe_notification_builder_spec.rb index c83319e16bc..7cbd5e0f603 100644 --- a/spec/datadog/di/probe_notification_builder_spec.rb +++ b/spec/datadog/di/probe_notification_builder_spec.rb @@ -93,6 +93,7 @@ instance_double(TracePoint).tap do |tp| # Returns an empty binding expect(tp).to receive(:binding).and_return(binding) + expect(tp).to receive(:path).and_return('/foo.rb') end end From f7569237529acd266143d5694d889b3c7115dc3c Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 8 Nov 2024 11:34:05 -0500 Subject: [PATCH 02/14] di component --- lib/datadog/di.rb | 47 ++++++++++++- lib/datadog/di/component.rb | 108 ++++++++++++++++++++++++++++++ spec/datadog/di/component_spec.rb | 62 +++++++++++++++++ 3 files changed, 216 insertions(+), 1 deletion(-) create mode 100644 lib/datadog/di/component.rb create mode 100644 spec/datadog/di/component_spec.rb diff --git a/lib/datadog/di.rb b/lib/datadog/di.rb index 082808f1786..18faee29a91 100644 --- a/lib/datadog/di.rb +++ b/lib/datadog/di.rb @@ -1,21 +1,44 @@ # frozen_string_literal: true require_relative 'di/error' -require_relative 'di/configuration' require_relative 'di/code_tracker' +require_relative 'di/component' +require_relative 'di/configuration' require_relative 'di/extensions' require_relative 'di/instrumenter' require_relative 'di/probe' +require_relative 'di/probe_builder' +require_relative 'di/probe_manager' +require_relative 'di/probe_notification_builder' +require_relative 'di/probe_notifier_worker' require_relative 'di/redactor' require_relative 'di/serializer' require_relative 'di/transport' require_relative 'di/utils' +if defined?(ActiveRecord::Base) + # The third-party library integrations need to be loaded after the + # third-party libraries are loaded. Tracing and appsec use Railtie + # to delay integrations until all of the application's dependencies + # are loaded, when running under Rails. We should do the same here in + # principle, however DI currently only has an ActiveRecord integration + # and AR should be loaded before any application code is loaded, being + # part of Rails, therefore for now we should be OK to just require the + # AR integration from here. + require_relative 'di/contrib/active_record' +end + module Datadog # Namespace for Datadog dynamic instrumentation. # # @api private module DI + class << self + def enabled? + Datadog.configuration.dynamic_instrumentation.enabled + end + end + # Expose DI to global shared objects Extensions.activate! @@ -52,6 +75,28 @@ def deactivate_tracking! def code_tracking_active? code_tracker&.active? || false end + + def component + Datadog.send(:components).dynamic_instrumentation + end + end + end +end + +# :script_compiled trace point was added in Ruby 2.6. +if RUBY_VERSION >= '2.6' + begin + # Activate code tracking by default because line trace points will not work + # without it. + Datadog::DI.activate_tracking! + rescue => exc + if defined?(Datadog.logger) + Datadog.logger.warn("Failed to activate code tracking for DI: #{exc.class}: #{exc}") + else + # We do not have Datadog logger potentially because DI code tracker is + # being loaded early in application boot process and the rest of datadog + # wasn't loaded yet. Output to standard error. + warn("Failed to activate code tracking for DI: #{exc.class}: #{exc}") end end end diff --git a/lib/datadog/di/component.rb b/lib/datadog/di/component.rb new file mode 100644 index 00000000000..eb96e0ff499 --- /dev/null +++ b/lib/datadog/di/component.rb @@ -0,0 +1,108 @@ +# frozen_string_literal: true + +module Datadog + module DI + # Component for dynamic instrumentation. + # + # Only one instance of the Component should ever be active; + # if configuration is changed, the old distance should be shut down + # prior to the new instance being created. + # + # The Component instance stores all state related to DI, for example + # which probes have been retrieved via remote config, + # intalled tracepoints and so on. Component will clean up all + # resources and installed tracepoints upon shutdown. + class Component + class << self + def build(settings, agent_settings, telemetry: nil) + return unless settings.respond_to?(:dynamic_instrumentation) && settings.dynamic_instrumentation.enabled + + unless settings.respond_to?(:remote) && settings.remote.enabled + Datadog.logger.debug("Dynamic Instrumentation could not be enabled because Remote Configuration Management is not available. To enable Remote Configuration, see https://docs.datadoghq.com/agent/remote_config") + return + end + + return unless environment_supported?(settings) + + new(settings, agent_settings, Datadog.logger, code_tracker: DI.code_tracker, telemetry: telemetry) + end + + def build!(settings, agent_settings, telemetry: nil) + unless settings.respond_to?(:dynamic_instrumentation) && settings.dynamic_instrumentation.enabled + raise "Requested DI component but DI is not enabled in settings" + end + + unless settings.respond_to?(:remote) && settings.remote.enabled + raise "Requested DI component but remote config is not enabled in settings" + end + + unless environment_supported?(settings) + raise "DI does not support the environment (development or Ruby version too low or not MRI)" + end + + new(settings, agent_settings, Datadog.logger, code_tracker: DI.code_tracker, telemetry: telemetry) + end + + # Checks whether the runtime environment is supported by + # dynamic instrumentation. Currently we only require that, if Rails + # is used, that Rails environment is not development because + # DI does not currently support code unloading and reloading. + def environment_supported?(settings) + # TODO add tests? + unless settings.dynamic_instrumentation.internal.development + if Datadog::Core::Environment::Execution.development? + Datadog.logger.debug("Not enabling dynamic instrumentation because we are in development environment") + return false + end + end + if RUBY_ENGINE != 'ruby' || RUBY_VERSION < '2.6' + Datadog.logger.debug("Not enabling dynamic instrumentation because of unsupported Ruby version") + return false + end + true + end + end + + def initialize(settings, agent_settings, logger, code_tracker: nil, telemetry: nil) + @settings = settings + @agent_settings = agent_settings + @logger = logger + @telemetry = telemetry + @redactor = Redactor.new(settings) + @serializer = Serializer.new(settings, redactor, telemetry: telemetry) + @instrumenter = Instrumenter.new(settings, serializer, logger, code_tracker: code_tracker, telemetry: telemetry) + @transport = Transport.new(agent_settings) + @probe_notifier_worker = ProbeNotifierWorker.new(settings, transport, logger, telemetry: telemetry) + @probe_notification_builder = ProbeNotificationBuilder.new(settings, serializer) + @probe_manager = ProbeManager.new(settings, instrumenter, probe_notification_builder, probe_notifier_worker, logger, telemetry: telemetry) + probe_notifier_worker.start + end + + attr_reader :settings + attr_reader :agent_settings + attr_reader :logger + attr_reader :telemetry + attr_reader :instrumenter + attr_reader :transport + attr_reader :probe_notifier_worker + attr_reader :probe_notification_builder + attr_reader :probe_manager + attr_reader :redactor + attr_reader :serializer + + # Shuts down dynamic instrumentation. + # + # Removes all code hooks and stops background threads. + # + # Does not clear out the code tracker, because it's only populated + # by code when code is compiled and therefore, if the code tracker + # was replaced by a new instance, the new instance of it wouldn't have + # any of the already loaded code tracked. + def shutdown!(replacement = nil) + probe_manager.clear_hooks + probe_manager.close + probe_notifier_worker.stop + end + end + end +end diff --git a/spec/datadog/di/component_spec.rb b/spec/datadog/di/component_spec.rb new file mode 100644 index 00000000000..7e77b2c5fc2 --- /dev/null +++ b/spec/datadog/di/component_spec.rb @@ -0,0 +1,62 @@ +require "datadog/di/spec_helper" +require 'datadog/di/component' + +RSpec.describe Datadog::DI::Component do + di_test + + describe '.build' do + let(:settings) do + settings = Datadog::Core::Configuration::Settings.new + settings.dynamic_instrumentation.enabled = dynamic_instrumentation_enabled + settings.dynamic_instrumentation.internal.development = true + settings + end + + let(:agent_settings) do + double('agent settings') + end + + context 'when dynamic instrumentation is enabled' do + let(:dynamic_instrumentation_enabled) { true } + + before do + allow(agent_settings).to receive(:hostname) + allow(agent_settings).to receive(:port) + allow(agent_settings).to receive(:timeout_seconds).and_return(1) + allow(agent_settings).to receive(:ssl) + end + + context 'when remote config is enabled' do + before do + settings.remote.enabled = true + end + + it 'returns a Datadog::DI::Component instance' do + component = described_class.build(settings, agent_settings) + expect(component).to be_a(described_class) + component.shutdown! + end + end + + context 'when remote config is disabled' do + before do + settings.remote.enabled = false + end + + it 'returns nil' do + component = described_class.build(settings, agent_settings) + expect(component).to be nil + end + end + end + + context 'when dynamic instrumentation is disabled' do + let(:dynamic_instrumentation_enabled) { false } + + it 'returns nil' do + component = described_class.build(settings, agent_settings) + expect(component).to be nil + end + end + end +end From ba4fcd5150ef51f4b094dd778a0ceebb4e95dde4 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 10 Nov 2024 00:26:51 -0500 Subject: [PATCH 03/14] do not enable code tracker yet --- lib/datadog/di.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/datadog/di.rb b/lib/datadog/di.rb index 18faee29a91..f9335ca2558 100644 --- a/lib/datadog/di.rb +++ b/lib/datadog/di.rb @@ -83,6 +83,7 @@ def component end end +=begin not yet enabled # :script_compiled trace point was added in Ruby 2.6. if RUBY_VERSION >= '2.6' begin @@ -100,3 +101,4 @@ def component end end end +=end From 5daaa6e39a1e9c8e7b94551aac0eb71bf0e05855 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 10 Nov 2024 00:39:02 -0500 Subject: [PATCH 04/14] benchmarks require more DI code to be merged --- spec/datadog/di/validate_benchmarks_spec.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spec/datadog/di/validate_benchmarks_spec.rb b/spec/datadog/di/validate_benchmarks_spec.rb index e84472b4fc9..8785eced16b 100644 --- a/spec/datadog/di/validate_benchmarks_spec.rb +++ b/spec/datadog/di/validate_benchmarks_spec.rb @@ -1,5 +1,6 @@ require "datadog/di/spec_helper" +=begin benchmarks require DI code to be merged RSpec.describe "Dynamic instrumentation benchmarks", :memcheck_valgrind_skip do di_test @@ -26,3 +27,4 @@ expect(benchmarks_to_validate).to contain_exactly(*all_benchmarks) end end +=end From d92677812e884c186b940400025661dabc6230bd Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Fri, 8 Nov 2024 11:40:30 -0500 Subject: [PATCH 05/14] repair tests --- lib/datadog/di.rb | 3 +- lib/datadog/di/code_tracker.rb | 3 ++ .../di/integration/instrumentation_spec.rb | 28 +++++++++++-------- .../probe_notification_builder_spec.rb | 1 + spec/datadog/di/spec_helper.rb | 14 ++++++++++ 5 files changed, 36 insertions(+), 13 deletions(-) diff --git a/lib/datadog/di.rb b/lib/datadog/di.rb index f9335ca2558..2ce3e1d8754 100644 --- a/lib/datadog/di.rb +++ b/lib/datadog/di.rb @@ -77,7 +77,8 @@ def code_tracking_active? end def component - Datadog.send(:components).dynamic_instrumentation + # TODO uncomment when remote is merged + #Datadog.send(:components).dynamic_instrumentation end end end diff --git a/lib/datadog/di/code_tracker.rb b/lib/datadog/di/code_tracker.rb index dbf3d394c45..8a64ac8a29a 100644 --- a/lib/datadog/di/code_tracker.rb +++ b/lib/datadog/di/code_tracker.rb @@ -79,6 +79,9 @@ def start registry[path] = tp.instruction_sequence end end + + DI.component&.probe_manager&.install_pending_line_probes(path) + # Since this method normally is called from customer applications, # rescue any exceptions that might not be handled to not break said # customer applications. diff --git a/spec/datadog/di/integration/instrumentation_spec.rb b/spec/datadog/di/integration/instrumentation_spec.rb index 0c35cafe80a..40d7260adf2 100644 --- a/spec/datadog/di/integration/instrumentation_spec.rb +++ b/spec/datadog/di/integration/instrumentation_spec.rb @@ -1,3 +1,4 @@ +require "datadog/di/spec_helper" require 'datadog/di' # Note: this file contains integration tests for instrumentation. @@ -18,11 +19,6 @@ def mutating_method(greeting) RSpec.describe 'Instrumentation integration' do di_test - before(:all) do - Datadog::DI.activate_tracking! - require_relative 'instrumentation_integration_test_class' - end - after do component.shutdown! end @@ -205,6 +201,8 @@ def run_test end context 'line probe' do + with_code_tracking + context 'simple log probe' do let(:probe) do Datadog::DI::Probe.new(id: "1234", type: :log, @@ -212,6 +210,11 @@ def run_test capture_snapshot: false,) end + before do + Object.send(:remove_const, :InstrumentationIntegrationTestClass) rescue nil + load File.join(File.dirname(__FILE__), 'instrumentation_integration_test_class.rb') + end + it 'invokes probe' do expect(component.transport).to receive(:send_request).at_least(:once) probe_manager.add_probe(probe) @@ -276,6 +279,11 @@ def run_test }}}} end + before do + Object.send(:remove_const, :InstrumentationIntegrationTestClass) rescue nil + load File.join(File.dirname(__FILE__), 'instrumentation_integration_test_class.rb') + end + it 'invokes probe' do expect(component.transport).to receive(:send_request).at_least(:once) probe_manager.add_probe(probe) @@ -302,9 +310,7 @@ def run_test context 'when target file is not loaded initially and is loaded later' do context 'when code tracking is available' do - before do - Datadog::DI.activate_tracking! - end + with_code_tracking let(:probe) do Datadog::DI::Probe.new(id: "1234", type: :log, @@ -334,9 +340,7 @@ def run_test end context 'when code tracking is not available' do - before do - Datadog::DI.deactivate_tracking! - end + without_code_tracking context 'untargeted trace points enabled' do let(:probe) do @@ -407,7 +411,7 @@ def run_test context 'when target is invoked' do before do - Datadog::DI.activate_tracking! + Object.send(:remove_const, :InstrumentationIntegrationTestClass) rescue nil load File.join(File.dirname(__FILE__), 'instrumentation_integration_test_class.rb') end diff --git a/spec/datadog/di/integration/probe_notification_builder_spec.rb b/spec/datadog/di/integration/probe_notification_builder_spec.rb index 373a23d661f..48a408c3fac 100644 --- a/spec/datadog/di/integration/probe_notification_builder_spec.rb +++ b/spec/datadog/di/integration/probe_notification_builder_spec.rb @@ -1,3 +1,4 @@ +require "datadog/di/spec_helper" require 'datadog/di/serializer' require 'datadog/di/probe' require 'datadog/di/probe_notification_builder' diff --git a/spec/datadog/di/spec_helper.rb b/spec/datadog/di/spec_helper.rb index 3ce872c2943..9b088c67622 100644 --- a/spec/datadog/di/spec_helper.rb +++ b/spec/datadog/di/spec_helper.rb @@ -33,6 +33,20 @@ def mock_settings_for_di(&block) double('di internal settings') end end + + def with_code_tracking + around do |example| + Datadog::DI.activate_tracking! + example.run + Datadog::DI.deactivate_tracking! + end + end + + def without_code_tracking + before do + Datadog::DI.deactivate_tracking! + end + end end module InstanceMethods From 0d9b137684980248058900294d4226c7e7be58b6 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 10 Nov 2024 01:18:12 -0500 Subject: [PATCH 06/14] standard --- .standard_todo.yml | 1 + lib/datadog/di/probe_manager.rb | 20 +++--- lib/datadog/di/transport.rb | 4 +- .../di/integration/instrumentation_spec.rb | 64 ++++++++++--------- spec/datadog/di/probe_manager_spec.rb | 2 +- spec/datadog/di/validate_benchmarks_spec.rb | 4 ++ 6 files changed, 53 insertions(+), 42 deletions(-) diff --git a/.standard_todo.yml b/.standard_todo.yml index 088c8d995b6..b8e966743ad 100644 --- a/.standard_todo.yml +++ b/.standard_todo.yml @@ -38,6 +38,7 @@ ignore: - spec/datadog/appsec/**/** - spec/datadog/benchmark/**/** - spec/datadog/core/**/** +- spec/datadog/di/integration/*_test_class*.rb - spec/datadog/kit/**/** - spec/datadog/tracing/**/** - spec/support/**/** diff --git a/lib/datadog/di/probe_manager.rb b/lib/datadog/di/probe_manager.rb index 38b25e1d3d8..3b1d4160047 100644 --- a/lib/datadog/di/probe_manager.rb +++ b/lib/datadog/di/probe_manager.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +# rubocop:disable Lint/AssignmentInCondition + module Datadog module DI # Stores probes received from remote config (that we can parse, in other @@ -24,14 +26,12 @@ def initialize(settings, instrumenter, probe_notification_builder, @failed_probes = {} @definition_trace_point = TracePoint.trace(:end) do |tp| - begin - install_pending_method_probes(tp.self) - rescue => exc - raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions - logger.warn("Unhandled exception in definition trace point: #{exc.class}: #{exc}") - telemetry&.report(exc, description: "Unhandled exception in definition trace point") - # TODO test this path - end + install_pending_method_probes(tp.self) + rescue => exc + raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions + logger.warn("Unhandled exception in definition trace point: #{exc.class}: #{exc}") + telemetry&.report(exc, description: "Unhandled exception in definition trace point") + # TODO test this path end end @@ -86,7 +86,7 @@ def add_probe(probe) # API smaller and shouldn't cause any actual problems. pending_probes.delete(probe.id) true - rescue Error::DITargetNotDefined => exc + rescue Error::DITargetNotDefined pending_probes[probe.id] = probe false end @@ -178,3 +178,5 @@ def probe_executed_callback(probe:, **opts) end end end + +# rubocop:enable Lint/AssignmentInCondition diff --git a/lib/datadog/di/transport.rb b/lib/datadog/di/transport.rb index d4529e92b02..2907caa1fdb 100644 --- a/lib/datadog/di/transport.rb +++ b/lib/datadog/di/transport.rb @@ -50,9 +50,9 @@ def send_input(payload) # TODO status should use either input or diagnostics endpoints # depending on agent version. - alias send_status send_diagnostics + alias_method :send_status, :send_diagnostics - alias send_snapshot send_input + alias_method :send_snapshot, :send_input private diff --git a/spec/datadog/di/integration/instrumentation_spec.rb b/spec/datadog/di/integration/instrumentation_spec.rb index 40d7260adf2..0b299d20d54 100644 --- a/spec/datadog/di/integration/instrumentation_spec.rb +++ b/spec/datadog/di/integration/instrumentation_spec.rb @@ -6,6 +6,8 @@ # For Instrumenter-only tests, use instrumenter_spec.rb in the parent # directory. +# rubocop:disable Style/RescueModifier + class InstrumentationSpecTestClass def test_method(a = 1) 42 @@ -50,34 +52,34 @@ def mutating_method(greeting) let(:expected_installed_payload) do {ddsource: 'dd_debugger', - debugger: { - diagnostics: { - parentId: nil, - probeId: String, - probeVersion: 0, - runtimeId: String, - status: 'INSTALLED', - }}, - message: String, - service: 'rspec', - timestamp: Integer, - } + debugger: { + diagnostics: { + parentId: nil, + probeId: String, + probeVersion: 0, + runtimeId: String, + status: 'INSTALLED', + } + }, + message: String, + service: 'rspec', + timestamp: Integer,} end let(:expected_emitting_payload) do {ddsource: 'dd_debugger', - debugger: { - diagnostics: { - parentId: nil, - probeId: String, - probeVersion: 0, - runtimeId: String, - status: 'EMITTING', - }}, - message: String, - service: 'rspec', - timestamp: Integer, - } + debugger: { + diagnostics: { + parentId: nil, + probeId: String, + probeVersion: 0, + runtimeId: String, + status: 'EMITTING', + } + }, + message: String, + service: 'rspec', + timestamp: Integer,} end context 'log probe' do @@ -115,8 +117,8 @@ def run_test expect(component.transport).to receive(:send_request).at_least(:once) probe_manager.add_probe(probe) payload = nil - expect(component.probe_notifier_worker).to receive(:add_snapshot) do |_payload| - payload = _payload + expect(component.probe_notifier_worker).to receive(:add_snapshot) do |payload_| + payload = payload_ end yield @@ -228,8 +230,8 @@ def run_test let(:payload) do probe_manager.add_probe(probe) payload = nil - expect(component.probe_notifier_worker).to receive(:add_snapshot) do |_payload| - payload = _payload + expect(component.probe_notifier_worker).to receive(:add_snapshot) do |payload_| + payload = payload_ end expect(InstrumentationIntegrationTestClass.new.test_method).to eq(42) component.probe_notifier_worker.flush @@ -296,8 +298,8 @@ def run_test expect(component.transport).to receive(:send_request).at_least(:once) probe_manager.add_probe(probe) payload = nil - expect(component.probe_notifier_worker).to receive(:add_snapshot) do |_payload| - payload = _payload + expect(component.probe_notifier_worker).to receive(:add_snapshot) do |payload_| + payload = payload_ end expect(InstrumentationIntegrationTestClass.new.test_method).to eq(42) component.probe_notifier_worker.flush @@ -459,3 +461,5 @@ def run_test end end end + +# rubocop:enable Style/RescueModifier diff --git a/spec/datadog/di/probe_manager_spec.rb b/spec/datadog/di/probe_manager_spec.rb index 6971c1ea839..f32cb71f577 100644 --- a/spec/datadog/di/probe_manager_spec.rb +++ b/spec/datadog/di/probe_manager_spec.rb @@ -304,7 +304,7 @@ class ProbeManagerSpecTestClass; end expect(cls.name).to eq 'ProbeManagerSpecTestClass' end - class ProbeManagerSpecTestClass; end + class ProbeManagerSpecTestClass; end # rubocop:disable Lint/ConstantDefinitionInBlock end end end diff --git a/spec/datadog/di/validate_benchmarks_spec.rb b/spec/datadog/di/validate_benchmarks_spec.rb index 8785eced16b..be590bf4ed9 100644 --- a/spec/datadog/di/validate_benchmarks_spec.rb +++ b/spec/datadog/di/validate_benchmarks_spec.rb @@ -1,5 +1,7 @@ require "datadog/di/spec_helper" +# rubocop:disable Style/BlockComments + =begin benchmarks require DI code to be merged RSpec.describe "Dynamic instrumentation benchmarks", :memcheck_valgrind_skip do di_test @@ -28,3 +30,5 @@ end end =end + +# rubocop:enable Style/BlockComments From 8a10e873f4e7bc53b5b0c30be86a6fd789a09bb5 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 10 Nov 2024 01:27:16 -0500 Subject: [PATCH 07/14] types --- lib/datadog/di/probe_manager.rb | 2 +- sig/datadog/di.rbs | 2 + sig/datadog/di/component.rbs | 57 +++++++++++++++++ sig/datadog/di/probe_manager.rbs | 64 +++++++++++++++++++ sig/datadog/di/probe_notification_builder.rbs | 2 +- 5 files changed, 125 insertions(+), 2 deletions(-) create mode 100644 sig/datadog/di/component.rbs create mode 100644 sig/datadog/di/probe_manager.rbs diff --git a/lib/datadog/di/probe_manager.rb b/lib/datadog/di/probe_manager.rb index 3b1d4160047..ac1457cb7b2 100644 --- a/lib/datadog/di/probe_manager.rb +++ b/lib/datadog/di/probe_manager.rb @@ -142,7 +142,7 @@ def remove_other_probes(probe_ids) # TODO is it OK to hook from trace point handler? # TODO the class is now defined, but can hooking still fail? hook_method(probe.type_name, probe.method_name, - rate_limiter: probe.rate_limiter, &instance_method(:probe_executed_callback)) + rate_limiter: probe.rate_limiter, &instance_method(:probe_executed_callback)) # steep:ignore pending_probes.delete(probe.id) break end diff --git a/sig/datadog/di.rbs b/sig/datadog/di.rbs index 716515cd3d5..eb6d76134d5 100644 --- a/sig/datadog/di.rbs +++ b/sig/datadog/di.rbs @@ -1,5 +1,7 @@ module Datadog module DI def self.code_tracker: () -> CodeTracker? + + def self.component: () -> Component? end end diff --git a/sig/datadog/di/component.rbs b/sig/datadog/di/component.rbs new file mode 100644 index 00000000000..27794388c27 --- /dev/null +++ b/sig/datadog/di/component.rbs @@ -0,0 +1,57 @@ +module Datadog + module DI + class Component + @settings: untyped + + @agent_settings: untyped + + @logger: untyped + + @telemetry: untyped + + @redactor: untyped + + @serializer: untyped + + @instrumenter: untyped + + @transport: untyped + + @probe_notifier_worker: untyped + + @probe_notification_builder: untyped + + @probe_manager: untyped + + def self.build: (untyped settings, untyped agent_settings, ?telemetry: untyped?) -> (nil | untyped) + + def self.build!: (untyped settings, untyped agent_settings, ?telemetry: untyped?) -> untyped + def self.environment_supported?: (untyped settings) -> (false | true) + + def initialize: (untyped settings, untyped agent_settings, untyped logger, ?code_tracker: untyped?, ?telemetry: untyped?) -> void + + attr_reader settings: untyped + + attr_reader agent_settings: untyped + + attr_reader logger: untyped + + attr_reader telemetry: untyped + + attr_reader instrumenter: untyped + + attr_reader transport: untyped + + attr_reader probe_notifier_worker: untyped + + attr_reader probe_notification_builder: untyped + + attr_reader probe_manager: untyped + + attr_reader redactor: untyped + + attr_reader serializer: untyped + def shutdown!: (?untyped? replacement) -> untyped + end + end +end diff --git a/sig/datadog/di/probe_manager.rbs b/sig/datadog/di/probe_manager.rbs new file mode 100644 index 00000000000..e4e42db24ba --- /dev/null +++ b/sig/datadog/di/probe_manager.rbs @@ -0,0 +1,64 @@ +module Datadog + module DI + class ProbeManager + @settings: untyped + + @instrumenter: untyped + + @probe_notification_builder: untyped + + @probe_notifier_worker: untyped + + @logger: untyped + + @telemetry: untyped + + @installed_probes: untyped + + @pending_probes: untyped + + @failed_probes: untyped + + @lock: untyped + + @definition_trace_point: untyped + + def initialize: (untyped settings, untyped instrumenter, untyped probe_notification_builder, untyped probe_notifier_worker, untyped logger, ?telemetry: untyped?) -> void + + attr_reader logger: untyped + + attr_reader telemetry: untyped + def close: () -> untyped + + def clear_hooks: () -> untyped + + attr_reader settings: untyped + + attr_reader instrumenter: untyped + + attr_reader probe_notification_builder: untyped + + attr_reader probe_notifier_worker: untyped + + def installed_probes: () -> untyped + + def pending_probes: () -> untyped + def failed_probes: () -> untyped + def add_probe: (untyped probe) -> untyped + + private + def do_add_probe: (untyped probe) -> untyped + + public + def remove_other_probes: (untyped probe_ids) -> untyped + + private + def install_pending_method_probes: (untyped cls) -> untyped + + public + def install_pending_line_probes: (untyped path) -> untyped + def probe_executed_callback: (probe: untyped, **untyped opts) -> untyped + attr_reader definition_trace_point: untyped + end + end +end diff --git a/sig/datadog/di/probe_notification_builder.rbs b/sig/datadog/di/probe_notification_builder.rbs index 812e08ffab5..3a46f3eef53 100644 --- a/sig/datadog/di/probe_notification_builder.rbs +++ b/sig/datadog/di/probe_notification_builder.rbs @@ -16,7 +16,7 @@ module Datadog def build_executed: (Probe probe, ?trace_point: TracePoint, ?rv: untyped?, ?duration: Float, ?caller_locations: Array[untyped], ?args: untyped?, ?kwargs: untyped?, ?serialized_entry_args: untyped?) -> Hash[Symbol,untyped] - def build_snapshot: (Probe probe, ?rv: untyped?, ?snapshot: untyped?, ?duration: Float, ?caller_locations: Array[untyped], ?args: untyped?, ?kwargs: untyped?, ?serialized_entry_args: untyped?) -> Hash[Symbol,untyped] + def build_snapshot: (Probe probe, ?rv: untyped?, ?snapshot: untyped?, ?path: String?, ?duration: Float, ?caller_locations: Array[untyped], ?args: untyped?, ?kwargs: untyped?, ?serialized_entry_args: untyped?) -> Hash[Symbol,untyped] def build_status: (Probe probe, message: untyped, status: untyped) -> Hash[Symbol,untyped] From 998efdc2c297c0a7184ffc5b965419927707af7e Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 10 Nov 2024 22:10:15 -0500 Subject: [PATCH 08/14] test basic method probe --- .../di/integration/instrumentation_spec.rb | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/spec/datadog/di/integration/instrumentation_spec.rb b/spec/datadog/di/integration/instrumentation_spec.rb index 0b299d20d54..f55fdfaa1d5 100644 --- a/spec/datadog/di/integration/instrumentation_spec.rb +++ b/spec/datadog/di/integration/instrumentation_spec.rb @@ -93,6 +93,45 @@ def mutating_method(greeting) end context 'method probe' do + context 'basic probe' do + let(:probe) do + Datadog::DI::Probe.new(id: "1234", type: :log, + type_name: 'InstrumentationSpecTestClass', method_name: 'test_method', + capture_snapshot: false,) + end + + it 'invokes probe' do + expect(component.transport).to receive(:send_request).at_least(:once) + probe_manager.add_probe(probe) + expect(component.probe_notifier_worker).to receive(:add_snapshot).once.and_call_original + expect(InstrumentationSpecTestClass.new.test_method).to eq(42) + component.probe_notifier_worker.flush + end + + def run_test + expect(component.transport).to receive(:send_request).at_least(:once) + probe_manager.add_probe(probe) + payload = nil + expect(component.probe_notifier_worker).to receive(:add_snapshot) do |payload_| + payload = payload_ + end + + yield + + component.probe_notifier_worker.flush + + expect(payload).to be_a(Hash) + snapshot = payload.fetch(:"debugger.snapshot") + expect(snapshot[:captures]).to be nil + end + + it 'assembles expected notification payload which does not include captures' do + run_test do + expect(InstrumentationSpecTestClass.new.test_method).to eq(42) + end + end + end + context 'enriched probe' do let(:probe) do Datadog::DI::Probe.new(id: "1234", type: :log, From b86a3ee735fdedbb1335eebbccd1533e57cec6ec Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 10 Nov 2024 22:19:36 -0500 Subject: [PATCH 09/14] test delayed class definition --- lib/datadog/di/probe_manager.rb | 20 +++++++--- .../di/integration/instrumentation_spec.rb | 40 +++++++++++++++++++ 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/lib/datadog/di/probe_manager.rb b/lib/datadog/di/probe_manager.rb index ac1457cb7b2..5723e1fa3fe 100644 --- a/lib/datadog/di/probe_manager.rb +++ b/lib/datadog/di/probe_manager.rb @@ -139,12 +139,20 @@ def remove_other_probes(probe_ids) if probe.method? # TODO move this stringification elsewhere if probe.type_name == cls.name - # TODO is it OK to hook from trace point handler? - # TODO the class is now defined, but can hooking still fail? - hook_method(probe.type_name, probe.method_name, - rate_limiter: probe.rate_limiter, &instance_method(:probe_executed_callback)) # steep:ignore - pending_probes.delete(probe.id) - break + begin + # TODO is it OK to hook from trace point handler? + # TODO the class is now defined, but can hooking still fail? + instrumenter.hook(probe, &method(:probe_executed_callback)) + pending_probes.delete(probe.id) + break + rescue Error::DITargetNotDefined + # This should not happen... try installing again later? + rescue => exc + raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions + + logger.warn("Error installing probe after class is defined: #{exc.class}: #{exc}") + telemetry&.report(exc, description: "Error installing probe after class is defined") + end end end end diff --git a/spec/datadog/di/integration/instrumentation_spec.rb b/spec/datadog/di/integration/instrumentation_spec.rb index f55fdfaa1d5..ff64d44504d 100644 --- a/spec/datadog/di/integration/instrumentation_spec.rb +++ b/spec/datadog/di/integration/instrumentation_spec.rb @@ -130,6 +130,46 @@ def run_test expect(InstrumentationSpecTestClass.new.test_method).to eq(42) end end + + context 'when method is defined after probe is added to probe manager' do + let(:probe) do + Datadog::DI::Probe.new(id: "1234", type: :log, + type_name: 'InstrumentationDelayedTestClass', method_name: 'test_method', + capture_snapshot: false,) + end + + it 'invokes probe and creates expected snapshot' do + expect(component.transport).to receive(:send_request).at_least(:once) + expect(probe_manager.add_probe(probe)).to be false + + class InstrumentationDelayedTestClass + def test_method + 43 + end + end + + payload = nil + expect(component.probe_notifier_worker).to receive(:add_snapshot) do |payload_| + payload = payload_ + end + + expect(InstrumentationDelayedTestClass.new.test_method).to eq(43) + component.probe_notifier_worker.flush + + snapshot = payload.fetch(:"debugger.snapshot") + expect(snapshot).to match( + id: String, + timestamp: Integer, + evaluationErrors: [], + probe: {id: '1234', version: 0, location: { + method: 'test_method', type: 'InstrumentationDelayedTestClass', + }}, + language: 'ruby', + stack: Array, + captures: nil, + ) + end + end end context 'enriched probe' do From b7e14fd99cd58c613e745929d1acbdddbe411b2b Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 10 Nov 2024 22:27:50 -0500 Subject: [PATCH 10/14] probe manager method documentation --- lib/datadog/di/probe_manager.rb | 35 +++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/lib/datadog/di/probe_manager.rb b/lib/datadog/di/probe_manager.rb index 5723e1fa3fe..d3b16a788f3 100644 --- a/lib/datadog/di/probe_manager.rb +++ b/lib/datadog/di/probe_manager.rb @@ -65,6 +65,14 @@ def clear_hooks # to instrument them every time remote configuration is processed. attr_reader :failed_probes + # Requests to install the specified probe. + # + # If the target of the probe does not exist, assume the relevant + # code is not loaded yet (rather than that it will never be loaded), + # and store the probe in a pending probe list. When classes are + # defined, or files loaded, the probe will be checked against the + # newly defined classes/loaded files, and will be installed if it + # matches. def add_probe(probe) # TODO lock here? @@ -105,6 +113,12 @@ def add_probe(probe) raise end + # Removes probes with ids other than in the specified list. + # + # This method is meant to be invoked from remote config processor. + # Remote config contains the list of currently defined probes; any + # probes not in that list have been removed by user and should be + # de-instrumented from the application. def remove_other_probes(probe_ids) pending_probes.values.each do |probe| unless probe_ids.include?(probe.id) @@ -133,6 +147,10 @@ def remove_other_probes(probe_ids) end end + # Installs pending method probes, if any, for the specified class. + # + # This method is meant to be called from the "end" trace point, + # which is invoked for each class definition. private def install_pending_method_probes(cls) # TODO search more efficiently than linearly pending_probes.each do |probe_id, probe| @@ -158,16 +176,29 @@ def remove_other_probes(probe_ids) end end - def install_pending_line_probes(file) + # Installs pending line probes, if any, for the file of the specified + # absolute path. + # + # This method is meant to be called from the script_compiled trace + # point, which is invoked for each required or loaded file + # (and also for eval'd code, but those invocations are filtered out). + def install_pending_line_probes(path) pending_probes.values.each do |probe| if probe.line? - if probe.file_matches?(file) + if probe.file_matches?(path) add_probe(probe) end end end end + # Entry point invoked from the instrumentation when the specfied probe + # is invoked (that is, either its target method is invoked, or + # execution reached its target file/line). + # + # This method is responsible for queueing probe status to be sent to the + # backend (once per the probe's lifetime) and a snapshot corresponding + # to the current invocation. def probe_executed_callback(probe:, **opts) unless probe.emitting_notified? payload = probe_notification_builder.build_emitting(probe) From a6e12204ab56e8918fa561e68e2310479f724797 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 10 Nov 2024 22:42:43 -0500 Subject: [PATCH 11/14] try locking probe manager --- lib/datadog/di/probe_manager.rb | 140 +++++++++++++++++++------------- 1 file changed, 84 insertions(+), 56 deletions(-) diff --git a/lib/datadog/di/probe_manager.rb b/lib/datadog/di/probe_manager.rb index d3b16a788f3..eeaa5797750 100644 --- a/lib/datadog/di/probe_manager.rb +++ b/lib/datadog/di/probe_manager.rb @@ -24,6 +24,7 @@ def initialize(settings, instrumenter, probe_notification_builder, @installed_probes = {} @pending_probes = {} @failed_probes = {} + @lock = Mutex.new @definition_trace_point = TracePoint.trace(:end) do |tp| install_pending_method_probes(tp.self) @@ -46,24 +47,40 @@ def close end def clear_hooks - pending_probes.clear - installed_probes.each do |probe_id, probe| - instrumenter.unhook(probe) + @lock.synchronize do + @pending_probes.clear + @installed_probes.each do |probe_id, probe| + instrumenter.unhook(probe) + end + @installed_probes.clear end - installed_probes.clear end attr_reader :settings attr_reader :instrumenter attr_reader :probe_notification_builder attr_reader :probe_notifier_worker - attr_reader :installed_probes - attr_reader :pending_probes + + def installed_probes + @lock.synchronize do + @installed_probes + end + end + + def pending_probes + @lock.synchronize do + @pending_probes + end + end # Probes that failed to instrument for reasons other than the target is # not yet loaded are added to this collection, so that we do not try # to instrument them every time remote configuration is processed. - attr_reader :failed_probes + def failed_probes + @lock.synchronize do + @failed_probes + end + end # Requests to install the specified probe. # @@ -74,10 +91,15 @@ def clear_hooks # newly defined classes/loaded files, and will be installed if it # matches. def add_probe(probe) - # TODO lock here? + @lock.synchronize do + do_add_probe(probe) + end + end + # Same as add_probe but without locking. + private def do_add_probe(probe) # Probe failed to install previously, do not try to install it again. - if msg = failed_probes[probe.id] + if msg = @failed_probes[probe.id] # TODO test this path raise Error::ProbePreviouslyFailed, msg end @@ -85,17 +107,17 @@ def add_probe(probe) begin instrumenter.hook(probe, &method(:probe_executed_callback)) - installed_probes[probe.id] = probe + @installed_probes[probe.id] = probe payload = probe_notification_builder.build_installed(probe) probe_notifier_worker.add_status(payload) # The probe would only be in the pending probes list if it was # previously attempted to be installed and the target was not loaded. # Always remove from pending list here because it makes the # API smaller and shouldn't cause any actual problems. - pending_probes.delete(probe.id) + @pending_probes.delete(probe.id) true rescue Error::DITargetNotDefined - pending_probes[probe.id] = probe + @pending_probes[probe.id] = probe false end rescue => exc @@ -108,7 +130,7 @@ def add_probe(probe) # install it again. # TODO add top stack frame to message - failed_probes[probe.id] = "#{exc.class}: #{exc}" + @failed_probes[probe.id] = "#{exc.class}: #{exc}" raise end @@ -120,28 +142,30 @@ def add_probe(probe) # probes not in that list have been removed by user and should be # de-instrumented from the application. def remove_other_probes(probe_ids) - pending_probes.values.each do |probe| - unless probe_ids.include?(probe.id) - pending_probes.delete(probe.id) + @lock.synchronize do + @pending_probes.values.each do |probe| + unless probe_ids.include?(probe.id) + @pending_probes.delete(probe.id) + end end - end - installed_probes.values.each do |probe| - unless probe_ids.include?(probe.id) - begin - instrumenter.unhook(probe) - # Only remove the probe from installed list if it was - # successfully de-instrumented. Active probes do incur overhead - # for the running application, and if the error is ephemeral - # we want to try removing the probe again at the next opportunity. - # - # TODO give up after some time? - installed_probes.delete(probe.id) - rescue => exc - raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions - # Silence all exceptions? - # TODO should we propagate here and rescue upstream? - logger.warn("Error removing probe #{probe.id}: #{exc.class}: #{exc}") - telemetry&.report(exc, description: "Error removing probe #{probe.id}") + @installed_probes.values.each do |probe| + unless probe_ids.include?(probe.id) + begin + instrumenter.unhook(probe) + # Only remove the probe from installed list if it was + # successfully de-instrumented. Active probes do incur overhead + # for the running application, and if the error is ephemeral + # we want to try removing the probe again at the next opportunity. + # + # TODO give up after some time? + @installed_probes.delete(probe.id) + rescue => exc + raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions + # Silence all exceptions? + # TODO should we propagate here and rescue upstream? + logger.warn("Error removing probe #{probe.id}: #{exc.class}: #{exc}") + telemetry&.report(exc, description: "Error removing probe #{probe.id}") + end end end end @@ -152,24 +176,26 @@ def remove_other_probes(probe_ids) # This method is meant to be called from the "end" trace point, # which is invoked for each class definition. private def install_pending_method_probes(cls) - # TODO search more efficiently than linearly - pending_probes.each do |probe_id, probe| - if probe.method? - # TODO move this stringification elsewhere - if probe.type_name == cls.name - begin - # TODO is it OK to hook from trace point handler? - # TODO the class is now defined, but can hooking still fail? - instrumenter.hook(probe, &method(:probe_executed_callback)) - pending_probes.delete(probe.id) - break - rescue Error::DITargetNotDefined - # This should not happen... try installing again later? - rescue => exc - raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions - - logger.warn("Error installing probe after class is defined: #{exc.class}: #{exc}") - telemetry&.report(exc, description: "Error installing probe after class is defined") + @lock.synchronize do + # TODO search more efficiently than linearly + @pending_probes.each do |probe_id, probe| + if probe.method? + # TODO move this stringification elsewhere + if probe.type_name == cls.name + begin + # TODO is it OK to hook from trace point handler? + # TODO the class is now defined, but can hooking still fail? + instrumenter.hook(probe, &method(:probe_executed_callback)) + @pending_probes.delete(probe.id) + break + rescue Error::DITargetNotDefined + # This should not happen... try installing again later? + rescue => exc + raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions + + logger.warn("Error installing probe after class is defined: #{exc.class}: #{exc}") + telemetry&.report(exc, description: "Error installing probe after class is defined") + end end end end @@ -183,10 +209,12 @@ def remove_other_probes(probe_ids) # point, which is invoked for each required or loaded file # (and also for eval'd code, but those invocations are filtered out). def install_pending_line_probes(path) - pending_probes.values.each do |probe| - if probe.line? - if probe.file_matches?(path) - add_probe(probe) + @lock.synchronize do + @pending_probes.values.each do |probe| + if probe.line? + if probe.file_matches?(path) + do_add_probe(probe) + end end end end From 21e62738ad62edf876facbc972593840e2045774 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sun, 10 Nov 2024 22:43:50 -0500 Subject: [PATCH 12/14] locking note --- lib/datadog/di/probe.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/datadog/di/probe.rb b/lib/datadog/di/probe.rb index 5dd5cf88a72..d8ac80aba35 100644 --- a/lib/datadog/di/probe.rb +++ b/lib/datadog/di/probe.rb @@ -165,6 +165,9 @@ def file_matches?(path) # For untargeted line trace points instrumented path will be nil. attr_accessor :instrumented_path + # TODO emitting_notified reads and writes should in theory be locked, + # however since DI is only implemented for MRI in practice the missing + # locking should not cause issues. attr_writer :emitting_notified def emitting_notified? !!@emitting_notified From a52162875ff24311c2d5af8f1874afa9e0a59788 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Mon, 11 Nov 2024 08:03:47 -0500 Subject: [PATCH 13/14] standard --- spec/datadog/di/integration/instrumentation_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/datadog/di/integration/instrumentation_spec.rb b/spec/datadog/di/integration/instrumentation_spec.rb index ff64d44504d..7d3a19c0ed9 100644 --- a/spec/datadog/di/integration/instrumentation_spec.rb +++ b/spec/datadog/di/integration/instrumentation_spec.rb @@ -142,7 +142,7 @@ def run_test expect(component.transport).to receive(:send_request).at_least(:once) expect(probe_manager.add_probe(probe)).to be false - class InstrumentationDelayedTestClass + class InstrumentationDelayedTestClass # rubocop:disable Lint/ConstantDefinitionInBlock def test_method 43 end From 23864437622a6bd6c23ac95fb1804446ecbfb122 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 12 Nov 2024 10:57:26 -0500 Subject: [PATCH 14/14] try Monitor instead of Mutex for recursive locking --- lib/datadog/di/probe_manager.rb | 75 ++++++++++++++++----------------- 1 file changed, 36 insertions(+), 39 deletions(-) diff --git a/lib/datadog/di/probe_manager.rb b/lib/datadog/di/probe_manager.rb index eeaa5797750..256b3411f6c 100644 --- a/lib/datadog/di/probe_manager.rb +++ b/lib/datadog/di/probe_manager.rb @@ -2,6 +2,8 @@ # rubocop:disable Lint/AssignmentInCondition +require 'monitor' + module Datadog module DI # Stores probes received from remote config (that we can parse, in other @@ -24,7 +26,7 @@ def initialize(settings, instrumenter, probe_notification_builder, @installed_probes = {} @pending_probes = {} @failed_probes = {} - @lock = Mutex.new + @lock = Monitor.new @definition_trace_point = TracePoint.trace(:end) do |tp| install_pending_method_probes(tp.self) @@ -92,47 +94,42 @@ def failed_probes # matches. def add_probe(probe) @lock.synchronize do - do_add_probe(probe) - end - end - - # Same as add_probe but without locking. - private def do_add_probe(probe) - # Probe failed to install previously, do not try to install it again. - if msg = @failed_probes[probe.id] - # TODO test this path - raise Error::ProbePreviouslyFailed, msg - end - - begin - instrumenter.hook(probe, &method(:probe_executed_callback)) + # Probe failed to install previously, do not try to install it again. + if msg = @failed_probes[probe.id] + # TODO test this path + raise Error::ProbePreviouslyFailed, msg + end - @installed_probes[probe.id] = probe - payload = probe_notification_builder.build_installed(probe) - probe_notifier_worker.add_status(payload) - # The probe would only be in the pending probes list if it was - # previously attempted to be installed and the target was not loaded. - # Always remove from pending list here because it makes the - # API smaller and shouldn't cause any actual problems. - @pending_probes.delete(probe.id) - true - rescue Error::DITargetNotDefined - @pending_probes[probe.id] = probe - false - end - rescue => exc - # In "propagate all exceptions" mode we will try to instrument again. - raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions + begin + instrumenter.hook(probe, &method(:probe_executed_callback)) + + @installed_probes[probe.id] = probe + payload = probe_notification_builder.build_installed(probe) + probe_notifier_worker.add_status(payload) + # The probe would only be in the pending probes list if it was + # previously attempted to be installed and the target was not loaded. + # Always remove from pending list here because it makes the + # API smaller and shouldn't cause any actual problems. + @pending_probes.delete(probe.id) + true + rescue Error::DITargetNotDefined + @pending_probes[probe.id] = probe + false + end + rescue => exc + # In "propagate all exceptions" mode we will try to instrument again. + raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions - logger.warn("Error processing probe configuration: #{exc.class}: #{exc}") - telemetry&.report(exc, description: "Error processing probe configuration") - # TODO report probe as failed to agent since we won't attempt to - # install it again. + logger.warn("Error processing probe configuration: #{exc.class}: #{exc}") + telemetry&.report(exc, description: "Error processing probe configuration") + # TODO report probe as failed to agent since we won't attempt to + # install it again. - # TODO add top stack frame to message - @failed_probes[probe.id] = "#{exc.class}: #{exc}" + # TODO add top stack frame to message + @failed_probes[probe.id] = "#{exc.class}: #{exc}" - raise + raise + end end # Removes probes with ids other than in the specified list. @@ -213,7 +210,7 @@ def install_pending_line_probes(path) @pending_probes.values.each do |probe| if probe.line? if probe.file_matches?(path) - do_add_probe(probe) + add_probe(probe) end end end