From 503bb5aa1b101ccb93d3c68e8cae9b93c65d2915 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Fri, 13 Sep 2024 10:57:47 +0200 Subject: [PATCH 1/5] Prevent deadlock with no-op for telemetry logger --- lib/datadog/core/telemetry/logger.rb | 14 ++++++++++++-- spec/datadog/core/telemetry/logger_spec.rb | 5 +++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/datadog/core/telemetry/logger.rb b/lib/datadog/core/telemetry/logger.rb index b18650ebd5e..7bc11882a3e 100644 --- a/lib/datadog/core/telemetry/logger.rb +++ b/lib/datadog/core/telemetry/logger.rb @@ -8,7 +8,7 @@ module Telemetry # Report telemetry logs via delegating to the telemetry component instance via mutex. # # IMPORTANT: Invoking this method during the lifecycle of component initialization will - # cause a non-recoverable deadlock + # be no-op instead. # # For developer using this module: # read: lib/datadog/core/telemetry/logging.rb @@ -25,7 +25,17 @@ def error(description) private def instance - Datadog.send(:components).telemetry + # `allow_initialization: false` to prevent deadlock from components lifecycle + components = Datadog.send(:components, allow_initialization: false) + + if components && components.telemetry + components.telemetry + else + Datadog.logger.error( + 'Fail to send telemetry log before components initialization or within components lifecycle' + ) + nil + end end end end diff --git a/spec/datadog/core/telemetry/logger_spec.rb b/spec/datadog/core/telemetry/logger_spec.rb index 033a2458b5c..4ba5d4763eb 100644 --- a/spec/datadog/core/telemetry/logger_spec.rb +++ b/spec/datadog/core/telemetry/logger_spec.rb @@ -10,6 +10,7 @@ telemetry = instance_double(Datadog::Core::Telemetry::Component) allow(Datadog.send(:components)).to receive(:telemetry).and_return(telemetry) expect(telemetry).to receive(:report).with(exception, level: :error, description: 'Oops...') + expect(Datadog.logger).not_to receive(:debug) expect do described_class.report(exception, level: :error, description: 'Oops...') @@ -22,6 +23,7 @@ telemetry = instance_double(Datadog::Core::Telemetry::Component) allow(Datadog.send(:components)).to receive(:telemetry).and_return(telemetry) expect(telemetry).to receive(:report).with(exception, level: :error, description: nil) + expect(Datadog.logger).not_to receive(:debug) expect do described_class.report(exception) @@ -34,6 +36,7 @@ it do exception = StandardError.new allow(Datadog.send(:components)).to receive(:telemetry).and_return(nil) + expect(Datadog.logger).to receive(:error).with(/Fail to send telemetry log/) expect do described_class.report(exception, level: :error, description: 'Oops...') @@ -48,6 +51,7 @@ telemetry = instance_double(Datadog::Core::Telemetry::Component) allow(Datadog.send(:components)).to receive(:telemetry).and_return(telemetry) expect(telemetry).to receive(:error).with('description') + expect(Datadog.logger).not_to receive(:debug) expect { described_class.error('description') }.not_to raise_error end @@ -56,6 +60,7 @@ context 'when there is no telemetry component configured' do it do allow(Datadog.send(:components)).to receive(:telemetry).and_return(nil) + expect(Datadog.logger).to receive(:error).with(/Fail to send telemetry log/) expect { described_class.error('description') }.not_to raise_error end From cabb511912da28fbd8deb157b7b328461c8919dc Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Fri, 13 Sep 2024 14:43:29 +0200 Subject: [PATCH 2/5] Change to warn --- lib/datadog/core/telemetry/logger.rb | 2 +- spec/datadog/core/telemetry/logger_spec.rb | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/datadog/core/telemetry/logger.rb b/lib/datadog/core/telemetry/logger.rb index 7bc11882a3e..e512cb07e6c 100644 --- a/lib/datadog/core/telemetry/logger.rb +++ b/lib/datadog/core/telemetry/logger.rb @@ -31,7 +31,7 @@ def instance if components && components.telemetry components.telemetry else - Datadog.logger.error( + Datadog.logger.warn( 'Fail to send telemetry log before components initialization or within components lifecycle' ) nil diff --git a/spec/datadog/core/telemetry/logger_spec.rb b/spec/datadog/core/telemetry/logger_spec.rb index 4ba5d4763eb..4446945016e 100644 --- a/spec/datadog/core/telemetry/logger_spec.rb +++ b/spec/datadog/core/telemetry/logger_spec.rb @@ -10,7 +10,7 @@ telemetry = instance_double(Datadog::Core::Telemetry::Component) allow(Datadog.send(:components)).to receive(:telemetry).and_return(telemetry) expect(telemetry).to receive(:report).with(exception, level: :error, description: 'Oops...') - expect(Datadog.logger).not_to receive(:debug) + expect(Datadog.logger).not_to receive(:warn) expect do described_class.report(exception, level: :error, description: 'Oops...') @@ -23,7 +23,7 @@ telemetry = instance_double(Datadog::Core::Telemetry::Component) allow(Datadog.send(:components)).to receive(:telemetry).and_return(telemetry) expect(telemetry).to receive(:report).with(exception, level: :error, description: nil) - expect(Datadog.logger).not_to receive(:debug) + expect(Datadog.logger).not_to receive(:warn) expect do described_class.report(exception) @@ -36,7 +36,7 @@ it do exception = StandardError.new allow(Datadog.send(:components)).to receive(:telemetry).and_return(nil) - expect(Datadog.logger).to receive(:error).with(/Fail to send telemetry log/) + expect(Datadog.logger).to receive(:warn).with(/Fail to send telemetry log/) expect do described_class.report(exception, level: :error, description: 'Oops...') @@ -51,7 +51,7 @@ telemetry = instance_double(Datadog::Core::Telemetry::Component) allow(Datadog.send(:components)).to receive(:telemetry).and_return(telemetry) expect(telemetry).to receive(:error).with('description') - expect(Datadog.logger).not_to receive(:debug) + expect(Datadog.logger).not_to receive(:warn) expect { described_class.error('description') }.not_to raise_error end @@ -60,7 +60,7 @@ context 'when there is no telemetry component configured' do it do allow(Datadog.send(:components)).to receive(:telemetry).and_return(nil) - expect(Datadog.logger).to receive(:error).with(/Fail to send telemetry log/) + expect(Datadog.logger).to receive(:warn).with(/Fail to send telemetry log/) expect { described_class.error('description') }.not_to raise_error end From d5da8b0770c290e9babb98f5c1d5fb3b059a1449 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Fri, 13 Sep 2024 14:49:41 +0200 Subject: [PATCH 3/5] Update comment --- lib/datadog/core/telemetry/logger.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/datadog/core/telemetry/logger.rb b/lib/datadog/core/telemetry/logger.rb index e512cb07e6c..c4f4b169900 100644 --- a/lib/datadog/core/telemetry/logger.rb +++ b/lib/datadog/core/telemetry/logger.rb @@ -25,7 +25,8 @@ def error(description) private def instance - # `allow_initialization: false` to prevent deadlock from components lifecycle + # `allow_initialization: false` would avoid referencing the components via `safely_synchronize` (mutex) + # which could cause deadlock during components initialization. components = Datadog.send(:components, allow_initialization: false) if components && components.telemetry From 19c88eb692da0005e15d904a1e7aeb8fedd7f3db Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Fri, 13 Sep 2024 14:51:56 +0200 Subject: [PATCH 4/5] Fix typo --- lib/datadog/core/telemetry/logger.rb | 2 +- lib/datadog/core/telemetry/logging.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/datadog/core/telemetry/logger.rb b/lib/datadog/core/telemetry/logger.rb index c4f4b169900..d56e4125a17 100644 --- a/lib/datadog/core/telemetry/logger.rb +++ b/lib/datadog/core/telemetry/logger.rb @@ -3,7 +3,7 @@ module Datadog module Core module Telemetry - # === INTRENAL USAGE ONLY === + # === INTERNAL USAGE ONLY === # # Report telemetry logs via delegating to the telemetry component instance via mutex. # diff --git a/lib/datadog/core/telemetry/logging.rb b/lib/datadog/core/telemetry/logging.rb index 2ddf12bd0b6..52044e3f0f0 100644 --- a/lib/datadog/core/telemetry/logging.rb +++ b/lib/datadog/core/telemetry/logging.rb @@ -7,7 +7,7 @@ module Datadog module Core module Telemetry - # === INTRENAL USAGE ONLY === + # === INTERNAL USAGE ONLY === # # Logging interface for sending telemetry logs... so we can fix them. # From f64ae44a6d664fa9fe86bb18d99bc2bdb5879108 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Fri, 13 Sep 2024 15:30:58 +0200 Subject: [PATCH 5/5] Update comment --- lib/datadog/core/telemetry/logger.rb | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/datadog/core/telemetry/logger.rb b/lib/datadog/core/telemetry/logger.rb index d56e4125a17..c03320b354a 100644 --- a/lib/datadog/core/telemetry/logger.rb +++ b/lib/datadog/core/telemetry/logger.rb @@ -25,8 +25,14 @@ def error(description) private def instance - # `allow_initialization: false` would avoid referencing the components via `safely_synchronize` (mutex) - # which could cause deadlock during components initialization. + # Component initialization uses a mutex to avoid having concurrent initialization. + # Trying to access the telemetry component during initialization (specifically: + # from the thread that's actually doing the initialization) would cause a deadlock, + # since accessing the components would try to recursively lock the mutex. + # + # To work around this, we use allow_initialization: false to avoid triggering this issue. + # + # The downside is: this leaves us unable to report telemetry during component initialization. components = Datadog.send(:components, allow_initialization: false) if components && components.telemetry