diff --git a/lib/datadog/core/metrics/client.rb b/lib/datadog/core/metrics/client.rb index cb180d7de2d..cde34bb32db 100644 --- a/lib/datadog/core/metrics/client.rb +++ b/lib/datadog/core/metrics/client.rb @@ -2,6 +2,7 @@ require_relative '../utils/time' require_relative '../utils/only_once' +require_relative '../telemetry/logger' require_relative '../configuration/ext' require_relative 'ext' @@ -100,6 +101,7 @@ def count(stat, value = nil, options = nil, &block) Datadog.logger.error( "Failed to send count stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}" ) + Telemetry::Logger.report(e, description: 'Failed to send count stat') end def distribution(stat, value = nil, options = nil, &block) @@ -113,6 +115,7 @@ def distribution(stat, value = nil, options = nil, &block) Datadog.logger.error( "Failed to send distribution stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}" ) + Telemetry::Logger.report(e, description: 'Failed to send distribution stat') end def increment(stat, options = nil) @@ -125,6 +128,7 @@ def increment(stat, options = nil) Datadog.logger.error( "Failed to send increment stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}" ) + Telemetry::Logger.report(e, description: 'Failed to send increment stat') end def gauge(stat, value = nil, options = nil, &block) @@ -138,6 +142,7 @@ def gauge(stat, value = nil, options = nil, &block) Datadog.logger.error( "Failed to send gauge stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}" ) + Telemetry::Logger.report(e, description: 'Failed to send gauge stat') end def time(stat, options = nil) @@ -153,9 +158,11 @@ def time(stat, options = nil) distribution(stat, ((finished - start) * 1000), options) end rescue StandardError => e + # TODO: Likely to be redundant, since `distribution` handles its own errors. Datadog.logger.error( "Failed to send time stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}" ) + Telemetry::Logger.report(e, description: 'Failed to send time stat') end end diff --git a/lib/datadog/core/remote/component.rb b/lib/datadog/core/remote/component.rb index b468ee9dbdc..3c8047cc9d3 100644 --- a/lib/datadog/core/remote/component.rb +++ b/lib/datadog/core/remote/component.rb @@ -39,6 +39,7 @@ def initialize(settings, capabilities, agent_settings) @client.sync @healthy ||= true rescue Client::SyncError => e + # Transient errors due to network or agent. Logged the error but not via telemetry Datadog.logger.error do "remote worker client sync error: #{e.message} location: #{Array(e.backtrace).first}. skipping sync" end @@ -48,6 +49,7 @@ def initialize(settings, capabilities, agent_settings) # negotiation object stores error logging state that should be reset. negotiation = Negotiation.new(settings, agent_settings) + # Transient errors due to network or agent. Logged the error but not via telemetry Datadog.logger.error do "remote worker error: #{e.class.name} #{e.message} location: #{Array(e.backtrace).first}. "\ 'reseting client state' diff --git a/lib/datadog/core/remote/negotiation.rb b/lib/datadog/core/remote/negotiation.rb index a43eb80a334..d68de4f40b6 100644 --- a/lib/datadog/core/remote/negotiation.rb +++ b/lib/datadog/core/remote/negotiation.rb @@ -20,7 +20,7 @@ def endpoint?(path) if res.internal_error? && network_error?(res.error) unless @logged[:agent_unreachable] - Datadog.logger.error { "agent unreachable: cannot negotiate #{path}" } + Datadog.logger.warn { "agent unreachable: cannot negotiate #{path}" } @logged[:agent_unreachable] = true end @@ -29,7 +29,7 @@ def endpoint?(path) if res.not_found? unless @logged[:no_info_endpoint] - Datadog.logger.error { "agent reachable but has no /info endpoint: cannot negotiate #{path}" } + Datadog.logger.warn { "agent reachable but has no /info endpoint: cannot negotiate #{path}" } @logged[:no_info_endpoint] = true end @@ -38,7 +38,7 @@ def endpoint?(path) unless res.ok? unless @logged[:unexpected_response] - Datadog.logger.error { "agent reachable but unexpected response: cannot negotiate #{path}" } + Datadog.logger.warn { "agent reachable but unexpected response: cannot negotiate #{path}" } @logged[:unexpected_response] = true end @@ -47,7 +47,7 @@ def endpoint?(path) unless res.endpoints.include?(path) unless @logged[:no_config_endpoint] - Datadog.logger.error { "agent reachable but does not report #{path}" } + Datadog.logger.warn { "agent reachable but does not report #{path}" } @logged[:no_config_endpoint] = true end diff --git a/lib/datadog/core/runtime/metrics.rb b/lib/datadog/core/runtime/metrics.rb index c07c75319e4..6c89d5b7dc7 100644 --- a/lib/datadog/core/runtime/metrics.rb +++ b/lib/datadog/core/runtime/metrics.rb @@ -94,7 +94,7 @@ def gc_metrics def try_flush yield rescue StandardError => e - Datadog.logger.error("Error while sending runtime metric. Cause: #{e.class.name} #{e.message}") + Datadog.logger.warn("Error while sending runtime metric. Cause: #{e.class.name} #{e.message}") end def default_metric_options diff --git a/lib/datadog/core/telemetry/event.rb b/lib/datadog/core/telemetry/event.rb index 3808c475893..fabc8014207 100644 --- a/lib/datadog/core/telemetry/event.rb +++ b/lib/datadog/core/telemetry/event.rb @@ -1,5 +1,8 @@ # frozen_string_literal: true +require_relative '../utils/forking' +require_relative '../utils/sequence' + module Datadog module Core module Telemetry diff --git a/lib/datadog/opentelemetry/sdk/propagator.rb b/lib/datadog/opentelemetry/sdk/propagator.rb index f45e4e9d7f4..b711fba2390 100644 --- a/lib/datadog/opentelemetry/sdk/propagator.rb +++ b/lib/datadog/opentelemetry/sdk/propagator.rb @@ -15,6 +15,7 @@ def inject( setter: ::OpenTelemetry::Context::Propagation.text_map_setter ) unless setter == ::OpenTelemetry::Context::Propagation.text_map_setter + # PENDING: Not to report telemetry logs for now Datadog.logger.error( 'Custom setter is not supported. Please inform the `datadog` team at ' \ ' https://github.com/DataDog/dd-trace-rb of your use case so we can best support you. Using the default ' \ @@ -31,6 +32,7 @@ def extract( ) if getter != ::OpenTelemetry::Context::Propagation.text_map_getter && getter != ::OpenTelemetry::Common::Propagation.rack_env_getter + # PENDING: Not to report telemetry logs for now Datadog.logger.error( "Custom getter #{getter} is not supported. Please inform the `datadog` team at " \ ' https://github.com/DataDog/dd-trace-rb of your use case so we can best support you. Using the default ' \ diff --git a/lib/datadog/tracing/distributed/propagation.rb b/lib/datadog/tracing/distributed/propagation.rb index 2c56b441220..9ab993eb56e 100644 --- a/lib/datadog/tracing/distributed/propagation.rb +++ b/lib/datadog/tracing/distributed/propagation.rb @@ -3,6 +3,7 @@ require_relative '../configuration/ext' require_relative '../trace_digest' require_relative '../trace_operation' +require_relative '../../core/telemetry/logger' module Datadog module Tracing @@ -72,6 +73,10 @@ def inject!(digest, data) ::Datadog.logger.error( "Error injecting distributed trace data. Cause: #{e} Location: #{Array(e.backtrace).first}" ) + ::Datadog::Core::Telemetry::Logger.report( + e, + description: "Error injecting distributed trace data with #{propagator.class.name}" + ) end result @@ -127,6 +132,7 @@ def extract(data) ) end rescue => e + # TODO: Not to report Telemetry logs for now ::Datadog.logger.error( "Error extracting distributed trace data. Cause: #{e} Location: #{Array(e.backtrace).first}" ) diff --git a/lib/datadog/tracing/sampling/rate_sampler.rb b/lib/datadog/tracing/sampling/rate_sampler.rb index bc79714bcec..ea581dd70fa 100644 --- a/lib/datadog/tracing/sampling/rate_sampler.rb +++ b/lib/datadog/tracing/sampling/rate_sampler.rb @@ -20,7 +20,7 @@ def initialize(sample_rate = 1.0, decision: nil) super() unless sample_rate >= 0.0 && sample_rate <= 1.0 - Datadog.logger.error('sample rate is not between 0 and 1, falling back to 1') + Datadog.logger.warn('sample rate is not between 0 and 1, falling back to 1') sample_rate = 1.0 end diff --git a/lib/datadog/tracing/sampling/rule.rb b/lib/datadog/tracing/sampling/rule.rb index 9c18bed7fac..6f458ba093f 100644 --- a/lib/datadog/tracing/sampling/rule.rb +++ b/lib/datadog/tracing/sampling/rule.rb @@ -2,6 +2,7 @@ require_relative 'matcher' require_relative 'rate_sampler' +require_relative '../../core/telemetry/logger' module Datadog module Tracing @@ -35,6 +36,7 @@ def match?(trace) Datadog.logger.error( "Matcher failed. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}" ) + Datadog::Core::Telemetry::Logger.report(e, description: 'Matcher failed') nil end diff --git a/lib/datadog/tracing/sampling/rule_sampler.rb b/lib/datadog/tracing/sampling/rule_sampler.rb index 4b7e812e75f..398926233e3 100644 --- a/lib/datadog/tracing/sampling/rule_sampler.rb +++ b/lib/datadog/tracing/sampling/rule_sampler.rb @@ -3,6 +3,7 @@ require_relative 'ext' require_relative '../../core/rate_limiter' require_relative 'rule' +require_relative '../../core/telemetry/logger' module Datadog module Tracing @@ -80,9 +81,10 @@ def self.parse(rules, rate_limit, default_sample_rate) new(parsed_rules, rate_limit: rate_limit, default_sample_rate: default_sample_rate) rescue => e - Datadog.logger.error do + Datadog.logger.warn do "Could not parse trace sampling rules '#{rules}': #{e.class.name} #{e.message} at #{Array(e.backtrace).first}" end + nil end @@ -140,6 +142,8 @@ def sample_trace(trace) Datadog.logger.error( "Rule sampling failed. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}" ) + Datadog::Core::Telemetry::Logger.report(e, description: 'Rule sampling failed') + yield(trace) end diff --git a/lib/datadog/tracing/transport/http/client.rb b/lib/datadog/tracing/transport/http/client.rb index e19491ddabc..ab2c7191df8 100644 --- a/lib/datadog/tracing/transport/http/client.rb +++ b/lib/datadog/tracing/transport/http/client.rb @@ -38,6 +38,7 @@ def send_request(request, &block) if stats.consecutive_errors > 0 Datadog.logger.debug(message) else + # Not to report telemetry logs Datadog.logger.error(message) end diff --git a/lib/datadog/tracing/transport/io/client.rb b/lib/datadog/tracing/transport/io/client.rb index 685c7512190..4288889ec76 100644 --- a/lib/datadog/tracing/transport/io/client.rb +++ b/lib/datadog/tracing/transport/io/client.rb @@ -49,6 +49,7 @@ def send_request(request) if stats.consecutive_errors > 0 Datadog.logger.debug(message) else + # Not to report telemetry logs Datadog.logger.error(message) end diff --git a/lib/datadog/tracing/workers.rb b/lib/datadog/tracing/workers.rb index fdcce6772ba..030986a2204 100644 --- a/lib/datadog/tracing/workers.rb +++ b/lib/datadog/tracing/workers.rb @@ -56,7 +56,7 @@ def callback_traces # ensures that the thread will not die because of an exception. # TODO[manu]: findout the reason and reschedule the send if it's not # a fatal exception - Datadog.logger.error( + Datadog.logger.warn( "Error during traces flush: dropped #{traces.length} items. Cause: #{e} Location: #{Array(e.backtrace).first}" ) end diff --git a/lib/datadog/tracing/workers/trace_writer.rb b/lib/datadog/tracing/workers/trace_writer.rb index fda409e74e5..bbb04ff219a 100644 --- a/lib/datadog/tracing/workers/trace_writer.rb +++ b/lib/datadog/tracing/workers/trace_writer.rb @@ -43,7 +43,7 @@ def write_traces(traces) traces = process_traces(traces) flush_traces(traces) rescue StandardError => e - Datadog.logger.error( + Datadog.logger.warn( "Error while writing traces: dropped #{traces.length} items. Cause: #{e} Location: #{Array(e.backtrace).first}" ) end diff --git a/spec/datadog/core/metrics/client_spec.rb b/spec/datadog/core/metrics/client_spec.rb index 5f6fb218b2c..91af802a322 100644 --- a/spec/datadog/core/metrics/client_spec.rb +++ b/spec/datadog/core/metrics/client_spec.rb @@ -13,9 +13,16 @@ it { is_expected.to have_attributes(statsd: statsd) } - shared_examples_for 'missing value arg' do - it 'logs an error without raising' do - expect(Datadog.logger).to receive(:error) + shared_examples_for 'logs an error without raising' do |action| + it do + expect(Datadog.logger).to receive(:error).with( + /Failed to send #{action} stat/ + ) + expect(Datadog::Core::Telemetry::Logger).to receive(:report).with( + a_kind_of(StandardError), + description: "Failed to send #{action} stat" + ) + expect { subject }.to_not raise_error end end @@ -383,7 +390,7 @@ context 'that does not yield args' do subject(:count) { metrics.count(stat) {} } - it_behaves_like 'missing value arg' + it_behaves_like 'logs an error without raising', 'count' end context 'that yields args' do @@ -433,12 +440,9 @@ end context 'which raises an error' do - before do - expect(statsd).to receive(:count).and_raise(StandardError) - expect(Datadog.logger).to receive(:error) - end + before { allow(statsd).to receive(:count).and_raise(StandardError) } - it { expect { count }.to_not raise_error } + it_behaves_like 'logs an error without raising', 'count' end end end @@ -464,7 +468,7 @@ context 'that does not yield args' do subject(:distribution) { metrics.distribution(stat) {} } - it_behaves_like 'missing value arg' + it_behaves_like 'logs an error without raising', 'distribution' end context 'that yields args' do @@ -514,12 +518,9 @@ end context 'which raises an error' do - before do - expect(statsd).to receive(:distribution).and_raise(StandardError) - expect(Datadog.logger).to receive(:error) - end + before { allow(statsd).to receive(:distribution).and_raise(StandardError) } - it { expect { distribution }.to_not raise_error } + it_behaves_like 'logs an error without raising', 'distribution' end end end @@ -545,7 +546,7 @@ context 'that does not yield args' do subject(:gauge) { metrics.gauge(stat) {} } - it_behaves_like 'missing value arg' + it_behaves_like 'logs an error without raising', 'gauge' end context 'that yields args' do @@ -595,12 +596,9 @@ end context 'which raises an error' do - before do - expect(statsd).to receive(:gauge).and_raise(StandardError) - expect(Datadog.logger).to receive(:error) - end + before { allow(statsd).to receive(:gauge).and_raise(StandardError) } - it { expect { gauge }.to_not raise_error } + it_behaves_like 'logs an error without raising', 'gauge' end end end @@ -622,6 +620,12 @@ context 'when #statsd is a Datadog::Statsd' do context 'and given a block' do + context 'when raise exception' do + subject(:increment) { metrics.increment(stat) { raise } } + + it_behaves_like 'logs an error without raising', 'increment' + end + context 'that yields args' do subject(:increment) { metrics.increment(stat) { stat_options } } @@ -676,12 +680,9 @@ end context 'which raises an error' do - before do - expect(statsd).to receive(:increment).and_raise(StandardError) - expect(Datadog.logger).to receive(:error) - end + before { allow(statsd).to receive(:increment).and_raise(StandardError) } - it { expect { increment }.to_not raise_error } + it_behaves_like 'logs an error without raising', 'increment' end end end @@ -711,7 +712,10 @@ let(:error) { RuntimeError.new } # Expect the given block to raise its errors through - it { expect { time }.to raise_error(error) } + it do + expect(Datadog.logger).not_to receive(:error) + expect { time }.to raise_error(error) + end end end diff --git a/spec/datadog/core/remote/negotiation_spec.rb b/spec/datadog/core/remote/negotiation_spec.rb index e34a0af2d59..71770a6ce51 100644 --- a/spec/datadog/core/remote/negotiation_spec.rb +++ b/spec/datadog/core/remote/negotiation_spec.rb @@ -50,13 +50,13 @@ end it do - expect(Datadog.logger).to_not receive(:error) + expect(Datadog.logger).to_not receive(:warn) expect(endpoint?).to be true end it do - expect(Datadog.logger).to receive(:error).and_return(nil) + expect(Datadog.logger).to receive(:warn) expect(negotiation.endpoint?('/bar')).to be false end @@ -65,7 +65,7 @@ let(:suppress_logging) { { no_config_endpoint: true } } it 'does not log an error' do - expect(Datadog.logger).to_not receive(:error) + expect(Datadog.logger).to_not receive(:warn) expect(negotiation.endpoint?('/bar')).to be false end @@ -77,7 +77,7 @@ let(:response_body) { '404 page not found' } before do - expect(Datadog.logger).to receive(:error).and_return(nil) + expect(Datadog.logger).to receive(:warn) end it { expect(endpoint?).to be false } @@ -95,7 +95,7 @@ let(:response_body) { '400 bad request' } before do - expect(Datadog.logger).to receive(:error).and_return(nil) + expect(Datadog.logger).to receive(:warn) end it { expect(endpoint?).to be false } @@ -113,7 +113,7 @@ let(:response_body) { '500 internal server error' } before do - expect(Datadog.logger).to receive(:error).and_return(nil) + expect(Datadog.logger).to receive(:warn) end it { expect(endpoint?).to be false } @@ -133,7 +133,7 @@ end before do - expect(Datadog.logger).to receive(:error).and_return(nil) + expect(Datadog.logger).to receive(:warn) end it { expect(endpoint?).to be false } @@ -150,7 +150,7 @@ let(:request_exception) { Errno::ECONNREFUSED.new } before do - expect(Datadog.logger).to receive(:error).and_return(nil) + expect(Datadog.logger).to receive(:warn) end it { expect(endpoint?).to be false } diff --git a/spec/datadog/core/runtime/metrics_spec.rb b/spec/datadog/core/runtime/metrics_spec.rb index 13488814f9b..f3480969ef4 100644 --- a/spec/datadog/core/runtime/metrics_spec.rb +++ b/spec/datadog/core/runtime/metrics_spec.rb @@ -98,7 +98,7 @@ end context 'when an error is thrown' do - before { allow(Datadog.logger).to receive(:error) } + before { allow(Datadog.logger).to receive(:warn) } it do allow(metric).to receive(:available?) @@ -106,7 +106,7 @@ flush - expect(Datadog.logger).to have_received(:error) + expect(Datadog.logger).to have_received(:warn) .with(/Error while sending runtime metric./) .at_least(:once) end diff --git a/spec/datadog/tracing/sampling/rate_sampler_spec.rb b/spec/datadog/tracing/sampling/rate_sampler_spec.rb index 5c26c02b47f..993efb1d0b4 100644 --- a/spec/datadog/tracing/sampling/rate_sampler_spec.rb +++ b/spec/datadog/tracing/sampling/rate_sampler_spec.rb @@ -22,6 +22,12 @@ it_behaves_like 'sampler with sample rate', 1.0 do let(:trace) { nil } end + + it do + expect(Datadog.logger).to receive(:warn).with('sample rate is not between 0 and 1, falling back to 1') + + sampler + end end context 'that is 0' do @@ -46,6 +52,12 @@ let(:sample_rate) { 1.5 } it_behaves_like 'sampler with sample rate', 1.0 + + it do + expect(Datadog.logger).to receive(:warn).with('sample rate is not between 0 and 1, falling back to 1') + + sampler + end end end end diff --git a/spec/datadog/tracing/sampling/rule_sampler_spec.rb b/spec/datadog/tracing/sampling/rule_sampler_spec.rb index 43c2ea25d22..e63e2e9d6ca 100644 --- a/spec/datadog/tracing/sampling/rule_sampler_spec.rb +++ b/spec/datadog/tracing/sampling/rule_sampler_spec.rb @@ -173,7 +173,7 @@ let(:rule) { { sample_rate: 'oops' } } it 'does not accept rule with a non-float sample_rate' do - expect(Datadog.logger).to receive(:error) + expect(Datadog.logger).to receive(:warn) is_expected.to be_nil end end @@ -182,7 +182,7 @@ let(:rule) { { name: 'test' } } it 'does not accept rule missing the mandatory sample_rate' do - expect(Datadog.logger).to receive(:error) + expect(Datadog.logger).to receive(:warn) is_expected.to be_nil end @@ -190,7 +190,7 @@ let(:rules) { [{ sample_rate: 0.1 }, { name: 'test' }] } it 'rejects all rules if one is missing the mandatory sample_rate' do - expect(Datadog.logger).to receive(:error) + expect(Datadog.logger).to receive(:warn) is_expected.to be_nil end end @@ -200,7 +200,7 @@ let(:rules) { 'not a json array' } it 'returns nil in case of parsing error' do - expect(Datadog.logger).to receive(:error) + expect(Datadog.logger).to receive(:warn) is_expected.to be_nil end end diff --git a/spec/datadog/tracing/sampling/rule_spec.rb b/spec/datadog/tracing/sampling/rule_spec.rb index 6451c32e51d..305eb4a41ac 100644 --- a/spec/datadog/tracing/sampling/rule_spec.rb +++ b/spec/datadog/tracing/sampling/rule_spec.rb @@ -53,6 +53,8 @@ expect(Datadog.logger).to receive(:error) .with(a_string_including("Matcher failed. Cause: #{error}")) + expect(Datadog::Core::Telemetry::Logger).to receive(:report) + .with(error, description: 'Matcher failed') end it { is_expected.to be nil }