Skip to content

Commit

Permalink
Merge branch 'master' into di-settings-internal
Browse files Browse the repository at this point in the history
  • Loading branch information
p-datadog authored Nov 11, 2024
2 parents bccfc89 + 3a5f21e commit b843351
Show file tree
Hide file tree
Showing 9 changed files with 408 additions and 138 deletions.
5 changes: 4 additions & 1 deletion Matrixfile
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,10 @@
'graphql-2.1' => '❌ 2.5 / ❌ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby',
'graphql-2.0' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby',
'graphql-1.13' => '❌ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby',
}
},
'di:active_record' => {
'rails61-mysql2' => '❌ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ❌ jruby',
},
}.each_with_object({}) do |(tasks, spec_metadata), hash|
# Explode arrays of task names into individual tasks
# e.g. ['rails', 'railsdisableenv'] => {'...'} becomes 'rails7' => {'...'}, 'railsdisableenv7' => {'...'}
Expand Down
8 changes: 8 additions & 0 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,14 @@ namespace :spec do

task appsec: [:'appsec:all']

namespace :di do
desc '' # "Explicitly hiding from `rake -T`"
RSpec::Core::RakeTask.new(:active_record) do |t, args|
t.pattern = 'spec/datadog/di/contrib/active_record/**/*_spec.rb'
t.rspec_opts = args.to_a.join(' ')
end
end

namespace :profiling do
task all: [:main, :ractors]

Expand Down
11 changes: 11 additions & 0 deletions lib/datadog/di/contrib/active_record.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# frozen_string_literal: true

Datadog::DI::Serializer.register(condition: lambda { |value| ActiveRecord::Base === value }) do |serializer, value, name:, depth:| # steep:ignore
# steep thinks all of the arguments are nil here
# steep:ignore:start
value_to_serialize = {
attributes: value.attributes,
}
serializer.serialize_value(value_to_serialize, depth: depth ? depth - 1 : nil, type: value.class)
# steep:ignore:end
end
238 changes: 143 additions & 95 deletions lib/datadog/di/serializer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,42 @@ module DI
#
# @api private
class Serializer
def initialize(settings, redactor)
# Third-party library integration / custom serializers.
#
# Dynamic instrumentation has limited payload sizes, and for efficiency
# reasons it is not desirable to transmit data to Datadog that will
# never contain useful information. Additionally, due to depth limits,
# desired data may not even be included in payloads when serialized
# with the default, naive serializer. Therefore, custom objects like
# ActiveRecord model instances may need custom serializers.
#
# CUSTOMER NOTE: The API for defining custom serializers is not yet
# finalized. Please create an issue at
# https://github.com/datadog/dd-trace-rb/issues describing the
# object(s) you wish to serialize so that we can ensure your use case
# will be supported as the library evolves.
#
# Note that the current implementation does not permit defining a
# serializer for a particular class, which is the simplest use case.
# This is because the library itself does not need this functionality
# yet, and it won't help for ActiveRecord models (that derive from
# a common base class but are all of different classes) or for Mongoid
# models (that do not have a common base class at all but include a
# standard Mongoid module).
@@flat_registry = []
def self.register(condition: nil, &block)
@@flat_registry << {condition: condition, proc: block}
end

def initialize(settings, redactor, telemetry: nil)
@settings = settings
@redactor = redactor
@telemetry = telemetry
end

attr_reader :settings
attr_reader :redactor
attr_reader :telemetry

# Serializes positional and keyword arguments to a method,
# as obtained by a method probe.
Expand Down Expand Up @@ -86,116 +115,135 @@ def serialize_vars(vars)
# (integers, strings, arrays, hashes).
#
# Respects string length, collection size and traversal depth limits.
def serialize_value(value, name: nil, depth: settings.dynamic_instrumentation.max_capture_depth)
if redactor.redact_type?(value)
return {type: class_name(value.class), notCapturedReason: "redactedType"}
end
def serialize_value(value, name: nil, depth: settings.dynamic_instrumentation.max_capture_depth, type: nil)
cls = type || value.class
begin
if redactor.redact_type?(value)
return {type: class_name(cls), notCapturedReason: "redactedType"}
end

if name && redactor.redact_identifier?(name)
return {type: class_name(value.class), notCapturedReason: "redactedIdent"}
end
if name && redactor.redact_identifier?(name)
return {type: class_name(cls), notCapturedReason: "redactedIdent"}
end

serialized = {type: class_name(value.class)}
case value
when NilClass
serialized.update(isNull: true)
when Integer, Float, TrueClass, FalseClass
serialized.update(value: value.to_s)
when String, Symbol
need_dup = false
value = if String === value
# This is the only place where we duplicate the value, currently.
# All other values are immutable primitives (e.g. numbers).
# However, do not duplicate if the string is frozen, or if
# it is later truncated.
need_dup = !value.frozen?
value
else
value.to_s
@@flat_registry.each do |entry|
if (condition = entry[:condition]) && condition.call(value)
serializer_proc = entry.fetch(:proc)
return serializer_proc.call(self, value, name: nil, depth: depth)
end
end
max = settings.dynamic_instrumentation.max_capture_string_length
if value.length > max
serialized.update(truncated: true, size: value.length)
value = value[0...max]

serialized = {type: class_name(cls)}
case value
when NilClass
serialized.update(isNull: true)
when Integer, Float, TrueClass, FalseClass
serialized.update(value: value.to_s)
when Time
# This path also handles DateTime values although they do not need
# to be explicitly added to the case statement.
serialized.update(value: value.iso8601)
when Date
serialized.update(value: value.to_s)
when String, Symbol
need_dup = false
end
value = value.dup if need_dup
serialized.update(value: value)
when Array
if depth < 0
serialized.update(notCapturedReason: "depth")
else
max = settings.dynamic_instrumentation.max_capture_collection_size
if max != 0 && value.length > max
serialized.update(notCapturedReason: "collectionSize", size: value.length)
# same steep failure with array slices.
# https://github.com/soutaro/steep/issues/1219
value = value[0...max] || []
value = if String === value
# This is the only place where we duplicate the value, currently.
# All other values are immutable primitives (e.g. numbers).
# However, do not duplicate if the string is frozen, or if
# it is later truncated.
need_dup = !value.frozen?
value
else
value.to_s
end
entries = value.map do |elt|
serialize_value(elt, depth: depth - 1)
max = settings.dynamic_instrumentation.max_capture_string_length
if value.length > max
serialized.update(truncated: true, size: value.length)
value = value[0...max]
need_dup = false
end
serialized.update(elements: entries)
end
when Hash
if depth < 0
serialized.update(notCapturedReason: "depth")
else
max = settings.dynamic_instrumentation.max_capture_collection_size
cur = 0
entries = []
value.each do |k, v|
if max != 0 && cur >= max
value = value.dup if need_dup
serialized.update(value: value)
when Array
if depth < 0
serialized.update(notCapturedReason: "depth")
else
max = settings.dynamic_instrumentation.max_capture_collection_size
if max != 0 && value.length > max
serialized.update(notCapturedReason: "collectionSize", size: value.length)
break
# same steep failure with array slices.
# https://github.com/soutaro/steep/issues/1219
value = value[0...max] || []
end
entries = value.map do |elt|
serialize_value(elt, depth: depth - 1)
end
cur += 1
entries << [serialize_value(k, depth: depth - 1), serialize_value(v, name: k, depth: depth - 1)]
serialized.update(elements: entries)
end
when Hash
if depth < 0
serialized.update(notCapturedReason: "depth")
else
max = settings.dynamic_instrumentation.max_capture_collection_size
cur = 0
entries = []
value.each do |k, v|
if max != 0 && cur >= max
serialized.update(notCapturedReason: "collectionSize", size: value.length)
break
end
cur += 1
entries << [serialize_value(k, depth: depth - 1), serialize_value(v, name: k, depth: depth - 1)]
end
serialized.update(entries: entries)
end
serialized.update(entries: entries)
end
else
if depth < 0
serialized.update(notCapturedReason: "depth")
else
fields = {}
max = settings.dynamic_instrumentation.max_capture_attribute_count
cur = 0
if depth < 0
serialized.update(notCapturedReason: "depth")
else
fields = {}
max = settings.dynamic_instrumentation.max_capture_attribute_count
cur = 0

# MRI and JRuby 9.4.5+ preserve instance variable definition
# order when calling #instance_variables. Previous JRuby versions
# did not preserve order and returned the variables in arbitrary
# order.
#
# The arbitrary order is problematic because 1) when there are
# fewer instance variables than capture limit, the order in which
# the variables are shown in UI will change from one capture to
# the next and generally will be arbitrary to the user, and
# 2) when there are more instance variables than capture limit,
# *which* variables are captured will also change meaning user
# looking at the UI may have "new" instance variables appear and
# existing ones disappear as they are looking at multiple captures.
#
# For consistency, we should have some kind of stable order of
# instance variables on all supported Ruby runtimes, so that the UI
# stays consistent. Given that initial implementation of Ruby DI
# does not support JRuby, we don't handle JRuby's lack of ordering
# of #instance_variables here, but if JRuby is supported in the
# future this may need to be addressed.
ivars = value.instance_variables
# MRI and JRuby 9.4.5+ preserve instance variable definition
# order when calling #instance_variables. Previous JRuby versions
# did not preserve order and returned the variables in arbitrary
# order.
#
# The arbitrary order is problematic because 1) when there are
# fewer instance variables than capture limit, the order in which
# the variables are shown in UI will change from one capture to
# the next and generally will be arbitrary to the user, and
# 2) when there are more instance variables than capture limit,
# *which* variables are captured will also change meaning user
# looking at the UI may have "new" instance variables appear and
# existing ones disappear as they are looking at multiple captures.
#
# For consistency, we should have some kind of stable order of
# instance variables on all supported Ruby runtimes, so that the UI
# stays consistent. Given that initial implementation of Ruby DI
# does not support JRuby, we don't handle JRuby's lack of ordering
# of #instance_variables here, but if JRuby is supported in the
# future this may need to be addressed.
ivars = value.instance_variables

ivars.each do |ivar|
if cur >= max
serialized.update(notCapturedReason: "fieldCount", fields: fields)
break
ivars.each do |ivar|
if cur >= max
serialized.update(notCapturedReason: "fieldCount", fields: fields)
break
end
cur += 1
fields[ivar] = serialize_value(value.instance_variable_get(ivar), name: ivar, depth: depth - 1)
end
cur += 1
fields[ivar] = serialize_value(value.instance_variable_get(ivar), name: ivar, depth: depth - 1)
serialized.update(fields: fields)
end
serialized.update(fields: fields)
end
serialized
rescue => exc
telemetry&.report(exc, description: "Error serializing")
{type: class_name(cls), notSerializedReason: exc.to_s}
end
serialized
end

private
Expand Down
Empty file.
9 changes: 8 additions & 1 deletion sig/datadog/di/serializer.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,23 @@ module Datadog
@settings: untyped

@redactor: untyped

@telemetry: Core::Telemetry::Component

def initialize: (untyped settings, untyped redactor) -> void
def initialize: (untyped settings, untyped redactor, ?telemetry: Core::Telemetry::Component) -> void

attr_reader settings: Datadog::Core::Configuration::Settings

attr_reader redactor: Datadog::DI::Redactor

attr_reader telemetry: Core::Telemetry::Component

def serialize_args: (untyped args, untyped kwargs) -> untyped
def serialize_vars: (untyped vars) -> untyped
def serialize_value: (untyped value, ?name: String, ?depth: Integer) -> untyped

def self.register: (?condition: Proc) {
(serializer: Serializer, value: untyped, name: Symbol, depth: Integer) -> untyped } -> void

private
def class_name: (untyped cls) -> untyped
Expand Down
Loading

0 comments on commit b843351

Please sign in to comment.