Skip to content

Commit

Permalink
Dynamic Instrumentation Redactor component
Browse files Browse the repository at this point in the history
This component determines whether a variable, attribute or hash element
should be redacted based on the identifier name/key name and the
type of the value.

Unit tests are included.

The Redactor component technically depends on DI settings but
they are mocked out in the unit tests.
  • Loading branch information
p committed Sep 10, 2024
1 parent a5881cb commit a635cda
Show file tree
Hide file tree
Showing 3 changed files with 299 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ AllCops:
- 'spec/**/**/interesting_backtrace_helper.rb' # This file needs quite a few bizarre code patterns by design
- 'vendor/bundle/**/*'
- 'spec/datadog/tracing/contrib/grpc/support/gen/**/*.rb' # Skip protoc autogenerated code
- lib/datadog/di/**/*
- spec/datadog/di/**/*
NewCops: disable # Don't allow new cops to be enabled implicitly.
SuggestExtensions: false # Stop pushing suggestions constantly.

Expand Down
174 changes: 174 additions & 0 deletions lib/datadog/di/redactor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
# frozen_string_literal: true

module Datadog
module DI
# Provides logic to identify sensitive information in snapshots captured
# by dynamic instrumentation.
#
# Redaction can be performed based on identifier or attribute name,
# or class name of said identifier or attribute. Redaction does not take
# into account variable values.
#
# There is a built-in list of identifier names which will be subject to
# redaction. Additional names can be provided by the user via the
# settings.dynamic_instrumentation.redacted_identifiers setting or
# the DD_DYNAMIC_INSTRUMENTATION_REDACTED_IDENTIFIERS environment
# variable. Currently no class names are subject to redaction by default;
# class names can be provided via the
# settings.dynamic_instrumentation.redacted_type_names setting or
# DD_DYNAMIC_INSTRUMENTATION_REDACTED_TYPES environment variable.
#
# Redacted identifiers must match exactly to an attribute name, a key
# in a hash or a variable name. Redacted types can either be matched
# exactly or, if the name is suffixed with an asterisk (*), any class
# whose name contains the specified prefix will be subject to redaction.
#
# This class does not perform redaction itself (i.e., value replacement
# with a placeholder). This replacement is performed by Serializer.
#
# @api private
class Redactor
def initialize(settings)
@settings = settings
end

attr_reader :settings

def redact_identifier?(name)
redacted_identifiers.include?(normalize(name))
end

def redact_type?(value)
# Classses can be nameless, do not attempt to redact in that case.
if (cls_name = value.class.name)
redacted_type_names_regexp.match?(cls_name)
else
false
end
end

private

def redacted_identifiers
@redacted_identifiers ||= begin
names = DEFAULT_REDACTED_IDENTIFIERS + settings.dynamic_instrumentation.redacted_identifiers
names.map! do |name|
normalize(name)
end
Set.new(names)
end
end

def redacted_type_names_regexp
@redacted_type_names_regexp ||= begin
names = settings.dynamic_instrumentation.redacted_type_names
names = names.map do |name|
if name.end_with?("*")
name = name[0..-2]
suffix = ".*"
else
suffix = ""
end
Regexp.escape(name) + suffix
end.join("|")
Regexp.new("\\A(?:#{names})\\z")
end
end

# Copied from dd-trace-py
DEFAULT_REDACTED_IDENTIFIERS = [
"2fa",
"accesstoken",
"aiohttpsession",
"apikey",
"apisecret",
"apisignature",
"appkey",
"applicationkey",
"auth",
"authorization",
"authtoken",
"ccnumber",
"certificatepin",
"cipher",
"clientid",
"clientsecret",
"connectionstring",
"connectsid",
"cookie",
"credentials",
"creditcard",
"csrf",
"csrftoken",
"cvv",
"databaseurl",
"dburl",
"encryptionkey",
"encryptionkeyid",
"env",
"geolocation",
"gpgkey",
"ipaddress",
"jti",
"jwt",
"licensekey",
"masterkey",
"mysqlpwd",
"nonce",
"oauth",
"oauthtoken",
"otp",
"passhash",
"passwd",
"password",
"passwordb",
"pemfile",
"pgpkey",
"phpsessid",
"pin",
"pincode",
"pkcs8",
"privatekey",
"publickey",
"pwd",
"recaptchakey",
"refreshtoken",
"routingnumber",
"salt",
"secret",
"secretkey",
"secrettoken",
"securityanswer",
"securitycode",
"securityquestion",
"serviceaccountcredentials",
"session",
"sessionid",
"sessionkey",
"setcookie",
"signature",
"signaturekey",
"sshkey",
"ssn",
"symfony",
"token",
"transactionid",
"twiliotoken",
"usersession",
"voterid",
"xapikey",
"xauthtoken",
"xcsrftoken",
"xforwardedfor",
"xrealip",
"xsrf",
"xsrftoken",
]

# Input can be a string or a symbol.
def normalize(str)
str.to_s.strip.downcase.gsub(/[-_$@]/, "")
end
end
end
end
123 changes: 123 additions & 0 deletions spec/datadog/di/redactor_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
require "datadog/di/redactor"

class DIRedactorSpecSensitiveType; end

class DIRedactorSpecWildCard; end

class DIRedactorSpecWildCardClass; end

class DIRedactorSpecWildCa; end

class DIRedactorSpecPrefixWildCard; end

module DIRedactorSpec
class SensitiveType; end

class NotSensitiveType; end

class WildCardSensitiveType; end

class ExactMatch; end
end

RSpec.describe Datadog::DI::Redactor do
let(:settings) do
double("settings").tap do |settings|
allow(settings).to receive(:dynamic_instrumentation).and_return(di_settings)
end
end

let(:di_settings) do
double("di settings").tap do |settings|
allow(settings).to receive(:enabled).and_return(true)
allow(settings).to receive(:propagate_all_exceptions).and_return(false)
allow(settings).to receive(:redacted_identifiers).and_return([])
end
end

let(:redactor) do
Datadog::DI::Redactor.new(settings)
end

describe "#redact_identifier?" do
cases = [
["lowercase", "password", true],
["uppercase", "PASSWORD", true],
["with removed punctiation", "pass_word", true],
["with non-removed punctuation", "pass/word", false],
]

cases.each do |(label, identifier_, redact_)|
identifier, redact = identifier_, redact_

context label do
let(:identifier) { identifier }

it do
expect(redactor.redact_identifier?(identifier)).to be redact
end
end
end
end

describe "#redact_type?" do
let(:redacted_type_names) {
%w[
DIRedactorSpecSensitiveType
DIRedactorSpecWildCard*
DIRedactorSpec::ExactMatch
DIRedactorSpec::WildCard*
SensitiveType
]
}

def self.define_cases(cases)
cases.each do |(label, value_, redact_)|
value, redact = value_, redact_

context label do
let(:value) { value }

it do
expect(redactor.redact_type?(value)).to be redact
end
end
end
end

context "redacted type list is checked" do
before do
expect(di_settings).to receive(:redacted_type_names).and_return(redacted_type_names)
end

cases = [
["redacted", DIRedactorSpecSensitiveType.new, true],
["not redacted", /123/, false],
["primitive type", nil, false],
["wild card type whose name is the same as prefix", DIRedactorSpecWildCard.new, true],
["wild card type", DIRedactorSpecWildCardClass.new, true],
["wild card does not match from beginning", DIRedactorSpecPrefixWildCard.new, false],
["partial wild card prefix match", DIRedactorSpecWildCa.new, false],
["class object", String, false],
["anonymous class object", Class.new, false],
["namespaced class - exact match", DIRedactorSpec::ExactMatch.new, true],
["namespaced class - wildcard - matched", DIRedactorSpec::WildCardSensitiveType.new, true],
["namespaced class - tail component match only", DIRedactorSpec::SensitiveType.new, false],
]

define_cases(cases)
end

context "redacted type list is not checked" do
before do
expect(di_settings).not_to receive(:redacted_type_names)
end

cases = [
["instance of anonymous class", Class.new.new, false],
]

define_cases(cases)
end
end
end

0 comments on commit a635cda

Please sign in to comment.