From a635cdaf21a05a5dff88a173593c013f66471e06 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Tue, 10 Sep 2024 12:11:54 -0400 Subject: [PATCH] Dynamic Instrumentation Redactor component This component determines whether a variable, attribute or hash element should be redacted based on the identifier name/key name and the type of the value. Unit tests are included. The Redactor component technically depends on DI settings but they are mocked out in the unit tests. --- .rubocop.yml | 2 + lib/datadog/di/redactor.rb | 174 +++++++++++++++++++++++++++++++ spec/datadog/di/redactor_spec.rb | 123 ++++++++++++++++++++++ 3 files changed, 299 insertions(+) create mode 100644 lib/datadog/di/redactor.rb create mode 100644 spec/datadog/di/redactor_spec.rb diff --git a/.rubocop.yml b/.rubocop.yml index 0cde8f175ae..fe48739255b 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -24,6 +24,8 @@ AllCops: - 'spec/**/**/interesting_backtrace_helper.rb' # This file needs quite a few bizarre code patterns by design - 'vendor/bundle/**/*' - 'spec/datadog/tracing/contrib/grpc/support/gen/**/*.rb' # Skip protoc autogenerated code + - lib/datadog/di/**/* + - spec/datadog/di/**/* NewCops: disable # Don't allow new cops to be enabled implicitly. SuggestExtensions: false # Stop pushing suggestions constantly. diff --git a/lib/datadog/di/redactor.rb b/lib/datadog/di/redactor.rb new file mode 100644 index 00000000000..d154f6bfc00 --- /dev/null +++ b/lib/datadog/di/redactor.rb @@ -0,0 +1,174 @@ +# frozen_string_literal: true + +module Datadog + module DI + # Provides logic to identify sensitive information in snapshots captured + # by dynamic instrumentation. + # + # Redaction can be performed based on identifier or attribute name, + # or class name of said identifier or attribute. Redaction does not take + # into account variable values. + # + # There is a built-in list of identifier names which will be subject to + # redaction. Additional names can be provided by the user via the + # settings.dynamic_instrumentation.redacted_identifiers setting or + # the DD_DYNAMIC_INSTRUMENTATION_REDACTED_IDENTIFIERS environment + # variable. Currently no class names are subject to redaction by default; + # class names can be provided via the + # settings.dynamic_instrumentation.redacted_type_names setting or + # DD_DYNAMIC_INSTRUMENTATION_REDACTED_TYPES environment variable. + # + # Redacted identifiers must match exactly to an attribute name, a key + # in a hash or a variable name. Redacted types can either be matched + # exactly or, if the name is suffixed with an asterisk (*), any class + # whose name contains the specified prefix will be subject to redaction. + # + # This class does not perform redaction itself (i.e., value replacement + # with a placeholder). This replacement is performed by Serializer. + # + # @api private + class Redactor + def initialize(settings) + @settings = settings + end + + attr_reader :settings + + def redact_identifier?(name) + redacted_identifiers.include?(normalize(name)) + end + + def redact_type?(value) + # Classses can be nameless, do not attempt to redact in that case. + if (cls_name = value.class.name) + redacted_type_names_regexp.match?(cls_name) + else + false + end + end + + private + + def redacted_identifiers + @redacted_identifiers ||= begin + names = DEFAULT_REDACTED_IDENTIFIERS + settings.dynamic_instrumentation.redacted_identifiers + names.map! do |name| + normalize(name) + end + Set.new(names) + end + end + + def redacted_type_names_regexp + @redacted_type_names_regexp ||= begin + names = settings.dynamic_instrumentation.redacted_type_names + names = names.map do |name| + if name.end_with?("*") + name = name[0..-2] + suffix = ".*" + else + suffix = "" + end + Regexp.escape(name) + suffix + end.join("|") + Regexp.new("\\A(?:#{names})\\z") + end + end + + # Copied from dd-trace-py + DEFAULT_REDACTED_IDENTIFIERS = [ + "2fa", + "accesstoken", + "aiohttpsession", + "apikey", + "apisecret", + "apisignature", + "appkey", + "applicationkey", + "auth", + "authorization", + "authtoken", + "ccnumber", + "certificatepin", + "cipher", + "clientid", + "clientsecret", + "connectionstring", + "connectsid", + "cookie", + "credentials", + "creditcard", + "csrf", + "csrftoken", + "cvv", + "databaseurl", + "dburl", + "encryptionkey", + "encryptionkeyid", + "env", + "geolocation", + "gpgkey", + "ipaddress", + "jti", + "jwt", + "licensekey", + "masterkey", + "mysqlpwd", + "nonce", + "oauth", + "oauthtoken", + "otp", + "passhash", + "passwd", + "password", + "passwordb", + "pemfile", + "pgpkey", + "phpsessid", + "pin", + "pincode", + "pkcs8", + "privatekey", + "publickey", + "pwd", + "recaptchakey", + "refreshtoken", + "routingnumber", + "salt", + "secret", + "secretkey", + "secrettoken", + "securityanswer", + "securitycode", + "securityquestion", + "serviceaccountcredentials", + "session", + "sessionid", + "sessionkey", + "setcookie", + "signature", + "signaturekey", + "sshkey", + "ssn", + "symfony", + "token", + "transactionid", + "twiliotoken", + "usersession", + "voterid", + "xapikey", + "xauthtoken", + "xcsrftoken", + "xforwardedfor", + "xrealip", + "xsrf", + "xsrftoken", + ] + + # Input can be a string or a symbol. + def normalize(str) + str.to_s.strip.downcase.gsub(/[-_$@]/, "") + end + end + end +end diff --git a/spec/datadog/di/redactor_spec.rb b/spec/datadog/di/redactor_spec.rb new file mode 100644 index 00000000000..af3de0f6153 --- /dev/null +++ b/spec/datadog/di/redactor_spec.rb @@ -0,0 +1,123 @@ +require "datadog/di/redactor" + +class DIRedactorSpecSensitiveType; end + +class DIRedactorSpecWildCard; end + +class DIRedactorSpecWildCardClass; end + +class DIRedactorSpecWildCa; end + +class DIRedactorSpecPrefixWildCard; end + +module DIRedactorSpec + class SensitiveType; end + + class NotSensitiveType; end + + class WildCardSensitiveType; end + + class ExactMatch; end +end + +RSpec.describe Datadog::DI::Redactor do + let(:settings) do + double("settings").tap do |settings| + allow(settings).to receive(:dynamic_instrumentation).and_return(di_settings) + end + end + + let(:di_settings) do + double("di settings").tap do |settings| + allow(settings).to receive(:enabled).and_return(true) + allow(settings).to receive(:propagate_all_exceptions).and_return(false) + allow(settings).to receive(:redacted_identifiers).and_return([]) + end + end + + let(:redactor) do + Datadog::DI::Redactor.new(settings) + end + + describe "#redact_identifier?" do + cases = [ + ["lowercase", "password", true], + ["uppercase", "PASSWORD", true], + ["with removed punctiation", "pass_word", true], + ["with non-removed punctuation", "pass/word", false], + ] + + cases.each do |(label, identifier_, redact_)| + identifier, redact = identifier_, redact_ + + context label do + let(:identifier) { identifier } + + it do + expect(redactor.redact_identifier?(identifier)).to be redact + end + end + end + end + + describe "#redact_type?" do + let(:redacted_type_names) { + %w[ + DIRedactorSpecSensitiveType + DIRedactorSpecWildCard* + DIRedactorSpec::ExactMatch + DIRedactorSpec::WildCard* + SensitiveType + ] + } + + def self.define_cases(cases) + cases.each do |(label, value_, redact_)| + value, redact = value_, redact_ + + context label do + let(:value) { value } + + it do + expect(redactor.redact_type?(value)).to be redact + end + end + end + end + + context "redacted type list is checked" do + before do + expect(di_settings).to receive(:redacted_type_names).and_return(redacted_type_names) + end + + cases = [ + ["redacted", DIRedactorSpecSensitiveType.new, true], + ["not redacted", /123/, false], + ["primitive type", nil, false], + ["wild card type whose name is the same as prefix", DIRedactorSpecWildCard.new, true], + ["wild card type", DIRedactorSpecWildCardClass.new, true], + ["wild card does not match from beginning", DIRedactorSpecPrefixWildCard.new, false], + ["partial wild card prefix match", DIRedactorSpecWildCa.new, false], + ["class object", String, false], + ["anonymous class object", Class.new, false], + ["namespaced class - exact match", DIRedactorSpec::ExactMatch.new, true], + ["namespaced class - wildcard - matched", DIRedactorSpec::WildCardSensitiveType.new, true], + ["namespaced class - tail component match only", DIRedactorSpec::SensitiveType.new, false], + ] + + define_cases(cases) + end + + context "redacted type list is not checked" do + before do + expect(di_settings).not_to receive(:redacted_type_names) + end + + cases = [ + ["instance of anonymous class", Class.new.new, false], + ] + + define_cases(cases) + end + end +end