From c1883db7111fac52ecee0c9309ee954e4578ff3f Mon Sep 17 00:00:00 2001 From: Dan Webb Date: Mon, 9 Feb 2026 21:34:39 +0000 Subject: [PATCH] feat: scrub sensitive data from OpenTelemetry traces [OTEL-014] - Add Otel::SpanSanitizer to redact PII (emails, dates, IPs) from span attributes - Add Otel::SpanSanitizingProcessor registered in both test and production configs - Change PG instrumentation db_statement from :include to :obfuscate - Sensitive key detection for authorization, cookie, password, name, email, DOB fields - 34 new tests covering sanitizer, processor, and integration Closes: med-tracker-ry5 --- config/initializers/opentelemetry.rb | 13 +- lib/otel/span_sanitizer.rb | 60 +++++++ lib/otel/span_sanitizing_processor.rb | 32 ++++ .../lib/otel/sensitive_data_scrubbing_spec.rb | 43 +++++ spec/lib/otel/span_sanitizer_spec.rb | 149 ++++++++++++++++++ .../otel/span_sanitizing_processor_spec.rb | 80 ++++++++++ 6 files changed, 372 insertions(+), 5 deletions(-) create mode 100644 lib/otel/span_sanitizer.rb create mode 100644 lib/otel/span_sanitizing_processor.rb create mode 100644 spec/lib/otel/sensitive_data_scrubbing_spec.rb create mode 100644 spec/lib/otel/span_sanitizer_spec.rb create mode 100644 spec/lib/otel/span_sanitizing_processor_spec.rb diff --git a/config/initializers/opentelemetry.rb b/config/initializers/opentelemetry.rb index 6e7a54b9..42b10f0f 100644 --- a/config/initializers/opentelemetry.rb +++ b/config/initializers/opentelemetry.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'opentelemetry-api' +require 'otel/span_sanitizing_processor' OpenTelemetry.logger = Logger.new($stdout, level: Rails.env.test? ? Logger::WARN : Logger::ERROR) @@ -20,11 +21,13 @@ record_frontend_span: true }, 'OpenTelemetry::Instrumentation::PG' => { - db_statement: :include, + db_statement: :obfuscate, peer_service: 'postgresql' } ) + c.add_span_processor(Otel::SpanSanitizingProcessor.new) + c.resource = OpenTelemetry::SDK::Resources::Resource.create( 'service.name' => 'medtracker-test', 'service.namespace' => 'medtracker', @@ -41,9 +44,7 @@ otlp_headers = ENV.fetch('OTEL_EXPORTER_OTLP_HEADERS', nil) otlp_timeout = ENV.fetch('OTEL_EXPORTER_OTLP_TIMEOUT', '10').to_i - if otlp_endpoint.present? - Rails.logger.info "[OpenTelemetry] Configuring OTLP exporter: #{otlp_endpoint}" - end + Rails.logger.info "[OpenTelemetry] Configuring OTLP exporter: #{otlp_endpoint}" if otlp_endpoint.present? OpenTelemetry::SDK.configure do |c| c.service_name = 'medtracker' @@ -74,7 +75,7 @@ record_frontend_span: true }, 'OpenTelemetry::Instrumentation::PG' => { - db_statement: :include, + db_statement: :obfuscate, peer_service: 'postgresql' }, 'OpenTelemetry::Instrumentation::Net::HTTP' => { @@ -82,6 +83,8 @@ } ) + c.add_span_processor(Otel::SpanSanitizingProcessor.new) + c.resource = OpenTelemetry::SDK::Resources::Resource.create( 'service.name' => 'medtracker', 'service.namespace' => 'medtracker', diff --git a/lib/otel/span_sanitizer.rb b/lib/otel/span_sanitizer.rb new file mode 100644 index 00000000..dbf3455a --- /dev/null +++ b/lib/otel/span_sanitizer.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module Otel + class SpanSanitizer + EMAIL_PATTERN = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/ + DATE_ONLY_PATTERN = /\b\d{4}-\d{2}-\d{2}\b(?!T)/ + DATE_DMY_PATTERN = %r{\b\d{2}/\d{2}/\d{4}\b} + IP_PATTERN = /\b(?:\d{1,3}\.){3}\d{1,3}\b/ + + SENSITIVE_KEY_PATTERNS = [ + /authorization/i, + /cookie/i, + /password/i, + /secret/i, + /token/i, + /\bdate_of_birth\b/i, + /\bdob\b/i + ].freeze + + PII_KEY_PATTERNS = [ + /(? e + Rails.logger.warn "[OpenTelemetry] SpanSanitizingProcessor error: #{e.message}" + end + + def on_finish(_span); end + + def force_flush(timeout: nil) # rubocop:disable Lint/UnusedMethodArgument + OpenTelemetry::SDK::Trace::Export::SUCCESS + end + + def shutdown(timeout: nil) # rubocop:disable Lint/UnusedMethodArgument + OpenTelemetry::SDK::Trace::Export::SUCCESS + end + end +end diff --git a/spec/lib/otel/sensitive_data_scrubbing_spec.rb b/spec/lib/otel/sensitive_data_scrubbing_spec.rb new file mode 100644 index 00000000..8d41e8ad --- /dev/null +++ b/spec/lib/otel/sensitive_data_scrubbing_spec.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe 'OTEL-014: Sensitive data scrubbed from traces' do # rubocop:disable RSpec/DescribeClass + describe 'PG instrumentation configuration' do + it 'obfuscates SQL statements instead of including raw SQL' do + pg_instrumentation = OpenTelemetry::Instrumentation::PG::Instrumentation.instance + expect(pg_instrumentation).to be_installed + + config = pg_instrumentation.instance_variable_get(:@config) + expect(config[:db_statement]).to eq(:obfuscate) + end + end + + describe 'Rack instrumentation configuration' do + it 'does not record request headers that may contain sensitive data' do + rack_instrumentation = OpenTelemetry::Instrumentation::Rack::Instrumentation.instance + expect(rack_instrumentation).to be_installed + + config = rack_instrumentation.instance_variable_get(:@config) + allowed_request_headers = config[:allowed_request_headers] || [] + sensitive_headers = %w[authorization cookie set-cookie x-forwarded-for] + + sensitive_headers.each do |header| + msg = "Rack instrumentation should not record sensitive header: #{header}" + expect(allowed_request_headers.map(&:downcase)).not_to include(header), msg + end + end + end + + describe 'SpanSanitizer processor' do + it 'is registered as a span processor' do + processors = OpenTelemetry.tracer_provider.instance_variable_get(:@span_processors) + sanitizer_present = processors.any? do |p| + p.is_a?(Otel::SpanSanitizingProcessor) + end + + expect(sanitizer_present).to be(true), + 'SpanSanitizingProcessor should be registered as a span processor' + end + end +end diff --git a/spec/lib/otel/span_sanitizer_spec.rb b/spec/lib/otel/span_sanitizer_spec.rb new file mode 100644 index 00000000..0be67d97 --- /dev/null +++ b/spec/lib/otel/span_sanitizer_spec.rb @@ -0,0 +1,149 @@ +# frozen_string_literal: true + +require 'rails_helper' +require 'otel/span_sanitizer' + +RSpec.describe Otel::SpanSanitizer do + subject(:sanitizer) { described_class.new } + + describe '#sanitize_value' do + it 'redacts email addresses' do + expect(sanitizer.sanitize_value('user@example.com')).to eq('[EMAIL REDACTED]') + end + + it 'redacts emails embedded in longer strings' do + result = sanitizer.sanitize_value('Contact john.doe@example.com for details') + expect(result).to eq('Contact [EMAIL REDACTED] for details') + end + + it 'redacts multiple emails in one string' do + result = sanitizer.sanitize_value('from alice@test.com to bob@test.com') + expect(result).not_to include('alice@test.com') + expect(result).not_to include('bob@test.com') + end + + it 'redacts date-of-birth patterns (YYYY-MM-DD)' do + expect(sanitizer.sanitize_value('1990-05-15')).to eq('[DATE REDACTED]') + end + + it 'redacts date-of-birth patterns (DD/MM/YYYY)' do + expect(sanitizer.sanitize_value('15/05/1990')).to eq('[DATE REDACTED]') + end + + it 'redacts IP addresses' do + expect(sanitizer.sanitize_value('192.168.1.100')).to eq('[IP REDACTED]') + end + + it 'redacts IP addresses embedded in strings' do + result = sanitizer.sanitize_value('Client IP: 10.0.0.1 connected') + expect(result).to eq('Client IP: [IP REDACTED] connected') + end + + it 'preserves non-sensitive strings' do + expect(sanitizer.sanitize_value('medication_take.create')).to eq('medication_take.create') + end + + it 'preserves numeric IDs' do + expect(sanitizer.sanitize_value('12345')).to eq('12345') + end + + it 'preserves ISO 8601 timestamps with time component' do + expect(sanitizer.sanitize_value('2025-01-15T10:30:00Z')).to eq('2025-01-15T10:30:00Z') + end + + it 'returns non-string values unchanged' do + expect(sanitizer.sanitize_value(42)).to eq(42) + expect(sanitizer.sanitize_value(true)).to be(true) + expect(sanitizer.sanitize_value(nil)).to be_nil + end + end + + describe '#sanitize_attributes' do + it 'redacts values for sensitive attribute keys' do + attrs = { + 'http.request.header.authorization' => 'Bearer secret-token', + 'http.request.header.cookie' => 'session=abc123', + 'model.name' => 'MedicationTake' + } + + result = sanitizer.sanitize_attributes(attrs) + + expect(result['http.request.header.authorization']).to eq('[REDACTED]') + expect(result['http.request.header.cookie']).to eq('[REDACTED]') + expect(result['model.name']).to eq('MedicationTake') + end + + it 'redacts PII patterns in attribute values even for non-sensitive keys' do + attrs = { + 'event.detail' => 'born on 1990-05-15 in London', + 'log.message' => 'Contact user@example.com now', + 'model.id' => '42' + } + + result = sanitizer.sanitize_attributes(attrs) + + expect(result['event.detail']).to eq('born on [DATE REDACTED] in London') + expect(result['log.message']).to eq('Contact [EMAIL REDACTED] now') + expect(result['model.id']).to eq('42') + end + + it 'redacts values for keys containing name, email, or password' do + attrs = { + 'person.name' => 'John Doe', + 'user.email_address' => 'john@example.com', + 'db.password' => 'secret' + } + + result = sanitizer.sanitize_attributes(attrs) + + expect(result['person.name']).to eq('[REDACTED]') + expect(result['user.email_address']).to eq('[REDACTED]') + expect(result['db.password']).to eq('[REDACTED]') + end + + it 'does not modify the original hash' do + attrs = { 'user.email' => 'test@example.com' } + sanitizer.sanitize_attributes(attrs) + expect(attrs['user.email']).to eq('test@example.com') + end + end + + describe '.sensitive_key?' do + it 'identifies authorization headers as sensitive' do + expect(described_class.sensitive_key?('http.request.header.authorization')).to be(true) + end + + it 'identifies cookie headers as sensitive' do + expect(described_class.sensitive_key?('http.request.header.cookie')).to be(true) + end + + it 'identifies name fields as sensitive' do + expect(described_class.sensitive_key?('person.name')).to be(true) + end + + it 'identifies email fields as sensitive' do + expect(described_class.sensitive_key?('user.email_address')).to be(true) + end + + it 'identifies password fields as sensitive' do + expect(described_class.sensitive_key?('db.password')).to be(true) + expect(described_class.sensitive_key?('password_digest')).to be(true) + end + + it 'identifies date_of_birth fields as sensitive' do + expect(described_class.sensitive_key?('person.date_of_birth')).to be(true) + end + + it 'does not flag model.name as sensitive' do + expect(described_class.sensitive_key?('model.name')).to be(false) + end + + it 'does not flag model.id as sensitive' do + expect(described_class.sensitive_key?('model.id')).to be(false) + end + + it 'does not flag model.operation as sensitive' do + expect(described_class.sensitive_key?('model.operation')).to be(false) + end + end +end diff --git a/spec/lib/otel/span_sanitizing_processor_spec.rb b/spec/lib/otel/span_sanitizing_processor_spec.rb new file mode 100644 index 00000000..038f2e59 --- /dev/null +++ b/spec/lib/otel/span_sanitizing_processor_spec.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +require 'rails_helper' +require 'otel/span_sanitizing_processor' + +RSpec.describe Otel::SpanSanitizingProcessor do + subject(:processor) { described_class.new } + + let(:tracer) do + OpenTelemetry.tracer_provider.tracer('test-tracer', '1.0.0') + end + + let(:exporter) { OpenTelemetry::SDK::Trace::Export::InMemorySpanExporter.new } + let(:simple_processor) { OpenTelemetry::SDK::Trace::Export::SimpleSpanProcessor.new(exporter) } + + before do + OpenTelemetry.tracer_provider.add_span_processor(simple_processor) + end + + after do + OpenTelemetry.tracer_provider.force_flush + exporter.reset + end + + describe '#on_start' do + it 'sanitizes email addresses in span attributes' do + tracer.in_span('test.operation', attributes: { 'user.email' => 'john@example.com' }) do |span| + processor.on_start(span, OpenTelemetry::Context.current) + expect(span.attributes['user.email']).to eq('[REDACTED]') + end + end + + it 'sanitizes sensitive header attributes' do + attrs = { 'http.request.header.authorization' => 'Bearer secret-token-123' } + tracer.in_span('http.request', attributes: attrs) do |span| + processor.on_start(span, OpenTelemetry::Context.current) + expect(span.attributes['http.request.header.authorization']).to eq('[REDACTED]') + end + end + + it 'preserves non-sensitive attributes' do + attrs = { + 'model.name' => 'MedicationTake', + 'model.id' => '42', + 'model.operation' => 'create' + } + tracer.in_span('medication_take.create', attributes: attrs) do |span| + processor.on_start(span, OpenTelemetry::Context.current) + expect(span.attributes['model.name']).to eq('MedicationTake') + expect(span.attributes['model.id']).to eq('42') + expect(span.attributes['model.operation']).to eq('create') + end + end + + it 'redacts IP addresses in attribute values' do + tracer.in_span('test', attributes: { 'client.address' => '192.168.1.100' }) do |span| + processor.on_start(span, OpenTelemetry::Context.current) + expect(span.attributes['client.address']).to eq('[IP REDACTED]') + end + end + end + + describe '#on_finish' do + it 'responds to on_finish' do + expect(processor).to respond_to(:on_finish) + end + end + + describe '#force_flush' do + it 'returns success' do + expect(processor.force_flush).to eq(OpenTelemetry::SDK::Trace::Export::SUCCESS) + end + end + + describe '#shutdown' do + it 'returns success' do + expect(processor.shutdown).to eq(OpenTelemetry::SDK::Trace::Export::SUCCESS) + end + end +end