Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions config/initializers/opentelemetry.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# frozen_string_literal: true

require 'opentelemetry-api'
require 'otel/span_sanitizing_processor'

OpenTelemetry.logger = Logger.new($stdout, level: Rails.env.test? ? Logger::WARN : Logger::ERROR)

Expand All @@ -20,11 +21,13 @@
record_frontend_span: true
},
'OpenTelemetry::Instrumentation::PG' => {
db_statement: :include,
db_statement: :obfuscate,
peer_service: 'postgresql'
}
)

c.add_span_processor(Otel::SpanSanitizingProcessor.new)

c.resource = OpenTelemetry::SDK::Resources::Resource.create(
'service.name' => 'medtracker-test',
'service.namespace' => 'medtracker',
Expand All @@ -41,9 +44,7 @@
otlp_headers = ENV.fetch('OTEL_EXPORTER_OTLP_HEADERS', nil)
otlp_timeout = ENV.fetch('OTEL_EXPORTER_OTLP_TIMEOUT', '10').to_i

if otlp_endpoint.present?
Rails.logger.info "[OpenTelemetry] Configuring OTLP exporter: #{otlp_endpoint}"
end
Rails.logger.info "[OpenTelemetry] Configuring OTLP exporter: #{otlp_endpoint}" if otlp_endpoint.present?

OpenTelemetry::SDK.configure do |c|
c.service_name = 'medtracker'
Expand Down Expand Up @@ -74,14 +75,16 @@
record_frontend_span: true
},
'OpenTelemetry::Instrumentation::PG' => {
db_statement: :include,
db_statement: :obfuscate,
peer_service: 'postgresql'
},
'OpenTelemetry::Instrumentation::Net::HTTP' => {
untraced_hosts: ['127.0.0.1', 'localhost']
}
)

c.add_span_processor(Otel::SpanSanitizingProcessor.new)

c.resource = OpenTelemetry::SDK::Resources::Resource.create(
'service.name' => 'medtracker',
'service.namespace' => 'medtracker',
Expand Down
60 changes: 60 additions & 0 deletions lib/otel/span_sanitizer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# frozen_string_literal: true

module Otel
class SpanSanitizer
EMAIL_PATTERN = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/
DATE_ONLY_PATTERN = /\b\d{4}-\d{2}-\d{2}\b(?!T)/
DATE_DMY_PATTERN = %r{\b\d{2}/\d{2}/\d{4}\b}
IP_PATTERN = /\b(?:\d{1,3}\.){3}\d{1,3}\b/

SENSITIVE_KEY_PATTERNS = [
/authorization/i,
/cookie/i,
/password/i,
/secret/i,
/token/i,
/\bdate_of_birth\b/i,
/\bdob\b/i
].freeze

PII_KEY_PATTERNS = [
/(?<!\bmodel\.)(?:^|\.)name$/i,
/(?<!\bmodel\.)(?:^|\.)email/i
].freeze

def sanitize_value(value)
return value unless value.is_a?(String)

result = value.dup
result = result.gsub(EMAIL_PATTERN, '[EMAIL REDACTED]')
result = result.gsub(IP_PATTERN, '[IP REDACTED]')
redact_date_only(result)
end

def sanitize_attributes(attrs)
attrs.each_with_object({}) do |(key, value), sanitized|
sanitized[key] = if self.class.sensitive_key?(key)
'[REDACTED]'
else
sanitize_value(value)
end
end
end

def self.sensitive_key?(key)
return false if key == 'model.name'
return false if key == 'model.operation'
return false if key == 'model.id'

SENSITIVE_KEY_PATTERNS.any? { |pattern| key.match?(pattern) } ||
PII_KEY_PATTERNS.any? { |pattern| key.match?(pattern) }
end

private

def redact_date_only(value)
result = value.gsub(DATE_DMY_PATTERN, '[DATE REDACTED]')
result.gsub(DATE_ONLY_PATTERN, '[DATE REDACTED]')
end
end
end
32 changes: 32 additions & 0 deletions lib/otel/span_sanitizing_processor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# frozen_string_literal: true

require_relative 'span_sanitizer'

module Otel
class SpanSanitizingProcessor
def initialize
@sanitizer = SpanSanitizer.new
end

def on_start(span, _parent_context)
return unless span.respond_to?(:attributes) && span.attributes

sanitized = @sanitizer.sanitize_attributes(span.attributes)
sanitized.each do |key, value|
span.set_attribute(key, value) if value != span.attributes[key]
end
rescue StandardError => e
Rails.logger.warn "[OpenTelemetry] SpanSanitizingProcessor error: #{e.message}"
end

def on_finish(_span); end

def force_flush(timeout: nil) # rubocop:disable Lint/UnusedMethodArgument
OpenTelemetry::SDK::Trace::Export::SUCCESS
end

def shutdown(timeout: nil) # rubocop:disable Lint/UnusedMethodArgument
OpenTelemetry::SDK::Trace::Export::SUCCESS
end
end
end
43 changes: 43 additions & 0 deletions spec/lib/otel/sensitive_data_scrubbing_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# frozen_string_literal: true

require 'rails_helper'

RSpec.describe 'OTEL-014: Sensitive data scrubbed from traces' do # rubocop:disable RSpec/DescribeClass
describe 'PG instrumentation configuration' do
it 'obfuscates SQL statements instead of including raw SQL' do
pg_instrumentation = OpenTelemetry::Instrumentation::PG::Instrumentation.instance
expect(pg_instrumentation).to be_installed

config = pg_instrumentation.instance_variable_get(:@config)
expect(config[:db_statement]).to eq(:obfuscate)
end
end

describe 'Rack instrumentation configuration' do
it 'does not record request headers that may contain sensitive data' do
rack_instrumentation = OpenTelemetry::Instrumentation::Rack::Instrumentation.instance
expect(rack_instrumentation).to be_installed

config = rack_instrumentation.instance_variable_get(:@config)
allowed_request_headers = config[:allowed_request_headers] || []
sensitive_headers = %w[authorization cookie set-cookie x-forwarded-for]

sensitive_headers.each do |header|
msg = "Rack instrumentation should not record sensitive header: #{header}"
expect(allowed_request_headers.map(&:downcase)).not_to include(header), msg
end
end
end

describe 'SpanSanitizer processor' do
it 'is registered as a span processor' do
processors = OpenTelemetry.tracer_provider.instance_variable_get(:@span_processors)
sanitizer_present = processors.any? do |p|
p.is_a?(Otel::SpanSanitizingProcessor)
end

expect(sanitizer_present).to be(true),
'SpanSanitizingProcessor should be registered as a span processor'
end
end
end
149 changes: 149 additions & 0 deletions spec/lib/otel/span_sanitizer_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# frozen_string_literal: true

require 'rails_helper'
require 'otel/span_sanitizer'

RSpec.describe Otel::SpanSanitizer do
subject(:sanitizer) { described_class.new }

describe '#sanitize_value' do
it 'redacts email addresses' do
expect(sanitizer.sanitize_value('user@example.com')).to eq('[EMAIL REDACTED]')
end

it 'redacts emails embedded in longer strings' do
result = sanitizer.sanitize_value('Contact john.doe@example.com for details')
expect(result).to eq('Contact [EMAIL REDACTED] for details')
end

it 'redacts multiple emails in one string' do
result = sanitizer.sanitize_value('from alice@test.com to bob@test.com')
expect(result).not_to include('alice@test.com')
expect(result).not_to include('bob@test.com')
end

it 'redacts date-of-birth patterns (YYYY-MM-DD)' do
expect(sanitizer.sanitize_value('1990-05-15')).to eq('[DATE REDACTED]')
end

it 'redacts date-of-birth patterns (DD/MM/YYYY)' do
expect(sanitizer.sanitize_value('15/05/1990')).to eq('[DATE REDACTED]')
end

it 'redacts IP addresses' do
expect(sanitizer.sanitize_value('192.168.1.100')).to eq('[IP REDACTED]')
end

it 'redacts IP addresses embedded in strings' do
result = sanitizer.sanitize_value('Client IP: 10.0.0.1 connected')
expect(result).to eq('Client IP: [IP REDACTED] connected')
end

it 'preserves non-sensitive strings' do
expect(sanitizer.sanitize_value('medication_take.create')).to eq('medication_take.create')
end

it 'preserves numeric IDs' do
expect(sanitizer.sanitize_value('12345')).to eq('12345')
end

it 'preserves ISO 8601 timestamps with time component' do
expect(sanitizer.sanitize_value('2025-01-15T10:30:00Z')).to eq('2025-01-15T10:30:00Z')
end

it 'returns non-string values unchanged' do
expect(sanitizer.sanitize_value(42)).to eq(42)
expect(sanitizer.sanitize_value(true)).to be(true)
expect(sanitizer.sanitize_value(nil)).to be_nil
end
end

describe '#sanitize_attributes' do
it 'redacts values for sensitive attribute keys' do
attrs = {
'http.request.header.authorization' => 'Bearer secret-token',
'http.request.header.cookie' => 'session=abc123',
'model.name' => 'MedicationTake'
}

result = sanitizer.sanitize_attributes(attrs)

expect(result['http.request.header.authorization']).to eq('[REDACTED]')
expect(result['http.request.header.cookie']).to eq('[REDACTED]')
expect(result['model.name']).to eq('MedicationTake')
end

it 'redacts PII patterns in attribute values even for non-sensitive keys' do
attrs = {
'event.detail' => 'born on 1990-05-15 in London',
'log.message' => 'Contact user@example.com now',
'model.id' => '42'
}

result = sanitizer.sanitize_attributes(attrs)

expect(result['event.detail']).to eq('born on [DATE REDACTED] in London')
expect(result['log.message']).to eq('Contact [EMAIL REDACTED] now')
expect(result['model.id']).to eq('42')
end

it 'redacts values for keys containing name, email, or password' do
attrs = {
'person.name' => 'John Doe',
'user.email_address' => 'john@example.com',
'db.password' => 'secret'
}

result = sanitizer.sanitize_attributes(attrs)

expect(result['person.name']).to eq('[REDACTED]')
expect(result['user.email_address']).to eq('[REDACTED]')
expect(result['db.password']).to eq('[REDACTED]')
end

it 'does not modify the original hash' do
attrs = { 'user.email' => 'test@example.com' }
sanitizer.sanitize_attributes(attrs)
expect(attrs['user.email']).to eq('test@example.com')
end
end

describe '.sensitive_key?' do
it 'identifies authorization headers as sensitive' do
expect(described_class.sensitive_key?('http.request.header.authorization')).to be(true)
end

it 'identifies cookie headers as sensitive' do
expect(described_class.sensitive_key?('http.request.header.cookie')).to be(true)
end

it 'identifies name fields as sensitive' do
expect(described_class.sensitive_key?('person.name')).to be(true)
end

it 'identifies email fields as sensitive' do
expect(described_class.sensitive_key?('user.email_address')).to be(true)
end

it 'identifies password fields as sensitive' do
expect(described_class.sensitive_key?('db.password')).to be(true)
expect(described_class.sensitive_key?('password_digest')).to be(true)
end

it 'identifies date_of_birth fields as sensitive' do
expect(described_class.sensitive_key?('person.date_of_birth')).to be(true)
end

it 'does not flag model.name as sensitive' do
expect(described_class.sensitive_key?('model.name')).to be(false)
end

it 'does not flag model.id as sensitive' do
expect(described_class.sensitive_key?('model.id')).to be(false)
end

it 'does not flag model.operation as sensitive' do
expect(described_class.sensitive_key?('model.operation')).to be(false)
end
end
end
Loading
Loading