From 1adeff04eb33031ecc01730090e4f144b7a803ca Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Thu, 8 Feb 2024 01:08:21 -0800 Subject: [PATCH] strip special characters out of header names. excel likes to leave odd unicode items, including the unicode bom, laying around. This causes havic. By stopping it right from the start we should prevent saving invisible characters to raw_metadata and other places they get stuck --- app/models/bulkrax/csv_entry.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/models/bulkrax/csv_entry.rb b/app/models/bulkrax/csv_entry.rb index a106a74b..2d65e37a 100644 --- a/app/models/bulkrax/csv_entry.rb +++ b/app/models/bulkrax/csv_entry.rb @@ -16,11 +16,12 @@ def self.fields_from_data(data) class_attribute(:csv_read_data_options, default: {}) # there's a risk that this reads the whole file into memory and could cause a memory leak + # we strip any special characters out of the headers. looking at you Excel def self.read_data(path) raise StandardError, 'CSV path empty' if path.blank? options = { headers: true, - header_converters: ->(h) { h.to_s.strip.to_sym }, + header_converters: ->(h) { h.to_s.gsub(/[^\w\d -]+/, '').strip.to_sym }, encoding: 'utf-8' }.merge(csv_read_data_options)