From 2856fac7d1d9d803aba94821d69856aa3ffec6b4 Mon Sep 17 00:00:00 2001 From: Kirk Wang Date: Wed, 6 Mar 2024 17:44:12 -0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=81=20Add=20a=20more=20nauced=20approa?= =?UTF-8?q?ch=20to=20required=20elements?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior to this commit we were blanket checking CSV recores for the presence of title and resource_type. The problem with this approach was that all models were being checked for these fields while not all models required these fields, such as the Collection and FileSet models. In this commit we leverage the exisiting `Bulkrax::CsvParserDecorator` to check each model's actual required fields according to their respective forms (minus the FileSet which do not have required fields) and add logic to check any custom parser mappings according to the bulkrax initializer and also any numbered fields. The error message was also changed to indicate which model was missing what required field(s). Ref: - https://github.com/scientist-softserv/palni-palci/issues/436 --- app/parsers/bulkrax/csv_parser_decorator.rb | 75 ++++-- .../bulkrax/csv_parser_decorator_spec.rb | 219 ++++++++++++++++++ 2 files changed, 269 insertions(+), 25 deletions(-) create mode 100644 spec/parsers/bulkrax/csv_parser_decorator_spec.rb diff --git a/app/parsers/bulkrax/csv_parser_decorator.rb b/app/parsers/bulkrax/csv_parser_decorator.rb index 70501a969..b1464c5da 100644 --- a/app/parsers/bulkrax/csv_parser_decorator.rb +++ b/app/parsers/bulkrax/csv_parser_decorator.rb @@ -1,41 +1,66 @@ # frozen_string_literal: true -# OVERRIDE BULKRAX 3.1.2 to include oer specific methods +# OVERRIDE BULKRAX 5.3.0 to include oer specific methods and model level required fields checking module Bulkrax module CsvParserDecorator include OerCsvParser - # @return [Array] - def required_elements - if Bulkrax.fill_in_blank_source_identifiers - %w[title resource_type] - else - ['title', 'resource_type', source_identifier] - end - end - - # remove required_elements?, missing_elements, and valid_import? if bulkrax - # has been upgraded to version 5.2 or higher - def required_elements?(record) - missing_elements(record).blank? - end - - def missing_elements(record) - keys = keys_without_numbers(record.reject { |_, v| v.blank? }.keys.compact.uniq.map(&:to_s)) - return if keys.blank? - - required_elements.map(&:to_s) - keys.map(&:to_s) - end def valid_import? - compressed_record = records.map(&:to_a).flatten(1).partition { |_, v| !v }.flatten(1).to_h - error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(compressed_record).join(', ')}" - raise StandardError, error_alert unless required_elements?(compressed_record) + missing_fields_by_model = records.each_with_object({}) do |record, hash| + record.transform_keys!(&:downcase).transform_keys!(&:to_sym) + missing_fields = missing_fields_for(record) + hash[record[:model]] = missing_fields if missing_fields.present? + end + + raise_error_for_missing_fields(missing_fields_by_model) if missing_fields_by_model.keys.present? file_paths.is_a?(Array) rescue StandardError => e status_info(e) false end + + private + + def missing_fields_for(record) + required_fields = determine_required_fields_for(record[:model]) + required_fields.select do |field| + # checks the field itself + # any parser_mappings fields terms from `config/initializers/bulkrax.rb` + # or any keys that has sequential numbers like creator_1 + (record[field] || + mapped_from(field).map { |f| record[f] }.first || + handle_keys_with_numbers(field, record)).blank? + end + end + + def determine_required_fields_for(model) + # TODO: Revisit when Valkyrized as we can use Hyrax::ModelRegistry + case model + when 'Collection' then Hyrax::Forms::CollectionForm.required_fields + when 'FileSet' then [] + else "Hyrax::#{model}Form".constantize.required_fields + end + end + + def mapped_from(field) + Bulkrax.config.field_mappings[self.class.to_s][field.to_s]&.fetch(:from, [])&.map(&:to_sym) + end + + def handle_keys_with_numbers(_field, record) + keys_with_numbers = record.keys.select { |k| k.to_s.match(/(.+)_\d+/) } + keys_with_numbers.each do |key| + return record[key] + end + end + + def raise_error_for_missing_fields(missing_fields_by_model) + error_alert = missing_fields_by_model.keys.map do |model| + "#{model} missing: #{missing_fields_by_model[model].join(', ')}" + end.join('; ') + # example alert: 'Collection missing: title; GenericWork missing: title, creator' + raise StandardError, error_alert + end end end diff --git a/spec/parsers/bulkrax/csv_parser_decorator_spec.rb b/spec/parsers/bulkrax/csv_parser_decorator_spec.rb new file mode 100644 index 000000000..990daa5f0 --- /dev/null +++ b/spec/parsers/bulkrax/csv_parser_decorator_spec.rb @@ -0,0 +1,219 @@ +# frozen_string_literal: true + +RSpec.describe Bulkrax::CsvParserDecorator, type: :decorator do + subject(:csv_parser) do + Bulkrax::CsvParser.new( + instance_double( + 'Bulkrax::Importer', parser_klass: "Bulkrax::CsvParser", + only_updates: false, + file?: false, + parser_fields: {}, + status_info: double('Bulkrax::Status'), + metadata_only?: true + ) + ) + end + + # Pardon the verbosity but I wanted to emphasize which fields are currently required for each model + let(:generic_work_record) do + { + model: 'GenericWork', + title: 'Generic Work Title', + creator: 'Smith, John', + keyword: 'keyword', + rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/', + resource_type: 'Article', + date_created: nil, + audience: nil, + education_level: nil, + learning_resource_type: nil, + discipline: nil, + degree_name: nil, + degree_level: nil, + degree_discipline: nil, + degree_grantor: nil + } + end + let(:image_record) do + { + model: 'Image', + title: 'Image Title', + creator: 'Smith, John', + keyword: 'keyword', + rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/', + resource_type: 'Article', + date_created: nil, + audience: nil, + education_level: nil, + learning_resource_type: nil, + discipline: nil, + degree_name: nil, + degree_level: nil, + degree_discipline: nil, + degree_grantor: nil + } + end + let(:oer_record) do + { + model: 'Oer', + title: 'Oer Title', + creator: 'Smith, John', + keyword: nil, + rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/', + resource_type: 'Article', + date_created: '2018-01-01', + audience: 'Student', + education_level: 'community college / lower division', + learning_resource_type: 'Activity/lab', + discipline: 'Languages - Spanish', + degree_name: nil, + degree_level: nil, + degree_discipline: nil, + degree_grantor: nil + } + end + let(:etd_record) do + { + model: 'Etd', + title: 'Etd Title', + creator: 'Smith, John', + keyword: 'keyword', + rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/', + resource_type: 'Article', + date_created: '2018-01-01', + audience: nil, + education_level: nil, + learning_resource_type: nil, + discipline: nil, + degree_name: 'Bachelor of Science', + degree_level: 'Bachelors', + degree_discipline: 'Languages - Spanish', + degree_grantor: 'University of California, Los Angeles' + } + end + let(:cdl_record) do + { + model: 'Cdl', + title: 'Cdl Title', + creator: 'Smith, John', + keyword: 'keyword', + rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/', + resource_type: 'Article', + date_created: nil, + audience: nil, + education_level: nil, + learning_resource_type: nil, + discipline: nil, + degree_name: nil, + degree_level: nil, + degree_discipline: nil, + degree_grantor: nil + } + end + let(:collection_record) do + { + model: 'Collection', + title: 'Collection Title', + creator: nil, + keyword: nil, + rights_statement: nil, + resource_type: nil, + date_created: nil, + audience: nil, + education_level: nil, + learning_resource_type: nil, + discipline: nil, + degree_name: nil, + degree_level: nil, + degree_discipline: nil, + degree_grantor: nil + } + end + let(:file_set_record) do + { + model: 'FileSet', + title: nil, + creator: nil, + keyword: nil, + rights_statement: nil, + resource_type: nil, + date_created: nil, + audience: nil, + education_level: nil, + learning_resource_type: nil, + discipline: nil, + degree_name: nil, + degree_level: nil, + degree_discipline: nil, + degree_grantor: nil + } + end + let(:records) do + [generic_work_record, image_record, oer_record, etd_record, cdl_record, collection_record, file_set_record] + end + + before do + allow(subject).to receive(:records).and_return(records) + end + + describe '#valid_import?' do + context 'when all required fields are present' do + it 'returns true' do + expect(subject.valid_import?).to be true + end + end + + context 'when required fields are missing' do + let(:generic_work_record_with_no_title_and_no_creator) { generic_work_record.merge(title: nil, creator: nil) } + let(:collection_record_with_no_title) { collection_record.merge(title: nil) } + let(:records) { [collection_record_with_no_title, generic_work_record_with_no_title_and_no_creator] } + + it 'returns false' do + expect(subject.valid_import?).to be false + end + + it 'rescues the error and calls status_info' do + expect(subject).to receive(:status_info) do |e| + expect(e).to be_a(StandardError) + expect(e.message).to eq('Collection missing: title; GenericWork missing: title, creator') + end + + subject.valid_import? + end + end + + context 'when the csv headers are in any case (capitalized or lowercased)' do + let(:generic_work_record_with_capitalized_headers) do + generic_work_record.transform_keys(&:to_s) + .transform_keys(&:capitalize) + end + let(:records) { [generic_work_record_with_capitalized_headers] } + + it 'still returns true' do + expect(subject.valid_import?).to be true + end + end + + context 'when the csv header is the parser_mappings value' do + let(:generic_work_record_with_type_instead_of_resource_type) do + generic_work_record.transform_keys! { |key| key == :resource_type ? :type : key } + end + let(:records) { [generic_work_record_with_type_instead_of_resource_type] } + + it 'still returns true' do + expect(subject.valid_import?).to be true + end + end + + context 'when the csv header is a sequential number' do + let(:generic_work_record_with_sequential_fields) do + generic_work_record.transform_keys! { |key| key == :resource_type ? :type_1 : key } + end + let(:records) { [generic_work_record_with_sequential_fields] } + + it 'still returns true' do + expect(subject.valid_import?).to be true + end + end + end +end