Skip to content

Commit

Permalink
🎁 Add a more nauced approach to required elements
Browse files Browse the repository at this point in the history
Prior to this commit we were blanket checking CSV recores for the
presence of title and resource_type.  The problem with this approach was
that all models were being checked for these fields while not all models
required these fields, such as the Collection and FileSet models.

In this commit we leverage the exisiting `Bulkrax::CsvParserDecorator`
to check each model's actual required fields according to their
respective forms (minus the FileSet which do not have required fields)
and add logic to check any custom parser mappings according to the
bulkrax initializer and also any numbered fields.  The error message was
also changed to indicate which model was missing what required field(s).

Ref:
  - #436
  • Loading branch information
Kirk Wang committed Mar 7, 2024
1 parent 09bf754 commit 2856fac
Show file tree
Hide file tree
Showing 2 changed files with 269 additions and 25 deletions.
75 changes: 50 additions & 25 deletions app/parsers/bulkrax/csv_parser_decorator.rb
Original file line number Diff line number Diff line change
@@ -1,41 +1,66 @@
# frozen_string_literal: true

# OVERRIDE BULKRAX 3.1.2 to include oer specific methods
# OVERRIDE BULKRAX 5.3.0 to include oer specific methods and model level required fields checking
module Bulkrax
module CsvParserDecorator
include OerCsvParser
# @return [Array<String>]
def required_elements
if Bulkrax.fill_in_blank_source_identifiers
%w[title resource_type]
else
['title', 'resource_type', source_identifier]
end
end

# remove required_elements?, missing_elements, and valid_import? if bulkrax
# has been upgraded to version 5.2 or higher
def required_elements?(record)
missing_elements(record).blank?
end

def missing_elements(record)
keys = keys_without_numbers(record.reject { |_, v| v.blank? }.keys.compact.uniq.map(&:to_s))
return if keys.blank?

required_elements.map(&:to_s) - keys.map(&:to_s)
end

def valid_import?
compressed_record = records.map(&:to_a).flatten(1).partition { |_, v| !v }.flatten(1).to_h
error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(compressed_record).join(', ')}"
raise StandardError, error_alert unless required_elements?(compressed_record)
missing_fields_by_model = records.each_with_object({}) do |record, hash|
record.transform_keys!(&:downcase).transform_keys!(&:to_sym)
missing_fields = missing_fields_for(record)
hash[record[:model]] = missing_fields if missing_fields.present?
end

raise_error_for_missing_fields(missing_fields_by_model) if missing_fields_by_model.keys.present?

file_paths.is_a?(Array)
rescue StandardError => e
status_info(e)
false
end

private

def missing_fields_for(record)
required_fields = determine_required_fields_for(record[:model])
required_fields.select do |field|
# checks the field itself
# any parser_mappings fields terms from `config/initializers/bulkrax.rb`
# or any keys that has sequential numbers like creator_1
(record[field] ||
mapped_from(field).map { |f| record[f] }.first ||
handle_keys_with_numbers(field, record)).blank?
end
end

def determine_required_fields_for(model)
# TODO: Revisit when Valkyrized as we can use Hyrax::ModelRegistry
case model
when 'Collection' then Hyrax::Forms::CollectionForm.required_fields
when 'FileSet' then []
else "Hyrax::#{model}Form".constantize.required_fields
end
end

def mapped_from(field)
Bulkrax.config.field_mappings[self.class.to_s][field.to_s]&.fetch(:from, [])&.map(&:to_sym)
end

def handle_keys_with_numbers(_field, record)
keys_with_numbers = record.keys.select { |k| k.to_s.match(/(.+)_\d+/) }
keys_with_numbers.each do |key|
return record[key]
end
end

def raise_error_for_missing_fields(missing_fields_by_model)
error_alert = missing_fields_by_model.keys.map do |model|
"#{model} missing: #{missing_fields_by_model[model].join(', ')}"
end.join('; ')
# example alert: 'Collection missing: title; GenericWork missing: title, creator'
raise StandardError, error_alert
end
end
end

Expand Down
219 changes: 219 additions & 0 deletions spec/parsers/bulkrax/csv_parser_decorator_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
# frozen_string_literal: true

RSpec.describe Bulkrax::CsvParserDecorator, type: :decorator do
subject(:csv_parser) do
Bulkrax::CsvParser.new(
instance_double(
'Bulkrax::Importer', parser_klass: "Bulkrax::CsvParser",
only_updates: false,
file?: false,
parser_fields: {},
status_info: double('Bulkrax::Status'),
metadata_only?: true
)
)
end

# Pardon the verbosity but I wanted to emphasize which fields are currently required for each model
let(:generic_work_record) do
{
model: 'GenericWork',
title: 'Generic Work Title',
creator: 'Smith, John',
keyword: 'keyword',
rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/',
resource_type: 'Article',
date_created: nil,
audience: nil,
education_level: nil,
learning_resource_type: nil,
discipline: nil,
degree_name: nil,
degree_level: nil,
degree_discipline: nil,
degree_grantor: nil
}
end
let(:image_record) do
{
model: 'Image',
title: 'Image Title',
creator: 'Smith, John',
keyword: 'keyword',
rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/',
resource_type: 'Article',
date_created: nil,
audience: nil,
education_level: nil,
learning_resource_type: nil,
discipline: nil,
degree_name: nil,
degree_level: nil,
degree_discipline: nil,
degree_grantor: nil
}
end
let(:oer_record) do
{
model: 'Oer',
title: 'Oer Title',
creator: 'Smith, John',
keyword: nil,
rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/',
resource_type: 'Article',
date_created: '2018-01-01',
audience: 'Student',
education_level: 'community college / lower division',
learning_resource_type: 'Activity/lab',
discipline: 'Languages - Spanish',
degree_name: nil,
degree_level: nil,
degree_discipline: nil,
degree_grantor: nil
}
end
let(:etd_record) do
{
model: 'Etd',
title: 'Etd Title',
creator: 'Smith, John',
keyword: 'keyword',
rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/',
resource_type: 'Article',
date_created: '2018-01-01',
audience: nil,
education_level: nil,
learning_resource_type: nil,
discipline: nil,
degree_name: 'Bachelor of Science',
degree_level: 'Bachelors',
degree_discipline: 'Languages - Spanish',
degree_grantor: 'University of California, Los Angeles'
}
end
let(:cdl_record) do
{
model: 'Cdl',
title: 'Cdl Title',
creator: 'Smith, John',
keyword: 'keyword',
rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/',
resource_type: 'Article',
date_created: nil,
audience: nil,
education_level: nil,
learning_resource_type: nil,
discipline: nil,
degree_name: nil,
degree_level: nil,
degree_discipline: nil,
degree_grantor: nil
}
end
let(:collection_record) do
{
model: 'Collection',
title: 'Collection Title',
creator: nil,
keyword: nil,
rights_statement: nil,
resource_type: nil,
date_created: nil,
audience: nil,
education_level: nil,
learning_resource_type: nil,
discipline: nil,
degree_name: nil,
degree_level: nil,
degree_discipline: nil,
degree_grantor: nil
}
end
let(:file_set_record) do
{
model: 'FileSet',
title: nil,
creator: nil,
keyword: nil,
rights_statement: nil,
resource_type: nil,
date_created: nil,
audience: nil,
education_level: nil,
learning_resource_type: nil,
discipline: nil,
degree_name: nil,
degree_level: nil,
degree_discipline: nil,
degree_grantor: nil
}
end
let(:records) do
[generic_work_record, image_record, oer_record, etd_record, cdl_record, collection_record, file_set_record]
end

before do
allow(subject).to receive(:records).and_return(records)
end

describe '#valid_import?' do
context 'when all required fields are present' do
it 'returns true' do
expect(subject.valid_import?).to be true
end
end

context 'when required fields are missing' do
let(:generic_work_record_with_no_title_and_no_creator) { generic_work_record.merge(title: nil, creator: nil) }
let(:collection_record_with_no_title) { collection_record.merge(title: nil) }
let(:records) { [collection_record_with_no_title, generic_work_record_with_no_title_and_no_creator] }

it 'returns false' do
expect(subject.valid_import?).to be false
end

it 'rescues the error and calls status_info' do
expect(subject).to receive(:status_info) do |e|
expect(e).to be_a(StandardError)
expect(e.message).to eq('Collection missing: title; GenericWork missing: title, creator')
end

subject.valid_import?
end
end

context 'when the csv headers are in any case (capitalized or lowercased)' do
let(:generic_work_record_with_capitalized_headers) do
generic_work_record.transform_keys(&:to_s)
.transform_keys(&:capitalize)
end
let(:records) { [generic_work_record_with_capitalized_headers] }

it 'still returns true' do
expect(subject.valid_import?).to be true
end
end

context 'when the csv header is the parser_mappings value' do
let(:generic_work_record_with_type_instead_of_resource_type) do
generic_work_record.transform_keys! { |key| key == :resource_type ? :type : key }
end
let(:records) { [generic_work_record_with_type_instead_of_resource_type] }

it 'still returns true' do
expect(subject.valid_import?).to be true
end
end

context 'when the csv header is a sequential number' do
let(:generic_work_record_with_sequential_fields) do
generic_work_record.transform_keys! { |key| key == :resource_type ? :type_1 : key }
end
let(:records) { [generic_work_record_with_sequential_fields] }

it 'still returns true' do
expect(subject.valid_import?).to be true
end
end
end
end

0 comments on commit 2856fac

Please sign in to comment.