Skip to content

Commit

Permalink
Merge pull request #984 from scientist-softserv/i436-bulkrax-required…
Browse files Browse the repository at this point in the history
…-fields

🎁 Add a more nauced approach to required elements
  • Loading branch information
kirkkwang authored Mar 7, 2024
2 parents 09bf754 + 2856fac commit 2852b89
Show file tree
Hide file tree
Showing 2 changed files with 269 additions and 25 deletions.
75 changes: 50 additions & 25 deletions app/parsers/bulkrax/csv_parser_decorator.rb
Original file line number Diff line number Diff line change
@@ -1,41 +1,66 @@
# frozen_string_literal: true

# OVERRIDE BULKRAX 3.1.2 to include oer specific methods
# OVERRIDE BULKRAX 5.3.0 to include oer specific methods and model level required fields checking
module Bulkrax
module CsvParserDecorator
include OerCsvParser
# @return [Array<String>]
def required_elements
if Bulkrax.fill_in_blank_source_identifiers
%w[title resource_type]
else
['title', 'resource_type', source_identifier]
end
end

# remove required_elements?, missing_elements, and valid_import? if bulkrax
# has been upgraded to version 5.2 or higher
def required_elements?(record)
missing_elements(record).blank?
end

def missing_elements(record)
keys = keys_without_numbers(record.reject { |_, v| v.blank? }.keys.compact.uniq.map(&:to_s))
return if keys.blank?

required_elements.map(&:to_s) - keys.map(&:to_s)
end

def valid_import?
compressed_record = records.map(&:to_a).flatten(1).partition { |_, v| !v }.flatten(1).to_h
error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(compressed_record).join(', ')}"
raise StandardError, error_alert unless required_elements?(compressed_record)
missing_fields_by_model = records.each_with_object({}) do |record, hash|
record.transform_keys!(&:downcase).transform_keys!(&:to_sym)
missing_fields = missing_fields_for(record)
hash[record[:model]] = missing_fields if missing_fields.present?
end

raise_error_for_missing_fields(missing_fields_by_model) if missing_fields_by_model.keys.present?

file_paths.is_a?(Array)
rescue StandardError => e
status_info(e)
false
end

private

def missing_fields_for(record)
required_fields = determine_required_fields_for(record[:model])
required_fields.select do |field|
# checks the field itself
# any parser_mappings fields terms from `config/initializers/bulkrax.rb`
# or any keys that has sequential numbers like creator_1
(record[field] ||
mapped_from(field).map { |f| record[f] }.first ||
handle_keys_with_numbers(field, record)).blank?
end
end

def determine_required_fields_for(model)
# TODO: Revisit when Valkyrized as we can use Hyrax::ModelRegistry
case model
when 'Collection' then Hyrax::Forms::CollectionForm.required_fields
when 'FileSet' then []
else "Hyrax::#{model}Form".constantize.required_fields
end
end

def mapped_from(field)
Bulkrax.config.field_mappings[self.class.to_s][field.to_s]&.fetch(:from, [])&.map(&:to_sym)
end

def handle_keys_with_numbers(_field, record)
keys_with_numbers = record.keys.select { |k| k.to_s.match(/(.+)_\d+/) }
keys_with_numbers.each do |key|
return record[key]
end
end

def raise_error_for_missing_fields(missing_fields_by_model)
error_alert = missing_fields_by_model.keys.map do |model|
"#{model} missing: #{missing_fields_by_model[model].join(', ')}"
end.join('; ')
# example alert: 'Collection missing: title; GenericWork missing: title, creator'
raise StandardError, error_alert
end
end
end

Expand Down
219 changes: 219 additions & 0 deletions spec/parsers/bulkrax/csv_parser_decorator_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
# frozen_string_literal: true

RSpec.describe Bulkrax::CsvParserDecorator, type: :decorator do
subject(:csv_parser) do
Bulkrax::CsvParser.new(
instance_double(
'Bulkrax::Importer', parser_klass: "Bulkrax::CsvParser",
only_updates: false,
file?: false,
parser_fields: {},
status_info: double('Bulkrax::Status'),
metadata_only?: true
)
)
end

# Pardon the verbosity but I wanted to emphasize which fields are currently required for each model
let(:generic_work_record) do
{
model: 'GenericWork',
title: 'Generic Work Title',
creator: 'Smith, John',
keyword: 'keyword',
rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/',
resource_type: 'Article',
date_created: nil,
audience: nil,
education_level: nil,
learning_resource_type: nil,
discipline: nil,
degree_name: nil,
degree_level: nil,
degree_discipline: nil,
degree_grantor: nil
}
end
let(:image_record) do
{
model: 'Image',
title: 'Image Title',
creator: 'Smith, John',
keyword: 'keyword',
rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/',
resource_type: 'Article',
date_created: nil,
audience: nil,
education_level: nil,
learning_resource_type: nil,
discipline: nil,
degree_name: nil,
degree_level: nil,
degree_discipline: nil,
degree_grantor: nil
}
end
let(:oer_record) do
{
model: 'Oer',
title: 'Oer Title',
creator: 'Smith, John',
keyword: nil,
rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/',
resource_type: 'Article',
date_created: '2018-01-01',
audience: 'Student',
education_level: 'community college / lower division',
learning_resource_type: 'Activity/lab',
discipline: 'Languages - Spanish',
degree_name: nil,
degree_level: nil,
degree_discipline: nil,
degree_grantor: nil
}
end
let(:etd_record) do
{
model: 'Etd',
title: 'Etd Title',
creator: 'Smith, John',
keyword: 'keyword',
rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/',
resource_type: 'Article',
date_created: '2018-01-01',
audience: nil,
education_level: nil,
learning_resource_type: nil,
discipline: nil,
degree_name: 'Bachelor of Science',
degree_level: 'Bachelors',
degree_discipline: 'Languages - Spanish',
degree_grantor: 'University of California, Los Angeles'
}
end
let(:cdl_record) do
{
model: 'Cdl',
title: 'Cdl Title',
creator: 'Smith, John',
keyword: 'keyword',
rights_statement: 'http://rightsstatements.org/vocab/InC/1.0/',
resource_type: 'Article',
date_created: nil,
audience: nil,
education_level: nil,
learning_resource_type: nil,
discipline: nil,
degree_name: nil,
degree_level: nil,
degree_discipline: nil,
degree_grantor: nil
}
end
let(:collection_record) do
{
model: 'Collection',
title: 'Collection Title',
creator: nil,
keyword: nil,
rights_statement: nil,
resource_type: nil,
date_created: nil,
audience: nil,
education_level: nil,
learning_resource_type: nil,
discipline: nil,
degree_name: nil,
degree_level: nil,
degree_discipline: nil,
degree_grantor: nil
}
end
let(:file_set_record) do
{
model: 'FileSet',
title: nil,
creator: nil,
keyword: nil,
rights_statement: nil,
resource_type: nil,
date_created: nil,
audience: nil,
education_level: nil,
learning_resource_type: nil,
discipline: nil,
degree_name: nil,
degree_level: nil,
degree_discipline: nil,
degree_grantor: nil
}
end
let(:records) do
[generic_work_record, image_record, oer_record, etd_record, cdl_record, collection_record, file_set_record]
end

before do
allow(subject).to receive(:records).and_return(records)
end

describe '#valid_import?' do
context 'when all required fields are present' do
it 'returns true' do
expect(subject.valid_import?).to be true
end
end

context 'when required fields are missing' do
let(:generic_work_record_with_no_title_and_no_creator) { generic_work_record.merge(title: nil, creator: nil) }
let(:collection_record_with_no_title) { collection_record.merge(title: nil) }
let(:records) { [collection_record_with_no_title, generic_work_record_with_no_title_and_no_creator] }

it 'returns false' do
expect(subject.valid_import?).to be false
end

it 'rescues the error and calls status_info' do
expect(subject).to receive(:status_info) do |e|
expect(e).to be_a(StandardError)
expect(e.message).to eq('Collection missing: title; GenericWork missing: title, creator')
end

subject.valid_import?
end
end

context 'when the csv headers are in any case (capitalized or lowercased)' do
let(:generic_work_record_with_capitalized_headers) do
generic_work_record.transform_keys(&:to_s)
.transform_keys(&:capitalize)
end
let(:records) { [generic_work_record_with_capitalized_headers] }

it 'still returns true' do
expect(subject.valid_import?).to be true
end
end

context 'when the csv header is the parser_mappings value' do
let(:generic_work_record_with_type_instead_of_resource_type) do
generic_work_record.transform_keys! { |key| key == :resource_type ? :type : key }
end
let(:records) { [generic_work_record_with_type_instead_of_resource_type] }

it 'still returns true' do
expect(subject.valid_import?).to be true
end
end

context 'when the csv header is a sequential number' do
let(:generic_work_record_with_sequential_fields) do
generic_work_record.transform_keys! { |key| key == :resource_type ? :type_1 : key }
end
let(:records) { [generic_work_record_with_sequential_fields] }

it 'still returns true' do
expect(subject.valid_import?).to be true
end
end
end
end

0 comments on commit 2852b89

Please sign in to comment.