Skip to content

Commit

Permalink
HYC-1968 - Reindexing error handling (#1142)
Browse files Browse the repository at this point in the history
* Handle Ldp::HttpError and continue with indexing

* Return nil immediately for vocab service lookups when blank keys are provided, rather than spamming logs with warnings
  • Loading branch information
bbpennel authored Feb 3, 2025
1 parent 551a37b commit 29c7e26
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 3 deletions.
8 changes: 6 additions & 2 deletions app/services/departments_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,30 @@ def self.select_all_options

# The permanent identifier for the term, stored in Fedora. This identifier should not be changed.
def self.identifier(term)
return nil if term.blank?
authority.all.reject { |item| item['active'] == false }.find { |department| department['label'] == term }['id']
rescue StandardError
Rails.logger.warn "DepartmentsService: cannot find identifier for '#{id}'"
nil
end

# The full term associated with the identifier. This is currently used in the display of People objects
def self.term(id)
return nil if id.blank?
authority.find(id).fetch('term')
rescue StandardError
Rails.logger.warn "DepartmentsService: cannot find '#{id}'"
Rails.logger.warn "DepartmentsService: cannot find term for '#{id}'"
nil
end

# The short version of the term associated with the identifier. These short terms were initially populated with the same
# values as the identifiers, but unlike the identifiers, these values *can* be changed without negative effects.
# This values is indexed to solr for department faceting.
def self.short_label(id)
return nil if id.blank?
authority.find(id).fetch('short_label')
rescue KeyError
Rails.logger.warn "DepartmentsService: cannot find '#{id}'"
Rails.logger.warn "DepartmentsService: cannot find short_label for '#{id}'"
nil
end

Expand Down
4 changes: 3 additions & 1 deletion app/services/languages_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def self.select_all_options
end

def self.label(id)
return nil if id.blank?
authority.find(Array.wrap(id).first).fetch('term')
rescue StandardError
Rails.logger.debug "LanguagesService: cannot find '#{id}'"
Expand All @@ -18,9 +19,10 @@ def self.label(id)
end

def self.iso639_1(id)
return nil if id.blank?
authority.find(id).fetch('iso639_1')
rescue KeyError
Rails.logger.warn "DepartmentsService: cannot find '#{id}'"
Rails.logger.warn "LanguageService: cannot find '#{id}'"
nil
end

Expand Down
4 changes: 4 additions & 0 deletions app/services/tasks/solr_migration_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ def reindex_list(id_list_file, clean_index)
rescue Ldp::Gone => e
puts "Object with id #{id} is gone, skipping"
Rails.logger.warn "Object with id #{id} is gone, skipping"
rescue Ldp::HttpError => e
puts "An error occurred when retrieving object with id #{id} from fedora, skipping"
Rails.logger.error "An error occurred when retrieving object with id #{id} from fedora, skipping"
Rails.logger.error [e.class.to_s, e.message, *e.backtrace].join($RS)
rescue ActiveFedora::ObjectNotFoundError => e
Rails.logger.warn "Object with id #{id} was not found, skipping: #{e.message}"
rescue JSON::GeneratorError => e
Expand Down
12 changes: 12 additions & 0 deletions spec/services/departments_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,20 @@
it 'resolves for ids of inactive terms' do
expect(service.term('example')).to eq('Some College; Example Department')
end

it 'returns nil for blank input' do
expect(service.term('')).to be_nil
end
end

describe '#identifier' do
it 'resolves for labels of active terms' do
expect(service.identifier('History')).to eq('history')
end

it 'returns nil for blank input' do
expect(service.identifier('')).to be_nil
end
end

describe '#short_label' do
Expand All @@ -42,6 +50,10 @@
expect(service.short_label('example')).to eq('Example Department')
end

it 'returns nil for blank input' do
expect(service.short_label('')).to be_nil
end

it 'logs but does not raise error for non-existent terms' do
allow(Rails.logger).to receive(:warn)
expect(service.short_label('not-a-department')).to be_nil
Expand Down
14 changes: 14 additions & 0 deletions spec/services/languages_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,19 @@
it 'resolves for ids of terms' do
expect(service.label('http://id.loc.gov/vocabulary/iso639-2/eng')).to eq('English')
end

it 'returns nil for blank input' do
expect(service.label('')).to be_nil
end
end

describe '#iso639_1' do
it 'resolves for ids of terms' do
expect(service.iso639_1('http://id.loc.gov/vocabulary/iso639-2/eng')).to eq('en')
end

it 'returns nil for blank input' do
expect(service.iso639_1('')).to be_nil
end
end
end
41 changes: 41 additions & 0 deletions spec/services/tasks/solr_migration_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,47 @@
expect(Rails.logger).to have_received(:error).with('Execution interrupted by unexpected error')
end

it 'skips over a deleted object' do
initial_records = ActiveFedora::SolrService.get('*:*', rows: 100, sort: 'id ASC')
expect(initial_records['response']['numFound']).to eq TOTAL_SOLR_COUNT

list_path = service.list_object_ids(output_dir, nil)

# Empty out solr so we can repopulate it
Blacklight.default_index.connection.delete_by_query('*:*')
Blacklight.default_index.connection.commit

# Delete work1 to simulate a deleted object
work1.delete
allow(Rails.logger).to receive(:warn)

service.reindex(list_path, false)
reindexed_records = ActiveFedora::SolrService.get('*:*', rows: 100, sort: 'id ASC')
expect(reindexed_records['response']['numFound']).to eq TOTAL_SOLR_COUNT - 1
expect(Rails.logger).to have_received(:warn).with("Object with id #{work1.id} is gone, skipping")
end

it 'skips over ldp http error' do
initial_records = ActiveFedora::SolrService.get('*:*', rows: 100, sort: 'id ASC')
expect(initial_records['response']['numFound']).to eq TOTAL_SOLR_COUNT

list_path = service.list_object_ids(output_dir, nil)

# Empty out solr so we can repopulate it
Blacklight.default_index.connection.delete_by_query('*:*')
Blacklight.default_index.connection.commit

# Delete work1 to simulate a deleted object
allow(ActiveFedora::Base).to receive(:find).and_call_original
allow(ActiveFedora::Base).to receive(:find).with(work1.id).and_raise(Ldp::HttpError)
allow(Rails.logger).to receive(:error)

service.reindex(list_path, false)
reindexed_records = ActiveFedora::SolrService.get('*:*', rows: 100, sort: 'id ASC')
expect(reindexed_records['response']['numFound']).to eq TOTAL_SOLR_COUNT - 1
expect(Rails.logger).to have_received(:error).with("An error occurred when retrieving object with id #{work1.id} from fedora, skipping")
end

def expect_results_match(results1, results2)
docs1 = results1['response']['docs']
docs2 = results2['response']['docs']
Expand Down

0 comments on commit 29c7e26

Please sign in to comment.