diff --git a/lib/sdr_events.rb b/lib/sdr_events.rb index d75a790c6..d25c00cc2 100644 --- a/lib/sdr_events.rb +++ b/lib/sdr_events.rb @@ -40,6 +40,34 @@ def report_indexing_errored(druid, target:, message:, context: nil) create_event(druid:, target:, type: 'indexing_errored', data: { message:, context: }.compact) end + # Take a SolrBetterJsonWriter::Batch and report successful adds/deletes + def report_indexing_batch_success(batch, target:) + batch.actions.each do |action, druid, _data| + next unless druid + + case action + when :delete + report_indexing_deleted(druid, target:) + when :add + report_indexing_success(druid, target:) + end + end + end + + # Take a SolrBetterJsonWriter::Batch and report failed adds/deletes + def report_indexing_batch_errored(batch, target:, exception:) + batch.actions.each do |action, druid, _data| + next unless druid + + case action + when :delete + report_indexing_errored(druid, target:, message: 'delete failed', context: exception) + when :add + report_indexing_errored(druid, target:, message: 'add failed', context: exception) + end + end + end + private # Generic event creation; prefer more specific methods diff --git a/lib/traject/config/geo_config.rb b/lib/traject/config/geo_config.rb index 2b3dfc52d..c03626a53 100644 --- a/lib/traject/config/geo_config.rb +++ b/lib/traject/config/geo_config.rb @@ -170,7 +170,6 @@ def geoserver_url(record) # delete records form the index context.output_hash['id'] = ["stanford-#{druid}"] - SdrEvents.report_indexing_deleted(druid, target: settings['purl_fetcher.target']) context.skip!("Delete: #{druid}") end @@ -465,12 +464,11 @@ def geoserver_url(record) end end -each_record do |record, context| +each_record do |_record, context| t0 = context.clipboard[:benchmark_start_time] t1 = Time.now logger.debug('geo_config.rb') { "Processed #{context.output_hash['id']} (#{t1 - t0}s)" } - SdrEvents.report_indexing_success(record.druid, target: settings['purl_fetcher.target']) end # rubocop:disable Metrics/MethodLength diff --git a/lib/traject/writers/solr_better_json_writer.rb b/lib/traject/writers/solr_better_json_writer.rb index a2566a9b9..78b408360 100644 --- a/lib/traject/writers/solr_better_json_writer.rb +++ b/lib/traject/writers/solr_better_json_writer.rb @@ -68,6 +68,7 @@ def send_batch(contexts) end else @retry_count = 0 + SdrEvents.report_indexing_batch_success(batch, target: @settings['purl_fetcher.target']) end end @@ -100,7 +101,10 @@ def send_single(context) "#{self.class.name}: Exceeded maximum number of skipped records (#{@max_skipped}): aborting" end + SdrEvents.report_indexing_batch_errored(batch, target: @settings['purl_fetcher.target'], exception: msg) return false + else + SdrEvents.report_indexing_batch_success(batch, target: @settings['purl_fetcher.target']) end true @@ -124,27 +128,28 @@ def each(&) @contexts.each(&) end - # Array of [action, data] pairs, where action is :add or :delete - # and data is either the doc id or the full doc hash + # Array of [action, druid, data] triples, where action is :add or :delete + # and data is either the doc id or the full doc hash. Druid is empty for + # non-SDR content. def actions - @contexts.map do |c| - if c.skip? - id = Array(c.output_hash['id']).first - [:delete, id] if id + @actions ||= @contexts.map do |context| + if context.skip? + id = Array(context.output_hash['id']).first + [:delete, context.source_record&.druid, id] if id else - [:add, c.output_hash] + [:add, context.source_record&.druid, context.output_hash] end end.compact end # Make a JSON string for sending to solr /update API def generate_json - actions.map do |action, data| + actions.map do |action, _druid, data| case action when :delete - "delete: #{JSON.generate(data)}" + "\"delete\":#{JSON.generate(data)}" when :add - "add: #{JSON.generate(doc: data)}" + "\"add\":#{JSON.generate(doc: data)}" end end.join(",\n").prepend('{').concat('}') end diff --git a/spec/integration/geo_config_spec.rb b/spec/integration/geo_config_spec.rb index 45276237e..8bdfc304e 100644 --- a/spec/integration/geo_config_spec.rb +++ b/spec/integration/geo_config_spec.rb @@ -312,22 +312,6 @@ def stub_mods_request(druid, body) ) end - context 'when indexing is successful' do - it 'creates an indexing success event' do - expect(result).to be_a Hash - expect(SdrEvents).to have_received(:report_indexing_success).with(druid, target: 'Earthworks') - end - end - - context 'when the item was deleted' do - let(:record) { { id: "druid:#{druid}", delete: true } } - - it 'creates an indexing delete event' do - expect(result).to be_nil - expect(SdrEvents).to have_received(:report_indexing_deleted).with(druid, target: 'Earthworks') - end - end - context 'when the item has no public XML' do before { allow(record).to receive(:public_xml).and_return(nil) } diff --git a/spec/lib/traject/writers/solr_better_json_writer_spec.rb b/spec/lib/traject/writers/solr_better_json_writer_spec.rb index 02015a0c5..913662911 100644 --- a/spec/lib/traject/writers/solr_better_json_writer_spec.rb +++ b/spec/lib/traject/writers/solr_better_json_writer_spec.rb @@ -3,7 +3,16 @@ require 'spec_helper' describe Traject::SolrBetterJsonWriter do - subject(:writer) { described_class.new('solr_json_writer.http_client' => http_client, 'solr.update_url' => 'http://localhost/solr', 'solr_better_json_writer.debounce_timeout' => 1) } + subject(:writer) do + described_class.new( + 'solr_json_writer.http_client' => http_client, + 'solr.update_url' => 'http://localhost/solr', + 'solr_better_json_writer.debounce_timeout' => 1, + 'purl_fetcher.target' => 'Searchworks', + 'solr_writer.max_skipped' => 100 + ) + end + let(:http_client) { double(post: double(status: 200)) } let(:doc) { Traject::Indexer::Context.new.tap { |doc| doc.output_hash['id'] = [1] } } let(:skipped_doc) do @@ -39,26 +48,108 @@ describe '#send_batch' do it 'adds documents to solr' do writer.send_batch([doc]) - expect(http_client).to have_received(:post).with('http://localhost/solr', '{add: {"doc":{"id":[1]}}}', - 'Content-type' => 'application/json') + expect(http_client).to have_received(:post).with( + 'http://localhost/solr', + '{"add":{"doc":{"id":[1]}}}', + 'Content-type' => 'application/json' + ) end it 'deletes documents from solr' do writer.send_batch([skipped_doc]) - expect(http_client).to have_received(:post).with('http://localhost/solr', '{delete: 2}', - 'Content-type' => 'application/json') + expect(http_client).to have_received(:post).with( + 'http://localhost/solr', + '{"delete":2}', + 'Content-type' => 'application/json' + ) end it 'skips writing documents that have no id' do writer.send_batch([bad_doc]) - expect(http_client).to have_received(:post).with('http://localhost/solr', '{}', - 'Content-type' => 'application/json') + expect(http_client).to have_received(:post).with( + 'http://localhost/solr', + '{}', + 'Content-type' => 'application/json' + ) end it 'sends the request as a batch' do writer.send_batch([doc, skipped_doc]) - expect(http_client).to have_received(:post).with('http://localhost/solr', - "{add: {\"doc\":{\"id\":[1]}},\ndelete: 2}", 'Content-type' => 'application/json') + expect(http_client).to have_received(:post).with( + 'http://localhost/solr', + "{\"add\":{\"doc\":{\"id\":[1]}},\n\"delete\":2}", + 'Content-type' => 'application/json' + ) + end + end + + describe 'event reporting' do + let(:sdr_docs) do + [ + Traject::Indexer::Context.new.tap do |doc| + doc.output_hash['id'] = [1] + doc.source_record = PublicXmlRecord.new('bb112zx3193') + end, + Traject::Indexer::Context.new.tap do |doc| + doc.output_hash['id'] = [2] + doc.source_record = PublicXmlRecord.new('py305sy7961') + end + ] + end + + before do + allow(Settings.sdr_events).to receive(:enabled).and_return(true) + allow(SdrEvents).to receive_messages( + report_indexing_deleted: true, + report_indexing_success: true, + report_indexing_errored: true + ) + end + + context 'when SDR events are disabled' do + before do + allow(Settings.sdr_events).to receive(:enabled).and_return(false) + allow(Dor::Event::Client).to receive(:create) + end + + it 'does not report any events' do + writer.send_batch(sdr_docs) + expect(Dor::Event::Client).not_to have_received(:create) + end + end + + context 'when all docs index successfully' do + it 'reports docs that are successfully added' do + writer.send_batch(sdr_docs) + expect(SdrEvents).to have_received(:report_indexing_success).with('bb112zx3193', target: 'Searchworks') + expect(SdrEvents).to have_received(:report_indexing_success).with('py305sy7961', target: 'Searchworks') + end + + it 'reports docs that are successfully deleted' do + skipped_doc.source_record = PublicXmlRecord.new('bj057dg6517') + writer.send_batch([skipped_doc]) + expect(SdrEvents).to have_received(:report_indexing_deleted).with('bj057dg6517', target: 'Searchworks') + end + end + + context 'when some docs fail to index' do + let(:http_client) { double(post: double(status: 400, body: 'Malformed data')) } + + it 'reports docs that fail to index' do + writer.send_batch(sdr_docs) + expect(SdrEvents).to have_received(:report_indexing_errored).with( + 'bb112zx3193', + target: 'Searchworks', + message: 'add failed', + context: 'Solr error response: 400: Malformed data' + ) + expect(SdrEvents).to have_received(:report_indexing_errored).with( + 'py305sy7961', + target: 'Searchworks', + message: 'add failed', + context: 'Solr error response: 400: Malformed data' + ) + end end end end