Skip to content

Commit

Permalink
Shift some indexing reporting to the SolrBetterJsonWriter
Browse files Browse the repository at this point in the history
Closes #1321
  • Loading branch information
thatbudakguy committed Feb 29, 2024
1 parent c3bcde5 commit f48aac7
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 38 deletions.
28 changes: 28 additions & 0 deletions lib/sdr_events.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,34 @@ def report_indexing_errored(druid, target:, message:, context: nil)
create_event(druid:, target:, type: 'indexing_errored', data: { message:, context: }.compact)
end

# Take a SolrBetterJsonWriter::Batch and report successful adds/deletes
def report_indexing_batch_success(batch, target:)
batch.actions.each do |action, druid, _data|
next unless druid

case action
when :delete
report_indexing_deleted(druid, target:)
when :add
report_indexing_success(druid, target:)
end
end
end

# Take a SolrBetterJsonWriter::Batch and report failed adds/deletes
def report_indexing_batch_errored(batch, target:, exception:)
batch.actions.each do |action, druid, _data|
next unless druid

case action
when :delete
report_indexing_errored(druid, target:, message: 'delete failed', context: exception)
when :add
report_indexing_errored(druid, target:, message: 'add failed', context: exception)
end
end
end

private

# Generic event creation; prefer more specific methods
Expand Down
4 changes: 1 addition & 3 deletions lib/traject/config/geo_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ def geoserver_url(record)
# delete records form the index
context.output_hash['id'] = ["stanford-#{druid}"]

SdrEvents.report_indexing_deleted(druid, target: settings['purl_fetcher.target'])
context.skip!("Delete: #{druid}")
end

Expand Down Expand Up @@ -465,12 +464,11 @@ def geoserver_url(record)
end
end

each_record do |record, context|
each_record do |_record, context|
t0 = context.clipboard[:benchmark_start_time]
t1 = Time.now

logger.debug('geo_config.rb') { "Processed #{context.output_hash['id']} (#{t1 - t0}s)" }
SdrEvents.report_indexing_success(record.druid, target: settings['purl_fetcher.target'])
end

# rubocop:disable Metrics/MethodLength
Expand Down
25 changes: 15 additions & 10 deletions lib/traject/writers/solr_better_json_writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def send_batch(contexts)
end
else
@retry_count = 0
SdrEvents.report_indexing_batch_success(batch, target: @settings['purl_fetcher.target'])
end
end

Expand Down Expand Up @@ -100,7 +101,10 @@ def send_single(context)
"#{self.class.name}: Exceeded maximum number of skipped records (#{@max_skipped}): aborting"
end

SdrEvents.report_indexing_batch_errored(batch, target: @settings['purl_fetcher.target'], exception: msg)
return false
else
SdrEvents.report_indexing_batch_success(batch, target: @settings['purl_fetcher.target'])
end

true
Expand All @@ -124,27 +128,28 @@ def each(&)
@contexts.each(&)
end

# Array of [action, data] pairs, where action is :add or :delete
# and data is either the doc id or the full doc hash
# Array of [action, druid, data] triples, where action is :add or :delete
# and data is either the doc id or the full doc hash. Druid is empty for
# non-SDR content.
def actions
@contexts.map do |c|
if c.skip?
id = Array(c.output_hash['id']).first
[:delete, id] if id
@actions ||= @contexts.map do |context|
if context.skip?
id = Array(context.output_hash['id']).first
[:delete, context.source_record&.druid, id] if id
else
[:add, c.output_hash]
[:add, context.source_record&.druid, context.output_hash]
end
end.compact
end

# Make a JSON string for sending to solr /update API
def generate_json
actions.map do |action, data|
actions.map do |action, _druid, data|
case action
when :delete
"delete: #{JSON.generate(data)}"
"\"delete\":#{JSON.generate(data)}"
when :add
"add: #{JSON.generate(doc: data)}"
"\"add\":#{JSON.generate(doc: data)}"
end
end.join(",\n").prepend('{').concat('}')
end
Expand Down
16 changes: 0 additions & 16 deletions spec/integration/geo_config_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -312,22 +312,6 @@ def stub_mods_request(druid, body)
)
end

context 'when indexing is successful' do
it 'creates an indexing success event' do
expect(result).to be_a Hash
expect(SdrEvents).to have_received(:report_indexing_success).with(druid, target: 'Earthworks')
end
end

context 'when the item was deleted' do
let(:record) { { id: "druid:#{druid}", delete: true } }

it 'creates an indexing delete event' do
expect(result).to be_nil
expect(SdrEvents).to have_received(:report_indexing_deleted).with(druid, target: 'Earthworks')
end
end

context 'when the item has no public XML' do
before { allow(record).to receive(:public_xml).and_return(nil) }

Expand Down
109 changes: 100 additions & 9 deletions spec/lib/traject/writers/solr_better_json_writer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,16 @@
require 'spec_helper'

describe Traject::SolrBetterJsonWriter do
subject(:writer) { described_class.new('solr_json_writer.http_client' => http_client, 'solr.update_url' => 'http://localhost/solr', 'solr_better_json_writer.debounce_timeout' => 1) }
subject(:writer) do
described_class.new(
'solr_json_writer.http_client' => http_client,
'solr.update_url' => 'http://localhost/solr',
'solr_better_json_writer.debounce_timeout' => 1,
'purl_fetcher.target' => 'Searchworks',
'solr_writer.max_skipped' => 100
)
end

let(:http_client) { double(post: double(status: 200)) }
let(:doc) { Traject::Indexer::Context.new.tap { |doc| doc.output_hash['id'] = [1] } }
let(:skipped_doc) do
Expand Down Expand Up @@ -39,26 +48,108 @@
describe '#send_batch' do
it 'adds documents to solr' do
writer.send_batch([doc])
expect(http_client).to have_received(:post).with('http://localhost/solr', '{add: {"doc":{"id":[1]}}}',
'Content-type' => 'application/json')
expect(http_client).to have_received(:post).with(
'http://localhost/solr',
'{"add":{"doc":{"id":[1]}}}',
'Content-type' => 'application/json'
)
end

it 'deletes documents from solr' do
writer.send_batch([skipped_doc])
expect(http_client).to have_received(:post).with('http://localhost/solr', '{delete: 2}',
'Content-type' => 'application/json')
expect(http_client).to have_received(:post).with(
'http://localhost/solr',
'{"delete":2}',
'Content-type' => 'application/json'
)
end

it 'skips writing documents that have no id' do
writer.send_batch([bad_doc])
expect(http_client).to have_received(:post).with('http://localhost/solr', '{}',
'Content-type' => 'application/json')
expect(http_client).to have_received(:post).with(
'http://localhost/solr',
'{}',
'Content-type' => 'application/json'
)
end

it 'sends the request as a batch' do
writer.send_batch([doc, skipped_doc])
expect(http_client).to have_received(:post).with('http://localhost/solr',
"{add: {\"doc\":{\"id\":[1]}},\ndelete: 2}", 'Content-type' => 'application/json')
expect(http_client).to have_received(:post).with(
'http://localhost/solr',
"{\"add\":{\"doc\":{\"id\":[1]}},\n\"delete\":2}",
'Content-type' => 'application/json'
)
end
end

describe 'event reporting' do
let(:sdr_docs) do
[
Traject::Indexer::Context.new.tap do |doc|
doc.output_hash['id'] = [1]
doc.source_record = PublicXmlRecord.new('bb112zx3193')
end,
Traject::Indexer::Context.new.tap do |doc|
doc.output_hash['id'] = [2]
doc.source_record = PublicXmlRecord.new('py305sy7961')
end
]
end

before do
allow(Settings.sdr_events).to receive(:enabled).and_return(true)
allow(SdrEvents).to receive_messages(
report_indexing_deleted: true,
report_indexing_success: true,
report_indexing_errored: true
)
end

context 'when SDR events are disabled' do
before do
allow(Settings.sdr_events).to receive(:enabled).and_return(false)
allow(Dor::Event::Client).to receive(:create)
end

it 'does not report any events' do
writer.send_batch(sdr_docs)
expect(Dor::Event::Client).not_to have_received(:create)
end
end

context 'when all docs index successfully' do
it 'reports docs that are successfully added' do
writer.send_batch(sdr_docs)
expect(SdrEvents).to have_received(:report_indexing_success).with('bb112zx3193', target: 'Searchworks')
expect(SdrEvents).to have_received(:report_indexing_success).with('py305sy7961', target: 'Searchworks')
end

it 'reports docs that are successfully deleted' do
skipped_doc.source_record = PublicXmlRecord.new('bj057dg6517')
writer.send_batch([skipped_doc])
expect(SdrEvents).to have_received(:report_indexing_deleted).with('bj057dg6517', target: 'Searchworks')
end
end

context 'when some docs fail to index' do
let(:http_client) { double(post: double(status: 400, body: 'Malformed data')) }

it 'reports docs that fail to index' do
writer.send_batch(sdr_docs)
expect(SdrEvents).to have_received(:report_indexing_errored).with(
'bb112zx3193',
target: 'Searchworks',
message: 'add failed',
context: 'Solr error response: 400: Malformed data'
)
expect(SdrEvents).to have_received(:report_indexing_errored).with(
'py305sy7961',
target: 'Searchworks',
message: 'add failed',
context: 'Solr error response: 400: Malformed data'
)
end
end
end
end

0 comments on commit f48aac7

Please sign in to comment.