Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add catch-some fields for key bib data #382

Merged
merged 1 commit into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 84 additions & 2 deletions lib/traject/config/folio_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,25 @@ def holdings(record, context)
extract_marc("440anpv:490av:800#{A_X}:810#{A_X}:811#{A_X}:830#{A_X}", alternate_script: :only)
to_field 'series_exact_search', extract_marc('830a', alternate_script: false)

to_field 'author_title_245ac_search', extract_marc('245ac', alternate_script: false)
to_field 'vern_author_title_245ac_search', extract_marc('245ac', alternate_script: :only)

to_field 'author_title_1xx_search' do |record, accumulator|
onexx = trim_punctuation_when_preceded_by_two_word_characters_or_some_other_stuff(Traject::MarcExtractor.cached('100abcq:110abn:111aeq', alternate_script: false).extract(record).first)
twoxx = trim_punctuation_when_preceded_by_two_word_characters_or_some_other_stuff(Traject::MarcExtractor.cached('240' + ALPHABET, alternate_script: false).extract(record).first)
twoxx ||= Traject::MarcExtractor.cached('245aa', alternate_script: false).extract(record).first

accumulator << [onexx, twoxx].compact.reject(&:empty?).map(&:strip).join(' ') if onexx
end

to_field 'vern_author_title_1xx_search' do |record, accumulator|
onexx = trim_punctuation_when_preceded_by_two_word_characters_or_some_other_stuff(Traject::MarcExtractor.cached('100abcq:110abn:111aeq', alternate_script: :only).extract(record).first)
twoxx = trim_punctuation_when_preceded_by_two_word_characters_or_some_other_stuff(Traject::MarcExtractor.cached('240' + ALPHABET, alternate_script: :only).extract(record).first)
twoxx ||= Traject::MarcExtractor.cached('245aa', alternate_script: :only).extract(record).first

accumulator << [onexx, twoxx].compact.reject(&:empty?).map(&:strip).join(' ') if onexx
end

# # Author Title Search Fields
to_field 'author_title_search' do |record, accumulator|
onexx = trim_punctuation_when_preceded_by_two_word_characters_or_some_other_stuff(Traject::MarcExtractor.cached(
Expand All @@ -380,7 +399,7 @@ def holdings(record, context)
twoxx ||= Traject::MarcExtractor.cached('245aa', alternate_script: false).extract(record).first if record['245']
twoxx ||= 'null'

accumulator << [onexx, twoxx].compact.reject(&:empty?).map(&:strip).join(' ') if onexx
accumulator << [onexx, twoxx].compact.reject(&:empty?).map(&:strip).join(' ') if onexx && twoxx
end

to_field 'author_title_search' do |record, accumulator|
Expand All @@ -389,7 +408,6 @@ def holdings(record, context)
).extract(record).first

twoxx = Traject::MarcExtractor.cached('240' + ALPHABET, alternate_script: :only).extract(record).first
twoxx ||= Traject::MarcExtractor.cached('245aa', alternate_script: :only).extract(record).first
accumulator << [onexx, twoxx].compact.reject(&:empty?).map(&:strip).join(' ') if onexx && twoxx
end

Expand All @@ -413,6 +431,21 @@ def holdings(record, context)
end
end

to_field 'author_title_search' do |record, accumulator|
onexx = trim_punctuation_when_preceded_by_two_word_characters_or_some_other_stuff(Traject::MarcExtractor.cached('100abcdfghijklmnopqrstuvwxyz:110abcdfghijklmnopqrstuvwxyz:111abcdefghjklmnopqrstuvwxyz',
alternate_script: false).extract(record).first)

twoxx = Traject::MarcExtractor.cached('245aa', alternate_script: false).extract(record).first if record['245']
accumulator << [onexx, twoxx].compact.reject(&:empty?).map(&:strip).join(' ') if onexx && twoxx
end

to_field 'author_title_search' do |record, accumulator|
onexx = Traject::MarcExtractor.cached('100abcdfghijklmnopqrstuvwxyz:110abcdfghijklmnopqrstuvwxyz:111abcdefghjklmnopqrstuvwxyz', alternate_script: :only).extract(record).first

twoxx = Traject::MarcExtractor.cached('245aa', alternate_script: :only).extract(record).first
accumulator << [onexx, twoxx].compact.reject(&:empty?).map(&:strip).join(' ') if onexx && twoxx
end

# # Author Search Fields
# # IFF relevancy of author search needs improvement, unstemmed flavors for author search
# # (keep using stemmed version for everything search to match stemmed query)
Expand Down Expand Up @@ -2575,6 +2608,55 @@ def accumulate_summary_struct_fields(matching_fields, tag, label, marc, accumula
end.flatten.uniq)
end

to_field 'bib_search' do |record, accumulator, context|
# authors, titles, series, publisher
keep_fields = %w[
100 110 111 130 210 222 242 243 245 246 247 260 264 440 490 700 710 711 800 810 811
]

result = []
record.each do |field|
next unless keep_fields.include?(field.tag)

subfield_values = field.subfields
.reject { |sf| Constants::EXCLUDE_FIELDS.include?(sf.code) }
.collect(&:value)

next unless subfield_values.length > 0

result << subfield_values.join(' ')
end

result += Array(context.output_hash['format_main_ssim'])

accumulator << result.join(' ') if result.any?
end

to_field 'vern_bib_search' do |record, accumulator|
# authors, titles, series, publisher
keep_fields = %w[
100 110 111 130 210 222 242 243 245 246 247 260 264 440 490 700 710 711 800 810 811
]

result = []
record.each do |field|
next unless field.tag == '880'
next if field['6'].nil? ||
!field['6'].include?('-') ||
!keep_fields.include?(field['6'].split('-')[0])

subfield_values = field.subfields
.reject { |sf| Constants::EXCLUDE_FIELDS.include?(sf.code) }
.collect(&:value)

next unless subfield_values.length > 0

result << subfield_values.join(' ')
end

accumulator << result.join(' ') if result.any?
end

## FOLIO specific fields

## QUESTIONS / ISSUES
Expand Down
2 changes: 1 addition & 1 deletion spec/integration/compare_against_solrmarc_docs_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
title_variant_display summary_display pub_display]
end
let(:ignored_fields) do
%w[all_search created last_updated format _version_ author_sort callnum_facet_hsim marcbib_xml marcxml mhld_display fund_facet building_facet collection] + copy_fields
%w[all_search created last_updated format _version_ author_sort author_title_search callnum_facet_hsim marcbib_xml marcxml mhld_display fund_facet building_facet collection] + copy_fields
end
let(:pending_fields) { %w[reverse_shelfkey shelfkey preferred_barcode item_display date_cataloged access_facet] }
subject(:result) { indexer.map_record(folio_record).transform_values { |v| v.sort } }
Expand Down
8 changes: 8 additions & 0 deletions spec/lib/traject/config/all_search_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,12 @@
# all_search should include 033a
end
end
describe 'bib_search' do
let(:field) { 'bib_search' }
it do
expect(select_by_id('allfields1')[field]).to include(/Dharma Kumar/)
expect(select_by_id('allfields1')[field]).to include(/allfields should skip 00x/)
expect(select_by_id('allfields1')[field]).to include(%r{Journal/Periodical})
end
end
end
17 changes: 13 additions & 4 deletions spec/lib/traject/config/author_title_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,18 @@
# rubocop:disable Layout/LineLength
describe 'maps search field values from 100, 110, 111 with data from the 240 or 245' do
it 'maps the right data' do
expect(select_by_id('100240')[field]).to eq ['100a 100b 100c 100d 100f 100g 100j 100k 100l 100n 100p 100q 100t 100u 240a 240d 240f 240g 240h 240k 240l 240m 240n 240o 240p 240r 240s']
expect(select_by_id('110240')[field]).to eq ['110a 110b 110c 110d 110f 110g 110k 110l 110n 110p 110t 110u 240a 240d 240f 240g 240h 240k 240l 240m 240n 240o 240p 240r 240s']
expect(select_by_id('111240')[field]).to eq ['111a 111c 111d 111e 111f 111g 111j 111k 111l 111n 111p 111q 111t 111u 240a 240d 240f 240g 240h 240k 240l 240m 240n 240o 240p 240r 240s']
expect(select_by_id('100240')[field]).to eq [
'100a 100b 100c 100d 100f 100g 100j 100k 100l 100n 100p 100q 100t 100u 240a 240d 240f 240g 240h 240k 240l 240m 240n 240o 240p 240r 240s',
'100a 100b 100c 100d 100f 100g 100j 100k 100l 100n 100p 100q 100t 100u 100 all subfields + 240 all subfields'
]
expect(select_by_id('110240')[field]).to eq [
'110a 110b 110c 110d 110f 110g 110k 110l 110n 110p 110t 110u 240a 240d 240f 240g 240h 240k 240l 240m 240n 240o 240p 240r 240s',
'110a 110b 110c 110d 110f 110g 110k 110l 110n 110p 110t 110u 110 all subfields + 240 all subfields'
]
expect(select_by_id('111240')[field]).to eq [
'111a 111c 111d 111e 111f 111g 111j 111k 111l 111n 111p 111q 111t 111u 240a 240d 240f 240g 240h 240k 240l 240m 240n 240o 240p 240r 240s',
'111a 111c 111d 111e 111f 111g 111j 111k 111l 111n 111p 111q 111t 111u 111 all subfields + 240 all subfields'
]

expect(select_by_id('100no240')[field]).to eq ['100a 100b 100c 100d 100f 100g 100j 100k 100l 100n 100p 100q 100t 100u 245a']
expect(select_by_id('110no240')[field]).to eq ['110a 110b 110c 110d 110f 110g 110k 110l 110n 110p 110t 110u 245a']
Expand Down Expand Up @@ -62,7 +71,7 @@

it 'does something with a minimal 100 field to link to 880' do
expect(select_by_id('vern100no240')[field]).to eq ['100a 245a']
expect(select_by_id('vern100plain240')[field]).to eq ['100a 240a 240d 240f 240g 240h 240k 240l 240m 240n 240o 240p 240r 240s']
expect(select_by_id('vern100plain240')[field]).to eq ['100a 240a 240d 240f 240g 240h 240k 240l 240m 240n 240o 240p 240r 240s', '100a 245a']

expect(select_by_id('vern110vern240')[field]).to include 'vern110a vern110b vern110c vern110d vern110f vern110g vern110k vern110l vern110n vern110p vern110t vern110u vern240a vern240d vern240f vern240g vern240h vern240k vern240l vern240m vern240n vern240o vern240p vern240r vern240s'
expect(select_by_id('vern110vern245')[field]).to include 'vern110a vern110b vern110c vern110d vern110f vern110g vern110k vern110l vern110n vern110p vern110t vern110u vern245a'
Expand Down
Loading