From baa7353ee97414d1a28b4d44a24e9f747e3c9f4a Mon Sep 17 00:00:00 2001 From: Steve Taylor Date: Wed, 14 Feb 2024 16:33:48 -0800 Subject: [PATCH] Remove GSB url filtering --- lib/folio/eresource_holdings_builder.rb | 3 +-- lib/marc_links.rb | 1 - lib/traject/config/folio_config.rb | 4 ++-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/folio/eresource_holdings_builder.rb b/lib/folio/eresource_holdings_builder.rb index 5f0809473..4979655c0 100644 --- a/lib/folio/eresource_holdings_builder.rb +++ b/lib/folio/eresource_holdings_builder.rb @@ -37,8 +37,7 @@ def build def fulltext_links electronic_location_fields.select do |field| - MarcLinks::Processor.new(field).link_is_fulltext? && - field.subfields.none? { |sf| sf.code == 'u' && MarcLinks::GSB_URL_REGEX.match?(sf.value) } + MarcLinks::Processor.new(field).link_is_fulltext? end end diff --git a/lib/marc_links.rb b/lib/marc_links.rb index 7460eeccc..d71301d07 100644 --- a/lib/marc_links.rb +++ b/lib/marc_links.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true module MarcLinks - GSB_URL_REGEX = %r{^https?://www.gsb.stanford.edu/jacksonlibrary/services/} PROXY_URL_REGEX = /stanford\.idm\.oclc\.org/ SFX_URL_REGEX = Regexp.union(%r{^https?://library.stanford.edu/sfx\?.+}, %r{^https?://caslon.stanford.edu:3210/sfxlcl3\?.+}) diff --git a/lib/traject/config/folio_config.rb b/lib/traject/config/folio_config.rb index d00bb7a42..944a754b5 100644 --- a/lib/traject/config/folio_config.rb +++ b/lib/traject/config/folio_config.rb @@ -913,7 +913,7 @@ def holdings(record, context) # # # URL Fields -# get full text urls from 856, then reject gsb forms +# get full text urls from 856 # get all 956 subfield u containing fulltext urls that aren't SFX to_field 'url_fulltext' do |record, accumulator| Traject::MarcExtractor.new('856u:956u', alternate_script: false).collect_matching_lines(record) do |field, spec, extractor| @@ -922,7 +922,7 @@ def holdings(record, context) accumulator.concat extractor.collect_subfields(field, spec) end - accumulator.reject! { |v| v.blank? || MarcLinks::GSB_URL_REGEX.match?(v) } + accumulator.reject! { |v| v.blank? } end # returns the URLs for supplementary information (rather than fulltext)