diff --git a/lib/iiif_print/split_pdfs/base_splitter.rb b/lib/iiif_print/split_pdfs/base_splitter.rb index 42524149..4b859d8d 100644 --- a/lib/iiif_print/split_pdfs/base_splitter.rb +++ b/lib/iiif_print/split_pdfs/base_splitter.rb @@ -21,6 +21,8 @@ class BaseSplitter # # @note We're including the ** args to provide method conformity; other services require # additional information (such as the FileSet) + # + # @see IiifPrint::SplitPdfs::DerivativeRodeoSplitter def self.call(path, **) new(path).to_a end diff --git a/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb b/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb index e019873d..1f9b9fd9 100644 --- a/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +++ b/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb @@ -14,12 +14,21 @@ class DerivativeRodeoSplitter # @param filename [String] the local path to the PDFDerivativeServicele # @param file_set [FileSet] file set containing the PDF file to split # - # @return [Array] paths to images split from each page of PDF file + # @return [Array] paths to images split from each page of PDF file + # + # @see IiifPrint::SplitPdfs::BaseSplitter def self.call(filename, file_set:) new(filename, file_set: file_set).split_files end - def initialize(filename, file_set:, output_tmp_dir: Dir.tmpdir) + ## + # @param filename [String] path to the original file. Note that we use {#filename} to + # derivate {#input_uri} + # @param file_set [FileSet] the container for the original file and its derivatives. + # + # @param output_tmp_dir [String] where we will be writing things. In using `Dir.mktmpdir` + # we're creating a sudirectory on `Dir.tmpdir` + def initialize(filename, file_set:, output_tmp_dir: Dir.mktmpdir) @filename = filename @file_set = file_set @@ -68,7 +77,6 @@ def initialize(filename, file_set:, output_tmp_dir: Dir.tmpdir) # @return [String] # # @see https://github.com/scientist-softserv/space_stone-serverless/blob/7f46dd5b218381739cd1c771183f95408a4e0752/awslambda/handler.rb#L58-L63 - # rubocop:disable Metrics/AbcSize # rubocop:disable Metrics/MethodLength def preprocessed_location_template return @preprocessed_location_template if defined?(@preprocessed_location_template) @@ -83,21 +91,7 @@ def preprocessed_location_template message = "#{self.class}##{__method__} did not find #{derivative_rodeo_candidate.inspect} to exist. " \ "Moving on to check the #{file_set.class}#import_url of #{file_set.import_url.inspect}" Rails.logger.warn(message) - # If the DerivativeRodeo doesn't know about the adapter for the import, this will raise - # an error. - # - # Since the file was not pre-processed, we're likely now going to be downloading that - # file and running all of the derivatives locally. - if rodeo_conformant_uri_exists?(file_set.import_url) - message = "#{self.class}##{__method__} found #{file_set.class}#import_url of #{file_set.import_url.inspect} to exist. " \ - "Perhaps there was a problem in SpaceStone downloading the file? Things should be okay." - Rails.logger.info(message) - file_set.import_url - else - message = "#{self.class}##{__method__} expected #{file_set.import_url.inspect} as specified " \ - "by #{file_set.class}#import_url to exist at remote location, but it did not." - raise MissingFileError, message - end + handle_original_file_not_in_derivative_rodeo else message = "#{self.class}##{__method__} could not find an existing file at #{derivative_rodeo_candidate} " \ "nor a remote_url for #{file_set.class} ID=#{file_set.id}. Returning `nil' as we have no possible preprocess. " \ @@ -106,9 +100,36 @@ def preprocessed_location_template nil end end - # rubocop:enable Metrics/AbcSize # rubocop:enable Metrics/MethodLength + ## + # @api private + # + # When the file does not exist in the pre-processed location (e.g. "SpaceStone") we need to + # ensure that we have something locally. We copy the {FileSet#import_url} to the {#input_uri} + # location. + # + # @return [String] should be the {#input_uri} + # @raise [DerivativeRodeo::Errors::FileMissingError] when the input_uri does not exist + def handle_original_file_not_in_derivative_rodeo + # A quick short-circuit. Don't attempt to copy. Likely already covered by the DerivativeRodeo::Generators::CopyGenerator + return input_uri if rodeo_conformant_uri_exists?(input_uri) + + message = "#{self.class}##{__method__} found #{file_set.class}#import_url of #{file_set.import_url.inspect} to exist. " \ + "Perhaps there was a problem in SpaceStone downloading the file? " \ + "Regardless, we'll use DerivativeRodeo::Generators::CopyGenerator to ensure #{input_uri.inspect} exists. " \ + "However, we'll almost certainly be generating child pages locally." + Rails.logger.info(message) + + # This ensures that we have a copy of the file_set.import_uri at the input_uri location; + # we likely have this. + DerivativeRodeo::Generators::CopyGenerator.new( + input_uris: [file_set.import_url], + output_location_template: input_uri + ).generated_uris.first + end + # private :handle_original_file_not_in_derivative_rodeo + def rodeo_conformant_uri_exists?(uri) DerivativeRodeo::StorageLocations::BaseLocation.from_uri(uri).exist? end diff --git a/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb b/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb index 5822bf5b..548044cc 100644 --- a/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb +++ b/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb @@ -18,7 +18,7 @@ it { is_expected.to respond_to(:call) } end - subject(:instance) { described_class.new(filename, file_set: file_set) } + subject(:instance) { described_class.new(filename, file_set: file_set, output_tmp_dir: Dir.tmpdir) } let(:generator) { double(DerivativeRodeo::Generators::PdfSplitGenerator, generated_files: []) } before do @@ -46,22 +46,25 @@ end end - context 'when the s3 file does not exist in the rodeo and the file sets import url exists' do + context 'when the s3 file does not exist in the rodeo and we have the local file' do it 'is the import_url' do + expect_any_instance_of(DerivativeRodeo::Generators::CopyGenerator).not_to receive(:generated_uris) file_set.import_url = import_url expect(instance).to receive(:rodeo_conformant_uri_exists?).with(derivative_rodeo_preprocessed_file).and_return(false) - expect(instance).to receive(:rodeo_conformant_uri_exists?).with(file_set.import_url).and_return(true) - expect(subject).to eq(file_set.import_url) + expect(instance).to receive(:rodeo_conformant_uri_exists?).with(instance.input_uri).and_return(true) + expect(subject).to eq(instance.input_uri) end end - context 'when the s3 file does not exist and the given import url does NOT exist' do - it 'will raise a IiifPrint::MissingFileError' do + context 'when the s3 file does not exist and we do not have the input URI nor the given import url does NOT exist' do + let(:generator) { double(DerivativeRodeo::Generators::CopyGenerator, generated_uris: ["file:///generated/uri"]) } + it 'will invoke the DerivativeRodeo::Generators::CopyGenerator to bring the file locally' do + allow(DerivativeRodeo::Generators::CopyGenerator).to receive(:new).and_return(generator) file_set.import_url = import_url expect(instance).to receive(:rodeo_conformant_uri_exists?).with(derivative_rodeo_preprocessed_file).and_return(false) - expect(instance).to receive(:rodeo_conformant_uri_exists?).with(file_set.import_url).and_return(false) + expect(instance).to receive(:rodeo_conformant_uri_exists?).with(instance.input_uri).and_return(false) - expect { subject }.to raise_error(IiifPrint::MissingFileError) + expect(subject).to eq(generator.generated_uris.first) end end