From 3fc7a2ea9e6f0d814a7488b405d7473ec2e479f1 Mon Sep 17 00:00:00 2001 From: Jeremy Friesen Date: Wed, 29 Nov 2023 10:07:28 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=81=20Introduce=20.named=5Fderivatives?= =?UTF-8?q?=5Fand=5Fgenerators=5Ffilter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior to this commit, IIIF Print assumed that every file of a given mime-type would use all of the same generators. However, that is not necessarily the case. With this commit: - Updated documentation based on a read of the generated Yardoc - Added `DerivativeRodeoService.named_derivatives_and_generators_filter` - Added a `clone` of attributes The clone is in place to help ensure that as we apply the filter we don't accidentally delete the application's configuration for mime category and expected derivatives. For example, let's say I have the following nested hash: ```ruby nested_hash = { pdf: { thumbnail: "DerivativeRodeo::Generators::ThumbnailGenerator" }, image: { thumbnail: "DerivativeRodeo::Generators::ThumbnailGenerator", json: "DerivativeRodeo::Generators::WordCoordinatesGenerator", xml: "DerivativeRodeo::Generators::AltoGenerator", txt: "DerivativeRodeo::Generators::PlainTextGenerator" } } ``` If I then call the following: ```ruby nested_hash.fetch(:pdf).delete_if { |key, value| key == :thumbnail } ``` Then look at `nested_hash`, I will see the following: ```ruby pp nested_hash {:pdf=>{}, :image=> {:thumbnail=>"DerivativeRodeo::Generators::ThumbnailGenerator", :json=>"DerivativeRodeo::Generators::WordCoordinatesGenerator", :xml=>"DerivativeRodeo::Generators::AltoGenerator", :txt=>"DerivativeRodeo::Generators::PlainTextGenerator"}} ``` Why? Because we haven't changed objects. It's possible that Rails's class_attribute will do deep clones of hashes, but with this clone behavior we remove that possibility of a problem. Related to: - https://github.com/scientist-softserv/adventist-dl/pull/684 - https://github.com/scientist-softserv/adventist-dl/issues/676 --- .../iiif_print/derivative_rodeo_service.rb | 72 ++++++++++++++----- lib/iiif_print/errors.rb | 6 ++ .../split_pdfs/derivative_rodeo_splitter.rb | 2 +- 3 files changed, 63 insertions(+), 17 deletions(-) diff --git a/app/services/iiif_print/derivative_rodeo_service.rb b/app/services/iiif_print/derivative_rodeo_service.rb index 093982ff..ad7da9d2 100644 --- a/app/services/iiif_print/derivative_rodeo_service.rb +++ b/app/services/iiif_print/derivative_rodeo_service.rb @@ -18,25 +18,31 @@ class DerivativeRodeoService ## # @!group Class Attributes # - # @attr parent_work_identifier_property_name [String] the property we use to identify the unique - # identifier of the parent work as it went through the SpaceStone pre-process. + # @!attribute parent_work_identifier_property_name [r|w] + # @return [String] the property we use to identify the unique identifier of the parent work as + # it went through the SpaceStone pre-process. # - # TODO: The default of :aark_id is a quick hack for adventist. By exposing a configuration - # value, my hope is that this becomes easier to configure. + # @todo The default of :aark_id is a quick hack for adventist. By exposing a configuration + # value, my hope is that this becomes easier to configure. + # @api public class_attribute :parent_work_identifier_property_name, default: 'aark_id' ## - # @attr preprocessed_location_adapter_name [String] The name of a derivative rodeo storage location; - # this will must be a registered with the DerivativeRodeo::StorageLocations::BaseLocation. + # @!attribute preprocessed_location_adapter_name [r|w] + # @return [String] The name of a derivative rodeo storage location; this will must be a + # registered with the DerivativeRodeo::StorageLocations::BaseLocation. + # @api public class_attribute :preprocessed_location_adapter_name, default: 's3' ## - # @attr named_derivatives_and_generators_by_type [Hash] the named - # derivative and it's associated generator. The "name" is important for Hyrax or IIIF - # Print implementations. The generator is one that exists in the DerivativeRodeo. + # @!attribute named_derivatives_and_generators_by_type [r|w] + # @return [Hash] the named derivative and it's associated generator. + # The "name" is important for Hyrax or IIIF Print implementations. The generator is + # one that exists in the DerivativeRodeo. # - # TODO: Could be nice to have a registry for the DerivativeRodeo::Generators; but that's a - # tomorrow wish. + # @todo Could be nice to have a registry for the DerivativeRodeo::Generators; but that's a + # tomorrow wish. + # @api public class_attribute(:named_derivatives_and_generators_by_type, default: { pdf: { thumbnail: "DerivativeRodeo::Generators::ThumbnailGenerator" @@ -48,18 +54,46 @@ class DerivativeRodeoService txt: "DerivativeRodeo::Generators::PlainTextGenerator" } }) + + ## + # @!attribute named_derivatives_and_generators_filter [r|w] + # @return [#call] with three named parameters: :filename, :candidates, :file_set + # + # - :file_set is a {FileSet} + # - :filename is a String + # - :named_derivatives_and_generators is an entry from + # {.named_derivatives_and_generators_by_type} as pulled from + # {#named_derivatives_and_generators} + # + # The lambda is responsible for filtering any named generators that should or should not + # be run. It should return a data structure similar to the provided + # :named_derivatives_and_generators + # + # @see .named_derivatives_and_generators_by_type + # @see #named_derivatives_and_generators + # @api public + # rubocop:disable Lint/UnusedBlockArgument + class_attribute(:named_derivatives_and_generators_filter, + default: ->(file_set:, filename:, named_derivatives_and_generators:) { named_derivatives_and_generators }) + + # rubocop:enable Lint/UnusedBlockArgument # @!endgroup Class Attributes ## + ## # @see .named_derivatives_and_generators_by_type + # + # @return [Hash