Skip to content

Commit f83cc25

Browse files
authored
Add PDF page limit for IiifPring splitter, PdfPage as pdf_split_child_model for BookContribution (#545)
1 parent 90202b7 commit f83cc25

File tree

2 files changed

+42
-1
lines changed

2 files changed

+42
-1
lines changed

app/models/book_contribution.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class BookContribution < ActiveFedora::Base
1414
# Adds behaviors for DataCite DOIs via hyrax-doi plugin.
1515
include Hyrax::DOI::DataCiteDOIBehavior
1616
include IiifPrint.model_configuration(
17-
pdf_split_child_model: self
17+
pdf_split_child_model: PdfPage
1818
)
1919

2020
self.indexer = BookContributionIndexer

config/initializers/iiif_print.rb

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
# Override IiifPrint::Configuration to allow a config item to limit splitting PDFs by page count (IiifPrint 1.0.0 8fdf56e)
2+
IiifPrint::Configuration.class_eval do
3+
attr_writer :split_pdf_page_limit
4+
# rubocop:disable Metrics/MethodLength
5+
# @api private
6+
# @note These fields will appear in rendering order.
7+
# @todo To move this to an `@api public` state, we need to consider what a proper configuration looks like.
8+
def split_pdf_page_limit
9+
@split_pdf_page_limit ||= 100
10+
end
11+
end
12+
113
IiifPrint.config do |config|
214
# NOTE: WorkTypes and models are used synonymously here.
315
# Add models to be excluded from search so the user
@@ -54,6 +66,9 @@
5466
add_info: {},
5567
collection: {}
5668
}
69+
70+
config.split_pdf_page_limit = 100
71+
5772
end
5873

5974
# Override Hrax::WorkShowPresenter.authorized_item_ids to disallow "Pdf Page" work type from showing as members
@@ -71,3 +86,29 @@ def authorized_item_ids
7186
end
7287
end
7388

89+
# Override IiifPrint::SplitPdfs::ChildWorkCreationFromPdfService (IiifPrint 1.0.0 8fdf56e)
90+
# IiiifPrint rendering does not do well when there are many pages
91+
# So enforce a page limit over which IiifPRint will not split a PDF
92+
# into childworks with images for each page
93+
# Duplicate pagecount from IiifPrint::SplitPdfs::BaseSplitter
94+
IiifPrint::SplitPdfs::ChildWorkCreationFromPdfService.class_eval do
95+
96+
PAGE_COUNT_REGEXP = %r{^Pages: +(\d+)$}.freeze
97+
98+
def self.pagecount(pdfpath)
99+
# Default to a value that will avoid
100+
# IiifPrint splitting from happening
101+
pagecount=IiifPrint.config.split_pdf_page_limit+1
102+
cmd = "pdfinfo #{pdfpath}"
103+
Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
104+
match = PAGE_COUNT_REGEXP.match(stdout.read)
105+
pagecount = match[1].to_i
106+
end
107+
pagecount
108+
end
109+
110+
def self.pdfs_only_for(paths)
111+
paths.select { |path| path.end_with?('.pdf', '.PDF') && pagecount(path) < IiifPrint.config.split_pdf_page_limit }
112+
end
113+
end
114+

0 commit comments

Comments
 (0)