From ae7d2a0bc3d68c7bdf5581f92376e3516fdb4a37 Mon Sep 17 00:00:00 2001 From: Ethan Bishop Date: Thu, 5 Dec 2024 14:31:43 +0000 Subject: [PATCH] Add build step with necessary patches for pdf2htmlEX - Patches ensure the build works, has the correct version number, and enables use of libopenjp-2.7 to add support for JPEG-2000 images - The build itself pulls in the upstream tag directly for the version we are targetting --- CHANGELOG.md | 2 + README.md | 2 +- src/Pdf2Html/Dockerfile | 40 ++++++++++++++++++- .../pdf2htmlEX/patches/CMakeLists.patch | 8 ++++ .../pdf2htmlEX/patches/buildPoppler.patch | 9 +++++ .../pdf2htmlEX/patches/getBuildToolsApt.patch | 7 ++++ .../patches/getDevLibrariesApt.patch | 5 +++ .../pdf2htmlEX/patches/versionEnvs.patch | 9 +++++ 8 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 src/Pdf2Html/pdf2htmlEX/patches/CMakeLists.patch create mode 100644 src/Pdf2Html/pdf2htmlEX/patches/buildPoppler.patch create mode 100644 src/Pdf2Html/pdf2htmlEX/patches/getBuildToolsApt.patch create mode 100644 src/Pdf2Html/pdf2htmlEX/patches/getDevLibrariesApt.patch create mode 100644 src/Pdf2Html/pdf2htmlEX/patches/versionEnvs.patch diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d42cb3..7d939f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ ## develop * Increase `font-size-multiplier` to increase text rendering fidelity and get rid of sporadic empty spaces at the end of numbers. +* Patch and build `pdf2htmlEX` as part of this build process to use `libopenjp` instead of `libjpeg` for JPEG-2000 support. + * All patches are in this source tree, and are applied to directly to the source of the upstream tag during build. ## 0.1.0 diff --git a/README.md b/README.md index 5035a12..ce73989 100644 --- a/README.md +++ b/README.md @@ -6,4 +6,4 @@ This project is a lightweight HTTP(S) interface to the [pdf2htmlex library](http Since pdf2htmlex is licensed under the GPL, this project is too (see the LICENSE.TXT file). -As you can see from the build process, pdf2htmlEX itself is not modified by this project and as such we have not repeated pdf2htmlEX's source code here; you can find it via the link above. \ No newline at end of file +As you can see from the build process, pdf2htmlEX itself is patched by the patches within this project (see [src/Pdf2Html/patches](tree/src/Pdf2Html/patches)), based on a clone of the upstream project tag we are targeting. As such we have not repeated pdf2htmlEX's source code here; you can find it via the link above. diff --git a/src/Pdf2Html/Dockerfile b/src/Pdf2Html/Dockerfile index 7052fff..5e46381 100644 --- a/src/Pdf2Html/Dockerfile +++ b/src/Pdf2Html/Dockerfile @@ -1,4 +1,40 @@ # syntax=docker/dockerfile:1 +FROM mcr.microsoft.com/dotnet/sdk:7.0 AS build-pdf2htmlex + +# Produces a patched pdf2htmlEX using libopenjp 2.7 instead of libjpeg to get JPEG2000 support. + +ENV PDF2HTMLEX_BRANCH=cfl1 +ENV UNATTENDED="--assume-yes" +ENV MAKE_PARALLEL="-j 4" +ENV PDF2HTMLEX_PREFIX=/usr/local +ENV DEBIAN_FRONTEND=noninteractive + +WORKDIR /source +RUN apt update && apt install -y git patch sudo +RUN git clone --depth=1 --branch v0.18.8.rc1 https://github.com/pdf2htmlEX/pdf2htmlEX +WORKDIR /source/pdf2htmlEX + +COPY ./pdf2htmlEX/patches ./patches +RUN patch ./buildScripts/versionEnvs ./patches/versionEnvs.patch +RUN patch ./buildScripts/buildPoppler ./patches/buildPoppler.patch +RUN patch ./buildScripts/getBuildToolsApt ./patches/getBuildToolsApt.patch +RUN patch ./buildScripts/getDevLibrariesApt ./patches/getDevLibrariesApt.patch +RUN patch ./pdf2htmlEX/CMakeLists.txt ./patches/CMakeLists.patch + +RUN ./buildScripts/versionEnvs +RUN ./buildScripts/reportEnvs +RUN ./buildScripts/getBuildToolsApt +RUN ./buildScripts/getDevLibrariesApt +RUN ./buildScripts/getPoppler +RUN ./buildScripts/buildPoppler +RUN ./buildScripts/getFontforge +RUN ./buildScripts/buildFontforge +RUN ./buildScripts/buildPdf2htmlEX +RUN ./buildScripts/installPdf2htmlEX +RUN git config user.name "CoreFiling" +RUN git config user.email "opensource@corefiling.com" +RUN ./buildScripts/createDebianPackage + FROM mcr.microsoft.com/dotnet/sdk:7.0 AS build WORKDIR /source COPY ./ . @@ -8,8 +44,8 @@ FROM mcr.microsoft.com/dotnet/aspnet:7.0 RUN apt update && apt install -y wget RUN wget http://archive.ubuntu.com/ubuntu/pool/main/libj/libjpeg-turbo/libjpeg-turbo8_2.0.3-0ubuntu1_amd64.deb RUN apt install -y ./libjpeg-turbo8_2.0.3-0ubuntu1_amd64.deb -RUN wget https://github.com/pdf2htmlEX/pdf2htmlEX/releases/download/v0.18.8.rc1/pdf2htmlEX-0.18.8.rc1-master-20200630-Ubuntu-bionic-x86_64.deb -RUN apt install -y ./pdf2htmlEX-0.18.8.rc1-master-20200630-Ubuntu-bionic-x86_64.deb +COPY --from=build-pdf2htmlex /source/pdf2htmlEX/imageBuild/*.deb /pdf2htmlEX/ +RUN apt install -y libjpeg62 libopenjp2-7 /pdf2htmlEX/pdf2htmlEX-0.18.8.rc1-cfl1-*-x86_64.deb WORKDIR /app COPY --from=build /app ./ diff --git a/src/Pdf2Html/pdf2htmlEX/patches/CMakeLists.patch b/src/Pdf2Html/pdf2htmlEX/patches/CMakeLists.patch new file mode 100644 index 0000000..c292227 --- /dev/null +++ b/src/Pdf2Html/pdf2htmlEX/patches/CMakeLists.patch @@ -0,0 +1,8 @@ +@@ -97,6 +97,7 @@ + ${LIB_INTL_LIBRARIES} + ${CAIRO_LIBRARIES} + -ljpeg ++ -lopenjp2 + -lpng + -lfontconfig + -lfreetype diff --git a/src/Pdf2Html/pdf2htmlEX/patches/buildPoppler.patch b/src/Pdf2Html/pdf2htmlEX/patches/buildPoppler.patch new file mode 100644 index 0000000..e9c212d --- /dev/null +++ b/src/Pdf2Html/pdf2htmlEX/patches/buildPoppler.patch @@ -0,0 +1,9 @@ +@@ -27,7 +27,7 @@ + -DENABLE_GOBJECT_INTROSPECTION=OFF \ + -DENABLE_GTK_DOC=OFF \ + -DENABLE_QT5=OFF \ +- -DENABLE_LIBOPENJPEG="none" \ ++ -DENABLE_LIBOPENJPEG="openjpeg2" \ + -DENABLE_CMS="none" \ + -DENABLE_DCTDECODER="libjpeg" \ + -DENABLE_LIBCURL=OFF \ diff --git a/src/Pdf2Html/pdf2htmlEX/patches/getBuildToolsApt.patch b/src/Pdf2Html/pdf2htmlEX/patches/getBuildToolsApt.patch new file mode 100644 index 0000000..68ecaf8 --- /dev/null +++ b/src/Pdf2Html/pdf2htmlEX/patches/getBuildToolsApt.patch @@ -0,0 +1,7 @@ +@@ -30,6 +30,6 @@ sudo apt-get $UNATTENDED install \ + dpkg \ + dpkg-dev \ + gettext \ +- openjdk-8-jre-headless \ ++ openjdk-11-jre-headless \ + jq diff --git a/src/Pdf2Html/pdf2htmlEX/patches/getDevLibrariesApt.patch b/src/Pdf2Html/pdf2htmlEX/patches/getDevLibrariesApt.patch new file mode 100644 index 0000000..387af0d --- /dev/null +++ b/src/Pdf2Html/pdf2htmlEX/patches/getDevLibrariesApt.patch @@ -0,0 +1,5 @@ +@@ -21,3 +21,4 @@ + libpng-dev \ + libjpeg-dev \ + libxml2-dev \ ++ libopenjp2-7-dev \ diff --git a/src/Pdf2Html/pdf2htmlEX/patches/versionEnvs.patch b/src/Pdf2Html/pdf2htmlEX/patches/versionEnvs.patch new file mode 100644 index 0000000..65ffd76 --- /dev/null +++ b/src/Pdf2Html/pdf2htmlEX/patches/versionEnvs.patch @@ -0,0 +1,9 @@ +@@ -6,7 +6,7 @@ + # see: https://poppler.freedesktop.org/releases.html + # current working: 0.89.0 + +-export PDF2HTMLEX_VERSION=0.18.8.rc2 ++export PDF2HTMLEX_VERSION=0.18.8.rc1 + + export POPPLER_VERSION=poppler-0.89.0 + #export POPPLER_VERSION=poppler-0.88.0