Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
* Switch base images to Ubuntu Noble (24.04 LTS).
* Patch and build `pdf2htmlEX` as part of this build process to use `libopenjp` instead of `libjpeg` for JPEG-2000 support.
* All patches are in this source tree, and are applied to directly to the source of the upstream tag during build.
* Patch issue with non-breaking spaces in `pdf2HTMLEX`.
* Convert complex SVGs images to bitmaps.

## 0.1.0

Expand Down
2 changes: 1 addition & 1 deletion src/Pdf2Html/Controllers/RootController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public async Task<ActionResult> Post()
private async Task<(bool Success, ICollection<string> logs)> ConvertAsync(string inputFile, string outputFile)
{
using var p = new Process();
const string conversionOptions = "--embed-javascript=0 --process-outline=0 --printing=0 --bg-format=svg --decompose-ligature 1 --tounicode 1";
const string conversionOptions = "--embed-javascript=0 --process-outline=0 --printing=0 --bg-format=svg --svg-node-count-limit=100 --decompose-ligature 1 --tounicode 1";
p.StartInfo = new ProcessStartInfo
{
FileName = "pdf2htmlEX",
Expand Down
1 change: 1 addition & 0 deletions src/Pdf2Html/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ RUN patch ./buildScripts/versionEnvs ./patches/versionEnvs.patch
RUN patch ./buildScripts/buildPoppler ./patches/buildPoppler.patch
RUN patch ./buildScripts/getBuildToolsApt ./patches/getBuildToolsApt.patch
RUN patch ./buildScripts/getDevLibrariesApt ./patches/getDevLibrariesApt.patch
RUN patch ./pdf2htmlEX/src/util/unicode.h ./patches/unicode.h.patch
RUN patch ./pdf2htmlEX/CMakeLists.txt ./patches/CMakeLists.patch

RUN ./buildScripts/versionEnvs
Expand Down
19 changes: 19 additions & 0 deletions src/Pdf2Html/pdf2htmlEX/patches/unicode.h.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
@@ -39,9 +39,6 @@ namespace pdf2htmlEX {
* moz:
* p2h: [------------------] [-] [-] [-----------------]
*
- * Note: 0xA0 (no-break space) affects word-spacing; and if "white-space:pre" is specified,
- * \n and \r can break line, \t can shift text, so they are considered illegal.
- *
* Resources (retrieved at 2015-03-16)
* * webkit
* * Avoid querying the font cache for the zero-width space glyph ( https://bugs.webkit.org/show_bug.cgi?id=90673 )
@@ -58,7 +55,7 @@ namespace pdf2htmlEX {
*/
inline bool is_illegal_unicode(Unicode c)
{
- return (c < 0x20) || (c >= 0x7F && c <= 0xA0) || (c == 0xAD)
+ return (c < 0x20) || (c >= 0x7F && c < 0xA0) || (c == 0xAD)
|| (c >= 0x300 && c <= 0x36f) // DCRH Combining diacriticals
|| (c >= 0x1ab0 && c <= 0x1aff) // DCRH Combining diacriticals
|| (c >= 0x1dc0 && c <= 0x1dff) // DCRH Combining diacriticals
4 changes: 2 additions & 2 deletions tests/E2E.Tests/Resources/CS_cheat_sheet.html
Git LFS file not shown
Loading