From 51fadf7435a8445272a5159202f6ecdb25d26f5c Mon Sep 17 00:00:00 2001 From: Maxence Lange Date: Wed, 13 Feb 2019 08:47:13 -0100 Subject: [PATCH] adding settings to enable/disable pdf ocr --- js/admin.elements.js | 3 +++ js/admin.settings.js | 4 +++- lib/Service/ConfigService.php | 4 +++- lib/Service/TesseractService.php | 4 ++++ templates/settings.admin.php | 12 ++++++++++++ 5 files changed, 25 insertions(+), 2 deletions(-) diff --git a/js/admin.elements.js b/js/admin.elements.js index 9969895..aebfb1e 100644 --- a/js/admin.elements.js +++ b/js/admin.elements.js @@ -34,16 +34,19 @@ var fts_tesseract_elements = { tesseract_ocr: null, tesseract_psm: null, tesseract_lang: null, + tesseract_pdf: null, init: function () { fts_tesseract_elements.tesseract_div = $('#files_ocr-tesseract'); fts_tesseract_elements.tesseract_psm = $('#tesseract_psm'); fts_tesseract_elements.tesseract_lang = $('#tesseract_lang'); fts_tesseract_elements.tesseract_ocr = $('#tesseract_ocr'); + fts_tesseract_elements.tesseract_pdf = $('#tesseract_pdf'); fts_tesseract_elements.tesseract_ocr.on('change', fts_tesseract_elements.updateSettings); fts_tesseract_elements.tesseract_psm.on('change', fts_tesseract_elements.updateSettings); fts_tesseract_elements.tesseract_lang.on('change', fts_tesseract_elements.updateSettings); + fts_tesseract_elements.tesseract_pdf.on('change', fts_tesseract_elements.updateSettings); }, diff --git a/js/admin.settings.js b/js/admin.settings.js index d590817..911fe88 100644 --- a/js/admin.settings.js +++ b/js/admin.settings.js @@ -49,6 +49,7 @@ var fts_tesseract_settings = { fts_tesseract_elements.tesseract_ocr.prop('checked', (result.tesseract_enabled === '1')); fts_tesseract_elements.tesseract_psm.val(result.tesseract_psm); fts_tesseract_elements.tesseract_lang.val(result.tesseract_lang); + fts_tesseract_elements.tesseract_pdf.prop('checked', (result.tesseract_pdf === '1')); fts_admin_settings.tagSettingsAsSaved(fts_tesseract_elements.tesseract_div); @@ -69,7 +70,8 @@ var fts_tesseract_settings = { var data = { tesseract_enabled: (fts_tesseract_elements.tesseract_ocr.is(':checked')) ? 1 : 0, tesseract_psm: fts_tesseract_elements.tesseract_psm.val(), - tesseract_lang: fts_tesseract_elements.tesseract_lang.val() + tesseract_lang: fts_tesseract_elements.tesseract_lang.val(), + tesseract_pdf: (fts_tesseract_elements.tesseract_pdf.is(':checked')) ? 1 : 0 }; $.ajax({ diff --git a/lib/Service/ConfigService.php b/lib/Service/ConfigService.php index b706a4d..c55fd08 100644 --- a/lib/Service/ConfigService.php +++ b/lib/Service/ConfigService.php @@ -46,11 +46,13 @@ class ConfigService { const TESSERACT_ENABLED = 'tesseract_enabled'; const TESSERACT_PSM = 'tesseract_psm'; const TESSERACT_LANG = 'tesseract_lang'; + const TESSERACT_PDF = 'tesseract_pdf'; public $defaults = [ self::TESSERACT_ENABLED => '0', self::TESSERACT_PSM => '4', - self::TESSERACT_LANG => 'eng' + self::TESSERACT_LANG => 'eng', + self::TESSERACT_PDF => '0' ]; diff --git a/lib/Service/TesseractService.php b/lib/Service/TesseractService.php index 16383b7..bf026ea 100644 --- a/lib/Service/TesseractService.php +++ b/lib/Service/TesseractService.php @@ -210,6 +210,10 @@ private function ocrPdf(AFilesDocument $document, File $file): bool { return false; } + if ($this->configService->getAppValue(ConfigService::TESSERACT_PDF) !== '1') { + return true; + } + try { $path = $this->getAbsolutePath($file); $pdf = new Pdf($path); diff --git a/templates/settings.admin.php b/templates/settings.admin.php index 8ec74f7..2f1717f 100644 --- a/templates/settings.admin.php +++ b/templates/settings.admin.php @@ -75,6 +75,18 @@ + +
+
+ PDF +
+ enable the OCR of PDF (heavy on resource) +
+
+ +
+
+