diff --git a/CHANGELOG.md b/CHANGELOG.md index ad7d03a..b1df1f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ # Changelog +### 20.0.0 + +- compat nc20 +- cleaning code + + ### 1.4.2 - compat nc19 diff --git a/Makefile b/Makefile index 82d4355..b880280 100644 --- a/Makefile +++ b/Makefile @@ -8,9 +8,9 @@ sign_dir=$(build_dir)/sign package_name=$(app_name) cert_dir=$(HOME)/.nextcloud/certificates github_account=daita -branch=master +branch=stable20 codecov_token_dir=$(HOME)/.nextcloud/codecov_token -version+=1.4.2 +version+=20.0.0 all: appstore diff --git a/appinfo/app.php b/appinfo/app.php deleted file mode 100644 index acbc3a3..0000000 --- a/appinfo/app.php +++ /dev/null @@ -1,47 +0,0 @@ - - * @copyright 2018 - * @license GNU AGPL version 3 or any later version - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - */ - - -namespace OCA\Files_FullTextSearch_Tesseract\AppInfo; - - -use OCP\AppFramework\QueryException; - - -require_once __DIR__ . '/autoload.php'; - - -try { - $app = new Application(); - $app->registerFilesExtension(); -} catch (QueryException $e) { -} - - - diff --git a/appinfo/autoload.php b/appinfo/autoload.php deleted file mode 100644 index f01d106..0000000 --- a/appinfo/autoload.php +++ /dev/null @@ -1,39 +0,0 @@ - - * @copyright 2018 - * @license GNU AGPL version 3 or any later version - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - */ - - -namespace OCA\Files_FullTextSearch_Tesseract\AppInfo; - - -$composerDir = __DIR__ . '/../vendor/'; - -if (is_dir($composerDir) && file_exists($composerDir . 'autoload.php')) { - require_once $composerDir . 'autoload.php'; -} - diff --git a/appinfo/info.xml b/appinfo/info.xml index de7634e..b3d8c16 100644 --- a/appinfo/info.xml +++ b/appinfo/info.xml @@ -14,7 +14,7 @@ which is a wrapper for the command line program [Tesseract OCR](https://github.c Tesseract must be installed locally, and configured. ]]> - 1.4.2 + 20.0.0 agpl Maxence Lange Files_FullTextSearch_Tesseract @@ -27,7 +27,7 @@ Tesseract must be installed locally, and configured. https://github.com/daita/files_fulltextsearch_tesseract.git https://raw.githubusercontent.com/nextcloud/fulltextsearch/master/screenshots/0.3.0.png - + diff --git a/composer.json b/composer.json index 48a4574..a783860 100644 --- a/composer.json +++ b/composer.json @@ -3,14 +3,25 @@ "description": "Files - Fulltextsearch - Tesseract OCR", "minimum-stability": "stable", "license": "agpl", + "config": { + "optimize-autoloader": true, + "classmap-authoritative": true, + "autoloader-suffix": "Files_FullTextSearch_Tesseract" + }, "authors": [ { "name": "Maxence Lange", "email": "maxence@artificial-owl.com" } ], + "autoload": { + "psr-4": { + "OCA\\Files_FullTextSearch_Tesseract\\": "lib/" + } + }, "require": { - "thiagoalessio/tesseract_ocr": "2.4.0", - "spatie/pdf-to-image": "1.8.1" + "daita/my-small-php-tools": "dev-master", + "thiagoalessio/tesseract_ocr": "2.9.5", + "spatie/pdf-to-image": "2.1.0" } } diff --git a/composer.lock b/composer.lock index f9e5f7f..0196bc8 100644 --- a/composer.lock +++ b/composer.lock @@ -4,8 +4,49 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "fca498052306e6cd5c7ca896c0da12d4", + "content-hash": "6fe8bcf7bc80f5ae88c7cd494e55cba7", "packages": [ + { + "name": "daita/my-small-php-tools", + "version": "dev-master", + "source": { + "type": "git", + "url": "https://github.com/daita/my-small-php-tools.git", + "reference": "844b10da6abf50044ae9c40120cb5b927352bd95" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/daita/my-small-php-tools/zipball/844b10da6abf50044ae9c40120cb5b927352bd95", + "reference": "844b10da6abf50044ae9c40120cb5b927352bd95", + "shasum": "" + }, + "require": { + "php": "^7.0" + }, + "default-branch": true, + "type": "library", + "autoload": { + "psr-4": { + "daita\\MySmallPhpTools\\": "lib/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "AGPL-3.0-or-later" + ], + "authors": [ + { + "name": "Maxence Lange", + "email": "maxence@artificial-owl.com" + } + ], + "description": "My small PHP Tools", + "support": { + "issues": "https://github.com/daita/my-small-php-tools/issues", + "source": "https://github.com/daita/my-small-php-tools/tree/master" + }, + "time": "2020-12-02T14:37:02+00:00" + }, { "name": "spatie/pdf-to-image", "version": "1.8.1", @@ -54,28 +95,31 @@ "pdf-to-image", "spatie" ], + "support": { + "issues": "https://github.com/spatie/pdf-to-image/issues", + "source": "https://github.com/spatie/pdf-to-image/tree/master" + }, "time": "2018-07-02T09:30:32+00:00" }, { "name": "thiagoalessio/tesseract_ocr", - "version": "2.4.0", + "version": "2.9.5", "source": { "type": "git", "url": "https://github.com/thiagoalessio/tesseract-ocr-for-php.git", - "reference": "a8b7a9ba7919e683b9915668f3d0d67ebba6d266" + "reference": "82cfd7879d6158be3098714f1d5c860f344a0e29" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/thiagoalessio/tesseract-ocr-for-php/zipball/a8b7a9ba7919e683b9915668f3d0d67ebba6d266", - "reference": "a8b7a9ba7919e683b9915668f3d0d67ebba6d266", + "url": "https://api.github.com/repos/thiagoalessio/tesseract-ocr-for-php/zipball/82cfd7879d6158be3098714f1d5c860f344a0e29", + "reference": "82cfd7879d6158be3098714f1d5c860f344a0e29", "shasum": "" }, "require": { - "php": "^5.6 || ^7.0" + "php": "^5.3 || ^7.0 || ^8.0" }, "require-dev": { - "codacy/coverage": "dev-master", - "phpunit/php-code-coverage": "^4.0.8" + "phpunit/php-code-coverage": "^2.2.4 || ^9.0.0" }, "type": "library", "autoload": { @@ -99,16 +143,23 @@ "Tesseract", "text recognition" ], - "time": "2018-05-11T14:22:47+00:00" + "support": { + "irc": "irc://irc.freenode.net/tesseract-ocr-for-php", + "issues": "https://github.com/thiagoalessio/tesseract-ocr-for-php/issues", + "source": "https://github.com/thiagoalessio/tesseract-ocr-for-php" + }, + "time": "2020-12-07T18:48:58+00:00" } ], "packages-dev": [], "aliases": [], "minimum-stability": "stable", - "stability-flags": [], + "stability-flags": { + "daita/my-small-php-tools": 20 + }, "prefer-stable": false, "prefer-lowest": false, "platform": [], "platform-dev": [], - "plugin-api-version": "1.1.0" + "plugin-api-version": "2.0.0" } diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index b1ae292..dea5b18 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -31,19 +31,26 @@ namespace OCA\Files_FullTextSearch_Tesseract\AppInfo; +use OCA\Files_FullTextSearch_Tesseract\Listeners\GenericListener; use OCA\Files_FullTextSearch_Tesseract\Service\ConfigService; use OCA\Files_FullTextSearch_Tesseract\Service\TesseractService; use OCP\AppFramework\App; +use OCP\AppFramework\Bootstrap\IBootContext; +use OCP\AppFramework\Bootstrap\IBootstrap; +use OCP\AppFramework\Bootstrap\IRegistrationContext; use OCP\AppFramework\QueryException; use OCP\EventDispatcher\GenericEvent; +require_once __DIR__ . '/../../vendor/autoload.php'; + + /** * Class Application * * @package OCA\Files_FullTextSearch_Tesseract\AppInfo */ -class Application extends App { +class Application extends App implements IBootstrap { const APP_NAME = 'files_fulltextsearch_tesseract'; @@ -69,28 +76,17 @@ public function __construct(array $params = []) { /** - * + * @param IRegistrationContext $context + */ + public function register(IRegistrationContext $context): void { + $context->registerEventListener(GenericEvent::class, GenericListener::class); + } + + + /** + * @param IBootContext $context */ - public function registerFilesExtension() { - $eventDispatcher = \OC::$server->getEventDispatcher(); - $eventDispatcher->addListener( - '\OCA\Files_FullTextSearch::onGetConfig', - function(GenericEvent $e) { - $this->configService->onGetConfig($e); - } - ); - $eventDispatcher->addListener( - '\OCA\Files_FullTextSearch::onFileIndexing', - function(GenericEvent $e) { - $this->tesseractService->onFileIndexing($e); - } - ); - $eventDispatcher->addListener( - '\OCA\Files_FullTextSearch::onSearchRequest', - function(GenericEvent $e) { - $this->tesseractService->onSearchRequest($e); - } - ); + public function boot(IBootContext $context): void { } } diff --git a/lib/Controller/SettingsController.php b/lib/Controller/SettingsController.php index ecad95f..2204e57 100644 --- a/lib/Controller/SettingsController.php +++ b/lib/Controller/SettingsController.php @@ -33,7 +33,6 @@ use OCA\Files_FullTextSearch_Tesseract\AppInfo\Application; use OCA\Files_FullTextSearch_Tesseract\Service\ConfigService; -use OCA\Files_FullTextSearch_Tesseract\Service\MiscService; use OCP\AppFramework\Controller; use OCP\AppFramework\Http; use OCP\AppFramework\Http\DataResponse; @@ -51,23 +50,16 @@ class SettingsController extends Controller { /** @var ConfigService */ private $configService; - /** @var MiscService */ - private $miscService; - /** * SettingsController constructor. * * @param IRequest $request * @param ConfigService $configService - * @param MiscService $miscService */ - public function __construct( - IRequest $request, ConfigService $configService, MiscService $miscService - ) { + public function __construct(IRequest $request, ConfigService $configService) { parent::__construct(Application::APP_NAME, $request); $this->configService = $configService; - $this->miscService = $miscService; } diff --git a/lib/Listeners/GenericListener.php b/lib/Listeners/GenericListener.php new file mode 100644 index 0000000..6b63ae0 --- /dev/null +++ b/lib/Listeners/GenericListener.php @@ -0,0 +1,94 @@ + + * @copyright 2020 + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + + +namespace OCA\Files_FullTextSearch_Tesseract\Listeners; + + +use OCA\Files_FullTextSearch_Tesseract\Service\ConfigService; +use OCA\Files_FullTextSearch_Tesseract\Service\TesseractService; +use OCP\EventDispatcher\Event; +use OCP\EventDispatcher\GenericEvent; +use OCP\EventDispatcher\IEventListener; + + +/** + * Class FileCreated + * + * @package OCA\Circles\Listeners + */ +class GenericListener implements IEventListener { + + + /** @var ConfigService */ + private $configService; + + /** @var TesseractService */ + private $tesseractService; + + + public function __construct(ConfigService $configService, TesseractService $tesseractService) { + $this->configService = $configService; + $this->tesseractService = $tesseractService; + } + + + /** + * @param Event $event + */ + public function handle(Event $event): void { + if (!($event instanceof GenericEvent)) { + return; + } + + $subject = $event->getSubject(); + if (substr($subject, 0, 21) !== 'Files_FullTextSearch.') { + return; + } + + $action = substr($subject, 21); + + switch ($action) { + case 'onGetConfig': + $this->configService->onGetConfig($event); + break; + + case 'onFileIndexing': + $this->tesseractService->onFileIndexing($event); + break; + + case 'onSearchRequest': + $this->tesseractService->onSearchRequest($event); + break; + } + + } + +} diff --git a/lib/Service/ConfigService.php b/lib/Service/ConfigService.php index 9e826dc..821017d 100644 --- a/lib/Service/ConfigService.php +++ b/lib/Service/ConfigService.php @@ -62,24 +62,14 @@ class ConfigService { /** @var IConfig */ private $config; - /** @var string */ - private $userId; - - /** @var MiscService */ - private $miscService; - /** * ConfigService constructor. * * @param IConfig $config - * @param string $userId - * @param MiscService $miscService */ - public function __construct(IConfig $config, $userId, MiscService $miscService) { + public function __construct(IConfig $config) { $this->config = $config; - $this->userId = $userId; - $this->miscService = $miscService; } diff --git a/lib/Service/MiscService.php b/lib/Service/MiscService.php deleted file mode 100644 index 8a638d8..0000000 --- a/lib/Service/MiscService.php +++ /dev/null @@ -1,73 +0,0 @@ - - * @copyright 2018 - * @license GNU AGPL version 3 or any later version - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - */ - - -namespace OCA\Files_FullTextSearch_Tesseract\Service; - - -use OCA\Files_FullTextSearch_Tesseract\AppInfo\Application; -use OCP\ILogger; - - -/** - * Class MiscService - * - * @package OCA\Files_FullTextSearch_Tesseract\Service - */ -class MiscService { - - /** @var ILogger */ - private $logger; - - - /** - * MiscService constructor. - * - * @param ILogger $logger - */ - public function __construct(ILogger $logger) { - $this->logger = $logger; - } - - - /** - * @param string $message - * @param int $level - */ - public function log(string $message, int $level = 2) { - $data = array( - 'app' => Application::APP_NAME, - 'level' => $level - ); - - $this->logger->log($level, $message, $data); - } - -} - diff --git a/lib/Service/TesseractService.php b/lib/Service/TesseractService.php index 5ce89a6..def6012 100644 --- a/lib/Service/TesseractService.php +++ b/lib/Service/TesseractService.php @@ -31,6 +31,7 @@ namespace OCA\Files_FullTextSearch_Tesseract\Service; +use daita\MySmallPhpTools\Traits\Nextcloud\nc20\TNC20Logger; use Exception; use OC\Files\View; use OCP\EventDispatcher\GenericEvent; @@ -43,6 +44,7 @@ use Spatie\PdfToImage\Exceptions\PageDoesNotExist; use Spatie\PdfToImage\Pdf; use thiagoalessio\TesseractOCR\TesseractOCR; +use thiagoalessio\TesseractOCR\TesseractOcrException; use Throwable; @@ -54,22 +56,22 @@ class TesseractService { + use TNC20Logger; + + /** @var ConfigService */ private $configService; - /** @var MiscService */ - private $miscService; - /** * TesseractService constructor. * * @param ConfigService $configService - * @param MiscService $miscService */ - public function __construct(ConfigService $configService, MiscService $miscService) { + public function __construct(ConfigService $configService) { $this->configService = $configService; - $this->miscService = $miscService; + + $this->setup('app', 'files_fulltextsearch_tesseract'); } @@ -147,10 +149,14 @@ private function extractContentUsingTesseractOCR(AFilesDocument &$document, File return; } - $this->miscService->log( - 'extracting content using TesseractOCR for #' . $document->getId() . ' - ' - . $document->getPath() . '; extension: ' - . $extension, 0 + $this->debug( + 'extracting content using TesseractOCR', + [ + 'documentId' => $document->getId(), + 'path' => $document->getPath(), + 'mime' => $document->getMimetype(), + 'extension' => $extension + ] ); // TODO: How to set options so that the index can be reset if admin settings are changed @@ -178,11 +184,10 @@ private function extractContentUsingTesseractOCR(AFilesDocument &$document, File * @throws NotFoundException */ private function ocrFile(File $file): string { - try { $path = $this->getAbsolutePath($file); } catch (Exception $e) { - $this->miscService->log('Exception while trying to obtain absolute path: ' . $e->getMessage(), 1); + $this->exception($e, self::$NOTICE); throw new NotFoundException(); } @@ -196,21 +201,25 @@ private function ocrFile(File $file): string { * @return string */ private function ocrFileFromPath(string $path): string { - $this->miscService->log('generating the TesseractOCR wrapper for ' . $path, 0); + $this->debug('generating the TesseractOCR wrapper', ['path' => $path]); $ocr = new TesseractOCR($path); $ocr->psm($this->configService->getAppValue(ConfigService::TESSERACT_PSM)); $lang = explode(',', $this->configService->getAppValue(ConfigService::TESSERACT_LANG)); call_user_func_array([$ocr, 'lang'], array_map('trim', $lang)); - $this->miscService->log('running the OCR command: ' . $ocr->command, 0); + $this->debug('running the OCR command', ['command' => $ocr->command]); if ($this->configService->getLogLevel() > 0) { $ocr->command .= ' 2> /dev/null'; } - $result = $ocr->run(); - - $this->miscService->log('OCR command ran smoothly', 0); + try { + $result = $ocr->run(); + $this->debug('OCR command ran smoothly'); + } catch (Exception $e) { + $this->exception($e, self::$NOTICE, ['path' => $path, 'cmd' => $ocr->command, 'lang' => $lang]); + $result = ''; + } return $result; } @@ -232,41 +241,38 @@ private function ocrPdf(AFilesDocument $document, File $file): bool { return true; } - $this->miscService->log("looks like we're working on a PDF file", 0); + $this->debug('looks like we\'re working on a PDF file'); try { $path = $this->getAbsolutePath($file); - $this->miscService->log('Absolute path: ' . $path, 0); + $this->debug('Absolute path', ['path' => $path]); $pdf = new Pdf($path); } catch (Exception $e) { - $this->miscService->log(get_class($e) . ' while ocr pdf: ' . $e->getMessage(), 1); + $this->exception($e, self::$NOTICE, ['document' => $document]); throw new NotFoundException(); } $content = ''; $pages = $pdf->getNumberOfPages(); - $this->miscService->log('PDF contains ' . $pages . ' page(s)', 0); + $this->debug('PDF contains ' . $pages . ' page(s)'); $limit = (int)$this->configService->getAppValue(ConfigService::TESSERACT_PDF_LIMIT); $pages = ($limit > 0 && $pages > $limit) ? $limit : $pages; - $this->miscService->log('App will now ocr ' . $pages . ' page(s)', 0); + $this->debug('App will now ocr ' . $pages . ' page(s)'); for ($i = 1; $i <= $pages; $i++) { - $this->miscService->log('Creating a temp image file for page #' . $i, 0); + $this->debug('Creating a temp image file for page #' . $i); $tmpFile = tmpfile(); $tmpPath = stream_get_meta_data($tmpFile)['uri']; - $this->miscService->log('temp image file: ' . $tmpPath . ' for page #' . $i, 0); + $this->debug('temp image file: ' . $tmpPath . ' for page #' . $i); try { - $this->miscService->log('opening the PDF at the page #' . $i, 0); + $this->debug('opening the PDF at the page #' . $i); $pdf->setPage($i); - $this->miscService->log( - 'saving the current page as image in ' . $tmpPath - . ' before OCRing the generated temp file', 0 - ); + $this->debug('saving the current page as image', ['tmpPath' => $tmpPath]); $pdf->saveImage($tmpPath); $content .= $this->ocrFileFromPath($tmpPath); @@ -276,9 +282,7 @@ private function ocrPdf(AFilesDocument $document, File $file): bool { fclose($tmpFile); } - $this->miscService->log( - 'content of the PDF was fully extracted, saving the data into the IndexDocument', 0 - ); + $this->debug('Saving the data into the IndexDocument'); $document->addPart('ocr', $content); return true; diff --git a/lib/Settings/Admin.php b/lib/Settings/Admin.php index d9c2daa..d0d5df2 100644 --- a/lib/Settings/Admin.php +++ b/lib/Settings/Admin.php @@ -34,7 +34,6 @@ use Exception; use OCA\Files_FullTextSearch_Tesseract\AppInfo\Application; use OCA\Files_FullTextSearch_Tesseract\Service\ConfigService; -use OCA\Files_FullTextSearch_Tesseract\Service\MiscService; use OCP\AppFramework\Http\TemplateResponse; use OCP\IL10N; use OCP\IURLGenerator; @@ -58,24 +57,16 @@ class Admin implements ISettings { /** @var ConfigService */ private $configService; - /** @var MiscService */ - private $miscService; - /** * @param IL10N $l10n * @param IURLGenerator $urlGenerator * @param ConfigService $configService - * @param MiscService $miscService */ - public function __construct( - IL10N $l10n, IURLGenerator $urlGenerator, ConfigService $configService, - MiscService $miscService - ) { + public function __construct(IL10N $l10n, IURLGenerator $urlGenerator, ConfigService $configService) { $this->l10n = $l10n; $this->urlGenerator = $urlGenerator; $this->configService = $configService; - $this->miscService = $miscService; }