Skip to content

Commit

Permalink
return accepted mime type
Browse files Browse the repository at this point in the history
  • Loading branch information
ArtificialOwl committed May 25, 2018
1 parent b3cf8d7 commit a5d4ee6
Showing 1 changed file with 49 additions and 3 deletions.
52 changes: 49 additions & 3 deletions lib/Service/TesseractService.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@

use Exception;
use OC\Files\View;
use OCA\Files_FullTextSearch\Exceptions\FileNotFoundException;
use OCP\AppFramework\IAppContainer;
use OCP\Files\File;
use OCP\Files\NotFoundException;
use thiagoalessio\TesseractOCR\TesseractOCR;

class TesseractService {
Expand All @@ -58,18 +58,64 @@ public function __construct(
}


/**
* @param string $mimeType
* @param string $extension
*
* @return bool
*/
public function parsedMimeType($mimeType, $extension) {
$ocrMimes = [
'image/png',
'image/jpeg',
'image/tiff',
'image/vnd.djvu'
];

foreach ($ocrMimes as $mime) {
if (strpos($mimeType, $mime) === 0) {
return true;
}
}

if ($mimeType === 'application/octet-stream') {
return $this->parsedExtension($extension);
}

return false;
}


/**
* @param string $extension
*
* @return bool
*/
private function parsedExtension($extension) {
$ocrExtensions = [
// 'djvu'
];

if (in_array($extension, $ocrExtensions)) {
return true;
}

return false;
}


/**
* @param $file
*
* @return string
* @throws FileNotFoundException
* @throws NotFoundException
*/
public function ocrFile(File $file) {

try {
$path = $this->getAbsolutePath($file);
} catch (Exception $e) {
throw new FileNotFoundException('file not found');
throw new NotFoundException('file not found');
}

$ocr = new TesseractOCR($path);
Expand Down

0 comments on commit a5d4ee6

Please sign in to comment.