diff --git a/jochre_core/pom.xml b/jochre_core/pom.xml index 2b408db..9bbe9ac 100644 --- a/jochre_core/pom.xml +++ b/jochre_core/pom.xml @@ -6,7 +6,7 @@ com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3 ../jochre_parent diff --git a/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java b/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java index 8e49615..620f7b5 100644 --- a/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java +++ b/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java @@ -128,6 +128,8 @@ import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; +import javax.imageio.ImageIO; + /** * Class encapsulating the various top-level Jochre commands and command-line * interface. @@ -1091,10 +1093,10 @@ public void doCommandAnalyse(File sourceFile, MostLikelyWordChooser wordChooser, || sourceFile.getName().toLowerCase().endsWith(".jpg") || sourceFile.getName().toLowerCase().endsWith(".jpeg") || sourceFile.getName().toLowerCase().endsWith(".gif") || sourceFile.getName().toLowerCase().endsWith(".tif") || sourceFile.getName().toLowerCase().endsWith(".tiff")) { - ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator); + ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(sourceFile, documentGenerator); extractor.extractDocument(); } else if (sourceFile.isDirectory()) { - ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator); + ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(sourceFile, documentGenerator); extractor.extractDocument(); } else { throw new RuntimeException("Unrecognised file extension"); @@ -1337,7 +1339,7 @@ public void doCommandSegment(String filename, String userFriendlyName, File outp pdfDocumentProcessor.process(); } else if (filename.toLowerCase().endsWith(".png") || filename.toLowerCase().endsWith(".jpg") || filename.toLowerCase().endsWith(".jpeg") || filename.toLowerCase().endsWith(".gif")) { - ImageDocumentExtractor extractor = new ImageDocumentExtractor(file, jochreDocumentGenerator); + ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(file, jochreDocumentGenerator); extractor.extractDocument(); } else { throw new RuntimeException("Unrecognised file extension"); @@ -1534,13 +1536,18 @@ public void imageFileToAlto4(File sourceFile, Writer writer) throws IOException } public void imageInputStreamToAlto4(InputStream inputStream, String fileName, Writer writer) throws IOException { + BufferedImage image = ImageIO.read(inputStream); + this.imageToAlto4(image, fileName, writer); + } + + public void imageToAlto4(BufferedImage image, String fileName, Writer writer) throws IOException { final Set myPages = new HashSet<>(); MostLikelyWordChooser wordChooser = new MostLikelyWordChooser(jochreSession); List documentObservers = new ArrayList<>(); AltoXMLExporter altoXMLExporter = new AltoXMLExporter(writer, 4); documentObservers.add(altoXMLExporter); JochreDocumentGenerator documentGenerator = this.getDocumentGenerator(fileName, wordChooser, myPages, documentObservers, new ArrayList<>()); - InputStreamDocumentExtractor documentExtractor = new InputStreamDocumentExtractor(inputStream, fileName, documentGenerator); + ImageDocumentExtractor documentExtractor = new ImageDocumentExtractor(image, fileName, documentGenerator); documentExtractor.extractDocument(); } } diff --git a/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java index 36c5872..644b07c 100644 --- a/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java +++ b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java @@ -18,22 +18,18 @@ ////////////////////////////////////////////////////////////////////////////// package com.joliciel.jochre.doc; -import java.awt.image.BufferedImage; -import java.io.File; -import java.io.FilenameFilter; - -import javax.imageio.ImageIO; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import com.joliciel.talismane.utils.Monitorable; import com.joliciel.talismane.utils.MultiTaskProgressMonitor; import com.joliciel.talismane.utils.ProgressMonitor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; +import java.io.InputStream; /** - * An interface for extracting a JochreDocument from an image File (jpeg, gif or - * png). + * An interface for extracting a JochreDocument from an image input stream. * * @author Assaf Urieli * @@ -42,12 +38,14 @@ public class ImageDocumentExtractor implements Monitorable, Runnable { private static final Logger LOG = LoggerFactory.getLogger(ImageDocumentExtractor.class); private final SourceFileProcessor documentProcessor; private MultiTaskProgressMonitor currentMonitor; - private final File imageFile; + private final BufferedImage image; + private final String fileName; private int pageNumber = 1; - public ImageDocumentExtractor(File imageFile, SourceFileProcessor documentProcessor) { + public ImageDocumentExtractor(BufferedImage image, String fileName, SourceFileProcessor documentProcessor) { this.documentProcessor = documentProcessor; - this.imageFile = imageFile; + this.image = image; + this.fileName = fileName; } @Override @@ -56,46 +54,31 @@ public void run() { } public JochreDocument extractDocument() { - LOG.debug("ImageDocumentExtractorImpl.extractDocument"); + LOG.debug("InputStreamDocumentExtractor.extractDocument"); try { - File[] files = new File[1]; - - if (imageFile.isDirectory()) { - files = imageFile.listFiles(new FilenameFilter() { - - @Override - public boolean accept(File dir, String name) { - return (name.toLowerCase().endsWith(".png") || name.toLowerCase().endsWith(".jpg") || name.toLowerCase().endsWith(".jpeg") - || name.toLowerCase().endsWith(".gif") || name.toLowerCase().endsWith(".tif") || name.toLowerCase().endsWith(".tiff")); - } - }); - } else { - files[0] = imageFile; - } + JochreDocument doc = this.documentProcessor.onDocumentStart(); - doc.setTotalPageCount(files.length); + doc.setTotalPageCount(1); int currentPageNumber = this.pageNumber; - for (File file : files) { - JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++); - BufferedImage image = ImageIO.read(file); - String imageName = file.getName(); + JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++); - if (currentMonitor != null && documentProcessor instanceof Monitorable) { - ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask(); - double percentAllotted = (1 / (double) (files.length)); - currentMonitor.startTask(monitor, percentAllotted); - } + String imageName = this.fileName; - documentProcessor.onImageFound(page, image, imageName, 0); - if (currentMonitor != null && documentProcessor instanceof Monitorable) { - currentMonitor.endTask(); - } + if (currentMonitor != null && documentProcessor instanceof Monitorable) { + ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask(); + currentMonitor.startTask(monitor, 1.0); + } - this.documentProcessor.onPageComplete(page); + documentProcessor.onImageFound(page, image, imageName, 0); + if (currentMonitor != null && documentProcessor instanceof Monitorable) { + currentMonitor.endTask(); } + + this.documentProcessor.onPageComplete(page); + this.documentProcessor.onDocumentComplete(doc); this.documentProcessor.onAnalysisComplete(); @@ -109,7 +92,7 @@ public boolean accept(File dir, String name) { LOG.error("Exception while processing document", e); throw new RuntimeException(e); } finally { - LOG.debug("Exit ImageDocumentExtractorImpl.extractDocument"); + LOG.debug("Exit InputStreamDocumentExtractor.extractDocument"); } } diff --git a/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageFileDocumentExtractor.java b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageFileDocumentExtractor.java new file mode 100644 index 0000000..b78e7e5 --- /dev/null +++ b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageFileDocumentExtractor.java @@ -0,0 +1,134 @@ +/////////////////////////////////////////////////////////////////////////////// +//Copyright (C) 2012 Assaf Urieli +// +//This file is part of Jochre. +// +//Jochre is free software: you can redistribute it and/or modify +//it under the terms of the GNU Affero General Public License as published by +//the Free Software Foundation, either version 3 of the License, or +//(at your option) any later version. +// +//Jochre is distributed in the hope that it will be useful, +//but WITHOUT ANY WARRANTY; without even the implied warranty of +//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//GNU Affero General Public License for more details. +// +//You should have received a copy of the GNU Affero General Public License +//along with Jochre. If not, see . +////////////////////////////////////////////////////////////////////////////// +package com.joliciel.jochre.doc; + +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.FilenameFilter; + +import javax.imageio.ImageIO; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.joliciel.talismane.utils.Monitorable; +import com.joliciel.talismane.utils.MultiTaskProgressMonitor; +import com.joliciel.talismane.utils.ProgressMonitor; + +/** + * An interface for extracting a JochreDocument from an image File (jpeg, gif or + * png). + * + * @author Assaf Urieli + * + */ +public class ImageFileDocumentExtractor implements Monitorable, Runnable { + private static final Logger LOG = LoggerFactory.getLogger(ImageFileDocumentExtractor.class); + private final SourceFileProcessor documentProcessor; + private MultiTaskProgressMonitor currentMonitor; + private final File imageFile; + private int pageNumber = 1; + + public ImageFileDocumentExtractor(File imageFile, SourceFileProcessor documentProcessor) { + this.documentProcessor = documentProcessor; + this.imageFile = imageFile; + } + + @Override + public void run() { + this.extractDocument(); + } + + public JochreDocument extractDocument() { + LOG.debug("ImageDocumentExtractorImpl.extractDocument"); + try { + File[] files = new File[1]; + + if (imageFile.isDirectory()) { + files = imageFile.listFiles(new FilenameFilter() { + + @Override + public boolean accept(File dir, String name) { + return (name.toLowerCase().endsWith(".png") || name.toLowerCase().endsWith(".jpg") || name.toLowerCase().endsWith(".jpeg") + || name.toLowerCase().endsWith(".gif") || name.toLowerCase().endsWith(".tif") || name.toLowerCase().endsWith(".tiff")); + } + }); + } else { + files[0] = imageFile; + } + + JochreDocument doc = this.documentProcessor.onDocumentStart(); + doc.setTotalPageCount(files.length); + + int currentPageNumber = this.pageNumber; + for (File file : files) { + JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++); + + BufferedImage image = ImageIO.read(file); + String imageName = file.getName(); + + if (currentMonitor != null && documentProcessor instanceof Monitorable) { + ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask(); + double percentAllotted = (1 / (double) (files.length)); + currentMonitor.startTask(monitor, percentAllotted); + } + + documentProcessor.onImageFound(page, image, imageName, 0); + if (currentMonitor != null && documentProcessor instanceof Monitorable) { + currentMonitor.endTask(); + } + + this.documentProcessor.onPageComplete(page); + } + this.documentProcessor.onDocumentComplete(doc); + this.documentProcessor.onAnalysisComplete(); + + if (currentMonitor != null) + currentMonitor.setFinished(true); + return doc; + } catch (Exception e) { + LOG.debug("Exception occurred. Have monitor? " + currentMonitor); + if (currentMonitor != null) + currentMonitor.setException(e); + LOG.error("Exception while processing document", e); + throw new RuntimeException(e); + } finally { + LOG.debug("Exit ImageDocumentExtractorImpl.extractDocument"); + } + } + + @Override + public ProgressMonitor monitorTask() { + currentMonitor = new MultiTaskProgressMonitor(); + + return currentMonitor; + } + + /** + * The page number to assign to this image. + */ + public int getPageNumber() { + return pageNumber; + } + + public void setPageNumber(int pageNumber) { + this.pageNumber = pageNumber; + } + +} diff --git a/jochre_distribution/pom.xml b/jochre_distribution/pom.xml index 45305d3..0d4c7c4 100644 --- a/jochre_distribution/pom.xml +++ b/jochre_distribution/pom.xml @@ -6,7 +6,7 @@ com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3 ../jochre_parent diff --git a/jochre_parent/pom.xml b/jochre_parent/pom.xml index e829b6f..8725ccf 100644 --- a/jochre_parent/pom.xml +++ b/jochre_parent/pom.xml @@ -4,7 +4,7 @@ 4.0.0 com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3 pom Jochre Java Optical CHaracter Recognition diff --git a/jochre_search/pom.xml b/jochre_search/pom.xml index 5bc08eb..ef9848f 100644 --- a/jochre_search/pom.xml +++ b/jochre_search/pom.xml @@ -6,7 +6,7 @@ com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3 ../jochre_parent jar diff --git a/jochre_search_webapp/pom.xml b/jochre_search_webapp/pom.xml index b1ef207..22615a1 100644 --- a/jochre_search_webapp/pom.xml +++ b/jochre_search_webapp/pom.xml @@ -5,7 +5,7 @@ com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3 ../jochre_parent war diff --git a/jochre_utils/pom.xml b/jochre_utils/pom.xml index f15f201..0be3bfd 100644 --- a/jochre_utils/pom.xml +++ b/jochre_utils/pom.xml @@ -5,7 +5,7 @@ com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3 ../jochre_parent diff --git a/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java b/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java index c5034fc..60a388b 100644 --- a/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java +++ b/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java @@ -82,7 +82,7 @@ public static BufferedImage toBlackAndWhite(BufferedImage greyImage, int thresho public static BufferedImage deepCopy(BufferedImage bi) { ColorModel cm = bi.getColorModel(); boolean isAlphaPremultiplied = cm.isAlphaPremultiplied(); - WritableRaster raster = bi.copyData(null); + WritableRaster raster = bi.copyData(bi.getRaster().createCompatibleWritableRaster()); return new BufferedImage(cm, raster, isAlphaPremultiplied, null); } diff --git a/jochre_web/pom.xml b/jochre_web/pom.xml index 7e62190..e94c4dc 100644 --- a/jochre_web/pom.xml +++ b/jochre_web/pom.xml @@ -5,7 +5,7 @@ com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3 ../jochre_parent war diff --git a/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java b/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java index e9916d9..af1b7e6 100644 --- a/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java +++ b/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java @@ -45,7 +45,7 @@ import com.joliciel.jochre.JochreSession; import com.joliciel.jochre.doc.DocumentDao; import com.joliciel.jochre.doc.DocumentObserver; -import com.joliciel.jochre.doc.ImageDocumentExtractor; +import com.joliciel.jochre.doc.ImageFileDocumentExtractor; import com.joliciel.jochre.doc.JochreDocument; import com.joliciel.jochre.doc.JochreDocumentGenerator; import com.joliciel.jochre.doc.JochrePage; @@ -306,7 +306,7 @@ public void doAfterCompose(Window window) throws Exception { progressTimer.setRunning(true); } else if (lowerCaseFileName.endsWith(".png") || lowerCaseFileName.endsWith(".jpg") || lowerCaseFileName.endsWith(".jpeg") || lowerCaseFileName.endsWith(".gif")) { - ImageDocumentExtractor extractor = new ImageDocumentExtractor(currentFile, documentGenerator); + ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(currentFile, documentGenerator); if (startPage >= 0) extractor.setPageNumber(startPage); this.progressMonitor = extractor.monitorTask(); diff --git a/jochre_yiddish/pom.xml b/jochre_yiddish/pom.xml index 635cc91..e206ad8 100644 --- a/jochre_yiddish/pom.xml +++ b/jochre_yiddish/pom.xml @@ -6,7 +6,7 @@ com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3 ../jochre_parent jar