diff --git a/jochre_core/pom.xml b/jochre_core/pom.xml index 2b408db..8384a89 100644 --- a/jochre_core/pom.xml +++ b/jochre_core/pom.xml @@ -6,7 +6,7 @@ com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3-SNAPSHOT ../jochre_parent diff --git a/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java b/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java index 8e49615..620f7b5 100644 --- a/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java +++ b/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java @@ -128,6 +128,8 @@ import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; +import javax.imageio.ImageIO; + /** * Class encapsulating the various top-level Jochre commands and command-line * interface. @@ -1091,10 +1093,10 @@ public void doCommandAnalyse(File sourceFile, MostLikelyWordChooser wordChooser, || sourceFile.getName().toLowerCase().endsWith(".jpg") || sourceFile.getName().toLowerCase().endsWith(".jpeg") || sourceFile.getName().toLowerCase().endsWith(".gif") || sourceFile.getName().toLowerCase().endsWith(".tif") || sourceFile.getName().toLowerCase().endsWith(".tiff")) { - ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator); + ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(sourceFile, documentGenerator); extractor.extractDocument(); } else if (sourceFile.isDirectory()) { - ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator); + ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(sourceFile, documentGenerator); extractor.extractDocument(); } else { throw new RuntimeException("Unrecognised file extension"); @@ -1337,7 +1339,7 @@ public void doCommandSegment(String filename, String userFriendlyName, File outp pdfDocumentProcessor.process(); } else if (filename.toLowerCase().endsWith(".png") || filename.toLowerCase().endsWith(".jpg") || filename.toLowerCase().endsWith(".jpeg") || filename.toLowerCase().endsWith(".gif")) { - ImageDocumentExtractor extractor = new ImageDocumentExtractor(file, jochreDocumentGenerator); + ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(file, jochreDocumentGenerator); extractor.extractDocument(); } else { throw new RuntimeException("Unrecognised file extension"); @@ -1534,13 +1536,18 @@ public void imageFileToAlto4(File sourceFile, Writer writer) throws IOException } public void imageInputStreamToAlto4(InputStream inputStream, String fileName, Writer writer) throws IOException { + BufferedImage image = ImageIO.read(inputStream); + this.imageToAlto4(image, fileName, writer); + } + + public void imageToAlto4(BufferedImage image, String fileName, Writer writer) throws IOException { final Set myPages = new HashSet<>(); MostLikelyWordChooser wordChooser = new MostLikelyWordChooser(jochreSession); List documentObservers = new ArrayList<>(); AltoXMLExporter altoXMLExporter = new AltoXMLExporter(writer, 4); documentObservers.add(altoXMLExporter); JochreDocumentGenerator documentGenerator = this.getDocumentGenerator(fileName, wordChooser, myPages, documentObservers, new ArrayList<>()); - InputStreamDocumentExtractor documentExtractor = new InputStreamDocumentExtractor(inputStream, fileName, documentGenerator); + ImageDocumentExtractor documentExtractor = new ImageDocumentExtractor(image, fileName, documentGenerator); documentExtractor.extractDocument(); } } diff --git a/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java index 36c5872..644b07c 100644 --- a/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java +++ b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java @@ -18,22 +18,18 @@ ////////////////////////////////////////////////////////////////////////////// package com.joliciel.jochre.doc; -import java.awt.image.BufferedImage; -import java.io.File; -import java.io.FilenameFilter; - -import javax.imageio.ImageIO; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import com.joliciel.talismane.utils.Monitorable; import com.joliciel.talismane.utils.MultiTaskProgressMonitor; import com.joliciel.talismane.utils.ProgressMonitor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; +import java.io.InputStream; /** - * An interface for extracting a JochreDocument from an image File (jpeg, gif or - * png). + * An interface for extracting a JochreDocument from an image input stream. * * @author Assaf Urieli * @@ -42,12 +38,14 @@ public class ImageDocumentExtractor implements Monitorable, Runnable { private static final Logger LOG = LoggerFactory.getLogger(ImageDocumentExtractor.class); private final SourceFileProcessor documentProcessor; private MultiTaskProgressMonitor currentMonitor; - private final File imageFile; + private final BufferedImage image; + private final String fileName; private int pageNumber = 1; - public ImageDocumentExtractor(File imageFile, SourceFileProcessor documentProcessor) { + public ImageDocumentExtractor(BufferedImage image, String fileName, SourceFileProcessor documentProcessor) { this.documentProcessor = documentProcessor; - this.imageFile = imageFile; + this.image = image; + this.fileName = fileName; } @Override @@ -56,46 +54,31 @@ public void run() { } public JochreDocument extractDocument() { - LOG.debug("ImageDocumentExtractorImpl.extractDocument"); + LOG.debug("InputStreamDocumentExtractor.extractDocument"); try { - File[] files = new File[1]; - - if (imageFile.isDirectory()) { - files = imageFile.listFiles(new FilenameFilter() { - - @Override - public boolean accept(File dir, String name) { - return (name.toLowerCase().endsWith(".png") || name.toLowerCase().endsWith(".jpg") || name.toLowerCase().endsWith(".jpeg") - || name.toLowerCase().endsWith(".gif") || name.toLowerCase().endsWith(".tif") || name.toLowerCase().endsWith(".tiff")); - } - }); - } else { - files[0] = imageFile; - } + JochreDocument doc = this.documentProcessor.onDocumentStart(); - doc.setTotalPageCount(files.length); + doc.setTotalPageCount(1); int currentPageNumber = this.pageNumber; - for (File file : files) { - JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++); - BufferedImage image = ImageIO.read(file); - String imageName = file.getName(); + JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++); - if (currentMonitor != null && documentProcessor instanceof Monitorable) { - ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask(); - double percentAllotted = (1 / (double) (files.length)); - currentMonitor.startTask(monitor, percentAllotted); - } + String imageName = this.fileName; - documentProcessor.onImageFound(page, image, imageName, 0); - if (currentMonitor != null && documentProcessor instanceof Monitorable) { - currentMonitor.endTask(); - } + if (currentMonitor != null && documentProcessor instanceof Monitorable) { + ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask(); + currentMonitor.startTask(monitor, 1.0); + } - this.documentProcessor.onPageComplete(page); + documentProcessor.onImageFound(page, image, imageName, 0); + if (currentMonitor != null && documentProcessor instanceof Monitorable) { + currentMonitor.endTask(); } + + this.documentProcessor.onPageComplete(page); + this.documentProcessor.onDocumentComplete(doc); this.documentProcessor.onAnalysisComplete(); @@ -109,7 +92,7 @@ public boolean accept(File dir, String name) { LOG.error("Exception while processing document", e); throw new RuntimeException(e); } finally { - LOG.debug("Exit ImageDocumentExtractorImpl.extractDocument"); + LOG.debug("Exit InputStreamDocumentExtractor.extractDocument"); } } diff --git a/jochre_core/src/main/java/com/joliciel/jochre/doc/InputStreamDocumentExtractor.java b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageFileDocumentExtractor.java similarity index 57% rename from jochre_core/src/main/java/com/joliciel/jochre/doc/InputStreamDocumentExtractor.java rename to jochre_core/src/main/java/com/joliciel/jochre/doc/ImageFileDocumentExtractor.java index 0c94bb5..b78e7e5 100644 --- a/jochre_core/src/main/java/com/joliciel/jochre/doc/InputStreamDocumentExtractor.java +++ b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageFileDocumentExtractor.java @@ -18,36 +18,36 @@ ////////////////////////////////////////////////////////////////////////////// package com.joliciel.jochre.doc; -import com.joliciel.talismane.utils.Monitorable; -import com.joliciel.talismane.utils.MultiTaskProgressMonitor; -import com.joliciel.talismane.utils.ProgressMonitor; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.File; import java.io.FilenameFilter; -import java.io.InputStream; + +import javax.imageio.ImageIO; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.joliciel.talismane.utils.Monitorable; +import com.joliciel.talismane.utils.MultiTaskProgressMonitor; +import com.joliciel.talismane.utils.ProgressMonitor; /** - * An interface for extracting a JochreDocument from an image input stream. + * An interface for extracting a JochreDocument from an image File (jpeg, gif or + * png). * * @author Assaf Urieli * */ -public class InputStreamDocumentExtractor implements Monitorable, Runnable { - private static final Logger LOG = LoggerFactory.getLogger(InputStreamDocumentExtractor.class); +public class ImageFileDocumentExtractor implements Monitorable, Runnable { + private static final Logger LOG = LoggerFactory.getLogger(ImageFileDocumentExtractor.class); private final SourceFileProcessor documentProcessor; private MultiTaskProgressMonitor currentMonitor; - private final InputStream imageInputStream; - private final String fileName; + private final File imageFile; private int pageNumber = 1; - public InputStreamDocumentExtractor(InputStream imageInputStream, String fileName, SourceFileProcessor documentProcessor) { + public ImageFileDocumentExtractor(File imageFile, SourceFileProcessor documentProcessor) { this.documentProcessor = documentProcessor; - this.imageInputStream = imageInputStream; - this.fileName = fileName; + this.imageFile = imageFile; } @Override @@ -56,32 +56,46 @@ public void run() { } public JochreDocument extractDocument() { - LOG.debug("InputStreamDocumentExtractor.extractDocument"); + LOG.debug("ImageDocumentExtractorImpl.extractDocument"); try { - + File[] files = new File[1]; + + if (imageFile.isDirectory()) { + files = imageFile.listFiles(new FilenameFilter() { + + @Override + public boolean accept(File dir, String name) { + return (name.toLowerCase().endsWith(".png") || name.toLowerCase().endsWith(".jpg") || name.toLowerCase().endsWith(".jpeg") + || name.toLowerCase().endsWith(".gif") || name.toLowerCase().endsWith(".tif") || name.toLowerCase().endsWith(".tiff")); + } + }); + } else { + files[0] = imageFile; + } JochreDocument doc = this.documentProcessor.onDocumentStart(); - doc.setTotalPageCount(1); + doc.setTotalPageCount(files.length); int currentPageNumber = this.pageNumber; + for (File file : files) { + JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++); - JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++); + BufferedImage image = ImageIO.read(file); + String imageName = file.getName(); - BufferedImage image = ImageIO.read(this.imageInputStream); - String imageName = this.fileName; + if (currentMonitor != null && documentProcessor instanceof Monitorable) { + ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask(); + double percentAllotted = (1 / (double) (files.length)); + currentMonitor.startTask(monitor, percentAllotted); + } - if (currentMonitor != null && documentProcessor instanceof Monitorable) { - ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask(); - currentMonitor.startTask(monitor, 1.0); - } + documentProcessor.onImageFound(page, image, imageName, 0); + if (currentMonitor != null && documentProcessor instanceof Monitorable) { + currentMonitor.endTask(); + } - documentProcessor.onImageFound(page, image, imageName, 0); - if (currentMonitor != null && documentProcessor instanceof Monitorable) { - currentMonitor.endTask(); + this.documentProcessor.onPageComplete(page); } - - this.documentProcessor.onPageComplete(page); - this.documentProcessor.onDocumentComplete(doc); this.documentProcessor.onAnalysisComplete(); @@ -95,7 +109,7 @@ public JochreDocument extractDocument() { LOG.error("Exception while processing document", e); throw new RuntimeException(e); } finally { - LOG.debug("Exit InputStreamDocumentExtractor.extractDocument"); + LOG.debug("Exit ImageDocumentExtractorImpl.extractDocument"); } } diff --git a/jochre_distribution/pom.xml b/jochre_distribution/pom.xml index 45305d3..de56164 100644 --- a/jochre_distribution/pom.xml +++ b/jochre_distribution/pom.xml @@ -6,7 +6,7 @@ com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3-SNAPSHOT ../jochre_parent diff --git a/jochre_parent/pom.xml b/jochre_parent/pom.xml index e829b6f..3168583 100644 --- a/jochre_parent/pom.xml +++ b/jochre_parent/pom.xml @@ -4,7 +4,7 @@ 4.0.0 com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3-SNAPSHOT pom Jochre Java Optical CHaracter Recognition diff --git a/jochre_search/pom.xml b/jochre_search/pom.xml index 5bc08eb..fc72512 100644 --- a/jochre_search/pom.xml +++ b/jochre_search/pom.xml @@ -6,7 +6,7 @@ com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3-SNAPSHOT ../jochre_parent jar diff --git a/jochre_search_webapp/pom.xml b/jochre_search_webapp/pom.xml index b1ef207..51920ef 100644 --- a/jochre_search_webapp/pom.xml +++ b/jochre_search_webapp/pom.xml @@ -5,7 +5,7 @@ com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3-SNAPSHOT ../jochre_parent war diff --git a/jochre_utils/pom.xml b/jochre_utils/pom.xml index f15f201..7372d06 100644 --- a/jochre_utils/pom.xml +++ b/jochre_utils/pom.xml @@ -5,7 +5,7 @@ com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3-SNAPSHOT ../jochre_parent diff --git a/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java b/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java index c5034fc..60a388b 100644 --- a/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java +++ b/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java @@ -82,7 +82,7 @@ public static BufferedImage toBlackAndWhite(BufferedImage greyImage, int thresho public static BufferedImage deepCopy(BufferedImage bi) { ColorModel cm = bi.getColorModel(); boolean isAlphaPremultiplied = cm.isAlphaPremultiplied(); - WritableRaster raster = bi.copyData(null); + WritableRaster raster = bi.copyData(bi.getRaster().createCompatibleWritableRaster()); return new BufferedImage(cm, raster, isAlphaPremultiplied, null); } diff --git a/jochre_web/pom.xml b/jochre_web/pom.xml index 7e62190..a624f51 100644 --- a/jochre_web/pom.xml +++ b/jochre_web/pom.xml @@ -5,7 +5,7 @@ com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3-SNAPSHOT ../jochre_parent war diff --git a/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java b/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java index e9916d9..af1b7e6 100644 --- a/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java +++ b/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java @@ -45,7 +45,7 @@ import com.joliciel.jochre.JochreSession; import com.joliciel.jochre.doc.DocumentDao; import com.joliciel.jochre.doc.DocumentObserver; -import com.joliciel.jochre.doc.ImageDocumentExtractor; +import com.joliciel.jochre.doc.ImageFileDocumentExtractor; import com.joliciel.jochre.doc.JochreDocument; import com.joliciel.jochre.doc.JochreDocumentGenerator; import com.joliciel.jochre.doc.JochrePage; @@ -306,7 +306,7 @@ public void doAfterCompose(Window window) throws Exception { progressTimer.setRunning(true); } else if (lowerCaseFileName.endsWith(".png") || lowerCaseFileName.endsWith(".jpg") || lowerCaseFileName.endsWith(".jpeg") || lowerCaseFileName.endsWith(".gif")) { - ImageDocumentExtractor extractor = new ImageDocumentExtractor(currentFile, documentGenerator); + ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(currentFile, documentGenerator); if (startPage >= 0) extractor.setPageNumber(startPage); this.progressMonitor = extractor.monitorTask(); diff --git a/jochre_yiddish/pom.xml b/jochre_yiddish/pom.xml index 635cc91..0f53af8 100644 --- a/jochre_yiddish/pom.xml +++ b/jochre_yiddish/pom.xml @@ -6,7 +6,7 @@ com.joliciel.jochre jochre-parent - 2.6.2 + 2.6.3-SNAPSHOT ../jochre_parent jar