diff --git a/jochre_core/pom.xml b/jochre_core/pom.xml
index 2b408db..9bbe9ac 100644
--- a/jochre_core/pom.xml
+++ b/jochre_core/pom.xml
@@ -6,7 +6,7 @@
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3
../jochre_parent
diff --git a/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java b/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java
index 8e49615..620f7b5 100644
--- a/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java
+++ b/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java
@@ -128,6 +128,8 @@
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
+import javax.imageio.ImageIO;
+
/**
* Class encapsulating the various top-level Jochre commands and command-line
* interface.
@@ -1091,10 +1093,10 @@ public void doCommandAnalyse(File sourceFile, MostLikelyWordChooser wordChooser,
|| sourceFile.getName().toLowerCase().endsWith(".jpg") || sourceFile.getName().toLowerCase().endsWith(".jpeg")
|| sourceFile.getName().toLowerCase().endsWith(".gif") || sourceFile.getName().toLowerCase().endsWith(".tif")
|| sourceFile.getName().toLowerCase().endsWith(".tiff")) {
- ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
+ ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(sourceFile, documentGenerator);
extractor.extractDocument();
} else if (sourceFile.isDirectory()) {
- ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
+ ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(sourceFile, documentGenerator);
extractor.extractDocument();
} else {
throw new RuntimeException("Unrecognised file extension");
@@ -1337,7 +1339,7 @@ public void doCommandSegment(String filename, String userFriendlyName, File outp
pdfDocumentProcessor.process();
} else if (filename.toLowerCase().endsWith(".png") || filename.toLowerCase().endsWith(".jpg")
|| filename.toLowerCase().endsWith(".jpeg") || filename.toLowerCase().endsWith(".gif")) {
- ImageDocumentExtractor extractor = new ImageDocumentExtractor(file, jochreDocumentGenerator);
+ ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(file, jochreDocumentGenerator);
extractor.extractDocument();
} else {
throw new RuntimeException("Unrecognised file extension");
@@ -1534,13 +1536,18 @@ public void imageFileToAlto4(File sourceFile, Writer writer) throws IOException
}
public void imageInputStreamToAlto4(InputStream inputStream, String fileName, Writer writer) throws IOException {
+ BufferedImage image = ImageIO.read(inputStream);
+ this.imageToAlto4(image, fileName, writer);
+ }
+
+ public void imageToAlto4(BufferedImage image, String fileName, Writer writer) throws IOException {
final Set myPages = new HashSet<>();
MostLikelyWordChooser wordChooser = new MostLikelyWordChooser(jochreSession);
List documentObservers = new ArrayList<>();
AltoXMLExporter altoXMLExporter = new AltoXMLExporter(writer, 4);
documentObservers.add(altoXMLExporter);
JochreDocumentGenerator documentGenerator = this.getDocumentGenerator(fileName, wordChooser, myPages, documentObservers, new ArrayList<>());
- InputStreamDocumentExtractor documentExtractor = new InputStreamDocumentExtractor(inputStream, fileName, documentGenerator);
+ ImageDocumentExtractor documentExtractor = new ImageDocumentExtractor(image, fileName, documentGenerator);
documentExtractor.extractDocument();
}
}
diff --git a/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java
index 36c5872..644b07c 100644
--- a/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java
+++ b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java
@@ -18,22 +18,18 @@
//////////////////////////////////////////////////////////////////////////////
package com.joliciel.jochre.doc;
-import java.awt.image.BufferedImage;
-import java.io.File;
-import java.io.FilenameFilter;
-
-import javax.imageio.ImageIO;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import com.joliciel.talismane.utils.Monitorable;
import com.joliciel.talismane.utils.MultiTaskProgressMonitor;
import com.joliciel.talismane.utils.ProgressMonitor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.imageio.ImageIO;
+import java.awt.image.BufferedImage;
+import java.io.InputStream;
/**
- * An interface for extracting a JochreDocument from an image File (jpeg, gif or
- * png).
+ * An interface for extracting a JochreDocument from an image input stream.
*
* @author Assaf Urieli
*
@@ -42,12 +38,14 @@ public class ImageDocumentExtractor implements Monitorable, Runnable {
private static final Logger LOG = LoggerFactory.getLogger(ImageDocumentExtractor.class);
private final SourceFileProcessor documentProcessor;
private MultiTaskProgressMonitor currentMonitor;
- private final File imageFile;
+ private final BufferedImage image;
+ private final String fileName;
private int pageNumber = 1;
- public ImageDocumentExtractor(File imageFile, SourceFileProcessor documentProcessor) {
+ public ImageDocumentExtractor(BufferedImage image, String fileName, SourceFileProcessor documentProcessor) {
this.documentProcessor = documentProcessor;
- this.imageFile = imageFile;
+ this.image = image;
+ this.fileName = fileName;
}
@Override
@@ -56,46 +54,31 @@ public void run() {
}
public JochreDocument extractDocument() {
- LOG.debug("ImageDocumentExtractorImpl.extractDocument");
+ LOG.debug("InputStreamDocumentExtractor.extractDocument");
try {
- File[] files = new File[1];
-
- if (imageFile.isDirectory()) {
- files = imageFile.listFiles(new FilenameFilter() {
-
- @Override
- public boolean accept(File dir, String name) {
- return (name.toLowerCase().endsWith(".png") || name.toLowerCase().endsWith(".jpg") || name.toLowerCase().endsWith(".jpeg")
- || name.toLowerCase().endsWith(".gif") || name.toLowerCase().endsWith(".tif") || name.toLowerCase().endsWith(".tiff"));
- }
- });
- } else {
- files[0] = imageFile;
- }
+
JochreDocument doc = this.documentProcessor.onDocumentStart();
- doc.setTotalPageCount(files.length);
+ doc.setTotalPageCount(1);
int currentPageNumber = this.pageNumber;
- for (File file : files) {
- JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++);
- BufferedImage image = ImageIO.read(file);
- String imageName = file.getName();
+ JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++);
- if (currentMonitor != null && documentProcessor instanceof Monitorable) {
- ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask();
- double percentAllotted = (1 / (double) (files.length));
- currentMonitor.startTask(monitor, percentAllotted);
- }
+ String imageName = this.fileName;
- documentProcessor.onImageFound(page, image, imageName, 0);
- if (currentMonitor != null && documentProcessor instanceof Monitorable) {
- currentMonitor.endTask();
- }
+ if (currentMonitor != null && documentProcessor instanceof Monitorable) {
+ ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask();
+ currentMonitor.startTask(monitor, 1.0);
+ }
- this.documentProcessor.onPageComplete(page);
+ documentProcessor.onImageFound(page, image, imageName, 0);
+ if (currentMonitor != null && documentProcessor instanceof Monitorable) {
+ currentMonitor.endTask();
}
+
+ this.documentProcessor.onPageComplete(page);
+
this.documentProcessor.onDocumentComplete(doc);
this.documentProcessor.onAnalysisComplete();
@@ -109,7 +92,7 @@ public boolean accept(File dir, String name) {
LOG.error("Exception while processing document", e);
throw new RuntimeException(e);
} finally {
- LOG.debug("Exit ImageDocumentExtractorImpl.extractDocument");
+ LOG.debug("Exit InputStreamDocumentExtractor.extractDocument");
}
}
diff --git a/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageFileDocumentExtractor.java b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageFileDocumentExtractor.java
new file mode 100644
index 0000000..b78e7e5
--- /dev/null
+++ b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageFileDocumentExtractor.java
@@ -0,0 +1,134 @@
+///////////////////////////////////////////////////////////////////////////////
+//Copyright (C) 2012 Assaf Urieli
+//
+//This file is part of Jochre.
+//
+//Jochre is free software: you can redistribute it and/or modify
+//it under the terms of the GNU Affero General Public License as published by
+//the Free Software Foundation, either version 3 of the License, or
+//(at your option) any later version.
+//
+//Jochre is distributed in the hope that it will be useful,
+//but WITHOUT ANY WARRANTY; without even the implied warranty of
+//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//GNU Affero General Public License for more details.
+//
+//You should have received a copy of the GNU Affero General Public License
+//along with Jochre. If not, see .
+//////////////////////////////////////////////////////////////////////////////
+package com.joliciel.jochre.doc;
+
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.FilenameFilter;
+
+import javax.imageio.ImageIO;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.joliciel.talismane.utils.Monitorable;
+import com.joliciel.talismane.utils.MultiTaskProgressMonitor;
+import com.joliciel.talismane.utils.ProgressMonitor;
+
+/**
+ * An interface for extracting a JochreDocument from an image File (jpeg, gif or
+ * png).
+ *
+ * @author Assaf Urieli
+ *
+ */
+public class ImageFileDocumentExtractor implements Monitorable, Runnable {
+ private static final Logger LOG = LoggerFactory.getLogger(ImageFileDocumentExtractor.class);
+ private final SourceFileProcessor documentProcessor;
+ private MultiTaskProgressMonitor currentMonitor;
+ private final File imageFile;
+ private int pageNumber = 1;
+
+ public ImageFileDocumentExtractor(File imageFile, SourceFileProcessor documentProcessor) {
+ this.documentProcessor = documentProcessor;
+ this.imageFile = imageFile;
+ }
+
+ @Override
+ public void run() {
+ this.extractDocument();
+ }
+
+ public JochreDocument extractDocument() {
+ LOG.debug("ImageDocumentExtractorImpl.extractDocument");
+ try {
+ File[] files = new File[1];
+
+ if (imageFile.isDirectory()) {
+ files = imageFile.listFiles(new FilenameFilter() {
+
+ @Override
+ public boolean accept(File dir, String name) {
+ return (name.toLowerCase().endsWith(".png") || name.toLowerCase().endsWith(".jpg") || name.toLowerCase().endsWith(".jpeg")
+ || name.toLowerCase().endsWith(".gif") || name.toLowerCase().endsWith(".tif") || name.toLowerCase().endsWith(".tiff"));
+ }
+ });
+ } else {
+ files[0] = imageFile;
+ }
+
+ JochreDocument doc = this.documentProcessor.onDocumentStart();
+ doc.setTotalPageCount(files.length);
+
+ int currentPageNumber = this.pageNumber;
+ for (File file : files) {
+ JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++);
+
+ BufferedImage image = ImageIO.read(file);
+ String imageName = file.getName();
+
+ if (currentMonitor != null && documentProcessor instanceof Monitorable) {
+ ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask();
+ double percentAllotted = (1 / (double) (files.length));
+ currentMonitor.startTask(monitor, percentAllotted);
+ }
+
+ documentProcessor.onImageFound(page, image, imageName, 0);
+ if (currentMonitor != null && documentProcessor instanceof Monitorable) {
+ currentMonitor.endTask();
+ }
+
+ this.documentProcessor.onPageComplete(page);
+ }
+ this.documentProcessor.onDocumentComplete(doc);
+ this.documentProcessor.onAnalysisComplete();
+
+ if (currentMonitor != null)
+ currentMonitor.setFinished(true);
+ return doc;
+ } catch (Exception e) {
+ LOG.debug("Exception occurred. Have monitor? " + currentMonitor);
+ if (currentMonitor != null)
+ currentMonitor.setException(e);
+ LOG.error("Exception while processing document", e);
+ throw new RuntimeException(e);
+ } finally {
+ LOG.debug("Exit ImageDocumentExtractorImpl.extractDocument");
+ }
+ }
+
+ @Override
+ public ProgressMonitor monitorTask() {
+ currentMonitor = new MultiTaskProgressMonitor();
+
+ return currentMonitor;
+ }
+
+ /**
+ * The page number to assign to this image.
+ */
+ public int getPageNumber() {
+ return pageNumber;
+ }
+
+ public void setPageNumber(int pageNumber) {
+ this.pageNumber = pageNumber;
+ }
+
+}
diff --git a/jochre_distribution/pom.xml b/jochre_distribution/pom.xml
index 45305d3..0d4c7c4 100644
--- a/jochre_distribution/pom.xml
+++ b/jochre_distribution/pom.xml
@@ -6,7 +6,7 @@
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3
../jochre_parent
diff --git a/jochre_parent/pom.xml b/jochre_parent/pom.xml
index e829b6f..8725ccf 100644
--- a/jochre_parent/pom.xml
+++ b/jochre_parent/pom.xml
@@ -4,7 +4,7 @@
4.0.0
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3
pom
Jochre
Java Optical CHaracter Recognition
diff --git a/jochre_search/pom.xml b/jochre_search/pom.xml
index 5bc08eb..ef9848f 100644
--- a/jochre_search/pom.xml
+++ b/jochre_search/pom.xml
@@ -6,7 +6,7 @@
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3
../jochre_parent
jar
diff --git a/jochre_search_webapp/pom.xml b/jochre_search_webapp/pom.xml
index b1ef207..22615a1 100644
--- a/jochre_search_webapp/pom.xml
+++ b/jochre_search_webapp/pom.xml
@@ -5,7 +5,7 @@
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3
../jochre_parent
war
diff --git a/jochre_utils/pom.xml b/jochre_utils/pom.xml
index f15f201..0be3bfd 100644
--- a/jochre_utils/pom.xml
+++ b/jochre_utils/pom.xml
@@ -5,7 +5,7 @@
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3
../jochre_parent
diff --git a/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java b/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java
index c5034fc..60a388b 100644
--- a/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java
+++ b/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java
@@ -82,7 +82,7 @@ public static BufferedImage toBlackAndWhite(BufferedImage greyImage, int thresho
public static BufferedImage deepCopy(BufferedImage bi) {
ColorModel cm = bi.getColorModel();
boolean isAlphaPremultiplied = cm.isAlphaPremultiplied();
- WritableRaster raster = bi.copyData(null);
+ WritableRaster raster = bi.copyData(bi.getRaster().createCompatibleWritableRaster());
return new BufferedImage(cm, raster, isAlphaPremultiplied, null);
}
diff --git a/jochre_web/pom.xml b/jochre_web/pom.xml
index 7e62190..e94c4dc 100644
--- a/jochre_web/pom.xml
+++ b/jochre_web/pom.xml
@@ -5,7 +5,7 @@
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3
../jochre_parent
war
diff --git a/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java b/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java
index e9916d9..af1b7e6 100644
--- a/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java
+++ b/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java
@@ -45,7 +45,7 @@
import com.joliciel.jochre.JochreSession;
import com.joliciel.jochre.doc.DocumentDao;
import com.joliciel.jochre.doc.DocumentObserver;
-import com.joliciel.jochre.doc.ImageDocumentExtractor;
+import com.joliciel.jochre.doc.ImageFileDocumentExtractor;
import com.joliciel.jochre.doc.JochreDocument;
import com.joliciel.jochre.doc.JochreDocumentGenerator;
import com.joliciel.jochre.doc.JochrePage;
@@ -306,7 +306,7 @@ public void doAfterCompose(Window window) throws Exception {
progressTimer.setRunning(true);
} else if (lowerCaseFileName.endsWith(".png") || lowerCaseFileName.endsWith(".jpg")
|| lowerCaseFileName.endsWith(".jpeg") || lowerCaseFileName.endsWith(".gif")) {
- ImageDocumentExtractor extractor = new ImageDocumentExtractor(currentFile, documentGenerator);
+ ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(currentFile, documentGenerator);
if (startPage >= 0)
extractor.setPageNumber(startPage);
this.progressMonitor = extractor.monitorTask();
diff --git a/jochre_yiddish/pom.xml b/jochre_yiddish/pom.xml
index 635cc91..e206ad8 100644
--- a/jochre_yiddish/pom.xml
+++ b/jochre_yiddish/pom.xml
@@ -6,7 +6,7 @@
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3
../jochre_parent
jar