diff --git a/jochre_core/pom.xml b/jochre_core/pom.xml
index 2b408db..8384a89 100644
--- a/jochre_core/pom.xml
+++ b/jochre_core/pom.xml
@@ -6,7 +6,7 @@
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3-SNAPSHOT
../jochre_parent
diff --git a/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java b/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java
index 8e49615..620f7b5 100644
--- a/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java
+++ b/jochre_core/src/main/java/com/joliciel/jochre/Jochre.java
@@ -128,6 +128,8 @@
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
+import javax.imageio.ImageIO;
+
/**
* Class encapsulating the various top-level Jochre commands and command-line
* interface.
@@ -1091,10 +1093,10 @@ public void doCommandAnalyse(File sourceFile, MostLikelyWordChooser wordChooser,
|| sourceFile.getName().toLowerCase().endsWith(".jpg") || sourceFile.getName().toLowerCase().endsWith(".jpeg")
|| sourceFile.getName().toLowerCase().endsWith(".gif") || sourceFile.getName().toLowerCase().endsWith(".tif")
|| sourceFile.getName().toLowerCase().endsWith(".tiff")) {
- ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
+ ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(sourceFile, documentGenerator);
extractor.extractDocument();
} else if (sourceFile.isDirectory()) {
- ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
+ ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(sourceFile, documentGenerator);
extractor.extractDocument();
} else {
throw new RuntimeException("Unrecognised file extension");
@@ -1337,7 +1339,7 @@ public void doCommandSegment(String filename, String userFriendlyName, File outp
pdfDocumentProcessor.process();
} else if (filename.toLowerCase().endsWith(".png") || filename.toLowerCase().endsWith(".jpg")
|| filename.toLowerCase().endsWith(".jpeg") || filename.toLowerCase().endsWith(".gif")) {
- ImageDocumentExtractor extractor = new ImageDocumentExtractor(file, jochreDocumentGenerator);
+ ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(file, jochreDocumentGenerator);
extractor.extractDocument();
} else {
throw new RuntimeException("Unrecognised file extension");
@@ -1534,13 +1536,18 @@ public void imageFileToAlto4(File sourceFile, Writer writer) throws IOException
}
public void imageInputStreamToAlto4(InputStream inputStream, String fileName, Writer writer) throws IOException {
+ BufferedImage image = ImageIO.read(inputStream);
+ this.imageToAlto4(image, fileName, writer);
+ }
+
+ public void imageToAlto4(BufferedImage image, String fileName, Writer writer) throws IOException {
final Set myPages = new HashSet<>();
MostLikelyWordChooser wordChooser = new MostLikelyWordChooser(jochreSession);
List documentObservers = new ArrayList<>();
AltoXMLExporter altoXMLExporter = new AltoXMLExporter(writer, 4);
documentObservers.add(altoXMLExporter);
JochreDocumentGenerator documentGenerator = this.getDocumentGenerator(fileName, wordChooser, myPages, documentObservers, new ArrayList<>());
- InputStreamDocumentExtractor documentExtractor = new InputStreamDocumentExtractor(inputStream, fileName, documentGenerator);
+ ImageDocumentExtractor documentExtractor = new ImageDocumentExtractor(image, fileName, documentGenerator);
documentExtractor.extractDocument();
}
}
diff --git a/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java
index 36c5872..644b07c 100644
--- a/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java
+++ b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageDocumentExtractor.java
@@ -18,22 +18,18 @@
//////////////////////////////////////////////////////////////////////////////
package com.joliciel.jochre.doc;
-import java.awt.image.BufferedImage;
-import java.io.File;
-import java.io.FilenameFilter;
-
-import javax.imageio.ImageIO;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import com.joliciel.talismane.utils.Monitorable;
import com.joliciel.talismane.utils.MultiTaskProgressMonitor;
import com.joliciel.talismane.utils.ProgressMonitor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.imageio.ImageIO;
+import java.awt.image.BufferedImage;
+import java.io.InputStream;
/**
- * An interface for extracting a JochreDocument from an image File (jpeg, gif or
- * png).
+ * An interface for extracting a JochreDocument from an image input stream.
*
* @author Assaf Urieli
*
@@ -42,12 +38,14 @@ public class ImageDocumentExtractor implements Monitorable, Runnable {
private static final Logger LOG = LoggerFactory.getLogger(ImageDocumentExtractor.class);
private final SourceFileProcessor documentProcessor;
private MultiTaskProgressMonitor currentMonitor;
- private final File imageFile;
+ private final BufferedImage image;
+ private final String fileName;
private int pageNumber = 1;
- public ImageDocumentExtractor(File imageFile, SourceFileProcessor documentProcessor) {
+ public ImageDocumentExtractor(BufferedImage image, String fileName, SourceFileProcessor documentProcessor) {
this.documentProcessor = documentProcessor;
- this.imageFile = imageFile;
+ this.image = image;
+ this.fileName = fileName;
}
@Override
@@ -56,46 +54,31 @@ public void run() {
}
public JochreDocument extractDocument() {
- LOG.debug("ImageDocumentExtractorImpl.extractDocument");
+ LOG.debug("InputStreamDocumentExtractor.extractDocument");
try {
- File[] files = new File[1];
-
- if (imageFile.isDirectory()) {
- files = imageFile.listFiles(new FilenameFilter() {
-
- @Override
- public boolean accept(File dir, String name) {
- return (name.toLowerCase().endsWith(".png") || name.toLowerCase().endsWith(".jpg") || name.toLowerCase().endsWith(".jpeg")
- || name.toLowerCase().endsWith(".gif") || name.toLowerCase().endsWith(".tif") || name.toLowerCase().endsWith(".tiff"));
- }
- });
- } else {
- files[0] = imageFile;
- }
+
JochreDocument doc = this.documentProcessor.onDocumentStart();
- doc.setTotalPageCount(files.length);
+ doc.setTotalPageCount(1);
int currentPageNumber = this.pageNumber;
- for (File file : files) {
- JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++);
- BufferedImage image = ImageIO.read(file);
- String imageName = file.getName();
+ JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++);
- if (currentMonitor != null && documentProcessor instanceof Monitorable) {
- ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask();
- double percentAllotted = (1 / (double) (files.length));
- currentMonitor.startTask(monitor, percentAllotted);
- }
+ String imageName = this.fileName;
- documentProcessor.onImageFound(page, image, imageName, 0);
- if (currentMonitor != null && documentProcessor instanceof Monitorable) {
- currentMonitor.endTask();
- }
+ if (currentMonitor != null && documentProcessor instanceof Monitorable) {
+ ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask();
+ currentMonitor.startTask(monitor, 1.0);
+ }
- this.documentProcessor.onPageComplete(page);
+ documentProcessor.onImageFound(page, image, imageName, 0);
+ if (currentMonitor != null && documentProcessor instanceof Monitorable) {
+ currentMonitor.endTask();
}
+
+ this.documentProcessor.onPageComplete(page);
+
this.documentProcessor.onDocumentComplete(doc);
this.documentProcessor.onAnalysisComplete();
@@ -109,7 +92,7 @@ public boolean accept(File dir, String name) {
LOG.error("Exception while processing document", e);
throw new RuntimeException(e);
} finally {
- LOG.debug("Exit ImageDocumentExtractorImpl.extractDocument");
+ LOG.debug("Exit InputStreamDocumentExtractor.extractDocument");
}
}
diff --git a/jochre_core/src/main/java/com/joliciel/jochre/doc/InputStreamDocumentExtractor.java b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageFileDocumentExtractor.java
similarity index 57%
rename from jochre_core/src/main/java/com/joliciel/jochre/doc/InputStreamDocumentExtractor.java
rename to jochre_core/src/main/java/com/joliciel/jochre/doc/ImageFileDocumentExtractor.java
index 0c94bb5..b78e7e5 100644
--- a/jochre_core/src/main/java/com/joliciel/jochre/doc/InputStreamDocumentExtractor.java
+++ b/jochre_core/src/main/java/com/joliciel/jochre/doc/ImageFileDocumentExtractor.java
@@ -18,36 +18,36 @@
//////////////////////////////////////////////////////////////////////////////
package com.joliciel.jochre.doc;
-import com.joliciel.talismane.utils.Monitorable;
-import com.joliciel.talismane.utils.MultiTaskProgressMonitor;
-import com.joliciel.talismane.utils.ProgressMonitor;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FilenameFilter;
-import java.io.InputStream;
+
+import javax.imageio.ImageIO;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.joliciel.talismane.utils.Monitorable;
+import com.joliciel.talismane.utils.MultiTaskProgressMonitor;
+import com.joliciel.talismane.utils.ProgressMonitor;
/**
- * An interface for extracting a JochreDocument from an image input stream.
+ * An interface for extracting a JochreDocument from an image File (jpeg, gif or
+ * png).
*
* @author Assaf Urieli
*
*/
-public class InputStreamDocumentExtractor implements Monitorable, Runnable {
- private static final Logger LOG = LoggerFactory.getLogger(InputStreamDocumentExtractor.class);
+public class ImageFileDocumentExtractor implements Monitorable, Runnable {
+ private static final Logger LOG = LoggerFactory.getLogger(ImageFileDocumentExtractor.class);
private final SourceFileProcessor documentProcessor;
private MultiTaskProgressMonitor currentMonitor;
- private final InputStream imageInputStream;
- private final String fileName;
+ private final File imageFile;
private int pageNumber = 1;
- public InputStreamDocumentExtractor(InputStream imageInputStream, String fileName, SourceFileProcessor documentProcessor) {
+ public ImageFileDocumentExtractor(File imageFile, SourceFileProcessor documentProcessor) {
this.documentProcessor = documentProcessor;
- this.imageInputStream = imageInputStream;
- this.fileName = fileName;
+ this.imageFile = imageFile;
}
@Override
@@ -56,32 +56,46 @@ public void run() {
}
public JochreDocument extractDocument() {
- LOG.debug("InputStreamDocumentExtractor.extractDocument");
+ LOG.debug("ImageDocumentExtractorImpl.extractDocument");
try {
-
+ File[] files = new File[1];
+
+ if (imageFile.isDirectory()) {
+ files = imageFile.listFiles(new FilenameFilter() {
+
+ @Override
+ public boolean accept(File dir, String name) {
+ return (name.toLowerCase().endsWith(".png") || name.toLowerCase().endsWith(".jpg") || name.toLowerCase().endsWith(".jpeg")
+ || name.toLowerCase().endsWith(".gif") || name.toLowerCase().endsWith(".tif") || name.toLowerCase().endsWith(".tiff"));
+ }
+ });
+ } else {
+ files[0] = imageFile;
+ }
JochreDocument doc = this.documentProcessor.onDocumentStart();
- doc.setTotalPageCount(1);
+ doc.setTotalPageCount(files.length);
int currentPageNumber = this.pageNumber;
+ for (File file : files) {
+ JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++);
- JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++);
+ BufferedImage image = ImageIO.read(file);
+ String imageName = file.getName();
- BufferedImage image = ImageIO.read(this.imageInputStream);
- String imageName = this.fileName;
+ if (currentMonitor != null && documentProcessor instanceof Monitorable) {
+ ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask();
+ double percentAllotted = (1 / (double) (files.length));
+ currentMonitor.startTask(monitor, percentAllotted);
+ }
- if (currentMonitor != null && documentProcessor instanceof Monitorable) {
- ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask();
- currentMonitor.startTask(monitor, 1.0);
- }
+ documentProcessor.onImageFound(page, image, imageName, 0);
+ if (currentMonitor != null && documentProcessor instanceof Monitorable) {
+ currentMonitor.endTask();
+ }
- documentProcessor.onImageFound(page, image, imageName, 0);
- if (currentMonitor != null && documentProcessor instanceof Monitorable) {
- currentMonitor.endTask();
+ this.documentProcessor.onPageComplete(page);
}
-
- this.documentProcessor.onPageComplete(page);
-
this.documentProcessor.onDocumentComplete(doc);
this.documentProcessor.onAnalysisComplete();
@@ -95,7 +109,7 @@ public JochreDocument extractDocument() {
LOG.error("Exception while processing document", e);
throw new RuntimeException(e);
} finally {
- LOG.debug("Exit InputStreamDocumentExtractor.extractDocument");
+ LOG.debug("Exit ImageDocumentExtractorImpl.extractDocument");
}
}
diff --git a/jochre_distribution/pom.xml b/jochre_distribution/pom.xml
index 45305d3..de56164 100644
--- a/jochre_distribution/pom.xml
+++ b/jochre_distribution/pom.xml
@@ -6,7 +6,7 @@
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3-SNAPSHOT
../jochre_parent
diff --git a/jochre_parent/pom.xml b/jochre_parent/pom.xml
index e829b6f..3168583 100644
--- a/jochre_parent/pom.xml
+++ b/jochre_parent/pom.xml
@@ -4,7 +4,7 @@
4.0.0
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3-SNAPSHOT
pom
Jochre
Java Optical CHaracter Recognition
diff --git a/jochre_search/pom.xml b/jochre_search/pom.xml
index 5bc08eb..fc72512 100644
--- a/jochre_search/pom.xml
+++ b/jochre_search/pom.xml
@@ -6,7 +6,7 @@
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3-SNAPSHOT
../jochre_parent
jar
diff --git a/jochre_search_webapp/pom.xml b/jochre_search_webapp/pom.xml
index b1ef207..51920ef 100644
--- a/jochre_search_webapp/pom.xml
+++ b/jochre_search_webapp/pom.xml
@@ -5,7 +5,7 @@
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3-SNAPSHOT
../jochre_parent
war
diff --git a/jochre_utils/pom.xml b/jochre_utils/pom.xml
index f15f201..7372d06 100644
--- a/jochre_utils/pom.xml
+++ b/jochre_utils/pom.xml
@@ -5,7 +5,7 @@
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3-SNAPSHOT
../jochre_parent
diff --git a/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java b/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java
index c5034fc..60a388b 100644
--- a/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java
+++ b/jochre_utils/src/main/java/com/joliciel/jochre/utils/graphics/ImageUtils.java
@@ -82,7 +82,7 @@ public static BufferedImage toBlackAndWhite(BufferedImage greyImage, int thresho
public static BufferedImage deepCopy(BufferedImage bi) {
ColorModel cm = bi.getColorModel();
boolean isAlphaPremultiplied = cm.isAlphaPremultiplied();
- WritableRaster raster = bi.copyData(null);
+ WritableRaster raster = bi.copyData(bi.getRaster().createCompatibleWritableRaster());
return new BufferedImage(cm, raster, isAlphaPremultiplied, null);
}
diff --git a/jochre_web/pom.xml b/jochre_web/pom.xml
index 7e62190..a624f51 100644
--- a/jochre_web/pom.xml
+++ b/jochre_web/pom.xml
@@ -5,7 +5,7 @@
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3-SNAPSHOT
../jochre_parent
war
diff --git a/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java b/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java
index e9916d9..af1b7e6 100644
--- a/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java
+++ b/jochre_web/src/main/java/com/joliciel/jochre/web/TextController.java
@@ -45,7 +45,7 @@
import com.joliciel.jochre.JochreSession;
import com.joliciel.jochre.doc.DocumentDao;
import com.joliciel.jochre.doc.DocumentObserver;
-import com.joliciel.jochre.doc.ImageDocumentExtractor;
+import com.joliciel.jochre.doc.ImageFileDocumentExtractor;
import com.joliciel.jochre.doc.JochreDocument;
import com.joliciel.jochre.doc.JochreDocumentGenerator;
import com.joliciel.jochre.doc.JochrePage;
@@ -306,7 +306,7 @@ public void doAfterCompose(Window window) throws Exception {
progressTimer.setRunning(true);
} else if (lowerCaseFileName.endsWith(".png") || lowerCaseFileName.endsWith(".jpg")
|| lowerCaseFileName.endsWith(".jpeg") || lowerCaseFileName.endsWith(".gif")) {
- ImageDocumentExtractor extractor = new ImageDocumentExtractor(currentFile, documentGenerator);
+ ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(currentFile, documentGenerator);
if (startPage >= 0)
extractor.setPageNumber(startPage);
this.progressMonitor = extractor.monitorTask();
diff --git a/jochre_yiddish/pom.xml b/jochre_yiddish/pom.xml
index 635cc91..0f53af8 100644
--- a/jochre_yiddish/pom.xml
+++ b/jochre_yiddish/pom.xml
@@ -6,7 +6,7 @@
com.joliciel.jochre
jochre-parent
- 2.6.2
+ 2.6.3-SNAPSHOT
../jochre_parent
jar