Skip to content
This repository has been archived by the owner on Jul 26, 2024. It is now read-only.

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
# Conflicts:
#	jochre_core/pom.xml
#	jochre_core/src/main/java/com/joliciel/jochre/Jochre.java
#	jochre_distribution/pom.xml
#	jochre_parent/pom.xml
#	jochre_search/pom.xml
#	jochre_search_webapp/pom.xml
#	jochre_utils/pom.xml
#	jochre_web/pom.xml
#	jochre_yiddish/pom.xml
  • Loading branch information
urieli committed Sep 13, 2023
2 parents ace9a69 + 37186af commit beb165b
Show file tree
Hide file tree
Showing 13 changed files with 184 additions and 60 deletions.
2 changes: 1 addition & 1 deletion jochre_core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>com.joliciel.jochre</groupId>
<artifactId>jochre-parent</artifactId>
<version>2.6.2</version>
<version>2.6.3</version>
<relativePath>../jochre_parent</relativePath>
</parent>
<build>
Expand Down
15 changes: 11 additions & 4 deletions jochre_core/src/main/java/com/joliciel/jochre/Jochre.java
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;

import javax.imageio.ImageIO;

/**
* Class encapsulating the various top-level Jochre commands and command-line
* interface.
Expand Down Expand Up @@ -1091,10 +1093,10 @@ public void doCommandAnalyse(File sourceFile, MostLikelyWordChooser wordChooser,
|| sourceFile.getName().toLowerCase().endsWith(".jpg") || sourceFile.getName().toLowerCase().endsWith(".jpeg")
|| sourceFile.getName().toLowerCase().endsWith(".gif") || sourceFile.getName().toLowerCase().endsWith(".tif")
|| sourceFile.getName().toLowerCase().endsWith(".tiff")) {
ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(sourceFile, documentGenerator);
extractor.extractDocument();
} else if (sourceFile.isDirectory()) {
ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(sourceFile, documentGenerator);
extractor.extractDocument();
} else {
throw new RuntimeException("Unrecognised file extension");
Expand Down Expand Up @@ -1337,7 +1339,7 @@ public void doCommandSegment(String filename, String userFriendlyName, File outp
pdfDocumentProcessor.process();
} else if (filename.toLowerCase().endsWith(".png") || filename.toLowerCase().endsWith(".jpg")
|| filename.toLowerCase().endsWith(".jpeg") || filename.toLowerCase().endsWith(".gif")) {
ImageDocumentExtractor extractor = new ImageDocumentExtractor(file, jochreDocumentGenerator);
ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(file, jochreDocumentGenerator);
extractor.extractDocument();
} else {
throw new RuntimeException("Unrecognised file extension");
Expand Down Expand Up @@ -1534,13 +1536,18 @@ public void imageFileToAlto4(File sourceFile, Writer writer) throws IOException
}

public void imageInputStreamToAlto4(InputStream inputStream, String fileName, Writer writer) throws IOException {
BufferedImage image = ImageIO.read(inputStream);
this.imageToAlto4(image, fileName, writer);
}

public void imageToAlto4(BufferedImage image, String fileName, Writer writer) throws IOException {
final Set<Integer> myPages = new HashSet<>();
MostLikelyWordChooser wordChooser = new MostLikelyWordChooser(jochreSession);
List<DocumentObserver> documentObservers = new ArrayList<>();
AltoXMLExporter altoXMLExporter = new AltoXMLExporter(writer, 4);
documentObservers.add(altoXMLExporter);
JochreDocumentGenerator documentGenerator = this.getDocumentGenerator(fileName, wordChooser, myPages, documentObservers, new ArrayList<>());
InputStreamDocumentExtractor documentExtractor = new InputStreamDocumentExtractor(inputStream, fileName, documentGenerator);
ImageDocumentExtractor documentExtractor = new ImageDocumentExtractor(image, fileName, documentGenerator);
documentExtractor.extractDocument();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,18 @@
//////////////////////////////////////////////////////////////////////////////
package com.joliciel.jochre.doc;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FilenameFilter;

import javax.imageio.ImageIO;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.joliciel.talismane.utils.Monitorable;
import com.joliciel.talismane.utils.MultiTaskProgressMonitor;
import com.joliciel.talismane.utils.ProgressMonitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.InputStream;

/**
* An interface for extracting a JochreDocument from an image File (jpeg, gif or
* png).
* An interface for extracting a JochreDocument from an image input stream.
*
* @author Assaf Urieli
*
Expand All @@ -42,12 +38,14 @@ public class ImageDocumentExtractor implements Monitorable, Runnable {
private static final Logger LOG = LoggerFactory.getLogger(ImageDocumentExtractor.class);
private final SourceFileProcessor documentProcessor;
private MultiTaskProgressMonitor currentMonitor;
private final File imageFile;
private final BufferedImage image;
private final String fileName;
private int pageNumber = 1;

public ImageDocumentExtractor(File imageFile, SourceFileProcessor documentProcessor) {
public ImageDocumentExtractor(BufferedImage image, String fileName, SourceFileProcessor documentProcessor) {
this.documentProcessor = documentProcessor;
this.imageFile = imageFile;
this.image = image;
this.fileName = fileName;
}

@Override
Expand All @@ -56,46 +54,31 @@ public void run() {
}

public JochreDocument extractDocument() {
LOG.debug("ImageDocumentExtractorImpl.extractDocument");
LOG.debug("InputStreamDocumentExtractor.extractDocument");
try {
File[] files = new File[1];

if (imageFile.isDirectory()) {
files = imageFile.listFiles(new FilenameFilter() {

@Override
public boolean accept(File dir, String name) {
return (name.toLowerCase().endsWith(".png") || name.toLowerCase().endsWith(".jpg") || name.toLowerCase().endsWith(".jpeg")
|| name.toLowerCase().endsWith(".gif") || name.toLowerCase().endsWith(".tif") || name.toLowerCase().endsWith(".tiff"));
}
});
} else {
files[0] = imageFile;
}


JochreDocument doc = this.documentProcessor.onDocumentStart();
doc.setTotalPageCount(files.length);
doc.setTotalPageCount(1);

int currentPageNumber = this.pageNumber;
for (File file : files) {
JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++);

BufferedImage image = ImageIO.read(file);
String imageName = file.getName();
JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++);

if (currentMonitor != null && documentProcessor instanceof Monitorable) {
ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask();
double percentAllotted = (1 / (double) (files.length));
currentMonitor.startTask(monitor, percentAllotted);
}
String imageName = this.fileName;

documentProcessor.onImageFound(page, image, imageName, 0);
if (currentMonitor != null && documentProcessor instanceof Monitorable) {
currentMonitor.endTask();
}
if (currentMonitor != null && documentProcessor instanceof Monitorable) {
ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask();
currentMonitor.startTask(monitor, 1.0);
}

this.documentProcessor.onPageComplete(page);
documentProcessor.onImageFound(page, image, imageName, 0);
if (currentMonitor != null && documentProcessor instanceof Monitorable) {
currentMonitor.endTask();
}

this.documentProcessor.onPageComplete(page);

this.documentProcessor.onDocumentComplete(doc);
this.documentProcessor.onAnalysisComplete();

Expand All @@ -109,7 +92,7 @@ public boolean accept(File dir, String name) {
LOG.error("Exception while processing document", e);
throw new RuntimeException(e);
} finally {
LOG.debug("Exit ImageDocumentExtractorImpl.extractDocument");
LOG.debug("Exit InputStreamDocumentExtractor.extractDocument");
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
///////////////////////////////////////////////////////////////////////////////
//Copyright (C) 2012 Assaf Urieli
//
//This file is part of Jochre.
//
//Jochre is free software: you can redistribute it and/or modify
//it under the terms of the GNU Affero General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//
//Jochre is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU Affero General Public License for more details.
//
//You should have received a copy of the GNU Affero General Public License
//along with Jochre. If not, see <http://www.gnu.org/licenses/>.
//////////////////////////////////////////////////////////////////////////////
package com.joliciel.jochre.doc;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FilenameFilter;

import javax.imageio.ImageIO;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.joliciel.talismane.utils.Monitorable;
import com.joliciel.talismane.utils.MultiTaskProgressMonitor;
import com.joliciel.talismane.utils.ProgressMonitor;

/**
* An interface for extracting a JochreDocument from an image File (jpeg, gif or
* png).
*
* @author Assaf Urieli
*
*/
public class ImageFileDocumentExtractor implements Monitorable, Runnable {
private static final Logger LOG = LoggerFactory.getLogger(ImageFileDocumentExtractor.class);
private final SourceFileProcessor documentProcessor;
private MultiTaskProgressMonitor currentMonitor;
private final File imageFile;
private int pageNumber = 1;

public ImageFileDocumentExtractor(File imageFile, SourceFileProcessor documentProcessor) {
this.documentProcessor = documentProcessor;
this.imageFile = imageFile;
}

@Override
public void run() {
this.extractDocument();
}

public JochreDocument extractDocument() {
LOG.debug("ImageDocumentExtractorImpl.extractDocument");
try {
File[] files = new File[1];

if (imageFile.isDirectory()) {
files = imageFile.listFiles(new FilenameFilter() {

@Override
public boolean accept(File dir, String name) {
return (name.toLowerCase().endsWith(".png") || name.toLowerCase().endsWith(".jpg") || name.toLowerCase().endsWith(".jpeg")
|| name.toLowerCase().endsWith(".gif") || name.toLowerCase().endsWith(".tif") || name.toLowerCase().endsWith(".tiff"));
}
});
} else {
files[0] = imageFile;
}

JochreDocument doc = this.documentProcessor.onDocumentStart();
doc.setTotalPageCount(files.length);

int currentPageNumber = this.pageNumber;
for (File file : files) {
JochrePage page = this.documentProcessor.onPageStart(currentPageNumber++);

BufferedImage image = ImageIO.read(file);
String imageName = file.getName();

if (currentMonitor != null && documentProcessor instanceof Monitorable) {
ProgressMonitor monitor = ((Monitorable) documentProcessor).monitorTask();
double percentAllotted = (1 / (double) (files.length));
currentMonitor.startTask(monitor, percentAllotted);
}

documentProcessor.onImageFound(page, image, imageName, 0);
if (currentMonitor != null && documentProcessor instanceof Monitorable) {
currentMonitor.endTask();
}

this.documentProcessor.onPageComplete(page);
}
this.documentProcessor.onDocumentComplete(doc);
this.documentProcessor.onAnalysisComplete();

if (currentMonitor != null)
currentMonitor.setFinished(true);
return doc;
} catch (Exception e) {
LOG.debug("Exception occurred. Have monitor? " + currentMonitor);
if (currentMonitor != null)
currentMonitor.setException(e);
LOG.error("Exception while processing document", e);
throw new RuntimeException(e);
} finally {
LOG.debug("Exit ImageDocumentExtractorImpl.extractDocument");
}
}

@Override
public ProgressMonitor monitorTask() {
currentMonitor = new MultiTaskProgressMonitor();

return currentMonitor;
}

/**
* The page number to assign to this image.
*/
public int getPageNumber() {
return pageNumber;
}

public void setPageNumber(int pageNumber) {
this.pageNumber = pageNumber;
}

}
2 changes: 1 addition & 1 deletion jochre_distribution/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>com.joliciel.jochre</groupId>
<artifactId>jochre-parent</artifactId>
<version>2.6.2</version>
<version>2.6.3</version>
<relativePath>../jochre_parent</relativePath>
</parent>
<build>
Expand Down
2 changes: 1 addition & 1 deletion jochre_parent/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.joliciel.jochre</groupId>
<artifactId>jochre-parent</artifactId>
<version>2.6.2</version>
<version>2.6.3</version>
<packaging>pom</packaging>
<name>Jochre</name>
<description>Java Optical CHaracter Recognition</description>
Expand Down
2 changes: 1 addition & 1 deletion jochre_search/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>com.joliciel.jochre</groupId>
<artifactId>jochre-parent</artifactId>
<version>2.6.2</version>
<version>2.6.3</version>
<relativePath>../jochre_parent</relativePath>
</parent>
<packaging>jar</packaging>
Expand Down
2 changes: 1 addition & 1 deletion jochre_search_webapp/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>com.joliciel.jochre</groupId>
<artifactId>jochre-parent</artifactId>
<version>2.6.2</version>
<version>2.6.3</version>
<relativePath>../jochre_parent</relativePath>
</parent>
<packaging>war</packaging>
Expand Down
2 changes: 1 addition & 1 deletion jochre_utils/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>com.joliciel.jochre</groupId>
<artifactId>jochre-parent</artifactId>
<version>2.6.2</version>
<version>2.6.3</version>
<relativePath>../jochre_parent</relativePath>
</parent>
<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ public static BufferedImage toBlackAndWhite(BufferedImage greyImage, int thresho
public static BufferedImage deepCopy(BufferedImage bi) {
ColorModel cm = bi.getColorModel();
boolean isAlphaPremultiplied = cm.isAlphaPremultiplied();
WritableRaster raster = bi.copyData(null);
WritableRaster raster = bi.copyData(bi.getRaster().createCompatibleWritableRaster());
return new BufferedImage(cm, raster, isAlphaPremultiplied, null);
}

Expand Down
2 changes: 1 addition & 1 deletion jochre_web/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>com.joliciel.jochre</groupId>
<artifactId>jochre-parent</artifactId>
<version>2.6.2</version>
<version>2.6.3</version>
<relativePath>../jochre_parent</relativePath>
</parent>
<packaging>war</packaging>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
import com.joliciel.jochre.JochreSession;
import com.joliciel.jochre.doc.DocumentDao;
import com.joliciel.jochre.doc.DocumentObserver;
import com.joliciel.jochre.doc.ImageDocumentExtractor;
import com.joliciel.jochre.doc.ImageFileDocumentExtractor;
import com.joliciel.jochre.doc.JochreDocument;
import com.joliciel.jochre.doc.JochreDocumentGenerator;
import com.joliciel.jochre.doc.JochrePage;
Expand Down Expand Up @@ -306,7 +306,7 @@ public void doAfterCompose(Window window) throws Exception {
progressTimer.setRunning(true);
} else if (lowerCaseFileName.endsWith(".png") || lowerCaseFileName.endsWith(".jpg")
|| lowerCaseFileName.endsWith(".jpeg") || lowerCaseFileName.endsWith(".gif")) {
ImageDocumentExtractor extractor = new ImageDocumentExtractor(currentFile, documentGenerator);
ImageFileDocumentExtractor extractor = new ImageFileDocumentExtractor(currentFile, documentGenerator);
if (startPage >= 0)
extractor.setPageNumber(startPage);
this.progressMonitor = extractor.monitorTask();
Expand Down
Loading

0 comments on commit beb165b

Please sign in to comment.