Skip to content

Commit

Permalink
issue #550: PDF invalid characters
Browse files Browse the repository at this point in the history
  • Loading branch information
leiblix committed Aug 4, 2017
1 parent a095143 commit 53e27ba
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
2 changes: 1 addition & 1 deletion common/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ dependencies {
compile name:"djvuframe-0.8.09"
compile name:"javadjvu-0.8.09"
compile "commons-configuration:commons-configuration:1.6"
compile "org.apache.commons:commons-lang3:3.2.1"
compile "org.apache.commons:commons-lang3:3.5"

compile "com.yourmediashelf.fedora.client:fedora-client-core:0.7"
compile "com.yourmediashelf.fedora.client:fedora-client-messaging:0.7"
Expand Down
9 changes: 7 additions & 2 deletions indexer/src/cz/incad/kramerius/indexer/ExtendedFields.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import cz.incad.kramerius.security.impl.criteria.mw.DatesParser;
import cz.incad.kramerius.utils.DCUtils;
import cz.incad.kramerius.utils.conf.KConfiguration;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.pdfparser.PDFParser;
Expand All @@ -25,6 +26,10 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
Expand Down Expand Up @@ -115,7 +120,7 @@ public void setPDFDocument(String pid) throws Exception {

File pdfImg = File.createTempFile(pid,null);
pdfImg.deleteOnExit();
java.nio.file.Files.copy(is,pdfImg.toPath(),java.nio.file.StandardCopyOption.REPLACE_EXISTING);
FileUtils.copyInputStreamToFile(is, pdfImg);


if (KConfiguration.getInstance().getConfiguration().getBoolean("convert.pdf.loadNonSeq", false)){
Expand Down Expand Up @@ -159,7 +164,7 @@ private String getPDFPage(int page) throws Exception {
stripper.setEndPage(page);
}

return StringEscapeUtils.escapeXml(stripper.getText(pdDoc));
return StringEscapeUtils.escapeXml10(stripper.getText(pdDoc));
} catch (Exception ex) {
return "";
}
Expand Down

0 comments on commit 53e27ba

Please sign in to comment.