diff --git a/jpylyzer-plugin/pom.xml b/jpylyzer-plugin/pom.xml new file mode 100644 index 0000000..42a2e03 --- /dev/null +++ b/jpylyzer-plugin/pom.xml @@ -0,0 +1,15 @@ + + + + verapdf-library-samples + org.verapdf + 1.0-SNAPSHOT + + 4.0.0 + + jpylyzer-plugin + + + \ No newline at end of file diff --git a/jpylyzer-plugin/src/main/java/org/verapdf/JpylyzerConfig.java b/jpylyzer-plugin/src/main/java/org/verapdf/JpylyzerConfig.java new file mode 100644 index 0000000..167b164 --- /dev/null +++ b/jpylyzer-plugin/src/main/java/org/verapdf/JpylyzerConfig.java @@ -0,0 +1,134 @@ +/** + * + */ +package org.verapdf; + +import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBException; +import javax.xml.bind.Marshaller; +import javax.xml.bind.Unmarshaller; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import java.io.*; + +/** + * @author Maksim Bezrukov + */ +@XmlRootElement(namespace = "http://www.verapdf.org/JpylyzerConfig", name = "jpylyzerConfig") +final class JpylyzerConfig { + + @XmlElement + private final String cliPath; + @XmlElement + private final String outFolder; + @XmlElement + private final boolean isVerbose; + + private JpylyzerConfig() { + this("", "", false); + } + + private JpylyzerConfig(String outFolder, String cliPath, boolean isVerbose) { + this.cliPath = cliPath; + this.outFolder = outFolder; + this.isVerbose = isVerbose; + } + + public String getOutFolder() { + return outFolder; + } + + public boolean isVerbose() { + return isVerbose; + } + + public String getCliPath() { + return cliPath; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + JpylyzerConfig config = (JpylyzerConfig) o; + + if (isVerbose != config.isVerbose) return false; + if (cliPath != null ? !cliPath.equals(config.cliPath) : config.cliPath != null) return false; + return outFolder != null ? outFolder.equals(config.outFolder) : config.outFolder == null; + + } + + @Override + public int hashCode() { + int result = cliPath != null ? cliPath.hashCode() : 0; + result = 31 * result + (outFolder != null ? outFolder.hashCode() : 0); + result = 31 * result + (isVerbose ? 1 : 0); + return result; + } + + static JpylyzerConfig defaultInstance() { + return new JpylyzerConfig(null, null, false); + } + + static JpylyzerConfig fromValues(final String cliPath, final String outFolder, final boolean isVerbose) { + return new JpylyzerConfig(cliPath, outFolder, isVerbose); + } + + static String toXml(final JpylyzerConfig toConvert, Boolean prettyXml) + throws JAXBException, IOException { + String retVal = ""; + try (StringWriter writer = new StringWriter()) { + toXml(toConvert, writer, prettyXml); + retVal = writer.toString(); + return retVal; + } + } + + static JpylyzerConfig fromXml(final String toConvert) + throws JAXBException { + try (StringReader reader = new StringReader(toConvert)) { + return fromXml(reader); + } + } + + static void toXml(final JpylyzerConfig toConvert, + final OutputStream stream, Boolean prettyXml) throws JAXBException { + Marshaller varMarshaller = getMarshaller(prettyXml); + varMarshaller.marshal(toConvert, stream); + } + + static JpylyzerConfig fromXml(final InputStream toConvert) + throws JAXBException { + Unmarshaller stringUnmarshaller = getUnmarshaller(); + return (JpylyzerConfig) stringUnmarshaller.unmarshal(toConvert); + } + + static void toXml(final JpylyzerConfig toConvert, final Writer writer, + Boolean prettyXml) throws JAXBException { + Marshaller varMarshaller = getMarshaller(prettyXml); + varMarshaller.marshal(toConvert, writer); + } + + static JpylyzerConfig fromXml(final Reader toConvert) + throws JAXBException { + Unmarshaller stringUnmarshaller = getUnmarshaller(); + return (JpylyzerConfig) stringUnmarshaller.unmarshal(toConvert); + } + + private static Unmarshaller getUnmarshaller() throws JAXBException { + JAXBContext context = JAXBContext + .newInstance(JpylyzerConfig.class); + Unmarshaller unmarshaller = context.createUnmarshaller(); + return unmarshaller; + } + + private static Marshaller getMarshaller(Boolean setPretty) + throws JAXBException { + JAXBContext context = JAXBContext + .newInstance(JpylyzerConfig.class); + Marshaller marshaller = context.createMarshaller(); + marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, setPretty); + return marshaller; + } +} diff --git a/jpylyzer-plugin/src/main/java/org/verapdf/JpylyzerExtractor.java b/jpylyzer-plugin/src/main/java/org/verapdf/JpylyzerExtractor.java new file mode 100644 index 0000000..fd2bc5e --- /dev/null +++ b/jpylyzer-plugin/src/main/java/org/verapdf/JpylyzerExtractor.java @@ -0,0 +1,199 @@ +package org.verapdf; + +import org.verapdf.core.FeatureParsingException; +import org.verapdf.features.AbstractImageFeaturesExtractor; +import org.verapdf.features.ImageFeaturesData; +import org.verapdf.features.tools.FeatureTreeNode; +import org.w3c.dom.Document; +import org.xml.sax.SAXException; + +import javax.xml.bind.JAXBException; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpression; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; +import java.io.*; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.*; +import java.util.ArrayList; +import java.util.List; + +/** + * @author Maksim Bezrukov + */ +public class JpylyzerExtractor extends AbstractImageFeaturesExtractor { + + @Override + public List getImageFeatures(ImageFeaturesData imageFeaturesData) { + boolean doesContainsJPXFilter = false; + for (ImageFeaturesData.Filter filter : imageFeaturesData.getFilters()) { + if ("JPXDecode".equals(filter.getName())) { + doesContainsJPXFilter = true; + break; + } + } + if (!doesContainsJPXFilter) { + return null; + } + List result = new ArrayList<>(); + try { + try { + JpylyzerConfig config = getConfig(result); + File temp = generateTempFile(imageFeaturesData.getStream(), "jpx"); + exec(result, config, temp); + } catch (IOException | InterruptedException | URISyntaxException e) { + FeatureTreeNode node = FeatureTreeNode.createRootNode("error"); + node.setValue("Error in execution. Error message: " + e.getMessage()); + result.add(node); + } + } catch (FeatureParsingException e) { + throw new IllegalStateException(e); + } + return result; + } + + private File generateTempFile(byte[] stream, String name) throws IOException { + File fold = getTempFolder(); + File temp = File.createTempFile(name == null ? "" : name, "", fold); + temp.deleteOnExit(); + FileOutputStream out = new FileOutputStream(temp); + out.write(stream); + out.close(); + return temp; + } + + private void exec(List nodes, JpylyzerConfig config, File temp) throws InterruptedException, FeatureParsingException, IOException, URISyntaxException { + String scriptPath = getScriptPath(config); + if (scriptPath == null) { + FeatureTreeNode error = FeatureTreeNode.createRootNode("error"); + error.setValue("Can not obtain jpylyzer script or binary"); + nodes.add(error); + return; + } + String[] args; + if (config.isVerbose()) { + args = new String[3]; + args[0] = scriptPath; + args[1] = "--verbose"; + args[2] = temp.getCanonicalPath(); + } else { + args = new String[2]; + args[0] = scriptPath; + args[1] = temp.getCanonicalPath(); + } + Runtime rt = Runtime.getRuntime(); + Process pr = rt.exec(args); + File out = getOutFile(config, nodes); + FileOutputStream outStream = new FileOutputStream(out); + byte[] buffer = new byte[1024]; + int bytesRead; + while ((bytesRead = pr.getInputStream().read(buffer)) != -1) { + outStream.write(buffer, 0, bytesRead); + } + pr.waitFor(); + outStream.close(); + FeatureTreeNode node = FeatureTreeNode.createRootNode("resultPath"); + node.setValue(out.getCanonicalPath()); + nodes.add(node); + + try { + String isValidJP2Value = getXMLNodeValue("//jpylyzer/isValidJP2", out); + FeatureTreeNode validationNode = FeatureTreeNode.createRootNode("isValidJP2"); + validationNode.setValue(isValidJP2Value); + nodes.add(validationNode); + } catch (ParserConfigurationException | SAXException | XPathExpressionException e) { + FeatureTreeNode error = FeatureTreeNode.createRootNode("error"); + error.setValue("Error in obtaining validation result. Error message: " + e.getMessage()); + nodes.add(error); + } + } + + private static String getXMLNodeValue(String xPath, File xml) throws ParserConfigurationException, IOException, SAXException, XPathExpressionException { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document document = builder.parse(xml); + XPathExpression xp = XPathFactory.newInstance().newXPath().compile(xPath); + return xp.evaluate(document); + } + + private static File getOutFile(JpylyzerConfig config, List nodes) throws FeatureParsingException, IOException { + if (config.getOutFolder() == null) { + File tempFolder = getTempFolder(); + return getOutFileInFolder(tempFolder); + } else { + File outFolder = new File(config.getOutFolder()); + if (outFolder.isDirectory()) { + return getOutFileInFolder(outFolder); + } else { + FeatureTreeNode node = FeatureTreeNode.createRootNode("error"); + node.setValue("Config file contains out folder path but it doesn't link a directory."); + nodes.add(node); + File tempFolder = getTempFolder(); + return getOutFileInFolder(tempFolder); + } + } + } + + private static File getTempFolder() { + File tempDir = new File(System.getProperty("java.io.tmpdir")); + File tempFolder = new File(tempDir, "veraPDFJpylyzerPluginTemp"); + if (!tempFolder.exists()) { + tempFolder.mkdir(); + } + tempFolder.deleteOnExit(); + return tempFolder; + } + + private static File getOutFileInFolder(File folder) throws IOException { + File out = File.createTempFile("veraPDF_Jpylyzer_Plugin_out", ".xml", folder); + out.deleteOnExit(); + return out; + } + + private JpylyzerConfig getConfig(List nodes) throws FeatureParsingException { + JpylyzerConfig config = JpylyzerConfig.defaultInstance(); + File conf = getConfigFile(); + if (conf.isFile() && conf.canRead()) { + try { + config = JpylyzerConfig.fromXml(new FileInputStream(conf)); + } catch (JAXBException | FileNotFoundException e) { + FeatureTreeNode node = FeatureTreeNode.createRootNode("error"); + node.setValue("Config file contains wrong syntax. Error message: " + e.getMessage()); + nodes.add(node); + } + } + return config; + } + + private File getConfigFile() { + return new File(getFolderPath().toFile(), "config.xml"); + } + + @Override + public String getID() { + return "3ee4e6b3-af6b-4510-8b95-1af29fc81629"; + } + + @Override + public String getDescription() { + return "Extracts features of the Image using Jpylyzer"; + } + + private String getScriptPath(JpylyzerConfig config) { + String cliPath = config.getCliPath(); + if (cliPath == null) { + cliPath = getFolderPath().toString() + "/jpylyzer-master/jpylyzer/jpylyzer.py"; + } + + File cli = new File(cliPath); + if (!(cli.exists() && cli.isFile())) { + return null; + } + return cliPath; + } +} + + diff --git a/pom.xml b/pom.xml index da94c37..97d90ff 100644 --- a/pom.xml +++ b/pom.xml @@ -29,6 +29,7 @@ imageSample-plugin metsMetadata-plugin mediaconch + jpylyzer-plugin @@ -46,7 +47,7 @@ org.verapdf core - [0.13.0,0.14.0) + [0.15.0,0.16.0)