From c53209b5cb3fb38629a101cf394cbab480431806 Mon Sep 17 00:00:00 2001 From: vlahoda Date: Wed, 13 Aug 2014 22:32:35 +0200 Subject: [PATCH] Import updateExisting parameter implemented --- .../main/java/res/configuration.properties | 3 + .../resources/res/configuration.properties | 4 ++ .../main/java/res/configuration.properties | 4 ++ .../resources/res/configuration.properties | 4 ++ .../src/main/java/org/kramerius/Import.java | 58 ++++++++++++++----- .../kramerius/imports/ParametrizedImport.java | 15 ++--- .../ParametrizedImportInputTemplate.java | 40 +++++-------- .../kramerius/replications/SecondPhase.java | 2 +- .../imports/input/parametrizedimport.stg | 27 +++++---- search/src/java/labels.properties | 1 + search/src/java/labels_cs.properties | 1 + 11 files changed, 101 insertions(+), 58 deletions(-) diff --git a/import-cmdtool/src/main/java/res/configuration.properties b/import-cmdtool/src/main/java/res/configuration.properties index 1b0f794d65..a580f8e0ff 100644 --- a/import-cmdtool/src/main/java/res/configuration.properties +++ b/import-cmdtool/src/main/java/res/configuration.properties @@ -124,6 +124,9 @@ ingest.startIndexer=true #when true, automatically sort relations in merged RELS-EXT datastreams (after incremental import of document parts) ingest.sortRelations=true +#when true, existing foxml objects with the same PID are replaced with the newly imported; when false, existing objects are preserved - only RELS-EXT RDF relations from the newly imported objects are added to the existing ones +ingest.updateExisting=false + # connection to fedora repository (replication target) ingest.url=${fedoraHost} ingest.user=${fedoraUser} diff --git a/import-cmdtool/src/main/resources/res/configuration.properties b/import-cmdtool/src/main/resources/res/configuration.properties index 1b0f794d65..54bcd0f617 100644 --- a/import-cmdtool/src/main/resources/res/configuration.properties +++ b/import-cmdtool/src/main/resources/res/configuration.properties @@ -124,6 +124,10 @@ ingest.startIndexer=true #when true, automatically sort relations in merged RELS-EXT datastreams (after incremental import of document parts) ingest.sortRelations=true +#when true, existing foxml objects with the same PID are replaced with the newly imported; when false, existing objects are preserved - only RELS-EXT RDF relations from the newly imported objects are added to the existing ones +ingest.updateExisting=false + + # connection to fedora repository (replication target) ingest.url=${fedoraHost} ingest.user=${fedoraUser} diff --git a/import-mets/src/main/java/res/configuration.properties b/import-mets/src/main/java/res/configuration.properties index c0e2d10c5e..5d2355169b 100644 --- a/import-mets/src/main/java/res/configuration.properties +++ b/import-mets/src/main/java/res/configuration.properties @@ -119,6 +119,10 @@ ingest.startIndexer=true #when true, automatically sort relations in merged RELS-EXT datastreams (after incremental import of document parts) ingest.sortRelations=true +#when true, existing foxml objects with the same PID are replaced with the newly imported; when false, existing objects are preserved - only RELS-EXT RDF relations from the newly imported objects are added to the existing ones +ingest.updateExisting=false + + # connection to fedora repository (replication target) ingest.url=${fedoraHost} ingest.user=${fedoraUser} diff --git a/import-mets/src/main/resources/res/configuration.properties b/import-mets/src/main/resources/res/configuration.properties index c0e2d10c5e..5d2355169b 100644 --- a/import-mets/src/main/resources/res/configuration.properties +++ b/import-mets/src/main/resources/res/configuration.properties @@ -119,6 +119,10 @@ ingest.startIndexer=true #when true, automatically sort relations in merged RELS-EXT datastreams (after incremental import of document parts) ingest.sortRelations=true +#when true, existing foxml objects with the same PID are replaced with the newly imported; when false, existing objects are preserved - only RELS-EXT RDF relations from the newly imported objects are added to the existing ones +ingest.updateExisting=false + + # connection to fedora repository (replication target) ingest.url=${fedoraHost} ingest.user=${fedoraUser} diff --git a/import/src/main/java/org/kramerius/Import.java b/import/src/main/java/org/kramerius/Import.java index b3156f329b..a2ca00172f 100644 --- a/import/src/main/java/org/kramerius/Import.java +++ b/import/src/main/java/org/kramerius/Import.java @@ -90,6 +90,11 @@ public static void ingest(final String url, final String user, final String pwd, log.info("INGEST CONFIGURED TO BE SKIPPED, RETURNING"); return; } + + boolean updateExisting = Boolean.valueOf (System.getProperties().containsKey("ingest.updateExisting") ? System.getProperty("ingest.updateExisting") : KConfiguration.getInstance().getConfiguration().getString("ingest.updateExisting", "false")); + log.info("INGEST updateExisting: "+updateExisting); + + long start = System.currentTimeMillis(); File importFile = new File(importRoot); @@ -104,7 +109,7 @@ public static void ingest(final String url, final String user, final String pwd, Set roots = new HashSet(); Set sortRelations = new HashSet(); if (importFile.isDirectory()) { - visitAllDirsAndFiles(importFile, roots, sortRelations); + visitAllDirsAndFiles(importFile, roots, sortRelations, updateExisting); } else { BufferedReader reader = null; try { @@ -128,7 +133,7 @@ public static void ingest(final String url, final String user, final String pwd, continue; } log.info("Importing " + importItem.getAbsolutePath()); - visitAllDirsAndFiles(importItem, roots, sortRelations); + visitAllDirsAndFiles(importItem, roots, sortRelations, updateExisting); } reader.close(); } catch (IOException e) { @@ -196,7 +201,7 @@ protected PasswordAuthentication getPasswordAuthentication() { of = new ObjectFactory(); } - private static void visitAllDirsAndFiles(File importFile, Set roots, Set sortRelations) { + private static void visitAllDirsAndFiles(File importFile, Set roots, Set sortRelations, boolean updateExisting) { if (importFile == null) { return; } @@ -207,7 +212,7 @@ private static void visitAllDirsAndFiles(File importFile, Set roo Arrays.sort(children); } for (int i = 0; i < children.length; i++) { - visitAllDirsAndFiles(children[i], roots, sortRelations); + visitAllDirsAndFiles(children[i], roots, sortRelations, updateExisting); } } else { DigitalObject dobj = null; @@ -222,12 +227,12 @@ private static void visitAllDirsAndFiles(File importFile, Set roo log.log(Level.FINE, "Underlying error was:", e); return; } - ingest(importFile, dobj.getPID(), sortRelations, roots); + ingest(importFile, dobj.getPID(), sortRelations, roots, updateExisting); checkRoot(dobj, roots); } } - public static void ingest(InputStream is, String pid, Set sortRelations, Set roots) throws IOException { + public static void ingest(InputStream is, String pid, Set sortRelations, Set roots, boolean updateExisting) throws IOException { long start = System.currentTimeMillis(); ByteArrayOutputStream bos = new ByteArrayOutputStream(); @@ -239,16 +244,39 @@ public static void ingest(InputStream is, String pid, Set sortRelations, //if (sfex.getMessage().contains("ObjectExistsException")) { if (objectExists(pid)) { - log.info("Merging with existing object " + pid); - if (merge(bytes)){ - if (sortRelations != null) { - sortRelations.add(pid); - log.info("Added merged object for sorting relations:"+pid); + if (updateExisting){ + log.info("Replacing existing object " + pid); + try{ + port.purgeObject(pid, "", false); + log.info("purged old object "+pid); + }catch(Exception ex){ + log.severe("Cannot purge object "+pid+", skipping: "+ex); + throw new RuntimeException(ex); + } + try { + port.ingest(bytes, "info:fedora/fedora-system:FOXML-1.1", "Initial ingest"); + log.info("Ingested new object "+pid); + } catch (SOAPFaultException rsfex) { + log.severe("Replace ingest SOAP fault:" + rsfex); + throw new RuntimeException(rsfex); } - if(roots!= null ){ + if (roots != null) { TitlePidTuple npt = new TitlePidTuple("", pid); roots.add(npt); - log.info("Added merged object for indexing:"+pid); + log.info("Added replaced object for indexing:" + pid); + } + }else { + log.info("Merging with existing object " + pid); + if (merge(bytes)) { + if (sortRelations != null) { + sortRelations.add(pid); + log.info("Added merged object for sorting relations:" + pid); + } + if (roots != null) { + TitlePidTuple npt = new TitlePidTuple("", pid); + roots.add(npt); + log.info("Added merged object for indexing:" + pid); + } } } } else { @@ -263,7 +291,7 @@ public static void ingest(InputStream is, String pid, Set sortRelations, log.info("Ingested:" + pid + " in " + (System.currentTimeMillis() - start) + "ms, count:" + counter); } - public static void ingest(File file, String pid, Set sortRelations, Set roots) { + public static void ingest(File file, String pid, Set sortRelations, Set roots, boolean updateExisting) { if (pid == null) { try { Object obj = unmarshaller.unmarshal(file); @@ -277,7 +305,7 @@ public static void ingest(File file, String pid, Set sortRelations, Set< try { FileInputStream is = new FileInputStream(file); - ingest(is, pid, sortRelations, roots); + ingest(is, pid, sortRelations, roots, updateExisting); } catch (Exception ex) { log.log(Level.SEVERE, "Ingestion error ", ex); throw new RuntimeException(ex); diff --git a/import/src/main/java/org/kramerius/imports/ParametrizedImport.java b/import/src/main/java/org/kramerius/imports/ParametrizedImport.java index 0e7e92cf10..ec176a4a53 100644 --- a/import/src/main/java/org/kramerius/imports/ParametrizedImport.java +++ b/import/src/main/java/org/kramerius/imports/ParametrizedImport.java @@ -16,15 +16,14 @@ */ package org.kramerius.imports; -import java.io.File; -import java.io.IOException; -import java.util.logging.Level; - -import org.kramerius.Import; - import cz.incad.kramerius.processes.annotations.ParameterName; import cz.incad.kramerius.processes.annotations.Process; import cz.incad.kramerius.processes.impl.ProcessStarter; +import org.kramerius.Import; + +import java.io.File; +import java.io.IOException; +import java.util.logging.Level; /** * Parametrized import proces @@ -39,10 +38,12 @@ public class ParametrizedImport { @Process public static void process( @ParameterName("importDirectory") File importDirectory, - @ParameterName("startIndexer")Boolean startIndexer) { + @ParameterName("startIndexer")Boolean startIndexer, + @ParameterName("updateExisting")Boolean updateExisting) { System.setProperty("import.directory", importDirectory.getAbsolutePath()); System.setProperty("ingest.startIndexer", startIndexer.toString()); + System.setProperty("ingest.updateExisting", updateExisting.toString()); System.setProperty("ingest.skip", "false"); //import se bude vždy spouštět try { diff --git a/import/src/main/java/org/kramerius/imports/input/ParametrizedImportInputTemplate.java b/import/src/main/java/org/kramerius/imports/input/ParametrizedImportInputTemplate.java index 0128950226..07bf0566fc 100644 --- a/import/src/main/java/org/kramerius/imports/input/ParametrizedImportInputTemplate.java +++ b/import/src/main/java/org/kramerius/imports/input/ParametrizedImportInputTemplate.java @@ -16,36 +16,24 @@ */ package org.kramerius.imports.input; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Writer; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Properties; -import java.util.ResourceBundle; -import java.util.Set; - -import org.antlr.stringtemplate.StringTemplate; -import org.antlr.stringtemplate.StringTemplateGroup; -import org.antlr.stringtemplate.language.DefaultTemplateLexer; -import org.kramerius.processes.filetree.TreeItem; -import org.kramerius.processes.filetree.TreeModelFilter; -import org.kramerius.processes.utils.BasicStringTemplateGroup; -import org.kramerius.processes.utils.TreeModelUtils; - import com.google.inject.Inject; import com.google.inject.Provider; - import cz.incad.kramerius.processes.LRProcessDefinition; import cz.incad.kramerius.processes.template.ProcessInputTemplate; import cz.incad.kramerius.service.ResourceBundleService; import cz.incad.kramerius.utils.conf.KConfiguration; import cz.incad.kramerius.utils.stemplates.ResourceBundleUtils; +import org.antlr.stringtemplate.StringTemplate; +import org.antlr.stringtemplate.StringTemplateGroup; +import org.antlr.stringtemplate.language.DefaultTemplateLexer; +import org.kramerius.processes.filetree.TreeItem; +import org.kramerius.processes.filetree.TreeModelFilter; +import org.kramerius.processes.utils.TreeModelUtils; + +import java.io.*; +import java.util.Locale; +import java.util.Properties; +import java.util.ResourceBundle; public class ParametrizedImportInputTemplate implements ProcessInputTemplate { @@ -88,7 +76,11 @@ public boolean accept(File file) { Boolean startIndexer = configuration.getConfiguration().getBoolean("ingest.startIndexer"); template.setAttribute("startIndexer",startIndexer); - + + Boolean updateExisting = configuration.getConfiguration().getBoolean("ingest.updateExisting"); + template.setAttribute("updateExisting",updateExisting); + + writer.write(template.toString()); } diff --git a/import/src/main/java/org/kramerius/replications/SecondPhase.java b/import/src/main/java/org/kramerius/replications/SecondPhase.java index f70e966eb4..4491689433 100644 --- a/import/src/main/java/org/kramerius/replications/SecondPhase.java +++ b/import/src/main/java/org/kramerius/replications/SecondPhase.java @@ -102,7 +102,7 @@ public void ingest(File foxmlfile) throws PhaseException{ LOGGER.info("ingesting '"+foxmlfile.getAbsolutePath()+"'"); Import.initialize(KConfiguration.getInstance().getProperty("ingest.user"), KConfiguration.getInstance().getProperty("ingest.password")); try { - Import.ingest(foxmlfile, null, null, null); //TODO třetí parametr má být List, inicializovaný na začátku této fáze a předaný třetí fázi, kde se budou třídit vazby + Import.ingest(foxmlfile, null, null, null, false); //TODO třetí parametr má být List, inicializovaný na začátku této fáze a předaný třetí fázi, kde se budou třídit vazby } catch (RuntimeException e) { if (e.getCause() != null) throw new PhaseException(this, e.getCause()); else throw new PhaseException(this,e); diff --git a/import/src/main/resources/org/kramerius/imports/input/parametrizedimport.stg b/import/src/main/resources/org/kramerius/imports/input/parametrizedimport.stg index 7e1b446188..ffe1038104 100644 --- a/import/src/main/resources/org/kramerius/imports/input/parametrizedimport.stg +++ b/import/src/main/resources/org/kramerius/imports/input/parametrizedimport.stg @@ -29,7 +29,7 @@ tree(root) ::=<< -form(importDirectory,importRootDirectory, startIndexer, bundle) ::=<< +form(importDirectory,importRootDirectory, startIndexer, updateExisting, bundle) ::=<<