Skip to content

Commit

Permalink
Fix #576
Browse files Browse the repository at this point in the history
  • Loading branch information
pavels committed Feb 7, 2018
1 parent d9a365a commit 220d9c7
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public boolean acceptElement(Element elm) {
return batches;
}

static void printPid(Element sourceDocElm) {
static String pid(Element sourceDocElm) {
Element pidElm = XMLUtils.findElement(sourceDocElm, new XMLUtils.ElementsFilter() {
@Override
public boolean acceptElement(Element element) {
Expand All @@ -59,21 +59,22 @@ public boolean acceptElement(Element element) {
}
});
if (pidElm != null) {
System.out.println(pidElm.getTextContent());
}
return pidElm.getTextContent().trim();
} else return "";
}

public static void transform(Element sourceDocElm, Document destDocument,Element destDocElem) throws MigrateSolrIndexException {
String pid = pid(sourceDocElm);
if (sourceDocElm.getNodeName().equals("doc")) {
NodeList childNodes = sourceDocElm.getChildNodes();
for (int j = 0,lj=childNodes.getLength(); j < lj; j++) {
Node node = childNodes.item(j);
if (node.getNodeType() == Node.ELEMENT_NODE) {
List<String> primitiveVals = Arrays.asList("str","int","bool", "date");
if (primitiveVals.contains(node.getNodeName())) {
simpleValue(destDocument,destDocElem, node,null);
simpleValue(pid, destDocument,destDocElem, node,null);
} else {
arrayValue(destDocument,destDocElem,node);
arrayValue(pid, destDocument,destDocElem,node);
}
}
}
Expand Down Expand Up @@ -111,10 +112,10 @@ public boolean acceptElement(Element paramElement) {
compositeIdElm.setAttribute("name", compositeIdName);
compositeIdElm.setTextContent(txt);
docElm.appendChild(compositeIdElm);

}

public static void simpleValue(Document ndoc, Element docElm, Node node, String derivedName) {
public static void simpleValue(String pid, Document ndoc, Element docElm, Node node, String derivedName) {
String attributeName = derivedName != null ? derivedName : ((Element)node).getAttribute("name");
if (!nonCopiingField(attributeName)) {
Element strElm = ndoc.createElement("field");
Expand All @@ -125,23 +126,44 @@ public static void simpleValue(Document ndoc, Element docElm, Node node, String
}
}

public static void arrayValue(Document ndoc, Element docElm, Node node) {
public static void arrayValue(String pid, Document ndoc, Element docElm, Node node) {
String attributeName = ((Element) node).getAttribute("name");
if (!nonCopiingField(attributeName)) {
NodeList childNodes = node.getChildNodes();
for (int i = 0,ll=childNodes.getLength(); i < ll; i++) {
Node n = childNodes.item(i);
if (n.getNodeType() == Node.ELEMENT_NODE) {
simpleValue(ndoc,docElm, n, attributeName);
if (exceptionField(attributeName) && pid.contains("/@")) {
NodeList childNodes = node.getChildNodes();
for (int i = 0,ll=childNodes.getLength(); i < ll; i++) {
Node n = childNodes.item(i);
if (n.getNodeType() == Node.ELEMENT_NODE) {
simpleValue(pid, ndoc,docElm, n, attributeName);
}
}
} else if (!exceptionField(attributeName)) {
NodeList childNodes = node.getChildNodes();
for (int i = 0,ll=childNodes.getLength(); i < ll; i++) {
Node n = childNodes.item(i);
if (n.getNodeType() == Node.ELEMENT_NODE) {
simpleValue(pid, ndoc,docElm, n, attributeName);
}
}
}
}
}

public static final List<String> COPIED_FIELDS = Arrays.asList("text","title", "search_title","facet_autor","search_autor");
// text is copied but not for PDF; uuugrrrr !!! Terrible
public static final List<String> EXCEPTION_FIELDS = Arrays.asList("text");

// copied
public static final List<String> COPIED_FIELDS = Arrays.asList("title", "search_title","facet_autor","search_autor");
// copied but identified by postfix
public static final List<String> COPIED_POSTFIXES = Arrays.asList("_lemmatized","_lemmatized_ascii","_lemmatized_nostopwords");

private static boolean exceptionField(String attributeName) {
if (EXCEPTION_FIELDS.contains(attributeName)) {
return true;
}
return false;
}

private static boolean nonCopiingField(String attributeName) {
if (COPIED_FIELDS.contains(attributeName)) {
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,26 @@ public class MigrationUtils {
private static final String DEST_SOLR_HOST = ".dest.solrHost";
private static final String SOLR_MIGRATION_QUERY_KEY = ".migration.solr.query";
private static final String SOLR_MIGRATION_FIELD_LIST_KEY = ".migration.solr.fieldlist";
private static final String SOLR_MIGRATION_SORT_FIELD_KEY = ".migration.solr.sort";

private static final String SOLR_MIGRATION_ROWS_KEY = ".migration.solr.rows";

private static final String SOLR_MIGRATION_THREAD_KEY = ".migration.threads";
private static final String SOLR_MIGRATION_BATCHSIZE_KEY = ".migration.solr.batchsize";


private static final String SOLR_MIGRATION_BUIDLD_COMPOSITE =".migration.build.composite";


public static final String DEFAULT_QEURY = "*:*";
public static final String DEFAULT_FIELDLIST = "PID timestamp fedora.model document_type handle status create_date modified_date parent_model " +
public static final String DEFAULT_FIELDLIST = "PID timestamp fedora.model document_type handle status created_date modified_date parent_model " +
"parent_pid parent_pid parent_title root_model root_pid root_title text_ocr pages_count " +
"datum_str datum rok datum_begin datum_end datum_page issn mdt ddt dostupnost keywords " +
"geographic_names collection sec model_path pid_path rels_ext_index level dc.title title_sort " +
"title_sort dc.creator language dc.description details facet_title browse_title browse_autor img_full_mime viewable " +
"virtual location range";
"title_sort dc.creator dc.identifier language dc.description details facet_title browse_title browse_autor img_full_mime viewable " +
"virtual location range mods.shelfLocator mods.physicalLocation text";

public static final String DEFAULT_SORT_FIELD="modified_date asc";


public static final int DEFAULT_NUMBER_OF_ROWS = 500;
Expand Down Expand Up @@ -197,7 +200,8 @@ public static String configuredMigrationQuery() throws MigrateSolrIndexException
try {
String query = KConfiguration.getInstance().getConfiguration().getString(SOLR_MIGRATION_QUERY_KEY, DEFAULT_QEURY);
String fieldlist = KConfiguration.getInstance().getConfiguration().getString(SOLR_MIGRATION_FIELD_LIST_KEY, DEFAULT_FIELDLIST);
return "select?q="+URLEncoder.encode(query, "UTF-8")+"&fl="+URLEncoder.encode(fieldlist, "UTF-8");
String sortfield = KConfiguration.getInstance().getConfiguration().getString(SOLR_MIGRATION_SORT_FIELD_KEY, DEFAULT_SORT_FIELD);
return "select?q="+URLEncoder.encode(query, "UTF-8")+"&fl="+URLEncoder.encode(fieldlist, "UTF-8") +"&sort="+URLEncoder.encode(sortfield, "UTF-8");
} catch (UnsupportedEncodingException e) {
LOGGER.log(Level.SEVERE, e.getMessage(), e);
throw new MigrateSolrIndexException(e.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,11 @@ public boolean acceptElement(Element elm) {
public void testConstructQuery() throws MigrateSolrIndexException {
KConfiguration.getInstance().getConfiguration().setProperty(".migration.solr.query", "*:*");
String url =MigrationUtils.constructedQueryURL();
Assert.assertTrue(url.endsWith("select?q=*%3A*&fl=PID+timestamp+fedora.model+document_type+handle+status+create_date+modified_date+parent_model+parent_pid+parent_pid+parent_title+root_model+root_pid+root_title+text_ocr+pages_count+datum_str+datum+rok+datum_begin+datum_end+datum_page+issn+mdt+ddt+dostupnost+keywords+geographic_names+collection+sec+model_path+pid_path+rels_ext_index+level+dc.title+title_sort+title_sort+dc.creator+language+dc.description+details+facet_title+browse_title+browse_autor+img_full_mime+viewable+virtual+location+range"));
Assert.assertTrue(url.endsWith("select?q=*%3A*&fl=PID+timestamp+fedora.model+document_type+handle+status+created_date+modified_date+parent_model+parent_pid+parent_pid+parent_title+root_model+root_pid+root_title+text_ocr+pages_count+datum_str+datum+rok+datum_begin+datum_end+datum_page+issn+mdt+ddt+dostupnost+keywords+geographic_names+collection+sec+model_path+pid_path+rels_ext_index+level+dc.title+title_sort+title_sort+dc.creator+dc.identifier+language+dc.description+details+facet_title+browse_title+browse_autor+img_full_mime+viewable+virtual+location+range+mods.shelfLocator+mods.physicalLocation+text&sort=modified_date+asc"));
KConfiguration.getInstance().getConfiguration().setProperty(".migration.solr.query", "*:* AND parent_pid:uuid\\:xxxx");
KConfiguration.getInstance().getConfiguration().setProperty(".migration.solr.fieldlist", "*:* AND parent_pid:uuid\\:xxxx&fl=PID");
url =MigrationUtils.constructedQueryURL();
Assert.assertTrue(url.endsWith("*%3A*+AND+parent_pid%3Auuid%5C%3Axxxx&fl=*%3A*+AND+parent_pid%3Auuid%5C%3Axxxx%26fl%3DPID"));
Assert.assertTrue(url.endsWith("select?q=*%3A*+AND+parent_pid%3Auuid%5C%3Axxxx&fl=*%3A*+AND+parent_pid%3Auuid%5C%3Axxxx%26fl%3DPID&sort=modified_date+asc"));
}


Expand Down

0 comments on commit 220d9c7

Please sign in to comment.