diff --git a/common/build.gradle b/common/build.gradle index c6990dd9fd..41af623853 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -24,7 +24,8 @@ dependencies { compile 'org.apache.pdfbox:pdfbox:1.8.2' compile 'com.levigo.jbig2:levigo-jbig2-imageio:1.6.1' - compile 'com.ibm.icu:icu4j:3.8' + compile 'com.ibm.icu:icu4j:56.1' + compile 'com.google.guava:guava:19.0' compile 'org.bouncycastle:bcprov-jdk15:1.44' compile 'org.bouncycastle:bcmail-jdk15:1.44' /* diff --git a/common/src/main/java/cz/incad/kramerius/service/SortingService.java b/common/src/main/java/cz/incad/kramerius/service/SortingService.java index 1b45266da4..2128c5cebf 100644 --- a/common/src/main/java/cz/incad/kramerius/service/SortingService.java +++ b/common/src/main/java/cz/incad/kramerius/service/SortingService.java @@ -18,8 +18,7 @@ public interface SortingService { * Sort given list of FOXML objects (their PIDs) based on the content of the BIBLIO-MODS datastream * @param pids list of FOXML PIDs to sort * @param xpath XPath expression to extract the data (upon which the objects will be sorted) from BIBLIO-MODS - * @param numeric when true, the data from xpath will be sorted as numeric (integer) values, otherwise alphabetically * @return sorted list of PIDs */ - List sortObjects(List pids, String xpath, boolean numeric); + List sortObjects(List pids, String xpath); } diff --git a/common/src/main/java/cz/incad/kramerius/service/impl/SortingServiceImpl.java b/common/src/main/java/cz/incad/kramerius/service/impl/SortingServiceImpl.java index 8263df23be..444e7635d4 100644 --- a/common/src/main/java/cz/incad/kramerius/service/impl/SortingServiceImpl.java +++ b/common/src/main/java/cz/incad/kramerius/service/impl/SortingServiceImpl.java @@ -1,8 +1,32 @@ package cz.incad.kramerius.service.impl; -import com.google.inject.*; +import javax.annotation.PostConstruct; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathExpression; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.logging.Logger; + +import org.w3c.dom.Document; + +import com.google.common.collect.Ordering; +import com.google.common.collect.TreeMultimap; +import com.google.inject.AbstractModule; +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import com.google.inject.Scopes; import com.google.inject.name.Named; import com.google.inject.name.Names; +import com.ibm.icu.text.Collator; + import cz.incad.kramerius.FedoraAccess; import cz.incad.kramerius.FedoraNamespaceContext; import cz.incad.kramerius.KrameriusModels; @@ -16,17 +40,8 @@ import cz.incad.kramerius.relation.impl.RelationServiceImpl; import cz.incad.kramerius.service.SortingService; import cz.incad.kramerius.statistics.StatisticsAccessLog; +import cz.incad.kramerius.utils.NaturalOrderCollator; import cz.incad.kramerius.utils.conf.KConfiguration; -import org.w3c.dom.Document; - -import javax.annotation.PostConstruct; -import javax.xml.xpath.XPath; -import javax.xml.xpath.XPathExpression; -import javax.xml.xpath.XPathExpressionException; -import javax.xml.xpath.XPathFactory; -import java.io.IOException; -import java.util.*; -import java.util.logging.Logger; /** * @ author vlahoda @@ -44,53 +59,64 @@ public class SortingServiceImpl implements SortingService { RelationService relationService; + private XPathFactory xpathFactory = XPathFactory.newInstance(); + private Map sortingConfigMap = new HashMap(); @Inject - public SortingServiceImpl(@Named("rawFedoraAccess") FedoraAccess fedoraAccess, KConfiguration configuration, RelationService relationService){ + public SortingServiceImpl(@Named("rawFedoraAccess") FedoraAccess fedoraAccess, KConfiguration configuration, RelationService relationService) { this.fedoraAccess = fedoraAccess; this.configuration = configuration; this.relationService = relationService; initSortingConfigMap(); } + public static void main(String[] args) throws IOException { + LOGGER.info("SortRelations service: " + Arrays.toString(args)); + Injector injector = Guice.createInjector(new SortingModule()); + SortingService inst = injector.getInstance(SortingService.class); + inst.sortRelations(args[0], true); + LOGGER.info("SortRelations finished."); + } @Override public void sortRelations(String pid, boolean startIndexer) { try { //TODO: I18n - if (startIndexer){ - try{ + if (startIndexer) { + try { ProcessStarter.updateName("Sort relations (" + pid + ")"); - }catch(Exception ex){} + } catch (Exception ex) { + } } String lastTime = fedoraAccess.getAPIA().getObjectProfile(pid, null).getObjLastModDate(); RelationModel model = relationService.load(pid); for (KrameriusModels kind : model.getRelationKinds()) { - if (KrameriusModels.DONATOR.equals(kind)) continue; + if (KrameriusModels.DONATOR.equals(kind)) + continue; List relations = model.getRelations(kind); List originalPids = new ArrayList(relations.size()); for (Relation relation : relations) { originalPids.add(relation.getPID()); } - SortingConfig sortingConfig = sortingConfigMap.get(kind.getValue()); - if (sortingConfig == null){ - LOGGER.warning("Unsupported relation type for sorting: "+kind.getValue()); + String xpath = sortingConfigMap.get(kind.getValue()); + if (xpath == null) { + LOGGER.warning("Unsupported relation type for sorting: " + kind.getValue()); continue; } - List sortedPids = sortObjects(originalPids, sortingConfig.xpath, sortingConfig.numeric); + List sortedPids = sortObjects(originalPids, xpath); relations.clear(); for (String sortedPid : sortedPids) { relations.add(new Relation(sortedPid, kind)); } } String currTime = fedoraAccess.getAPIA().getObjectProfile(pid, null).getObjLastModDate(); - if (currTime.equals(lastTime)){ + if (currTime.equals(lastTime)) { relationService.save(pid, model); - if (startIndexer){ + if (startIndexer) { IndexerProcessStarter.spawnIndexer(true, "Reindexing sorted relations", pid); } - }else{ - LOGGER.warning("Cannot save sorted relations, object "+pid+" was modified."); + } else { + LOGGER.warning("Cannot save sorted relations, object " + pid + " was modified."); } } catch (IOException e) { throw new RuntimeException(e); @@ -98,8 +124,9 @@ public void sortRelations(String pid, boolean startIndexer) { } @Override - public List sortObjects(List pids, String xpathString, boolean numeric) { - TreeMap sortedMap = new TreeMap(); + public List sortObjects(List pids, String xpathString) { + Collator stringCollator = Collator.getInstance(new Locale(configuration.getConfiguration().getString("sort.locale", "cs_CZ"))); + TreeMultimap sortedMap = TreeMultimap.create(new NaturalOrderCollator(stringCollator), Ordering.natural()); List failedList = new ArrayList(); XPathExpression expr = null; try { @@ -111,66 +138,40 @@ public List sortObjects(List pids, String xpathString, boolean n } for (String pid : pids) { String sortingValue = null; - try{ + try { Document mods = RelationUtils.getMods(pid, fedoraAccess); sortingValue = expr.evaluate(mods); } catch (Exception e) { //ignore, will be logged in next step (sortingValue test) } - if (sortingValue == null || "".equals(sortingValue)){ + if (sortingValue == null || "".equals(sortingValue)) { failedList.add(pid); - LOGGER.info("Cannot sort relation for invalid value:"+sortingValue + " ("+pid+")"); - }else{ - if (numeric){ - try{ - Integer ordinal = Integer.parseInt(sortingValue); - String existing = sortedMap.put(ordinal,pid); - if (existing != null){ - failedList.add(existing); - } - }catch (Exception ex){ - failedList.add(pid); - LOGGER.info("Cannot sort relation for invalid numeric value:"+sortingValue + " ("+pid+")"); - } - }else{ - String existing = sortedMap.put(sortingValue,pid); - if (existing != null){ - failedList.add(existing); - } + LOGGER.info("Cannot sort relation for invalid value:" + sortingValue + " (" + pid + ")"); + } else { + try { + sortedMap.put(sortingValue, pid); + } catch (Exception ex) { + failedList.add(pid); + LOGGER.info("Cannot sort relation for invalid value:" + sortingValue + " (" + pid + ")"); } } } List result = new ArrayList(pids.size()); - for (Map.Entry entry:sortedMap.entrySet()){ - result.add(entry.getValue()); + for (String o : sortedMap.values()) { + result.add(o); } result.addAll(failedList); return result; } - private XPathFactory xpathFactory = XPathFactory.newInstance(); - private Map sortingConfigMap = new HashMap(); - @PostConstruct - private void initSortingConfigMap(){ + private void initSortingConfigMap() { String[] rawConfig = configuration.getConfiguration().getStringArray(CONFIG_KEY); - for (String modelConfig:rawConfig){ + for (String modelConfig : rawConfig) { String[] configItems = modelConfig.split(";"); - SortingConfig sortingConfig = new SortingConfig(); - sortingConfig.xpath = configItems[1]; - sortingConfig.numeric = Boolean.parseBoolean(configItems[2]); - sortingConfigMap.put(configItems[0], sortingConfig); - } - } - - - public static void main(String[] args) throws IOException { - LOGGER.info("SortRelations service: " + Arrays.toString(args)); - Injector injector = Guice.createInjector(new SortingModule()); - SortingService inst = injector.getInstance(SortingService.class); - inst.sortRelations(args[0], true); - LOGGER.info("SortRelations finished."); + sortingConfigMap.put(configItems[0], configItems[1]); + } } } @@ -185,7 +186,4 @@ protected void configure() { } } -class SortingConfig { - String xpath; - boolean numeric; -} + diff --git a/common/src/main/java/cz/incad/kramerius/utils/NaturalOrderCollator.java b/common/src/main/java/cz/incad/kramerius/utils/NaturalOrderCollator.java new file mode 100644 index 0000000000..295dba6219 --- /dev/null +++ b/common/src/main/java/cz/incad/kramerius/utils/NaturalOrderCollator.java @@ -0,0 +1,114 @@ +package cz.incad.kramerius.utils; + + +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Locale; + +import com.ibm.icu.text.Collator; + + +public class NaturalOrderCollator implements Comparator + + +{ + + Collator stringCollator; + + public static void main(String[] args) { + String[] strings = new String[]{"1-2", "1-02", "1-20", "10-20", "fred", "jane", "pic01", + "pic2", "pic02", "pic02a", "pic3", "pic4", "pic 4 else", "pic 5", "pic05", "pic 5", + "pic 5 something", "pic 6", "pic 7", "pic100", "pic100a", "pic120", "pic121", + "pic02000", "tom", "x2-g8", "x2-y7", "x2-y08", "x8-y8", "anča1chtěla", "čtenář5a", "cizinec", "anča1hleděla", "an-ča1čuměla", "1", "10", "2", "20", "2a", "pic", "pic0100", ",", "#", "#1", "9", "6", "7-8", "[7,8]", "motýl noční", "motýlek"}; + + List orig = Arrays.asList(strings); + + System.out.println("Original: " + orig); + + List scrambled = Arrays.asList(strings); + Collections.shuffle(scrambled); + + System.out.println("Scrambled: " + scrambled); + + Collections.sort(scrambled, new NaturalOrderCollator()); + //Collections.sort(scrambled, Collator.getInstance(new Locale("cs"))); + + System.out.println("Sorted: " + scrambled); + } + + public NaturalOrderCollator(Collator stringCollator) { + this.stringCollator = stringCollator; + } + + public NaturalOrderCollator() { + this.stringCollator = Collator.getInstance(new Locale("cs_CZ")); + } + + private final boolean isDigit(char ch) { + return ch >= 48 && ch <= 57; + } + + /** + * Length of string is passed in for improved efficiency (only need to calculate it once) + **/ + private final String getChunk(String s, int slength, int marker) { + StringBuilder chunk = new StringBuilder(); + char c = s.charAt(marker); + chunk.append(c); + marker++; + if (isDigit(c)) { + while (marker < slength) { + c = s.charAt(marker); + if (!isDigit(c)) + break; + chunk.append(c); + marker++; + } + } else { + while (marker < slength) { + c = s.charAt(marker); + if (isDigit(c)) + break; + chunk.append(c); + marker++; + } + } + return chunk.toString(); + } + + public int compare(String s1, String s2) { + + int thisMarker = 0; + int thatMarker = 0; + int s1Length = s1.length(); + int s2Length = s2.length(); + + while (thisMarker < s1Length && thatMarker < s2Length) { + String thisChunk = getChunk(s1, s1Length, thisMarker); + thisMarker += thisChunk.length(); + + String thatChunk = getChunk(s2, s2Length, thatMarker); + thatMarker += thatChunk.length(); + + // If both chunks contain numeric characters, sort them numerically + int result = 0; + if (isDigit(thisChunk.charAt(0)) && isDigit(thatChunk.charAt(0))) { + int firstInt = Integer.parseInt(thisChunk); + int secondInt = Integer.parseInt(thatChunk); + result = firstInt - secondInt; + if (result != 0) { + return result; + } + } else { + result = stringCollator.compare(thisChunk, thatChunk); + } + + if (result != 0) + return result; + } + + return s1Length - s2Length; + } +} \ No newline at end of file diff --git a/common/src/main/java/res/configuration.properties b/common/src/main/java/res/configuration.properties index 51bfb4f101..0dfc22c98b 100644 --- a/common/src/main/java/res/configuration.properties +++ b/common/src/main/java/res/configuration.properties @@ -116,6 +116,9 @@ sort.xpaths=page;//mods:mods/mods:part/mods:detail[@type='pageIndex']/mods:numbe supplement;//mods:mods/mods:part/mods:detail[@type='pageNumber']/mods:number | //mods:mods/mods:titleInfo/mods:partNumber;true,\ picture;//mods:mods/mods:part/mods:detail[@type='pageNumber']/mods:number | //mods:mods/mods:titleInfo/mods:partNumber;true +## Locale used for relations sorting +sort.locale=cs_CZ + ## Podporovane jazyky v rozhrani interface.languages=\u010desky,cs,english,en