Skip to content

Commit

Permalink
optimised matching algorithm for speed up
Browse files Browse the repository at this point in the history
  • Loading branch information
pasqLisena committed Apr 10, 2020
1 parent c13f066 commit 3116e06
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 76 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
group 'org.doremus'
version '0.5.1'
version '0.6.0'

apply plugin: 'java'
apply plugin: 'application'
Expand Down
152 changes: 77 additions & 75 deletions src/main/java/org/doremus/string2vocabulary/SKOSVocabulary.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,24 @@
import org.apache.jena.vocabulary.RDF;
import org.apache.jena.vocabulary.SKOS;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;

public class SKOSVocabulary extends Vocabulary {
private Map<String, List<Resource>> substitutionMap;
private Map<String, List<Resource>> substitutionMapNoBrackets;
private final Map<String, Resource> substitutionMap;
private final Map<String, Resource> substitutionMapNoBrackets;
private final Map<String, Resource> substitutionMapPlain;
private final Map<String, Resource> substitutionMapPlainNoBrackets;

public SKOSVocabulary(String name, Model model) {
super(name, model);

setSchemePathFromType(SKOS.ConceptScheme);

// Build a map
// Build maps
substitutionMap = new HashMap<>();
substitutionMapNoBrackets = new HashMap<>();
substitutionMapPlain = new HashMap<>();
substitutionMapPlainNoBrackets = new HashMap<>();

// for each concept
StmtIterator conceptIter =
Expand All @@ -31,83 +32,84 @@ public SKOSVocabulary(String name, Model model) {
return;
}

while (conceptIter.hasNext()) {
Resource resource = conceptIter.nextStatement().getSubject();
// get the labels
StmtIterator labelIterator = resource.listProperties(SKOS.prefLabel);
//for each label
while (labelIterator.hasNext()) {
Literal nx = labelIterator.nextStatement().getLiteral();
String value = norm(nx.getLexicalForm());
String valueNb = normNb(nx.getLexicalForm());
String lang = nx.getLanguage();
if (lang != null && !lang.isEmpty()){
value += "@" + nx.getLanguage();
valueNb += "@" + nx.getLanguage();
}

// get the list or create a new one
List<Resource> ls = substitutionMap.computeIfAbsent(value, k -> new ArrayList<>());
List<Resource> lsNb = substitutionMapNoBrackets.computeIfAbsent(valueNb, k -> new ArrayList<>());
// add it to the list
ls.add(resource);
lsNb.add(resource);
while (conceptIter.hasNext())
processConcept(conceptIter.nextStatement().getSubject());
}

private void processConcept(Resource resource) {
// get the labels
StmtIterator labelIterator = resource.listProperties(SKOS.prefLabel);
//for each label
while (labelIterator.hasNext()) {
Literal nx = labelIterator.nextStatement().getLiteral();
String value = norm(nx.getLexicalForm());
String valueNb = normNb(nx.getLexicalForm());

// add the value or - if already there - keep the best one
putKeepBest(value, resource, substitutionMapPlain);
putKeepBest(valueNb, resource, substitutionMapPlainNoBrackets);

// now, manage the lang tag
String lang = nx.getLanguage();
if (lang != null && !lang.isEmpty()) {
value += "@" + nx.getLanguage();
valueNb += "@" + nx.getLanguage();
}

labelIterator = resource.listProperties(SKOS.altLabel);
//for each label
while (labelIterator.hasNext()) {
Literal nx = labelIterator.nextStatement().getLiteral();
String value = norm(nx.getLexicalForm());
String valueNb = normNb(nx.getLexicalForm());
String lang = nx.getLanguage();
if (lang != null && !lang.isEmpty()){
value += "@" + nx.getLanguage();
valueNb += "@" + nx.getLanguage();
}

// get the list or create a new one
List<Resource> ls = substitutionMap.computeIfAbsent(value, k -> new ArrayList<>());
List<Resource> lsNb = substitutionMapNoBrackets.computeIfAbsent(valueNb, k -> new ArrayList<>());
// add it to the list
ls.add(resource);
lsNb.add(resource);
// add the value or - if already there - keep the best one
putKeepBest(value, resource, substitutionMap);
putKeepBest(valueNb, resource, substitutionMapNoBrackets);
}

labelIterator = resource.listProperties(SKOS.altLabel);
//for each label
while (labelIterator.hasNext()) {
Literal nx = labelIterator.nextStatement().getLiteral();
String value = norm(nx.getLexicalForm());
String valueNb = normNb(nx.getLexicalForm());
// add the value or - if already there, skip
putOrSkip(value, resource, substitutionMapPlain);
putOrSkip(valueNb, resource, substitutionMapPlainNoBrackets);

String lang = nx.getLanguage();
if (lang != null && !lang.isEmpty()) {
value += "@" + nx.getLanguage();
valueNb += "@" + nx.getLanguage();
}

// add the value or - if already there, skip
putOrSkip(value, resource, substitutionMap);
putOrSkip(valueNb, resource, substitutionMapNoBrackets);
}
}

private void putOrSkip(String value, Resource resource, Map<String, Resource> map) {
if (!map.containsKey(value))
map.put(value, resource);
}

private void putKeepBest(String value, Resource resource, Map<String, Resource> map) {
if (map.containsKey(value)) {
Resource oldRes = map.get(value);
Statement narrower = oldRes.getProperty(SKOS.narrower);
if (narrower != null) map.put(value, resource);
} else map.put(value, resource);
}


@Override
public Resource findConcept(String text, boolean strict, boolean excludeBrackets) {
String textOnly = text.replaceAll("@[a-z]{2,3}$", "");

Map<String, List<Resource>> map = excludeBrackets ? substitutionMapNoBrackets : substitutionMap;
for (Map.Entry<String, List<Resource>> entry : map.entrySet()) {
String key = entry.getKey();
String keyPlain = key.replaceAll("@[a-z]{2,3}$", "");

boolean textLangMatch = text.equalsIgnoreCase(key);
boolean textOnlyMatch = !strict && textOnly.equalsIgnoreCase(keyPlain);

if (textLangMatch || textOnlyMatch) {
List<Resource> matches = entry.getValue();
Resource bestMatch = null;
for (Resource m : matches) {
if (bestMatch == null) {
bestMatch = m;
continue;
}

// if I already had a "bestMatch"
// choose the most specific one (skos:narrower)
Statement narrower = bestMatch.getProperty(SKOS.narrower);
if (narrower != null) bestMatch = m;
}

return bestMatch;
}
}
return null;
// remove the lang tag if not strict
text = strict ? text : text.replaceAll("@[a-z]{2,3}$", "");

// select the right substitution map
Map<String, Resource> map;
if (strict)
map = excludeBrackets ? substitutionMapNoBrackets : substitutionMap;
else
map = excludeBrackets ? substitutionMapPlainNoBrackets : substitutionMapPlain;

return map.get(text.toLowerCase());
}

}

0 comments on commit 3116e06

Please sign in to comment.