diff --git a/build.gradle b/build.gradle index 0fafbbd..fa43e60 100644 --- a/build.gradle +++ b/build.gradle @@ -1,5 +1,5 @@ group 'org.doremus' -version '0.5.1' +version '0.6.0' apply plugin: 'java' apply plugin: 'application' diff --git a/src/main/java/org/doremus/string2vocabulary/SKOSVocabulary.java b/src/main/java/org/doremus/string2vocabulary/SKOSVocabulary.java index c51aaba..0f52e2e 100644 --- a/src/main/java/org/doremus/string2vocabulary/SKOSVocabulary.java +++ b/src/main/java/org/doremus/string2vocabulary/SKOSVocabulary.java @@ -4,23 +4,24 @@ import org.apache.jena.vocabulary.RDF; import org.apache.jena.vocabulary.SKOS; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; public class SKOSVocabulary extends Vocabulary { - private Map> substitutionMap; - private Map> substitutionMapNoBrackets; + private final Map substitutionMap; + private final Map substitutionMapNoBrackets; + private final Map substitutionMapPlain; + private final Map substitutionMapPlainNoBrackets; public SKOSVocabulary(String name, Model model) { super(name, model); setSchemePathFromType(SKOS.ConceptScheme); - // Build a map + // Build maps substitutionMap = new HashMap<>(); substitutionMapNoBrackets = new HashMap<>(); + substitutionMapPlain = new HashMap<>(); + substitutionMapPlainNoBrackets = new HashMap<>(); // for each concept StmtIterator conceptIter = @@ -31,83 +32,84 @@ public SKOSVocabulary(String name, Model model) { return; } - while (conceptIter.hasNext()) { - Resource resource = conceptIter.nextStatement().getSubject(); - // get the labels - StmtIterator labelIterator = resource.listProperties(SKOS.prefLabel); - //for each label - while (labelIterator.hasNext()) { - Literal nx = labelIterator.nextStatement().getLiteral(); - String value = norm(nx.getLexicalForm()); - String valueNb = normNb(nx.getLexicalForm()); - String lang = nx.getLanguage(); - if (lang != null && !lang.isEmpty()){ - value += "@" + nx.getLanguage(); - valueNb += "@" + nx.getLanguage(); - } - - // get the list or create a new one - List ls = substitutionMap.computeIfAbsent(value, k -> new ArrayList<>()); - List lsNb = substitutionMapNoBrackets.computeIfAbsent(valueNb, k -> new ArrayList<>()); - // add it to the list - ls.add(resource); - lsNb.add(resource); + while (conceptIter.hasNext()) + processConcept(conceptIter.nextStatement().getSubject()); + } + + private void processConcept(Resource resource) { + // get the labels + StmtIterator labelIterator = resource.listProperties(SKOS.prefLabel); + //for each label + while (labelIterator.hasNext()) { + Literal nx = labelIterator.nextStatement().getLiteral(); + String value = norm(nx.getLexicalForm()); + String valueNb = normNb(nx.getLexicalForm()); + + // add the value or - if already there - keep the best one + putKeepBest(value, resource, substitutionMapPlain); + putKeepBest(valueNb, resource, substitutionMapPlainNoBrackets); + + // now, manage the lang tag + String lang = nx.getLanguage(); + if (lang != null && !lang.isEmpty()) { + value += "@" + nx.getLanguage(); + valueNb += "@" + nx.getLanguage(); } - labelIterator = resource.listProperties(SKOS.altLabel); - //for each label - while (labelIterator.hasNext()) { - Literal nx = labelIterator.nextStatement().getLiteral(); - String value = norm(nx.getLexicalForm()); - String valueNb = normNb(nx.getLexicalForm()); - String lang = nx.getLanguage(); - if (lang != null && !lang.isEmpty()){ - value += "@" + nx.getLanguage(); - valueNb += "@" + nx.getLanguage(); - } - - // get the list or create a new one - List ls = substitutionMap.computeIfAbsent(value, k -> new ArrayList<>()); - List lsNb = substitutionMapNoBrackets.computeIfAbsent(valueNb, k -> new ArrayList<>()); - // add it to the list - ls.add(resource); - lsNb.add(resource); + // add the value or - if already there - keep the best one + putKeepBest(value, resource, substitutionMap); + putKeepBest(valueNb, resource, substitutionMapNoBrackets); + } + + labelIterator = resource.listProperties(SKOS.altLabel); + //for each label + while (labelIterator.hasNext()) { + Literal nx = labelIterator.nextStatement().getLiteral(); + String value = norm(nx.getLexicalForm()); + String valueNb = normNb(nx.getLexicalForm()); + // add the value or - if already there, skip + putOrSkip(value, resource, substitutionMapPlain); + putOrSkip(valueNb, resource, substitutionMapPlainNoBrackets); + + String lang = nx.getLanguage(); + if (lang != null && !lang.isEmpty()) { + value += "@" + nx.getLanguage(); + valueNb += "@" + nx.getLanguage(); } + + // add the value or - if already there, skip + putOrSkip(value, resource, substitutionMap); + putOrSkip(valueNb, resource, substitutionMapNoBrackets); } } + private void putOrSkip(String value, Resource resource, Map map) { + if (!map.containsKey(value)) + map.put(value, resource); + } + + private void putKeepBest(String value, Resource resource, Map map) { + if (map.containsKey(value)) { + Resource oldRes = map.get(value); + Statement narrower = oldRes.getProperty(SKOS.narrower); + if (narrower != null) map.put(value, resource); + } else map.put(value, resource); + } + @Override public Resource findConcept(String text, boolean strict, boolean excludeBrackets) { - String textOnly = text.replaceAll("@[a-z]{2,3}$", ""); - - Map> map = excludeBrackets ? substitutionMapNoBrackets : substitutionMap; - for (Map.Entry> entry : map.entrySet()) { - String key = entry.getKey(); - String keyPlain = key.replaceAll("@[a-z]{2,3}$", ""); - - boolean textLangMatch = text.equalsIgnoreCase(key); - boolean textOnlyMatch = !strict && textOnly.equalsIgnoreCase(keyPlain); - - if (textLangMatch || textOnlyMatch) { - List matches = entry.getValue(); - Resource bestMatch = null; - for (Resource m : matches) { - if (bestMatch == null) { - bestMatch = m; - continue; - } - - // if I already had a "bestMatch" - // choose the most specific one (skos:narrower) - Statement narrower = bestMatch.getProperty(SKOS.narrower); - if (narrower != null) bestMatch = m; - } - - return bestMatch; - } - } - return null; + // remove the lang tag if not strict + text = strict ? text : text.replaceAll("@[a-z]{2,3}$", ""); + + // select the right substitution map + Map map; + if (strict) + map = excludeBrackets ? substitutionMapNoBrackets : substitutionMap; + else + map = excludeBrackets ? substitutionMapPlainNoBrackets : substitutionMapPlain; + + return map.get(text.toLowerCase()); } }