diff --git a/build.gradle b/build.gradle index c2c2c4b..12fd009 100644 --- a/build.gradle +++ b/build.gradle @@ -1,5 +1,5 @@ group 'org.doremus' -version '0.4' +version '0.5' apply plugin: 'java' apply plugin: 'application' diff --git a/src/main/java/org/doremus/string2vocabulary/MODS.java b/src/main/java/org/doremus/string2vocabulary/MODS.java index a71cb03..54d274d 100644 --- a/src/main/java/org/doremus/string2vocabulary/MODS.java +++ b/src/main/java/org/doremus/string2vocabulary/MODS.java @@ -24,7 +24,7 @@ public MODS(String name, Model model) { } @Override - public Resource findConcept(String text, boolean strict) { + public Resource findConcept(String text, boolean strict, boolean excludeBrackets) { return findModsResource(text, null); } @@ -32,14 +32,14 @@ public Resource findModsResource(String identifier, List subjects) { if (identifier == null || identifier.isEmpty()) return null; String modsSearch = - "prefix modsrdf: \n" + - "select distinct ?cat where {\n" + - " { ?cat modsrdf:identifier ?id}\n" + - " UNION {\n" + - " ?cat modsrdf:identifierGroup / modsrdf:identifierGroupValue ?id\n" + - " }\n" + - " FILTER (lcase(str(?id)) = \"" + identifier.toLowerCase() + "\")\n" + - "}"; + "prefix modsrdf: \n" + + "select distinct ?cat where {\n" + + " { ?cat modsrdf:identifier ?id}\n" + + " UNION {\n" + + " ?cat modsrdf:identifierGroup / modsrdf:identifierGroupValue ?id\n" + + " }\n" + + " FILTER (lcase(str(?id)) = \"" + identifier.toLowerCase() + "\")\n" + + "}"; // search all catalogs with that identifier QueryExecution qexec = QueryExecutionFactory.create(modsSearch, vocabulary); diff --git a/src/main/java/org/doremus/string2vocabulary/SKOSVocabulary.java b/src/main/java/org/doremus/string2vocabulary/SKOSVocabulary.java index bd6e567..c51aaba 100644 --- a/src/main/java/org/doremus/string2vocabulary/SKOSVocabulary.java +++ b/src/main/java/org/doremus/string2vocabulary/SKOSVocabulary.java @@ -11,6 +11,7 @@ public class SKOSVocabulary extends Vocabulary { private Map> substitutionMap; + private Map> substitutionMapNoBrackets; public SKOSVocabulary(String name, Model model) { super(name, model); @@ -19,6 +20,7 @@ public SKOSVocabulary(String name, Model model) { // Build a map substitutionMap = new HashMap<>(); + substitutionMapNoBrackets = new HashMap<>(); // for each concept StmtIterator conceptIter = @@ -37,13 +39,19 @@ public SKOSVocabulary(String name, Model model) { while (labelIterator.hasNext()) { Literal nx = labelIterator.nextStatement().getLiteral(); String value = norm(nx.getLexicalForm()); + String valueNb = normNb(nx.getLexicalForm()); String lang = nx.getLanguage(); - if (lang != null && !lang.isEmpty()) value += "@" + nx.getLanguage(); + if (lang != null && !lang.isEmpty()){ + value += "@" + nx.getLanguage(); + valueNb += "@" + nx.getLanguage(); + } // get the list or create a new one List ls = substitutionMap.computeIfAbsent(value, k -> new ArrayList<>()); + List lsNb = substitutionMapNoBrackets.computeIfAbsent(valueNb, k -> new ArrayList<>()); // add it to the list ls.add(resource); + lsNb.add(resource); } labelIterator = resource.listProperties(SKOS.altLabel); @@ -51,26 +59,35 @@ public SKOSVocabulary(String name, Model model) { while (labelIterator.hasNext()) { Literal nx = labelIterator.nextStatement().getLiteral(); String value = norm(nx.getLexicalForm()); + String valueNb = normNb(nx.getLexicalForm()); String lang = nx.getLanguage(); - if (lang != null && !lang.isEmpty()) value += "@" + nx.getLanguage(); + if (lang != null && !lang.isEmpty()){ + value += "@" + nx.getLanguage(); + valueNb += "@" + nx.getLanguage(); + } // get the list or create a new one List ls = substitutionMap.computeIfAbsent(value, k -> new ArrayList<>()); + List lsNb = substitutionMapNoBrackets.computeIfAbsent(valueNb, k -> new ArrayList<>()); // add it to the list ls.add(resource); + lsNb.add(resource); } } } @Override - public Resource findConcept(String text, boolean strict) { - for (Map.Entry> entry : substitutionMap.entrySet()) { + public Resource findConcept(String text, boolean strict, boolean excludeBrackets) { + String textOnly = text.replaceAll("@[a-z]{2,3}$", ""); + + Map> map = excludeBrackets ? substitutionMapNoBrackets : substitutionMap; + for (Map.Entry> entry : map.entrySet()) { String key = entry.getKey(); String keyPlain = key.replaceAll("@[a-z]{2,3}$", ""); boolean textLangMatch = text.equalsIgnoreCase(key); - boolean textOnlyMatch = !strict && text.replaceAll("@[a-z]{2,3}$", "").equalsIgnoreCase(keyPlain); + boolean textOnlyMatch = !strict && textOnly.equalsIgnoreCase(keyPlain); if (textLangMatch || textOnlyMatch) { List matches = entry.getValue(); diff --git a/src/main/java/org/doremus/string2vocabulary/Vocabulary.java b/src/main/java/org/doremus/string2vocabulary/Vocabulary.java index fcdf6bb..6e86075 100644 --- a/src/main/java/org/doremus/string2vocabulary/Vocabulary.java +++ b/src/main/java/org/doremus/string2vocabulary/Vocabulary.java @@ -57,7 +57,11 @@ public Resource getConcept(String code) { else return null; } - public abstract Resource findConcept(String text, boolean strict); + public Resource findConcept(String text, boolean strict) { + return findConcept(text, strict, false); + } + + public abstract Resource findConcept(String text, boolean strict, boolean excludeBrackets); @Override @@ -108,4 +112,10 @@ protected static String norm(String input) { // lowercase return seed.toLowerCase(); } + + protected static String normNb(String input) { + // remove brackets + input = input.replaceAll("\\([^)]+\\)", "").trim(); + return norm(input); + } } diff --git a/src/test/java/org/doremus/string2vocabulary/ModuleTest.java b/src/test/java/org/doremus/string2vocabulary/ModuleTest.java index 5a729ba..de0df24 100644 --- a/src/test/java/org/doremus/string2vocabulary/ModuleTest.java +++ b/src/test/java/org/doremus/string2vocabulary/ModuleTest.java @@ -1,10 +1,12 @@ package org.doremus.string2vocabulary; import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.Resource; import org.apache.jena.riot.RDFDataMgr; import org.junit.Assert; import org.junit.Test; +import java.io.File; import java.io.IOException; import java.io.StringWriter; @@ -31,6 +33,21 @@ public void string2uri() { } } + @Test + public void matchNoBrackets() { + ClassLoader classLoader = getClass().getClassLoader(); + String vocabularyFolder = classLoader.getResource("vocabulary").getPath(); + + + Vocabulary v = Vocabulary.fromFile(new File(vocabularyFolder + "/test.ttl")); + + Resource brackMatch = v.findConcept("test@en", true); + Resource noBrackMatch = v.findConcept("test@en", true, true); + System.out.println(noBrackMatch); + Assert.assertNull(brackMatch); + Assert.assertNotNull(noBrackMatch); + } + private String toTtlString(Model m) { StringWriter sw = new StringWriter(); m.write(sw, syntax); @@ -38,4 +55,4 @@ private String toTtlString(Model m) { } -} \ No newline at end of file +} diff --git a/src/test/resources/vocabulary/test.ttl b/src/test/resources/vocabulary/test.ttl new file mode 100644 index 0000000..c0abdfc --- /dev/null +++ b/src/test/resources/vocabulary/test.ttl @@ -0,0 +1,13 @@ +@prefix dct: . +@prefix rdf: . +@prefix owl: . +@prefix skos: . +@prefix rdfs: . + + + a skos:ConceptScheme . + + + a skos:Concept ; + skos:prefLabel "test (to be ignored) "@en ; + skos:topConceptOf .