Skip to content

Commit

Permalink
excludeBrackets
Browse files Browse the repository at this point in the history
  • Loading branch information
pasqLisena committed Jan 27, 2020
1 parent cf992cf commit d60ef6a
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 17 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
group 'org.doremus'
version '0.4'
version '0.5'

apply plugin: 'java'
apply plugin: 'application'
Expand Down
18 changes: 9 additions & 9 deletions src/main/java/org/doremus/string2vocabulary/MODS.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,22 @@ public MODS(String name, Model model) {
}

@Override
public Resource findConcept(String text, boolean strict) {
public Resource findConcept(String text, boolean strict, boolean excludeBrackets) {
return findModsResource(text, null);
}

public Resource findModsResource(String identifier, List<String> subjects) {
if (identifier == null || identifier.isEmpty()) return null;

String modsSearch =
"prefix modsrdf: <http://www.loc.gov/standards/mods/rdf/v1/#>\n" +
"select distinct ?cat where {\n" +
" { ?cat modsrdf:identifier ?id}\n" +
" UNION {\n" +
" ?cat modsrdf:identifierGroup / modsrdf:identifierGroupValue ?id\n" +
" }\n" +
" FILTER (lcase(str(?id)) = \"" + identifier.toLowerCase() + "\")\n" +
"}";
"prefix modsrdf: <http://www.loc.gov/standards/mods/rdf/v1/#>\n" +
"select distinct ?cat where {\n" +
" { ?cat modsrdf:identifier ?id}\n" +
" UNION {\n" +
" ?cat modsrdf:identifierGroup / modsrdf:identifierGroupValue ?id\n" +
" }\n" +
" FILTER (lcase(str(?id)) = \"" + identifier.toLowerCase() + "\")\n" +
"}";

// search all catalogs with that identifier
QueryExecution qexec = QueryExecutionFactory.create(modsSearch, vocabulary);
Expand Down
27 changes: 22 additions & 5 deletions src/main/java/org/doremus/string2vocabulary/SKOSVocabulary.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

public class SKOSVocabulary extends Vocabulary {
private Map<String, List<Resource>> substitutionMap;
private Map<String, List<Resource>> substitutionMapNoBrackets;

public SKOSVocabulary(String name, Model model) {
super(name, model);
Expand All @@ -19,6 +20,7 @@ public SKOSVocabulary(String name, Model model) {

// Build a map
substitutionMap = new HashMap<>();
substitutionMapNoBrackets = new HashMap<>();

// for each concept
StmtIterator conceptIter =
Expand All @@ -37,40 +39,55 @@ public SKOSVocabulary(String name, Model model) {
while (labelIterator.hasNext()) {
Literal nx = labelIterator.nextStatement().getLiteral();
String value = norm(nx.getLexicalForm());
String valueNb = normNb(nx.getLexicalForm());
String lang = nx.getLanguage();
if (lang != null && !lang.isEmpty()) value += "@" + nx.getLanguage();
if (lang != null && !lang.isEmpty()){
value += "@" + nx.getLanguage();
valueNb += "@" + nx.getLanguage();
}

// get the list or create a new one
List<Resource> ls = substitutionMap.computeIfAbsent(value, k -> new ArrayList<>());
List<Resource> lsNb = substitutionMapNoBrackets.computeIfAbsent(valueNb, k -> new ArrayList<>());
// add it to the list
ls.add(resource);
lsNb.add(resource);
}

labelIterator = resource.listProperties(SKOS.altLabel);
//for each label
while (labelIterator.hasNext()) {
Literal nx = labelIterator.nextStatement().getLiteral();
String value = norm(nx.getLexicalForm());
String valueNb = normNb(nx.getLexicalForm());
String lang = nx.getLanguage();
if (lang != null && !lang.isEmpty()) value += "@" + nx.getLanguage();
if (lang != null && !lang.isEmpty()){
value += "@" + nx.getLanguage();
valueNb += "@" + nx.getLanguage();
}

// get the list or create a new one
List<Resource> ls = substitutionMap.computeIfAbsent(value, k -> new ArrayList<>());
List<Resource> lsNb = substitutionMapNoBrackets.computeIfAbsent(valueNb, k -> new ArrayList<>());
// add it to the list
ls.add(resource);
lsNb.add(resource);
}
}
}


@Override
public Resource findConcept(String text, boolean strict) {
for (Map.Entry<String, List<Resource>> entry : substitutionMap.entrySet()) {
public Resource findConcept(String text, boolean strict, boolean excludeBrackets) {
String textOnly = text.replaceAll("@[a-z]{2,3}$", "");

Map<String, List<Resource>> map = excludeBrackets ? substitutionMapNoBrackets : substitutionMap;
for (Map.Entry<String, List<Resource>> entry : map.entrySet()) {
String key = entry.getKey();
String keyPlain = key.replaceAll("@[a-z]{2,3}$", "");

boolean textLangMatch = text.equalsIgnoreCase(key);
boolean textOnlyMatch = !strict && text.replaceAll("@[a-z]{2,3}$", "").equalsIgnoreCase(keyPlain);
boolean textOnlyMatch = !strict && textOnly.equalsIgnoreCase(keyPlain);

if (textLangMatch || textOnlyMatch) {
List<Resource> matches = entry.getValue();
Expand Down
12 changes: 11 additions & 1 deletion src/main/java/org/doremus/string2vocabulary/Vocabulary.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,11 @@ public Resource getConcept(String code) {
else return null;
}

public abstract Resource findConcept(String text, boolean strict);
public Resource findConcept(String text, boolean strict) {
return findConcept(text, strict, false);
}

public abstract Resource findConcept(String text, boolean strict, boolean excludeBrackets);


@Override
Expand Down Expand Up @@ -108,4 +112,10 @@ protected static String norm(String input) {
// lowercase
return seed.toLowerCase();
}

protected static String normNb(String input) {
// remove brackets
input = input.replaceAll("\\([^)]+\\)", "").trim();
return norm(input);
}
}
19 changes: 18 additions & 1 deletion src/test/java/org/doremus/string2vocabulary/ModuleTest.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package org.doremus.string2vocabulary;

import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.riot.RDFDataMgr;
import org.junit.Assert;
import org.junit.Test;

import java.io.File;
import java.io.IOException;
import java.io.StringWriter;

Expand All @@ -31,11 +33,26 @@ public void string2uri() {
}
}

@Test
public void matchNoBrackets() {
ClassLoader classLoader = getClass().getClassLoader();
String vocabularyFolder = classLoader.getResource("vocabulary").getPath();


Vocabulary v = Vocabulary.fromFile(new File(vocabularyFolder + "/test.ttl"));

Resource brackMatch = v.findConcept("test@en", true);
Resource noBrackMatch = v.findConcept("test@en", true, true);
System.out.println(noBrackMatch);
Assert.assertNull(brackMatch);
Assert.assertNotNull(noBrackMatch);
}

private String toTtlString(Model m) {
StringWriter sw = new StringWriter();
m.write(sw, syntax);
return sw.toString();
}


}
}
13 changes: 13 additions & 0 deletions src/test/resources/vocabulary/test.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
@prefix dct: <http://purl.org/dc/terms/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

<http://example.org/vocabulary/>
a skos:ConceptScheme .

<http://example.org/vocabulary/brackets>
a skos:Concept ;
skos:prefLabel "test (to be ignored) "@en ;
skos:topConceptOf <http://example.org/vocabulary/> .

0 comments on commit d60ef6a

Please sign in to comment.