From 3422b937d094a83889b2e14d923a21254ed87fc5 Mon Sep 17 00:00:00 2001 From: haider Date: Tue, 1 Oct 2024 13:05:22 +0100 Subject: [PATCH 1/3] - Update curie formation logic --- .../annotators/ShortFormAnnotator.java | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java index 1b2d5b1b0..11f5f8e57 100644 --- a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java +++ b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java @@ -36,7 +36,25 @@ public static void annotateShortForms(OntologyGraph graph) { } String shortForm = extractShortForm(graph, ontologyBaseUris, preferredPrefix, c.uri); - String curie = shortForm.replaceFirst("_", ":"); + + /* + CURIEs are formed by following rules: + If there are more than one underscore "_" in the shortform just keep the curie same as shortform + If there is only one underscore "_" AND the characters after the underscore are numbers then replace the underscore with colon ":" + If there is only one underscore "_" and the characters after the underscore are not just numbers then just keep the curie same as shortform + */ + + String curie; + if (shortForm.chars().filter(ch -> ch == '_').count() > 1) { + curie = shortForm; + } else { + int underscoreIndex = shortForm.indexOf('_'); + if (underscoreIndex != -1 && shortForm.substring(underscoreIndex + 1).matches("\\d+")) { + curie = shortForm.replaceFirst("_", ":"); + } else { + curie = shortForm; + } + } c.properties.addProperty("shortForm", PropertyValueLiteral.fromString(shortForm)); c.properties.addProperty("curie", PropertyValueLiteral.fromString(curie)); From 4eb0e5f3d37a07ad287fd61530b1a19a1c1da829 Mon Sep 17 00:00:00 2001 From: haider Date: Tue, 1 Oct 2024 13:52:19 +0100 Subject: [PATCH 2/3] - Improve code logic --- .../rdf2json/annotators/ShortFormAnnotator.java | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java index 11f5f8e57..d110fb6e3 100644 --- a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java +++ b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java @@ -44,26 +44,16 @@ public static void annotateShortForms(OntologyGraph graph) { If there is only one underscore "_" and the characters after the underscore are not just numbers then just keep the curie same as shortform */ - String curie; - if (shortForm.chars().filter(ch -> ch == '_').count() > 1) { - curie = shortForm; - } else { - int underscoreIndex = shortForm.indexOf('_'); - if (underscoreIndex != -1 && shortForm.substring(underscoreIndex + 1).matches("\\d+")) { - curie = shortForm.replaceFirst("_", ":"); - } else { - curie = shortForm; - } + String curie = shortForm; + if (shortForm.matches("^[^_]+_\\d+$")) { + curie = shortForm.replaceFirst("_", ":"); } - c.properties.addProperty("shortForm", PropertyValueLiteral.fromString(shortForm)); c.properties.addProperty("curie", PropertyValueLiteral.fromString(curie)); } } long endTime3 = System.nanoTime(); logger.info("annotate short forms: {}", ((endTime3 - startTime3) / 1000 / 1000 / 1000)); - - } private static String extractShortForm(OntologyGraph graph, Set ontologyBaseUris, String preferredPrefix, From 5e161a17e9be95c61a8aa023bf5e3402c49fd4ec Mon Sep 17 00:00:00 2001 From: haider Date: Wed, 2 Oct 2024 14:41:57 +0100 Subject: [PATCH 3/3] - Improve code logic to handle edge cases --- .../uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java index d110fb6e3..bdb9b8809 100644 --- a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java +++ b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/ShortFormAnnotator.java @@ -39,15 +39,12 @@ public static void annotateShortForms(OntologyGraph graph) { /* CURIEs are formed by following rules: - If there are more than one underscore "_" in the shortform just keep the curie same as shortform If there is only one underscore "_" AND the characters after the underscore are numbers then replace the underscore with colon ":" If there is only one underscore "_" and the characters after the underscore are not just numbers then just keep the curie same as shortform + If there are multiple underscore but has only digits after the last underscore then the code replaces the last underscore with a colon */ - String curie = shortForm; - if (shortForm.matches("^[^_]+_\\d+$")) { - curie = shortForm.replaceFirst("_", ":"); - } + String curie = shortForm.replaceFirst("_(\\d+)$", ":$1"); c.properties.addProperty("shortForm", PropertyValueLiteral.fromString(shortForm)); c.properties.addProperty("curie", PropertyValueLiteral.fromString(curie)); }