diff --git a/Dockerfile.dataseer b/Dockerfile.dataseer index 392de0a..79adcce 100644 --- a/Dockerfile.dataseer +++ b/Dockerfile.dataseer @@ -125,7 +125,7 @@ RUN rm /opt/grobid/grobid-home/lib/lin-64/jep/libjep.so COPY --from=builder /opt/grobid-source/grobid-home/scripts/preload_embeddings.py . # embeddings will be loaded when building and running tests -RUN ln -s /opt/grobid /opt/delft +RUN ln -s /opt/grobid/delft /opt/delft COPY --from=builder /opt/grobid-source/dataseer-ml /opt/grobid/dataseer-ml #COPY --from=builder /root/.m2/repository/org /opt/grobid/dataseer-ml/lib/org diff --git a/build.gradle b/build.gradle index 5efee44..2b47908 100644 --- a/build.gradle +++ b/build.gradle @@ -86,8 +86,8 @@ dependencies { //implementation fileTree(dir: new File(rootProject.rootDir, 'lib'), include: localLibs) //Grobid - implementation group: 'org.grobid', name: 'grobid-core', version: '0.7.3' - implementation group: 'org.grobid', name: 'grobid-trainer', version: '0.7.3' + implementation group: 'org.grobid', name: 'grobid-core', version: '0.8.0' + implementation group: 'org.grobid', name: 'grobid-trainer', version: '0.8.0' implementation group: 'net.arnx', name: 'jsonic', version: '1.3.10' @@ -145,32 +145,22 @@ dependencies { testImplementation group: 'org.hamcrest', name: 'hamcrest-all', version: '1.3' } -/*configurations { +configurations { implementation.exclude group: "org.slf4j", module: "slf4j-jdk14" -}*/ + implementation.exclude group: 'org.slf4j', module: "slf4j-log4j12" + implementation.exclude group: 'log4j', module: "log4j" +} -/*configurations.all { +configurations.all { resolutionStrategy { force 'xml-apis:xml-apis:1.4.01' } -}*/ - -configurations.implementation.setCanBeResolved(true) -configurations.all { - resolutionStrategy { - force 'xml-apis:xml-apis:1.4.01' + configurations { + all*.exclude group: 'org.slf4j', module: "slf4j-log4j12" + all*.exclude group: 'log4j', module: "log4j" + implementation.setCanBeResolved(true) } - exclude group: 'org.slf4j', module: "slf4j-log4j12" - //exclude group: 'log4j', module: "log4j" - exclude group: 'org.slf4j', module: "slf4j-jdk14" - - //if (project.gradle.startParameter.taskNames.contains('run') || - // project.gradle.startParameter.taskNames.contains('post_process_corpus')) - // exclude group: 'org.slf4j', module: "slf4j-jdk14" - - if (project.gradle.startParameter.taskNames.contains('run')) - exclude group: 'org.slf4j', module: "slf4j-jdk14" } // return the default value if the property has not been specified in command line @@ -250,7 +240,7 @@ test { //} wrapper { - gradleVersion "7.1.1" + gradleVersion "7.3" } // Custom Tasks @@ -311,5 +301,28 @@ application { run { args = ['server', 'resources/config/server.yml'] + if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) { + jvmArgs "--add-opens", "java.base/java.lang=ALL-UNNAMED" + } + + def envPrefix = "" + if (System.env.CONDA_PREFIX) { + envPrefix = "${System.env.CONDA_PREFIX}" + } else if (System.env.VIRTUAL_ENV) { + envPrefix = "${System.env.VIRTUAL_ENV}" + } + def javaLibraryPath = "${System.getProperty('java.library.path')}:" + + if (envPrefix) { + def envLibs = "${envPrefix}/lib" + def pythonDirectory = file(envLibs).listFiles({ it.toString().contains("/lib/python") } as FileFilter)?.first() + def pythonVersion = (pythonDirectory =~ /python([0-9]\.[0-9]+)/)[0][1] + + javaLibraryPath = "${System.getProperty('java.library.path')}:" + + "${envLibs}:" + + "${envLibs}/python${pythonVersion}/site-packages/jep" + } + println("Running with the java.library.path: ${javaLibraryPath}") + systemProperty "java.library.path", javaLibraryPath } } diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index c41933c..5bb8ddd 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -3,4 +3,4 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.1.1-all.zip \ No newline at end of file +distributionUrl=https\://services.gradle.org/distributions/gradle-7.3-all.zip \ No newline at end of file diff --git a/src/main/java/org/grobid/core/engines/DataseerClassifier.java b/src/main/java/org/grobid/core/engines/DataseerClassifier.java index d32d67a..1817605 100644 --- a/src/main/java/org/grobid/core/engines/DataseerClassifier.java +++ b/src/main/java/org/grobid/core/engines/DataseerClassifier.java @@ -713,7 +713,7 @@ private void enrich(org.w3c.dom.Document doc, Node node) { JsonNode noDatasetNode = classificationNode.findPath("no_dataset"); JsonNode textNode = classificationNode.findPath("text"); - Boolean localResult = new Boolean(false); + Boolean localResult = Boolean.FALSE; if ((datasetNode != null) && (!datasetNode.isMissingNode()) && (noDatasetNode != null) && (!noDatasetNode.isMissingNode()) ) { double probDataset = datasetNode.asDouble(); @@ -830,7 +830,7 @@ private void enrich(org.w3c.dom.Document doc, Node node) { datasetMap.put("dataset-"+dataSetId, Pair.of(bestDataTypeWithProb.getLeft(), null)); dataInstanceMap.put("dataInstance-"+dataSetId, "dataset-"+dataSetId); dataInstanceScoreMap.put("dataInstance-"+dataSetId, bestDataTypeWithProb.getRight()); - dataInstanceReuseMap.put("dataInstance-"+dataSetId, new Boolean(isReuse)); + dataInstanceReuseMap.put("dataInstance-"+dataSetId, isReuse); dataSetId++; // we also need to add a dataseer subtype attribute to the parent
@@ -1012,7 +1012,7 @@ private Pair getBestDataType(JsonNode classificationsNode) { bestDataType = className; } } - return Pair.of(bestDataType, new Double(bestProb)); + return Pair.of(bestDataType, bestProb); } private boolean getReuseInfo(JsonNode classificationsNode) { diff --git a/src/main/java/org/grobid/core/engines/DataseerParser.java b/src/main/java/org/grobid/core/engines/DataseerParser.java index a15470e..b47b9e5 100644 --- a/src/main/java/org/grobid/core/engines/DataseerParser.java +++ b/src/main/java/org/grobid/core/engines/DataseerParser.java @@ -114,9 +114,9 @@ public List processing(List> segments, List s values = line.split(" "); String label = values[values.length-1]; if (label.endsWith("no_dataset")) - result.add(new Boolean(false)); + result.add(Boolean.FALSE); else - result.add(new Boolean(true)); + result.add(Boolean.TRUE); if (indexMatMetSection == -1 && values[values.length-2].equals("1")) { indexMatMetSection = i; @@ -145,7 +145,7 @@ public List processing(List> segments, List s for(int j=indexMatMetSection; j < lines.length; j++) { // set the section to true String line = lines[j].toLowerCase(); - result.set(j, new Boolean(true)); + result.set(j, Boolean.TRUE); if (j == indexMatMetSection) continue; @@ -163,7 +163,7 @@ public List processing(List> segments, List s break; if (line.indexOf("acknowledgement") != -1 || line.indexOf("funding") != -1 || line.indexOf("conclusion") != -1) { - result.set(j, new Boolean(false)); + result.set(j, Boolean.FALSE); break; } } @@ -191,7 +191,7 @@ public List processing(List> segments, List s if (nbDataset > 2) { for(int j=0; jindexMatMetSection+10) - result.set(j, new Boolean(false)); + result.set(j, Boolean.FALSE); } } } diff --git a/src/main/java/org/grobid/trainer/AnnotatedCorpusGeneratorCSV.java b/src/main/java/org/grobid/trainer/AnnotatedCorpusGeneratorCSV.java index 4d1be20..9166a0d 100644 --- a/src/main/java/org/grobid/trainer/AnnotatedCorpusGeneratorCSV.java +++ b/src/main/java/org/grobid/trainer/AnnotatedCorpusGeneratorCSV.java @@ -202,7 +202,7 @@ public void processXML(String documentPath, String csvPath, String xmlPath) thro if (localSentence.equals(sentence)) { totalMatchedAnnotations++; System.out.println("matched sentence!"); - solvedAnnotations.add(new Integer(k)); + solvedAnnotations.add(k); // add annotation attributes to the DOM sentence break; @@ -505,7 +505,7 @@ public void process(String documentPath, String pdfPath, String csvPath, String } String sentence = annotation.getContext().trim(); - if (sentence.toLowerCase().equals("n/a")) { + if (sentence.equalsIgnoreCase("n/a")) { k++; continue; } @@ -535,7 +535,7 @@ public void process(String documentPath, String pdfPath, String csvPath, String totalMatchedAnnotations++; //System.out.println("matched sentence! " + sentence); - solvedAnnotations.add(new Integer(k)); + solvedAnnotations.add(k); if (!docMatchedSentences.contains(sentenceSimplified)) { docMatchedSentences.add(sentenceSimplified); diff --git a/src/main/java/org/grobid/trainer/DataseerAnnotationSaxHandler.java b/src/main/java/org/grobid/trainer/DataseerAnnotationSaxHandler.java index ff56a32..e40e898 100644 --- a/src/main/java/org/grobid/trainer/DataseerAnnotationSaxHandler.java +++ b/src/main/java/org/grobid/trainer/DataseerAnnotationSaxHandler.java @@ -244,7 +244,7 @@ private void writeData(String qName) { } } - nbDatasets.add(new Integer(nb)); + nbDatasets.add(nb); datasetTypes.add(datasetType); accumulator.setLength(0); }