diff --git a/Dockerfile.dataseer b/Dockerfile.dataseer index 392de0a..79adcce 100644 --- a/Dockerfile.dataseer +++ b/Dockerfile.dataseer @@ -125,7 +125,7 @@ RUN rm /opt/grobid/grobid-home/lib/lin-64/jep/libjep.so COPY --from=builder /opt/grobid-source/grobid-home/scripts/preload_embeddings.py . # embeddings will be loaded when building and running tests -RUN ln -s /opt/grobid /opt/delft +RUN ln -s /opt/grobid/delft /opt/delft COPY --from=builder /opt/grobid-source/dataseer-ml /opt/grobid/dataseer-ml #COPY --from=builder /root/.m2/repository/org /opt/grobid/dataseer-ml/lib/org diff --git a/build.gradle b/build.gradle index 5efee44..2b47908 100644 --- a/build.gradle +++ b/build.gradle @@ -86,8 +86,8 @@ dependencies { //implementation fileTree(dir: new File(rootProject.rootDir, 'lib'), include: localLibs) //Grobid - implementation group: 'org.grobid', name: 'grobid-core', version: '0.7.3' - implementation group: 'org.grobid', name: 'grobid-trainer', version: '0.7.3' + implementation group: 'org.grobid', name: 'grobid-core', version: '0.8.0' + implementation group: 'org.grobid', name: 'grobid-trainer', version: '0.8.0' implementation group: 'net.arnx', name: 'jsonic', version: '1.3.10' @@ -145,32 +145,22 @@ dependencies { testImplementation group: 'org.hamcrest', name: 'hamcrest-all', version: '1.3' } -/*configurations { +configurations { implementation.exclude group: "org.slf4j", module: "slf4j-jdk14" -}*/ + implementation.exclude group: 'org.slf4j', module: "slf4j-log4j12" + implementation.exclude group: 'log4j', module: "log4j" +} -/*configurations.all { +configurations.all { resolutionStrategy { force 'xml-apis:xml-apis:1.4.01' } -}*/ - -configurations.implementation.setCanBeResolved(true) -configurations.all { - resolutionStrategy { - force 'xml-apis:xml-apis:1.4.01' + configurations { + all*.exclude group: 'org.slf4j', module: "slf4j-log4j12" + all*.exclude group: 'log4j', module: "log4j" + implementation.setCanBeResolved(true) } - exclude group: 'org.slf4j', module: "slf4j-log4j12" - //exclude group: 'log4j', module: "log4j" - exclude group: 'org.slf4j', module: "slf4j-jdk14" - - //if (project.gradle.startParameter.taskNames.contains('run') || - // project.gradle.startParameter.taskNames.contains('post_process_corpus')) - // exclude group: 'org.slf4j', module: "slf4j-jdk14" - - if (project.gradle.startParameter.taskNames.contains('run')) - exclude group: 'org.slf4j', module: "slf4j-jdk14" } // return the default value if the property has not been specified in command line @@ -250,7 +240,7 @@ test { //} wrapper { - gradleVersion "7.1.1" + gradleVersion "7.3" } // Custom Tasks @@ -311,5 +301,28 @@ application { run { args = ['server', 'resources/config/server.yml'] + if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) { + jvmArgs "--add-opens", "java.base/java.lang=ALL-UNNAMED" + } + + def envPrefix = "" + if (System.env.CONDA_PREFIX) { + envPrefix = "${System.env.CONDA_PREFIX}" + } else if (System.env.VIRTUAL_ENV) { + envPrefix = "${System.env.VIRTUAL_ENV}" + } + def javaLibraryPath = "${System.getProperty('java.library.path')}:" + + if (envPrefix) { + def envLibs = "${envPrefix}/lib" + def pythonDirectory = file(envLibs).listFiles({ it.toString().contains("/lib/python") } as FileFilter)?.first() + def pythonVersion = (pythonDirectory =~ /python([0-9]\.[0-9]+)/)[0][1] + + javaLibraryPath = "${System.getProperty('java.library.path')}:" + + "${envLibs}:" + + "${envLibs}/python${pythonVersion}/site-packages/jep" + } + println("Running with the java.library.path: ${javaLibraryPath}") + systemProperty "java.library.path", javaLibraryPath } } diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index c41933c..5bb8ddd 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -3,4 +3,4 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.1.1-all.zip \ No newline at end of file +distributionUrl=https\://services.gradle.org/distributions/gradle-7.3-all.zip \ No newline at end of file diff --git a/src/main/java/org/grobid/core/engines/DataseerClassifier.java b/src/main/java/org/grobid/core/engines/DataseerClassifier.java index d32d67a..1817605 100644 --- a/src/main/java/org/grobid/core/engines/DataseerClassifier.java +++ b/src/main/java/org/grobid/core/engines/DataseerClassifier.java @@ -713,7 +713,7 @@ private void enrich(org.w3c.dom.Document doc, Node node) { JsonNode noDatasetNode = classificationNode.findPath("no_dataset"); JsonNode textNode = classificationNode.findPath("text"); - Boolean localResult = new Boolean(false); + Boolean localResult = Boolean.FALSE; if ((datasetNode != null) && (!datasetNode.isMissingNode()) && (noDatasetNode != null) && (!noDatasetNode.isMissingNode()) ) { double probDataset = datasetNode.asDouble(); @@ -830,7 +830,7 @@ private void enrich(org.w3c.dom.Document doc, Node node) { datasetMap.put("dataset-"+dataSetId, Pair.of(bestDataTypeWithProb.getLeft(), null)); dataInstanceMap.put("dataInstance-"+dataSetId, "dataset-"+dataSetId); dataInstanceScoreMap.put("dataInstance-"+dataSetId, bestDataTypeWithProb.getRight()); - dataInstanceReuseMap.put("dataInstance-"+dataSetId, new Boolean(isReuse)); + dataInstanceReuseMap.put("dataInstance-"+dataSetId, isReuse); dataSetId++; // we also need to add a dataseer subtype attribute to the parent