Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile.dataseer
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ RUN rm /opt/grobid/grobid-home/lib/lin-64/jep/libjep.so
COPY --from=builder /opt/grobid-source/grobid-home/scripts/preload_embeddings.py .
# embeddings will be loaded when building and running tests

RUN ln -s /opt/grobid /opt/delft
RUN ln -s /opt/grobid/delft /opt/delft

COPY --from=builder /opt/grobid-source/dataseer-ml /opt/grobid/dataseer-ml
#COPY --from=builder /root/.m2/repository/org /opt/grobid/dataseer-ml/lib/org
Expand Down
57 changes: 35 additions & 22 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ dependencies {
//implementation fileTree(dir: new File(rootProject.rootDir, 'lib'), include: localLibs)

//Grobid
implementation group: 'org.grobid', name: 'grobid-core', version: '0.7.3'
implementation group: 'org.grobid', name: 'grobid-trainer', version: '0.7.3'
implementation group: 'org.grobid', name: 'grobid-core', version: '0.8.0'
implementation group: 'org.grobid', name: 'grobid-trainer', version: '0.8.0'

implementation group: 'net.arnx', name: 'jsonic', version: '1.3.10'

Expand Down Expand Up @@ -145,32 +145,22 @@ dependencies {
testImplementation group: 'org.hamcrest', name: 'hamcrest-all', version: '1.3'
}

/*configurations {
configurations {
implementation.exclude group: "org.slf4j", module: "slf4j-jdk14"
}*/
implementation.exclude group: 'org.slf4j', module: "slf4j-log4j12"
implementation.exclude group: 'log4j', module: "log4j"
}

/*configurations.all {
configurations.all {
resolutionStrategy {
force 'xml-apis:xml-apis:1.4.01'
}
}*/

configurations.implementation.setCanBeResolved(true)

configurations.all {
resolutionStrategy {
force 'xml-apis:xml-apis:1.4.01'
configurations {
all*.exclude group: 'org.slf4j', module: "slf4j-log4j12"
all*.exclude group: 'log4j', module: "log4j"
implementation.setCanBeResolved(true)
}
exclude group: 'org.slf4j', module: "slf4j-log4j12"
//exclude group: 'log4j', module: "log4j"
exclude group: 'org.slf4j', module: "slf4j-jdk14"

//if (project.gradle.startParameter.taskNames.contains('run') ||
// project.gradle.startParameter.taskNames.contains('post_process_corpus'))
// exclude group: 'org.slf4j', module: "slf4j-jdk14"

if (project.gradle.startParameter.taskNames.contains('run'))
exclude group: 'org.slf4j', module: "slf4j-jdk14"
}

// return the default value if the property has not been specified in command line
Expand Down Expand Up @@ -250,7 +240,7 @@ test {
//}

wrapper {
gradleVersion "7.1.1"
gradleVersion "7.3"
}

// Custom Tasks
Expand Down Expand Up @@ -311,5 +301,28 @@ application {

run {
args = ['server', 'resources/config/server.yml']
if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs "--add-opens", "java.base/java.lang=ALL-UNNAMED"
}

def envPrefix = ""
if (System.env.CONDA_PREFIX) {
envPrefix = "${System.env.CONDA_PREFIX}"
} else if (System.env.VIRTUAL_ENV) {
envPrefix = "${System.env.VIRTUAL_ENV}"
}
def javaLibraryPath = "${System.getProperty('java.library.path')}:"

if (envPrefix) {
def envLibs = "${envPrefix}/lib"
def pythonDirectory = file(envLibs).listFiles({ it.toString().contains("/lib/python") } as FileFilter)?.first()
def pythonVersion = (pythonDirectory =~ /python([0-9]\.[0-9]+)/)[0][1]

javaLibraryPath = "${System.getProperty('java.library.path')}:" +
"${envLibs}:" +
"${envLibs}/python${pythonVersion}/site-packages/jep"
}
println("Running with the java.library.path: ${javaLibraryPath}")
systemProperty "java.library.path", javaLibraryPath
}
}
2 changes: 1 addition & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.1.1-all.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3-all.zip
6 changes: 3 additions & 3 deletions src/main/java/org/grobid/core/engines/DataseerClassifier.java
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ private void enrich(org.w3c.dom.Document doc, Node node) {
JsonNode noDatasetNode = classificationNode.findPath("no_dataset");
JsonNode textNode = classificationNode.findPath("text");

Boolean localResult = new Boolean(false);
Boolean localResult = Boolean.FALSE;
if ((datasetNode != null) && (!datasetNode.isMissingNode()) &&
(noDatasetNode != null) && (!noDatasetNode.isMissingNode()) ) {
double probDataset = datasetNode.asDouble();
Expand Down Expand Up @@ -830,7 +830,7 @@ private void enrich(org.w3c.dom.Document doc, Node node) {
datasetMap.put("dataset-"+dataSetId, Pair.of(bestDataTypeWithProb.getLeft(), null));
dataInstanceMap.put("dataInstance-"+dataSetId, "dataset-"+dataSetId);
dataInstanceScoreMap.put("dataInstance-"+dataSetId, bestDataTypeWithProb.getRight());
dataInstanceReuseMap.put("dataInstance-"+dataSetId, new Boolean(isReuse));
dataInstanceReuseMap.put("dataInstance-"+dataSetId, isReuse);
dataSetId++;

// we also need to add a dataseer subtype attribute to the parent <div>
Expand Down Expand Up @@ -1012,7 +1012,7 @@ private Pair<String, Double> getBestDataType(JsonNode classificationsNode) {
bestDataType = className;
}
}
return Pair.of(bestDataType, new Double(bestProb));
return Pair.of(bestDataType, bestProb);
}

private boolean getReuseInfo(JsonNode classificationsNode) {
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/org/grobid/core/engines/DataseerParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ public List<Boolean> processing(List<List<LayoutToken>> segments, List<String> s
values = line.split(" ");
String label = values[values.length-1];
if (label.endsWith("no_dataset"))
result.add(new Boolean(false));
result.add(Boolean.FALSE);
else
result.add(new Boolean(true));
result.add(Boolean.TRUE);

if (indexMatMetSection == -1 && values[values.length-2].equals("1")) {
indexMatMetSection = i;
Expand Down Expand Up @@ -145,7 +145,7 @@ public List<Boolean> processing(List<List<LayoutToken>> segments, List<String> s
for(int j=indexMatMetSection; j < lines.length; j++) {
// set the section to true
String line = lines[j].toLowerCase();
result.set(j, new Boolean(true));
result.set(j, Boolean.TRUE);
if (j == indexMatMetSection)
continue;

Expand All @@ -163,7 +163,7 @@ public List<Boolean> processing(List<List<LayoutToken>> segments, List<String> s
break;

if (line.indexOf("acknowledgement") != -1 || line.indexOf("funding") != -1 || line.indexOf("conclusion") != -1) {
result.set(j, new Boolean(false));
result.set(j, Boolean.FALSE);
break;
}
}
Expand Down Expand Up @@ -191,7 +191,7 @@ public List<Boolean> processing(List<List<LayoutToken>> segments, List<String> s
if (nbDataset > 2) {
for(int j=0; j<result.size(); j++) {
if (j<indexMatMetSection || j>indexMatMetSection+10)
result.set(j, new Boolean(false));
result.set(j, Boolean.FALSE);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ public void processXML(String documentPath, String csvPath, String xmlPath) thro
if (localSentence.equals(sentence)) {
totalMatchedAnnotations++;
System.out.println("matched sentence!");
solvedAnnotations.add(new Integer(k));
solvedAnnotations.add(k);
// add annotation attributes to the DOM sentence

break;
Expand Down Expand Up @@ -505,7 +505,7 @@ public void process(String documentPath, String pdfPath, String csvPath, String
}

String sentence = annotation.getContext().trim();
if (sentence.toLowerCase().equals("n/a")) {
if (sentence.equalsIgnoreCase("n/a")) {
k++;
continue;
}
Expand Down Expand Up @@ -535,7 +535,7 @@ public void process(String documentPath, String pdfPath, String csvPath, String

totalMatchedAnnotations++;
//System.out.println("matched sentence! " + sentence);
solvedAnnotations.add(new Integer(k));
solvedAnnotations.add(k);

if (!docMatchedSentences.contains(sentenceSimplified)) {
docMatchedSentences.add(sentenceSimplified);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ private void writeData(String qName) {
}
}

nbDatasets.add(new Integer(nb));
nbDatasets.add(nb);
datasetTypes.add(datasetType);
accumulator.setLength(0);
}
Expand Down