diff --git a/pom.xml b/pom.xml
index 7420d48..186fb68 100644
--- a/pom.xml
+++ b/pom.xml
@@ -23,7 +23,7 @@
edu.umn.biomedicus
biomedicus-gpl
jar
- 1.6.0-SNAPSHOT
+ 1.6.0
biomedicus-gpl
BioMedICUS Annotation System - GPL Extensions
@@ -31,7 +31,7 @@
UTF-8
UTF-8
- 1.6.0-SNAPSHOT
+ 1.6.0
diff --git a/src/main/java/edu/umn/biomedicus/gpl/penntree/PennTreebankInputFileAdapter.java b/src/main/java/edu/umn/biomedicus/gpl/penntree/PennTreebankInputFileAdapter.java
new file mode 100644
index 0000000..c2778ca
--- /dev/null
+++ b/src/main/java/edu/umn/biomedicus/gpl/penntree/PennTreebankInputFileAdapter.java
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2016 Regents of the University of Minnesota
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+package edu.umn.biomedicus.gpl.penntree;
+
+import com.google.inject.Inject;
+import edu.stanford.nlp.ling.TaggedWord;
+import edu.stanford.nlp.trees.PennTreeReaderFactory;
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.TreeReader;
+import edu.umn.biomedicus.common.types.syntax.PartOfSpeech;
+import edu.umn.biomedicus.common.types.syntax.PartsOfSpeech;
+import edu.umn.biomedicus.common.types.text.ImmutableParseToken;
+import edu.umn.biomedicus.common.types.text.Sentence;
+import edu.umn.biomedicus.framework.store.Document;
+import edu.umn.biomedicus.framework.store.Label;
+import edu.umn.biomedicus.framework.store.Span;
+import edu.umn.biomedicus.framework.store.TextView;
+import edu.umn.biomedicus.uima.adapter.UimaAdapters;
+import edu.umn.biomedicus.uima.files.InputFileAdapter;
+import edu.umn.biomedicus.uima.labels.LabelAdapters;
+import java.io.IOException;
+import java.io.Reader;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CollectionException;
+
+/**
+ * Adapts Penn treebank format files to CAS files.
+ *
+ * @author Ben Knoll
+ * @since 1.3.0
+ */
+public final class PennTreebankInputFileAdapter implements InputFileAdapter {
+
+ /**
+ * The penn tree reader factory.
+ */
+ private final PennTreeReaderFactory pennTreeReaderFactory = new PennTreeReaderFactory();
+
+ private final LabelAdapters labelAdapters;
+
+ /**
+ * The view name to load into.
+ */
+ private String viewName;
+
+ @Inject
+ public PennTreebankInputFileAdapter(LabelAdapters labelAdapters) {
+ this.labelAdapters = labelAdapters;
+ }
+
+ @Override
+ public void adaptFile(CAS cas, Path path) throws CollectionException {
+ StringBuilder text = new StringBuilder();
+ ArrayList sentences = new ArrayList<>();
+ try (Reader reader = Files.newBufferedReader(path)) {
+ TreeReader treeReader = pennTreeReaderFactory.newTreeReader(reader);
+ Tree tree;
+ while ((tree = treeReader.readTree()) != null) {
+ int sentenceStart = text.length();
+
+ ArrayList taggedWords = tree.taggedYield();
+ SentenceBuilder sentenceBuilder = new SentenceBuilder();
+ sentenceBuilder.tokenBuilders = new ArrayList<>(taggedWords.size());
+ for (TaggedWord taggedWord : taggedWords) {
+ String tag = taggedWord.tag();
+ String word = taggedWord.word();
+ text.append(" ");
+
+ /**
+ * This -NONE- tag occurs in some documents when there is a assumed phrase.
+ */
+ if (!"-NONE-".equals(tag)) {
+ int tokenStart = text.length();
+ if ("-LRB-".equals(word)) {
+ text.append('(');
+ } else if ("-RRB-".equals(word)) {
+ text.append(')');
+ } else if ("-LCB-".equals(word)) {
+ text.append('{');
+ } else if ("-RCB-".equals(word)) {
+ text.append('}');
+ } else if ("-LSB-".equals(word)) {
+ text.append('[');
+ } else if ("-RSB-".equals(word)) {
+ text.append(']');
+ } else if ("``".equals(word)) {
+ text.append("\"");
+ } else if ("''".equals(word)) {
+ text.append("\"");
+ } else {
+ text.append(word);
+ }
+ int tokenEnd = text.length();
+
+ PartOfSpeech partOfSpeech;
+ if ("-LRB-".equals(tag)) {
+ partOfSpeech = PartOfSpeech.LEFT_PAREN;
+ } else if ("-RRB-".equals(tag)) {
+ partOfSpeech = PartOfSpeech.RIGHT_PAREN;
+ } else {
+ if (tag.contains("|")) {
+ String[] tags = tag.split("\\|");
+ Random random = new Random();
+ int randomIndex = random.nextInt(tags.length);
+ partOfSpeech = PartsOfSpeech.forTag(tags[randomIndex]);
+ } else {
+ partOfSpeech = PartsOfSpeech.forTag(tag);
+ }
+
+ }
+
+ TokenBuilder tokenBuilder = new TokenBuilder();
+ tokenBuilder.tokenSpan = new Span(tokenStart, tokenEnd);
+ tokenBuilder.partOfSpeech = partOfSpeech;
+ if (partOfSpeech == null) {
+ throw new AssertionError("part of speech should not be null");
+ }
+ sentenceBuilder.tokenBuilders.add(tokenBuilder);
+ }
+ }
+
+ int sentenceEnd = text.length();
+
+ sentenceBuilder.sentenceSpan = new Span(sentenceStart, sentenceEnd);
+ sentences.add(sentenceBuilder);
+ }
+ } catch (IOException e) {
+ throw new CollectionException(e);
+ }
+
+ Document document = UimaAdapters.createDocument(cas, labelAdapters,
+ path.getFileName().toString());
+ TextView textView = document.newTextView()
+ .withText(text.toString())
+ .withName(viewName)
+ .build();
+
+ for (SentenceBuilder sentence : sentences) {
+ Span sentenceSpan = sentence.sentenceSpan;
+
+ textView.label(Label.create(sentenceSpan, Sentence.create()));
+
+ for (TokenBuilder tokenBuilder : sentence.tokenBuilders) {
+ Span tokenSpan = tokenBuilder.tokenSpan;
+
+ textView.label(Label.create(tokenSpan,
+ ImmutableParseToken.builder()
+ .text(tokenSpan.getCovered(text).toString())
+ .hasSpaceAfter(true)
+ .build()
+ ));
+
+ PartOfSpeech partOfSpeech = tokenBuilder.partOfSpeech;
+ if (partOfSpeech != null) {
+ String pos = partOfSpeech.toString();
+ textView.label(Label.create(tokenSpan, PartsOfSpeech.forTag(pos)));
+ }
+ }
+ }
+
+ }
+
+ @Override
+ public void setTargetView(String viewName) {
+ this.viewName = viewName;
+ }
+
+ /**
+ * Used to build sentences.
+ */
+ private static class SentenceBuilder {
+
+ /**
+ * The begin and end of the sentence.
+ */
+ private Span sentenceSpan;
+
+ /**
+ * The tokens of the sentence.
+ */
+ private List tokenBuilders;
+ }
+
+ /**
+ * Used to build tokens
+ */
+ private static class TokenBuilder {
+
+ /**
+ * The begin and end of the token
+ */
+ private Span tokenSpan;
+
+ /**
+ * The part of speech.
+ */
+ private PartOfSpeech partOfSpeech;
+ }
+}
diff --git a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/SHStanfordConstituencyParser.java b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/SHStanfordConstituencyParser.java
index 5b87889..8f7b1a3 100644
--- a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/SHStanfordConstituencyParser.java
+++ b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/SHStanfordConstituencyParser.java
@@ -17,41 +17,44 @@
package edu.umn.biomedicus.gpl.stanford.parser;
-import edu.umn.biomedicus.framework.DocumentProcessor;
-import edu.umn.biomedicus.framework.store.TextView;
-import edu.umn.biomedicus.framework.store.Label;
-import edu.umn.biomedicus.framework.store.LabelIndex;
-import edu.umn.biomedicus.framework.store.Labeler;
import edu.umn.biomedicus.common.types.semantics.SocialHistoryCandidate;
import edu.umn.biomedicus.common.types.syntax.PartOfSpeech;
import edu.umn.biomedicus.common.types.text.ConstituencyParse;
import edu.umn.biomedicus.common.types.text.ParseToken;
import edu.umn.biomedicus.exc.BiomedicusException;
-
+import edu.umn.biomedicus.framework.DocumentProcessor;
+import edu.umn.biomedicus.framework.store.Document;
+import edu.umn.biomedicus.framework.store.Label;
+import edu.umn.biomedicus.framework.store.LabelIndex;
+import edu.umn.biomedicus.framework.store.Labeler;
+import edu.umn.biomedicus.framework.store.TextView;
import javax.inject.Inject;
public class SHStanfordConstituencyParser implements DocumentProcessor {
- private final LabelIndex partOfSpeechLabelIndex;
- private final Labeler constituencyParseLabeler;
- private final LabelIndex parseTokenLabelIndex;
- private final StanfordConstituencyParserModel stanfordConstituencyParserModel;
- private final LabelIndex socialHistoryCandidateLabelIndex;
- @Inject
- SHStanfordConstituencyParser(TextView document,
- StanfordConstituencyParserModel stanfordConstituencyParserModel) {
- socialHistoryCandidateLabelIndex = document.getLabelIndex(SocialHistoryCandidate.class);
- parseTokenLabelIndex = document.getLabelIndex(ParseToken.class);
- partOfSpeechLabelIndex = document.getLabelIndex(PartOfSpeech.class);
- constituencyParseLabeler = document.getLabeler(ConstituencyParse.class);
- this.stanfordConstituencyParserModel = stanfordConstituencyParserModel;
- }
+ private final LabelIndex partOfSpeechLabelIndex;
+ private final Labeler constituencyParseLabeler;
+ private final LabelIndex parseTokenLabelIndex;
+ private final StanfordConstituencyParserModel stanfordConstituencyParserModel;
+ private final LabelIndex labelIndex;
+
+ @Inject
+ public SHStanfordConstituencyParser(
+ TextView textView,
+ StanfordConstituencyParserModel stanfordConstituencyParserModel
+ ) {
+ labelIndex = textView.getLabelIndex(SocialHistoryCandidate.class);
+ parseTokenLabelIndex = textView.getLabelIndex(ParseToken.class);
+ partOfSpeechLabelIndex = textView.getLabelIndex(PartOfSpeech.class);
+ constituencyParseLabeler = textView.getLabeler(ConstituencyParse.class);
+ this.stanfordConstituencyParserModel = stanfordConstituencyParserModel;
+ }
- @Override
- public void process() throws BiomedicusException {
- for (Label socialHistoryCandidateLabel : socialHistoryCandidateLabelIndex) {
- stanfordConstituencyParserModel.parseSentence(socialHistoryCandidateLabel, parseTokenLabelIndex,
- partOfSpeechLabelIndex, constituencyParseLabeler);
- }
+ @Override
+ public void process() throws BiomedicusException {
+ for (Label label : labelIndex) {
+ stanfordConstituencyParserModel.parseSentence(label, parseTokenLabelIndex,
+ partOfSpeechLabelIndex, constituencyParseLabeler);
}
+ }
}
diff --git a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParser.java b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParser.java
index 4aedfda..a48988a 100644
--- a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParser.java
+++ b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParser.java
@@ -17,41 +17,41 @@
package edu.umn.biomedicus.gpl.stanford.parser;
-import edu.umn.biomedicus.framework.DocumentProcessor;
-import edu.umn.biomedicus.framework.store.TextView;
-import edu.umn.biomedicus.framework.store.Label;
-import edu.umn.biomedicus.framework.store.LabelIndex;
-import edu.umn.biomedicus.framework.store.Labeler;
import edu.umn.biomedicus.common.types.syntax.PartOfSpeech;
import edu.umn.biomedicus.common.types.text.ConstituencyParse;
import edu.umn.biomedicus.common.types.text.ParseToken;
import edu.umn.biomedicus.common.types.text.Sentence;
import edu.umn.biomedicus.exc.BiomedicusException;
-
+import edu.umn.biomedicus.framework.DocumentProcessor;
+import edu.umn.biomedicus.framework.store.Label;
+import edu.umn.biomedicus.framework.store.LabelIndex;
+import edu.umn.biomedicus.framework.store.Labeler;
+import edu.umn.biomedicus.framework.store.TextView;
import javax.inject.Inject;
public class StanfordConstituencyParser implements DocumentProcessor {
- private final LabelIndex sentenceLabelIndex;
- private final LabelIndex partOfSpeechLabelIndex;
- private final Labeler constituencyParseLabeler;
- private final LabelIndex parseTokenLabelIndex;
- private final StanfordConstituencyParserModel stanfordConstituencyParserModel;
- @Inject
- public StanfordConstituencyParser(TextView document,
- StanfordConstituencyParserModel stanfordConstituencyParserModel) {
- sentenceLabelIndex = document.getLabelIndex(Sentence.class);
- parseTokenLabelIndex = document.getLabelIndex(ParseToken.class);
- partOfSpeechLabelIndex = document.getLabelIndex(PartOfSpeech.class);
- constituencyParseLabeler = document.getLabeler(ConstituencyParse.class);
- this.stanfordConstituencyParserModel = stanfordConstituencyParserModel;
- }
+ private final LabelIndex sentenceLabelIndex;
+ private final LabelIndex partOfSpeechLabelIndex;
+ private final Labeler constituencyParseLabeler;
+ private final LabelIndex parseTokenLabelIndex;
+ private final StanfordConstituencyParserModel stanfordConstituencyParserModel;
+
+ @Inject
+ public StanfordConstituencyParser(TextView textView,
+ StanfordConstituencyParserModel stanfordConstituencyParserModel) {
+ sentenceLabelIndex = textView.getLabelIndex(Sentence.class);
+ parseTokenLabelIndex = textView.getLabelIndex(ParseToken.class);
+ partOfSpeechLabelIndex = textView.getLabelIndex(PartOfSpeech.class);
+ constituencyParseLabeler = textView.getLabeler(ConstituencyParse.class);
+ this.stanfordConstituencyParserModel = stanfordConstituencyParserModel;
+ }
- @Override
- public void process() throws BiomedicusException {
- for (Label sentenceLabel : sentenceLabelIndex) {
- stanfordConstituencyParserModel.parseSentence(sentenceLabel, parseTokenLabelIndex, partOfSpeechLabelIndex,
- constituencyParseLabeler);
- }
+ @Override
+ public void process() throws BiomedicusException {
+ for (Label sentenceLabel : sentenceLabelIndex) {
+ stanfordConstituencyParserModel.parseSentence(sentenceLabel, parseTokenLabelIndex,
+ partOfSpeechLabelIndex, constituencyParseLabeler);
}
+ }
}
diff --git a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParserModel.java b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParserModel.java
index c6bf75f..7f71d3c 100644
--- a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParserModel.java
+++ b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParserModel.java
@@ -23,78 +23,73 @@
import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser;
import edu.stanford.nlp.trees.Tree;
import edu.umn.biomedicus.annotations.Setting;
-import edu.umn.biomedicus.framework.DataLoader;
-import edu.umn.biomedicus.framework.store.Label;
-import edu.umn.biomedicus.framework.store.LabelIndex;
-import edu.umn.biomedicus.framework.store.Labeler;
import edu.umn.biomedicus.common.types.syntax.PartOfSpeech;
import edu.umn.biomedicus.common.types.syntax.PartsOfSpeech;
import edu.umn.biomedicus.common.types.text.ConstituencyParse;
import edu.umn.biomedicus.common.types.text.ImmutableConstituencyParse;
import edu.umn.biomedicus.common.types.text.ParseToken;
import edu.umn.biomedicus.exc.BiomedicusException;
-
-import javax.inject.Inject;
+import edu.umn.biomedicus.framework.DataLoader;
+import edu.umn.biomedicus.framework.store.Label;
+import edu.umn.biomedicus.framework.store.LabelIndex;
+import edu.umn.biomedicus.framework.store.Labeler;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
+import javax.inject.Inject;
@Singleton
@ProvidedBy(StanfordConstituencyParserModel.Loader.class)
public class StanfordConstituencyParserModel {
- private final ShiftReduceParser srParser;
- private StanfordConstituencyParserModel(ShiftReduceParser srParser) {
- this.srParser = srParser;
- }
+ private final ShiftReduceParser shiftReduceParser;
- void parseSentence(Label> sentenceLabel,
- LabelIndex parseTokenLabelIndex,
- LabelIndex partOfSpeechLabelIndex,
- Labeler constituencyParseLabeler)
- throws BiomedicusException {
- List taggedWordList = new ArrayList<>();
- for (Label parseTokenLabel : parseTokenLabelIndex
- .insideSpan(sentenceLabel)) {
- String word = parseTokenLabel.value().text();
- PartOfSpeech partOfSpeech = partOfSpeechLabelIndex
- .withTextLocation(parseTokenLabel)
- .orElseThrow(() -> new BiomedicusException(
- "parse token did not have part of speech."))
- .value();
+ private StanfordConstituencyParserModel(ShiftReduceParser shiftReduceParser) {
+ this.shiftReduceParser = shiftReduceParser;
+ }
- TaggedWord taggedWord = new TaggedWord(word,
- PartsOfSpeech.tagForPartOfSpeech(partOfSpeech));
- taggedWordList.add(taggedWord);
- }
- Tree tree = srParser.apply(taggedWordList);
- StringWriter stringWriter = new StringWriter();
- PrintWriter pw = new PrintWriter(stringWriter);
- tree.pennPrint(pw);
- String pennPrint = stringWriter.toString();
- ConstituencyParse constituencyParse = ImmutableConstituencyParse
- .builder().parse(pennPrint).build();
- constituencyParseLabeler.value(constituencyParse).label(sentenceLabel);
+ void parseSentence(
+ Label> sentenceLabel,
+ LabelIndex parseTokenLabelIndex,
+ LabelIndex partOfSpeechLabelIndex,
+ Labeler constituencyParseLabeler
+ ) throws BiomedicusException {
+ List taggedWordList = new ArrayList<>();
+ for (Label parseTokenLabel : parseTokenLabelIndex.insideSpan(sentenceLabel)) {
+ String word = parseTokenLabel.value().text();
+ PartOfSpeech partOfSpeech = partOfSpeechLabelIndex.withTextLocation(parseTokenLabel)
+ .orElseThrow(() -> new BiomedicusException("parse token did not have part of speech."))
+ .value();
+
+ TaggedWord taggedWord = new TaggedWord(word, PartsOfSpeech.tagForPartOfSpeech(partOfSpeech));
+ taggedWordList.add(taggedWord);
}
+ Tree tree = shiftReduceParser.apply(taggedWordList);
+ StringWriter stringWriter = new StringWriter();
+ tree.pennPrint(new PrintWriter(stringWriter));
+ String pennPrint = stringWriter.toString();
+ ConstituencyParse constituencyParse = ImmutableConstituencyParse.builder()
+ .parse(pennPrint)
+ .build();
+ constituencyParseLabeler.value(constituencyParse).label(sentenceLabel);
+ }
+
+ @Singleton
+ public static class Loader extends DataLoader {
- @Singleton
- public static class Loader
- extends DataLoader {
- private final Path path;
+ private final Path path;
- @Inject
- public Loader(@Setting("stanford.srParser.path") Path path) {
- this.path = path;
- }
+ @Inject
+ public Loader(@Setting("stanford.srParser.path") Path path) {
+ this.path = path;
+ }
- @Override
- protected StanfordConstituencyParserModel loadModel()
- throws BiomedicusException {
- ShiftReduceParser shiftReduceParser = ShiftReduceParser
- .loadModel(path.toString());
- return new StanfordConstituencyParserModel(shiftReduceParser);
- }
+ @Override
+ protected StanfordConstituencyParserModel loadModel() throws BiomedicusException {
+ ShiftReduceParser shiftReduceParser = ShiftReduceParser.loadModel(path.toString());
+ return new StanfordConstituencyParserModel(shiftReduceParser);
}
+ }
}
diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifier.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifier.java
index 08e9452..d4ba811 100644
--- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifier.java
+++ b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifier.java
@@ -18,29 +18,28 @@
package edu.umn.biomedicus.internal.docclass;
import com.google.inject.Inject;
-import edu.umn.biomedicus.framework.store.Document;
+import edu.umn.biomedicus.exc.BiomedicusException;
import edu.umn.biomedicus.framework.DocumentProcessor;
+import edu.umn.biomedicus.framework.store.Document;
import edu.umn.biomedicus.framework.store.TextView;
-import edu.umn.biomedicus.exc.BiomedicusException;
public class SeverityClassifier implements DocumentProcessor {
- private final SeverityClassifierModel severityClassifierModel;
- private final TextView textView;
- private final Document document;
+ private final SeverityClassifierModel severityClassifierModel;
+ private final Document document;
+ private final TextView textView;
- @Inject
- public SeverityClassifier(SeverityClassifierModel severityClassifierModel,
- TextView textView,
- Document document) {
- this.severityClassifierModel = severityClassifierModel;
- this.textView = textView;
- this.document = document;
- }
+ @Inject
+ public SeverityClassifier(SeverityClassifierModel severityClassifierModel, Document document,
+ TextView textView) {
+ this.severityClassifierModel = severityClassifierModel;
+ this.document = document;
+ this.textView = textView;
+ }
- @Override
- public void process() throws BiomedicusException {
- String prediction = severityClassifierModel.predict(textView);
- document.putMetadata("Severity", prediction);
- }
+ @Override
+ public void process() throws BiomedicusException {
+ String prediction = severityClassifierModel.predict(textView);
+ document.putMetadata("Severity", prediction);
+ }
}
diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierModel.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierModel.java
index e8895bd..98a747e 100644
--- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierModel.java
+++ b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierModel.java
@@ -21,9 +21,9 @@
import com.google.inject.ProvidedBy;
import edu.umn.biomedicus.annotations.ProcessorScoped;
import edu.umn.biomedicus.annotations.ProcessorSetting;
+import edu.umn.biomedicus.exc.BiomedicusException;
import edu.umn.biomedicus.framework.DataLoader;
import edu.umn.biomedicus.framework.store.TextView;
-import edu.umn.biomedicus.exc.BiomedicusException;
import weka.classifiers.Classifier;
import weka.core.Instance;
import weka.filters.Filter;
@@ -76,12 +76,12 @@ public class SeverityClassifierModel implements Serializable {
/**
* Perform attribute selection and then classification using the stored Weka objects
* Where classes are tied, err on the side of higher class
- * @param document the document
- * @return a string (from the predefined classes) representing this document's symptom severity
+ * @param textView the textView
+ * @return a string (from the predefined classes) representing this textView's symptom severity
* @throws BiomedicusException
*/
- public String predict(TextView document) throws BiomedicusException {
- Instance inst = severityWekaProcessor.getTestData(document);
+ public String predict(TextView textView) throws BiomedicusException {
+ Instance inst = severityWekaProcessor.getTestData(textView);
double result;
try {
if(attSel.input(inst)) {
diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierTrainer.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierTrainer.java
index 10591f5..2a92c1b 100644
--- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierTrainer.java
+++ b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierTrainer.java
@@ -20,9 +20,10 @@
import com.google.inject.Inject;
import edu.umn.biomedicus.annotations.ProcessorScoped;
import edu.umn.biomedicus.annotations.ProcessorSetting;
+import edu.umn.biomedicus.exc.BiomedicusException;
import edu.umn.biomedicus.framework.PostProcessor;
+import edu.umn.biomedicus.framework.store.Document;
import edu.umn.biomedicus.framework.store.TextView;
-import edu.umn.biomedicus.exc.BiomedicusException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import weka.attributeSelection.ASEvaluation;
@@ -84,10 +85,10 @@ public SeverityClassifierTrainer(@ProcessorSetting("docclass.severity.output.pat
/**
* Add the document to the collection, which will be trained all at once at the end
- * @param document a document
+ * @param textView a document
*/
- public void processDocument(TextView document) {
- wekaProcessor.addTrainingDocument(document);
+ public void processDocument(TextView textView) {
+ wekaProcessor.addTrainingDocument(textView);
}
@Override
diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityTrainerProcessor.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityTrainerProcessor.java
index 66e31c6..787a0c9 100644
--- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityTrainerProcessor.java
+++ b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityTrainerProcessor.java
@@ -18,22 +18,22 @@
package edu.umn.biomedicus.internal.docclass;
import com.google.inject.Inject;
+import edu.umn.biomedicus.exc.BiomedicusException;
import edu.umn.biomedicus.framework.DocumentProcessor;
import edu.umn.biomedicus.framework.store.TextView;
-import edu.umn.biomedicus.exc.BiomedicusException;
public class SeverityTrainerProcessor implements DocumentProcessor {
private final SeverityClassifierTrainer severityClassifierTrainer;
- private final TextView document;
+ private final TextView textView;
@Inject
- public SeverityTrainerProcessor(SeverityClassifierTrainer severityClassifierTrainer, TextView document) {
+ public SeverityTrainerProcessor(SeverityClassifierTrainer severityClassifierTrainer, TextView textView) {
this.severityClassifierTrainer = severityClassifierTrainer;
- this.document = document;
+ this.textView = textView;
}
@Override
public void process() throws BiomedicusException {
- severityClassifierTrainer.processDocument(document);
+ severityClassifierTrainer.processDocument(textView);
}
}
diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityWekaProcessor.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityWekaProcessor.java
index 5e11680..74db22e 100644
--- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityWekaProcessor.java
+++ b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityWekaProcessor.java
@@ -17,6 +17,7 @@
package edu.umn.biomedicus.internal.docclass;
+import edu.umn.biomedicus.framework.store.Document;
import edu.umn.biomedicus.framework.store.TextView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -94,10 +95,10 @@ Instances getTrainingData() {
/**
* Add a document for training. Will extract this doc's text but will not train on it until getTrainingData called
- * @param document a document
+ * @param textView a document
*/
- void addTrainingDocument(TextView document) {
- Instance trainingInstance = getTextInstance(document.getText());
+ void addTrainingDocument(TextView textView) {
+ Instance trainingInstance = getTextInstance(textView.getText());
if (trainingInstance != null) {
trainingTextInstances.add(trainingInstance);
}
@@ -105,11 +106,11 @@ void addTrainingDocument(TextView document) {
/**
* Convert a document into a vector instance. buildDictionary() needs to have been run.
- * @param document a document
+ * @param textView a document
* @return an Instance with real-valued data
*/
- Instance getTestData(TextView document) {
- Instance textInstance = getTextInstance(document.getText());
+ Instance getTestData(TextView textView) {
+ Instance textInstance = getTextInstance(textView.getText());
Instance vectorInstance = vectorizeInstance(textInstance);
vectorInstance.setDataset(vectorTemplate);
return vectorInstance;