diff --git a/pom.xml b/pom.xml index 7420d48..186fb68 100644 --- a/pom.xml +++ b/pom.xml @@ -23,7 +23,7 @@ edu.umn.biomedicus biomedicus-gpl jar - 1.6.0-SNAPSHOT + 1.6.0 biomedicus-gpl BioMedICUS Annotation System - GPL Extensions @@ -31,7 +31,7 @@ UTF-8 UTF-8 - 1.6.0-SNAPSHOT + 1.6.0 diff --git a/src/main/java/edu/umn/biomedicus/gpl/penntree/PennTreebankInputFileAdapter.java b/src/main/java/edu/umn/biomedicus/gpl/penntree/PennTreebankInputFileAdapter.java new file mode 100644 index 0000000..c2778ca --- /dev/null +++ b/src/main/java/edu/umn/biomedicus/gpl/penntree/PennTreebankInputFileAdapter.java @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2016 Regents of the University of Minnesota + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +package edu.umn.biomedicus.gpl.penntree; + +import com.google.inject.Inject; +import edu.stanford.nlp.ling.TaggedWord; +import edu.stanford.nlp.trees.PennTreeReaderFactory; +import edu.stanford.nlp.trees.Tree; +import edu.stanford.nlp.trees.TreeReader; +import edu.umn.biomedicus.common.types.syntax.PartOfSpeech; +import edu.umn.biomedicus.common.types.syntax.PartsOfSpeech; +import edu.umn.biomedicus.common.types.text.ImmutableParseToken; +import edu.umn.biomedicus.common.types.text.Sentence; +import edu.umn.biomedicus.framework.store.Document; +import edu.umn.biomedicus.framework.store.Label; +import edu.umn.biomedicus.framework.store.Span; +import edu.umn.biomedicus.framework.store.TextView; +import edu.umn.biomedicus.uima.adapter.UimaAdapters; +import edu.umn.biomedicus.uima.files.InputFileAdapter; +import edu.umn.biomedicus.uima.labels.LabelAdapters; +import java.io.IOException; +import java.io.Reader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import org.apache.uima.cas.CAS; +import org.apache.uima.collection.CollectionException; + +/** + * Adapts Penn treebank format files to CAS files. + * + * @author Ben Knoll + * @since 1.3.0 + */ +public final class PennTreebankInputFileAdapter implements InputFileAdapter { + + /** + * The penn tree reader factory. + */ + private final PennTreeReaderFactory pennTreeReaderFactory = new PennTreeReaderFactory(); + + private final LabelAdapters labelAdapters; + + /** + * The view name to load into. + */ + private String viewName; + + @Inject + public PennTreebankInputFileAdapter(LabelAdapters labelAdapters) { + this.labelAdapters = labelAdapters; + } + + @Override + public void adaptFile(CAS cas, Path path) throws CollectionException { + StringBuilder text = new StringBuilder(); + ArrayList sentences = new ArrayList<>(); + try (Reader reader = Files.newBufferedReader(path)) { + TreeReader treeReader = pennTreeReaderFactory.newTreeReader(reader); + Tree tree; + while ((tree = treeReader.readTree()) != null) { + int sentenceStart = text.length(); + + ArrayList taggedWords = tree.taggedYield(); + SentenceBuilder sentenceBuilder = new SentenceBuilder(); + sentenceBuilder.tokenBuilders = new ArrayList<>(taggedWords.size()); + for (TaggedWord taggedWord : taggedWords) { + String tag = taggedWord.tag(); + String word = taggedWord.word(); + text.append(" "); + + /** + * This -NONE- tag occurs in some documents when there is a assumed phrase. + */ + if (!"-NONE-".equals(tag)) { + int tokenStart = text.length(); + if ("-LRB-".equals(word)) { + text.append('('); + } else if ("-RRB-".equals(word)) { + text.append(')'); + } else if ("-LCB-".equals(word)) { + text.append('{'); + } else if ("-RCB-".equals(word)) { + text.append('}'); + } else if ("-LSB-".equals(word)) { + text.append('['); + } else if ("-RSB-".equals(word)) { + text.append(']'); + } else if ("``".equals(word)) { + text.append("\""); + } else if ("''".equals(word)) { + text.append("\""); + } else { + text.append(word); + } + int tokenEnd = text.length(); + + PartOfSpeech partOfSpeech; + if ("-LRB-".equals(tag)) { + partOfSpeech = PartOfSpeech.LEFT_PAREN; + } else if ("-RRB-".equals(tag)) { + partOfSpeech = PartOfSpeech.RIGHT_PAREN; + } else { + if (tag.contains("|")) { + String[] tags = tag.split("\\|"); + Random random = new Random(); + int randomIndex = random.nextInt(tags.length); + partOfSpeech = PartsOfSpeech.forTag(tags[randomIndex]); + } else { + partOfSpeech = PartsOfSpeech.forTag(tag); + } + + } + + TokenBuilder tokenBuilder = new TokenBuilder(); + tokenBuilder.tokenSpan = new Span(tokenStart, tokenEnd); + tokenBuilder.partOfSpeech = partOfSpeech; + if (partOfSpeech == null) { + throw new AssertionError("part of speech should not be null"); + } + sentenceBuilder.tokenBuilders.add(tokenBuilder); + } + } + + int sentenceEnd = text.length(); + + sentenceBuilder.sentenceSpan = new Span(sentenceStart, sentenceEnd); + sentences.add(sentenceBuilder); + } + } catch (IOException e) { + throw new CollectionException(e); + } + + Document document = UimaAdapters.createDocument(cas, labelAdapters, + path.getFileName().toString()); + TextView textView = document.newTextView() + .withText(text.toString()) + .withName(viewName) + .build(); + + for (SentenceBuilder sentence : sentences) { + Span sentenceSpan = sentence.sentenceSpan; + + textView.label(Label.create(sentenceSpan, Sentence.create())); + + for (TokenBuilder tokenBuilder : sentence.tokenBuilders) { + Span tokenSpan = tokenBuilder.tokenSpan; + + textView.label(Label.create(tokenSpan, + ImmutableParseToken.builder() + .text(tokenSpan.getCovered(text).toString()) + .hasSpaceAfter(true) + .build() + )); + + PartOfSpeech partOfSpeech = tokenBuilder.partOfSpeech; + if (partOfSpeech != null) { + String pos = partOfSpeech.toString(); + textView.label(Label.create(tokenSpan, PartsOfSpeech.forTag(pos))); + } + } + } + + } + + @Override + public void setTargetView(String viewName) { + this.viewName = viewName; + } + + /** + * Used to build sentences. + */ + private static class SentenceBuilder { + + /** + * The begin and end of the sentence. + */ + private Span sentenceSpan; + + /** + * The tokens of the sentence. + */ + private List tokenBuilders; + } + + /** + * Used to build tokens + */ + private static class TokenBuilder { + + /** + * The begin and end of the token + */ + private Span tokenSpan; + + /** + * The part of speech. + */ + private PartOfSpeech partOfSpeech; + } +} diff --git a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/SHStanfordConstituencyParser.java b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/SHStanfordConstituencyParser.java index 5b87889..8f7b1a3 100644 --- a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/SHStanfordConstituencyParser.java +++ b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/SHStanfordConstituencyParser.java @@ -17,41 +17,44 @@ package edu.umn.biomedicus.gpl.stanford.parser; -import edu.umn.biomedicus.framework.DocumentProcessor; -import edu.umn.biomedicus.framework.store.TextView; -import edu.umn.biomedicus.framework.store.Label; -import edu.umn.biomedicus.framework.store.LabelIndex; -import edu.umn.biomedicus.framework.store.Labeler; import edu.umn.biomedicus.common.types.semantics.SocialHistoryCandidate; import edu.umn.biomedicus.common.types.syntax.PartOfSpeech; import edu.umn.biomedicus.common.types.text.ConstituencyParse; import edu.umn.biomedicus.common.types.text.ParseToken; import edu.umn.biomedicus.exc.BiomedicusException; - +import edu.umn.biomedicus.framework.DocumentProcessor; +import edu.umn.biomedicus.framework.store.Document; +import edu.umn.biomedicus.framework.store.Label; +import edu.umn.biomedicus.framework.store.LabelIndex; +import edu.umn.biomedicus.framework.store.Labeler; +import edu.umn.biomedicus.framework.store.TextView; import javax.inject.Inject; public class SHStanfordConstituencyParser implements DocumentProcessor { - private final LabelIndex partOfSpeechLabelIndex; - private final Labeler constituencyParseLabeler; - private final LabelIndex parseTokenLabelIndex; - private final StanfordConstituencyParserModel stanfordConstituencyParserModel; - private final LabelIndex socialHistoryCandidateLabelIndex; - @Inject - SHStanfordConstituencyParser(TextView document, - StanfordConstituencyParserModel stanfordConstituencyParserModel) { - socialHistoryCandidateLabelIndex = document.getLabelIndex(SocialHistoryCandidate.class); - parseTokenLabelIndex = document.getLabelIndex(ParseToken.class); - partOfSpeechLabelIndex = document.getLabelIndex(PartOfSpeech.class); - constituencyParseLabeler = document.getLabeler(ConstituencyParse.class); - this.stanfordConstituencyParserModel = stanfordConstituencyParserModel; - } + private final LabelIndex partOfSpeechLabelIndex; + private final Labeler constituencyParseLabeler; + private final LabelIndex parseTokenLabelIndex; + private final StanfordConstituencyParserModel stanfordConstituencyParserModel; + private final LabelIndex labelIndex; + + @Inject + public SHStanfordConstituencyParser( + TextView textView, + StanfordConstituencyParserModel stanfordConstituencyParserModel + ) { + labelIndex = textView.getLabelIndex(SocialHistoryCandidate.class); + parseTokenLabelIndex = textView.getLabelIndex(ParseToken.class); + partOfSpeechLabelIndex = textView.getLabelIndex(PartOfSpeech.class); + constituencyParseLabeler = textView.getLabeler(ConstituencyParse.class); + this.stanfordConstituencyParserModel = stanfordConstituencyParserModel; + } - @Override - public void process() throws BiomedicusException { - for (Label socialHistoryCandidateLabel : socialHistoryCandidateLabelIndex) { - stanfordConstituencyParserModel.parseSentence(socialHistoryCandidateLabel, parseTokenLabelIndex, - partOfSpeechLabelIndex, constituencyParseLabeler); - } + @Override + public void process() throws BiomedicusException { + for (Label label : labelIndex) { + stanfordConstituencyParserModel.parseSentence(label, parseTokenLabelIndex, + partOfSpeechLabelIndex, constituencyParseLabeler); } + } } diff --git a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParser.java b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParser.java index 4aedfda..a48988a 100644 --- a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParser.java +++ b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParser.java @@ -17,41 +17,41 @@ package edu.umn.biomedicus.gpl.stanford.parser; -import edu.umn.biomedicus.framework.DocumentProcessor; -import edu.umn.biomedicus.framework.store.TextView; -import edu.umn.biomedicus.framework.store.Label; -import edu.umn.biomedicus.framework.store.LabelIndex; -import edu.umn.biomedicus.framework.store.Labeler; import edu.umn.biomedicus.common.types.syntax.PartOfSpeech; import edu.umn.biomedicus.common.types.text.ConstituencyParse; import edu.umn.biomedicus.common.types.text.ParseToken; import edu.umn.biomedicus.common.types.text.Sentence; import edu.umn.biomedicus.exc.BiomedicusException; - +import edu.umn.biomedicus.framework.DocumentProcessor; +import edu.umn.biomedicus.framework.store.Label; +import edu.umn.biomedicus.framework.store.LabelIndex; +import edu.umn.biomedicus.framework.store.Labeler; +import edu.umn.biomedicus.framework.store.TextView; import javax.inject.Inject; public class StanfordConstituencyParser implements DocumentProcessor { - private final LabelIndex sentenceLabelIndex; - private final LabelIndex partOfSpeechLabelIndex; - private final Labeler constituencyParseLabeler; - private final LabelIndex parseTokenLabelIndex; - private final StanfordConstituencyParserModel stanfordConstituencyParserModel; - @Inject - public StanfordConstituencyParser(TextView document, - StanfordConstituencyParserModel stanfordConstituencyParserModel) { - sentenceLabelIndex = document.getLabelIndex(Sentence.class); - parseTokenLabelIndex = document.getLabelIndex(ParseToken.class); - partOfSpeechLabelIndex = document.getLabelIndex(PartOfSpeech.class); - constituencyParseLabeler = document.getLabeler(ConstituencyParse.class); - this.stanfordConstituencyParserModel = stanfordConstituencyParserModel; - } + private final LabelIndex sentenceLabelIndex; + private final LabelIndex partOfSpeechLabelIndex; + private final Labeler constituencyParseLabeler; + private final LabelIndex parseTokenLabelIndex; + private final StanfordConstituencyParserModel stanfordConstituencyParserModel; + + @Inject + public StanfordConstituencyParser(TextView textView, + StanfordConstituencyParserModel stanfordConstituencyParserModel) { + sentenceLabelIndex = textView.getLabelIndex(Sentence.class); + parseTokenLabelIndex = textView.getLabelIndex(ParseToken.class); + partOfSpeechLabelIndex = textView.getLabelIndex(PartOfSpeech.class); + constituencyParseLabeler = textView.getLabeler(ConstituencyParse.class); + this.stanfordConstituencyParserModel = stanfordConstituencyParserModel; + } - @Override - public void process() throws BiomedicusException { - for (Label sentenceLabel : sentenceLabelIndex) { - stanfordConstituencyParserModel.parseSentence(sentenceLabel, parseTokenLabelIndex, partOfSpeechLabelIndex, - constituencyParseLabeler); - } + @Override + public void process() throws BiomedicusException { + for (Label sentenceLabel : sentenceLabelIndex) { + stanfordConstituencyParserModel.parseSentence(sentenceLabel, parseTokenLabelIndex, + partOfSpeechLabelIndex, constituencyParseLabeler); } + } } diff --git a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParserModel.java b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParserModel.java index c6bf75f..7f71d3c 100644 --- a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParserModel.java +++ b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParserModel.java @@ -23,78 +23,73 @@ import edu.stanford.nlp.parser.shiftreduce.ShiftReduceParser; import edu.stanford.nlp.trees.Tree; import edu.umn.biomedicus.annotations.Setting; -import edu.umn.biomedicus.framework.DataLoader; -import edu.umn.biomedicus.framework.store.Label; -import edu.umn.biomedicus.framework.store.LabelIndex; -import edu.umn.biomedicus.framework.store.Labeler; import edu.umn.biomedicus.common.types.syntax.PartOfSpeech; import edu.umn.biomedicus.common.types.syntax.PartsOfSpeech; import edu.umn.biomedicus.common.types.text.ConstituencyParse; import edu.umn.biomedicus.common.types.text.ImmutableConstituencyParse; import edu.umn.biomedicus.common.types.text.ParseToken; import edu.umn.biomedicus.exc.BiomedicusException; - -import javax.inject.Inject; +import edu.umn.biomedicus.framework.DataLoader; +import edu.umn.biomedicus.framework.store.Label; +import edu.umn.biomedicus.framework.store.LabelIndex; +import edu.umn.biomedicus.framework.store.Labeler; import java.io.PrintWriter; import java.io.StringWriter; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; +import javax.inject.Inject; @Singleton @ProvidedBy(StanfordConstituencyParserModel.Loader.class) public class StanfordConstituencyParserModel { - private final ShiftReduceParser srParser; - private StanfordConstituencyParserModel(ShiftReduceParser srParser) { - this.srParser = srParser; - } + private final ShiftReduceParser shiftReduceParser; - void parseSentence(Label sentenceLabel, - LabelIndex parseTokenLabelIndex, - LabelIndex partOfSpeechLabelIndex, - Labeler constituencyParseLabeler) - throws BiomedicusException { - List taggedWordList = new ArrayList<>(); - for (Label parseTokenLabel : parseTokenLabelIndex - .insideSpan(sentenceLabel)) { - String word = parseTokenLabel.value().text(); - PartOfSpeech partOfSpeech = partOfSpeechLabelIndex - .withTextLocation(parseTokenLabel) - .orElseThrow(() -> new BiomedicusException( - "parse token did not have part of speech.")) - .value(); + private StanfordConstituencyParserModel(ShiftReduceParser shiftReduceParser) { + this.shiftReduceParser = shiftReduceParser; + } - TaggedWord taggedWord = new TaggedWord(word, - PartsOfSpeech.tagForPartOfSpeech(partOfSpeech)); - taggedWordList.add(taggedWord); - } - Tree tree = srParser.apply(taggedWordList); - StringWriter stringWriter = new StringWriter(); - PrintWriter pw = new PrintWriter(stringWriter); - tree.pennPrint(pw); - String pennPrint = stringWriter.toString(); - ConstituencyParse constituencyParse = ImmutableConstituencyParse - .builder().parse(pennPrint).build(); - constituencyParseLabeler.value(constituencyParse).label(sentenceLabel); + void parseSentence( + Label sentenceLabel, + LabelIndex parseTokenLabelIndex, + LabelIndex partOfSpeechLabelIndex, + Labeler constituencyParseLabeler + ) throws BiomedicusException { + List taggedWordList = new ArrayList<>(); + for (Label parseTokenLabel : parseTokenLabelIndex.insideSpan(sentenceLabel)) { + String word = parseTokenLabel.value().text(); + PartOfSpeech partOfSpeech = partOfSpeechLabelIndex.withTextLocation(parseTokenLabel) + .orElseThrow(() -> new BiomedicusException("parse token did not have part of speech.")) + .value(); + + TaggedWord taggedWord = new TaggedWord(word, PartsOfSpeech.tagForPartOfSpeech(partOfSpeech)); + taggedWordList.add(taggedWord); } + Tree tree = shiftReduceParser.apply(taggedWordList); + StringWriter stringWriter = new StringWriter(); + tree.pennPrint(new PrintWriter(stringWriter)); + String pennPrint = stringWriter.toString(); + ConstituencyParse constituencyParse = ImmutableConstituencyParse.builder() + .parse(pennPrint) + .build(); + constituencyParseLabeler.value(constituencyParse).label(sentenceLabel); + } + + @Singleton + public static class Loader extends DataLoader { - @Singleton - public static class Loader - extends DataLoader { - private final Path path; + private final Path path; - @Inject - public Loader(@Setting("stanford.srParser.path") Path path) { - this.path = path; - } + @Inject + public Loader(@Setting("stanford.srParser.path") Path path) { + this.path = path; + } - @Override - protected StanfordConstituencyParserModel loadModel() - throws BiomedicusException { - ShiftReduceParser shiftReduceParser = ShiftReduceParser - .loadModel(path.toString()); - return new StanfordConstituencyParserModel(shiftReduceParser); - } + @Override + protected StanfordConstituencyParserModel loadModel() throws BiomedicusException { + ShiftReduceParser shiftReduceParser = ShiftReduceParser.loadModel(path.toString()); + return new StanfordConstituencyParserModel(shiftReduceParser); } + } } diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifier.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifier.java index 08e9452..d4ba811 100644 --- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifier.java +++ b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifier.java @@ -18,29 +18,28 @@ package edu.umn.biomedicus.internal.docclass; import com.google.inject.Inject; -import edu.umn.biomedicus.framework.store.Document; +import edu.umn.biomedicus.exc.BiomedicusException; import edu.umn.biomedicus.framework.DocumentProcessor; +import edu.umn.biomedicus.framework.store.Document; import edu.umn.biomedicus.framework.store.TextView; -import edu.umn.biomedicus.exc.BiomedicusException; public class SeverityClassifier implements DocumentProcessor { - private final SeverityClassifierModel severityClassifierModel; - private final TextView textView; - private final Document document; + private final SeverityClassifierModel severityClassifierModel; + private final Document document; + private final TextView textView; - @Inject - public SeverityClassifier(SeverityClassifierModel severityClassifierModel, - TextView textView, - Document document) { - this.severityClassifierModel = severityClassifierModel; - this.textView = textView; - this.document = document; - } + @Inject + public SeverityClassifier(SeverityClassifierModel severityClassifierModel, Document document, + TextView textView) { + this.severityClassifierModel = severityClassifierModel; + this.document = document; + this.textView = textView; + } - @Override - public void process() throws BiomedicusException { - String prediction = severityClassifierModel.predict(textView); - document.putMetadata("Severity", prediction); - } + @Override + public void process() throws BiomedicusException { + String prediction = severityClassifierModel.predict(textView); + document.putMetadata("Severity", prediction); + } } diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierModel.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierModel.java index e8895bd..98a747e 100644 --- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierModel.java +++ b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierModel.java @@ -21,9 +21,9 @@ import com.google.inject.ProvidedBy; import edu.umn.biomedicus.annotations.ProcessorScoped; import edu.umn.biomedicus.annotations.ProcessorSetting; +import edu.umn.biomedicus.exc.BiomedicusException; import edu.umn.biomedicus.framework.DataLoader; import edu.umn.biomedicus.framework.store.TextView; -import edu.umn.biomedicus.exc.BiomedicusException; import weka.classifiers.Classifier; import weka.core.Instance; import weka.filters.Filter; @@ -76,12 +76,12 @@ public class SeverityClassifierModel implements Serializable { /** * Perform attribute selection and then classification using the stored Weka objects * Where classes are tied, err on the side of higher class - * @param document the document - * @return a string (from the predefined classes) representing this document's symptom severity + * @param textView the textView + * @return a string (from the predefined classes) representing this textView's symptom severity * @throws BiomedicusException */ - public String predict(TextView document) throws BiomedicusException { - Instance inst = severityWekaProcessor.getTestData(document); + public String predict(TextView textView) throws BiomedicusException { + Instance inst = severityWekaProcessor.getTestData(textView); double result; try { if(attSel.input(inst)) { diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierTrainer.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierTrainer.java index 10591f5..2a92c1b 100644 --- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierTrainer.java +++ b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierTrainer.java @@ -20,9 +20,10 @@ import com.google.inject.Inject; import edu.umn.biomedicus.annotations.ProcessorScoped; import edu.umn.biomedicus.annotations.ProcessorSetting; +import edu.umn.biomedicus.exc.BiomedicusException; import edu.umn.biomedicus.framework.PostProcessor; +import edu.umn.biomedicus.framework.store.Document; import edu.umn.biomedicus.framework.store.TextView; -import edu.umn.biomedicus.exc.BiomedicusException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import weka.attributeSelection.ASEvaluation; @@ -84,10 +85,10 @@ public SeverityClassifierTrainer(@ProcessorSetting("docclass.severity.output.pat /** * Add the document to the collection, which will be trained all at once at the end - * @param document a document + * @param textView a document */ - public void processDocument(TextView document) { - wekaProcessor.addTrainingDocument(document); + public void processDocument(TextView textView) { + wekaProcessor.addTrainingDocument(textView); } @Override diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityTrainerProcessor.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityTrainerProcessor.java index 66e31c6..787a0c9 100644 --- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityTrainerProcessor.java +++ b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityTrainerProcessor.java @@ -18,22 +18,22 @@ package edu.umn.biomedicus.internal.docclass; import com.google.inject.Inject; +import edu.umn.biomedicus.exc.BiomedicusException; import edu.umn.biomedicus.framework.DocumentProcessor; import edu.umn.biomedicus.framework.store.TextView; -import edu.umn.biomedicus.exc.BiomedicusException; public class SeverityTrainerProcessor implements DocumentProcessor { private final SeverityClassifierTrainer severityClassifierTrainer; - private final TextView document; + private final TextView textView; @Inject - public SeverityTrainerProcessor(SeverityClassifierTrainer severityClassifierTrainer, TextView document) { + public SeverityTrainerProcessor(SeverityClassifierTrainer severityClassifierTrainer, TextView textView) { this.severityClassifierTrainer = severityClassifierTrainer; - this.document = document; + this.textView = textView; } @Override public void process() throws BiomedicusException { - severityClassifierTrainer.processDocument(document); + severityClassifierTrainer.processDocument(textView); } } diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityWekaProcessor.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityWekaProcessor.java index 5e11680..74db22e 100644 --- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityWekaProcessor.java +++ b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityWekaProcessor.java @@ -17,6 +17,7 @@ package edu.umn.biomedicus.internal.docclass; +import edu.umn.biomedicus.framework.store.Document; import edu.umn.biomedicus.framework.store.TextView; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -94,10 +95,10 @@ Instances getTrainingData() { /** * Add a document for training. Will extract this doc's text but will not train on it until getTrainingData called - * @param document a document + * @param textView a document */ - void addTrainingDocument(TextView document) { - Instance trainingInstance = getTextInstance(document.getText()); + void addTrainingDocument(TextView textView) { + Instance trainingInstance = getTextInstance(textView.getText()); if (trainingInstance != null) { trainingTextInstances.add(trainingInstance); } @@ -105,11 +106,11 @@ void addTrainingDocument(TextView document) { /** * Convert a document into a vector instance. buildDictionary() needs to have been run. - * @param document a document + * @param textView a document * @return an Instance with real-valued data */ - Instance getTestData(TextView document) { - Instance textInstance = getTextInstance(document.getText()); + Instance getTestData(TextView textView) { + Instance textInstance = getTextInstance(textView.getText()); Instance vectorInstance = vectorizeInstance(textInstance); vectorInstance.setDataset(vectorTemplate); return vectorInstance;