From b78bbafce12569150355ce9e05f48876a5fa48ae Mon Sep 17 00:00:00 2001
From: Ben Knoll <benknoll@umn.edu>
Date: Thu, 25 May 2017 13:40:29 -0500
Subject: [PATCH 1/5] Version bump.

---
 pom.xml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/pom.xml b/pom.xml
index 186fb68..2eae1e6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -23,7 +23,7 @@
     <groupId>edu.umn.biomedicus</groupId>
     <artifactId>biomedicus-gpl</artifactId>
     <packaging>jar</packaging>
-    <version>1.6.0</version>
+    <version>1.7.0-SNAPSHOT</version>
 
     <name>biomedicus-gpl</name>
     <description>BioMedICUS Annotation System - GPL Extensions</description>
@@ -31,19 +31,18 @@
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
         <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
-        <biomedicus.version>1.6.0</biomedicus.version>
     </properties>
 
     <dependencies>
         <dependency>
             <groupId>edu.umn.biomedicus</groupId>
             <artifactId>biomedicus-core</artifactId>
-            <version>${biomedicus.version}</version>
+            <version>1.7.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>edu.umn.biomedicus</groupId>
             <artifactId>biomedicus-uima</artifactId>
-            <version>${biomedicus.version}</version>
+            <version>1.7.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>edu.stanford.nlp</groupId>

From 2b1703a759c31d037e2cf9d366699cbd47d7a7fe Mon Sep 17 00:00:00 2001
From: Ben Knoll <benknoll@umn.edu>
Date: Thu, 25 May 2017 13:56:57 -0500
Subject: [PATCH 2/5] README update.

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7d10633..610f9a0 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,9 @@ The system is being developed by our biomedical NLP/IE program at the University
 This is a collaborative project that aims to serve biomedical and clinical researchers, allowing for customization
 with different texts.
 
-This project is a collection of GPL-licensed extensions and utilities for the BioMedICUS system.
+This project is a collection of GPL-licensed extensions and utilities for the BioMedICUS system. 
+Any extensions or use of the BioMedICUS project with this extension installed must be compliant with
+the GPLv3 license.
 
 
 Wiki

From 1f625a653f277168ada024b9163e9ffed9b10996 Mon Sep 17 00:00:00 2001
From: Ben Knoll <benknoll@umn.edu>
Date: Tue, 11 Jul 2017 15:21:27 -0500
Subject: [PATCH 3/5] Google style guide update.

---
 pom.xml                                       | 232 ++++----
 src/assembly/descriptor.xml                   | 110 ++--
 .../SHStanfordConstituencyParser.xml          | 212 ++++----
 .../desc/ae/annotator/SeverityClassifier.xml  | 196 +++----
 .../annotator/StanfordConstituencyParser.xml  | 213 ++++----
 .../ae/training/SeverityClassifierTrainer.xml | 284 +++++-----
 .../parser/SHStanfordConstituencyParser.java  |   2 +-
 .../parser/StanfordConstituencyParser.java    |   3 +-
 .../internal/docclass/SeverityClassifier.java |   4 +-
 .../docclass/SeverityClassifierModel.java     | 163 +++---
 .../docclass/SeverityClassifierTrainer.java   | 147 ++---
 .../docclass/SeverityTrainerProcessor.java    |  39 --
 .../docclass/SeverityWekaProcessor.java       | 502 +++++++++---------
 13 files changed, 1054 insertions(+), 1053 deletions(-)
 delete mode 100644 src/main/java/edu/umn/biomedicus/internal/docclass/SeverityTrainerProcessor.java

diff --git a/pom.xml b/pom.xml
index 2eae1e6..17b043b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -16,127 +16,127 @@
   ~ along with this program.  If not, see <http://www.gnu.org/licenses/>.
   -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <modelVersion>4.0.0</modelVersion>
-    <groupId>edu.umn.biomedicus</groupId>
-    <artifactId>biomedicus-gpl</artifactId>
-    <packaging>jar</packaging>
-    <version>1.7.0-SNAPSHOT</version>
+<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xmlns="http://maven.apache.org/POM/4.0.0"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>edu.umn.biomedicus</groupId>
+  <artifactId>biomedicus-gpl</artifactId>
+  <packaging>jar</packaging>
+  <version>1.7.0-SNAPSHOT</version>
 
-    <name>biomedicus-gpl</name>
-    <description>BioMedICUS Annotation System - GPL Extensions</description>
+  <name>biomedicus-gpl</name>
+  <description>BioMedICUS Annotation System - GPL Extensions</description>
 
-    <properties>
-        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
-    </properties>
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+  </properties>
 
-    <dependencies>
-        <dependency>
-            <groupId>edu.umn.biomedicus</groupId>
-            <artifactId>biomedicus-core</artifactId>
-            <version>1.7.0-SNAPSHOT</version>
-        </dependency>
-        <dependency>
-            <groupId>edu.umn.biomedicus</groupId>
-            <artifactId>biomedicus-uima</artifactId>
-            <version>1.7.0-SNAPSHOT</version>
-        </dependency>
-        <dependency>
-            <groupId>edu.stanford.nlp</groupId>
-            <artifactId>stanford-corenlp</artifactId>
-            <version>3.6.0</version>
-        </dependency>
-        <dependency>
-            <groupId>nz.ac.waikato.cms.weka</groupId>
-            <artifactId>weka-stable</artifactId>
-            <version>3.8.0</version>
-        </dependency>
-        <dependency>
-            <groupId>com.google.code.findbugs</groupId>
-            <artifactId>jsr305</artifactId>
-            <version>3.0.0</version>
-            <optional>true</optional>
-        </dependency>
-    </dependencies>
+  <dependencies>
+    <dependency>
+      <groupId>edu.umn.biomedicus</groupId>
+      <artifactId>biomedicus-core</artifactId>
+      <version>1.7.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>edu.umn.biomedicus</groupId>
+      <artifactId>biomedicus-uima</artifactId>
+      <version>1.7.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>edu.stanford.nlp</groupId>
+      <artifactId>stanford-corenlp</artifactId>
+      <version>3.6.0</version>
+    </dependency>
+    <dependency>
+      <groupId>nz.ac.waikato.cms.weka</groupId>
+      <artifactId>weka-stable</artifactId>
+      <version>3.8.0</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.code.findbugs</groupId>
+      <artifactId>jsr305</artifactId>
+      <version>3.0.0</version>
+      <optional>true</optional>
+    </dependency>
+  </dependencies>
 
 
-    <build>
-        <resources>
-            <resource>
-                <directory>src/main/resources</directory>
-                <filtering>true</filtering>
-                <includes>
-                    <include>**/*.xml</include>
-                </includes>
-            </resource>
-            <resource>
-                <directory>src/main/resources</directory>
-                <filtering>false</filtering>
-                <excludes>
-                    <exclude>**/*.xml</exclude>
-                </excludes>
-            </resource>
-        </resources>
-        <plugins>
-            <plugin>
-                <artifactId>maven-assembly-plugin</artifactId>
-                <version>2.5.2</version>
-                <executions>
-                    <execution>
-                        <id>make-bundles</id>
-                        <goals>
-                            <goal>single</goal>
-                        </goals>
-                        <phase>package</phase>
-                        <configuration>
-                            <descriptors>
-                                <descriptor>src/assembly/descriptor.xml</descriptor>
-                            </descriptors>
-                        </configuration>
-                    </execution>
-                </executions>
-            </plugin>
-        </plugins>
-        <pluginManagement>
-            <plugins>
-                <plugin>
-                    <artifactId>maven-compiler-plugin</artifactId>
-                    <configuration>
-                        <source>1.8</source>
-                        <target>1.8</target>
-                    </configuration>
-                </plugin>
-            </plugins>
-        </pluginManagement>
-    </build>
+  <build>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+        <filtering>true</filtering>
+        <includes>
+          <include>**/*.xml</include>
+        </includes>
+      </resource>
+      <resource>
+        <directory>src/main/resources</directory>
+        <filtering>false</filtering>
+        <excludes>
+          <exclude>**/*.xml</exclude>
+        </excludes>
+      </resource>
+    </resources>
+    <plugins>
+      <plugin>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <version>2.5.2</version>
+        <executions>
+          <execution>
+            <id>make-bundles</id>
+            <goals>
+              <goal>single</goal>
+            </goals>
+            <phase>package</phase>
+            <configuration>
+              <descriptors>
+                <descriptor>src/assembly/descriptor.xml</descriptor>
+              </descriptors>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <artifactId>maven-compiler-plugin</artifactId>
+          <configuration>
+            <source>1.8</source>
+            <target>1.8</target>
+          </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
 
-    <organization>
-        <name>University of Minnesota Institute for Health Informatics NLP/IE Program</name>
-        <url>http://www.bmhi.umn.edu/ihi/research/nlpie/index.htm</url>
-    </organization>
-    <developers>
-        <developer>
-            <name>Ben Knoll</name>
-            <email>benknoll@umn.edu</email>
-            <organization>NLP/IE Group at the University of Minnesota Institute for Health Informatics
-            </organization>
-            <organizationUrl>http://www.bmhi.umn.edu/ihi/research/nlpie/index.htm</organizationUrl>
-        </developer>
-    </developers>
-    <licenses>
-        <license>
-            <name>GNU General Public License, Version 3.0 (GPLv3)</name>
-            <url>https://www.gnu.org/licenses/gpl-3.0.en.html</url>
-            <distribution>repo</distribution>
-        </license>
-    </licenses>
-    <scm>
-        <url>https://github.com/NLPIE/BioMedICUS</url>
-        <connection>scm:git:https://github.com/NLPIE/BioMedICUS.git</connection>
-        <developerConnection>scm:git:https://github.com/NLPIE/BioMedICUS.git</developerConnection>
-    </scm>
+  <organization>
+    <name>University of Minnesota Institute for Health Informatics NLP/IE Program</name>
+    <url>http://www.bmhi.umn.edu/ihi/research/nlpie/index.htm</url>
+  </organization>
+  <developers>
+    <developer>
+      <name>Ben Knoll</name>
+      <email>benknoll@umn.edu</email>
+      <organization>NLP/IE Group at the University of Minnesota Institute for Health Informatics
+      </organization>
+      <organizationUrl>http://www.bmhi.umn.edu/ihi/research/nlpie/index.htm</organizationUrl>
+    </developer>
+  </developers>
+  <licenses>
+    <license>
+      <name>GNU General Public License, Version 3.0 (GPLv3)</name>
+      <url>https://www.gnu.org/licenses/gpl-3.0.en.html</url>
+      <distribution>repo</distribution>
+    </license>
+  </licenses>
+  <scm>
+    <url>https://github.com/NLPIE/BioMedICUS</url>
+    <connection>scm:git:https://github.com/NLPIE/BioMedICUS.git</connection>
+    <developerConnection>scm:git:https://github.com/NLPIE/BioMedICUS.git</developerConnection>
+  </scm>
 
 </project>
\ No newline at end of file
diff --git a/src/assembly/descriptor.xml b/src/assembly/descriptor.xml
index 5fbe75e..dfbf380 100644
--- a/src/assembly/descriptor.xml
+++ b/src/assembly/descriptor.xml
@@ -15,59 +15,59 @@
   ~ along with this program.  If not, see <http://www.gnu.org/licenses/>.
   -->
 
-<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
-          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-          xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
-    <id>release</id>
-    <formats>
-        <format>zip</format>
-    </formats>
-    <includeBaseDirectory>false</includeBaseDirectory>
-    <dependencySets>
-        <dependencySet>
-            <useProjectArtifact>true</useProjectArtifact>
-            <useTransitiveDependencies>true</useTransitiveDependencies>
-            <outputDirectory>lib</outputDirectory>
-            <unpack>false</unpack>
-        </dependencySet>
-    </dependencySets>
-    <fileSets>
-        <fileSet>
-            <directoryMode>0755</directoryMode>
-            <fileMode>0755</fileMode>
-            <directory>src/main/bin</directory>
-            <outputDirectory>bin</outputDirectory>
-            <includes>
-                <include>**/*</include>
-            </includes>
-        </fileSet>
-        <fileSet>
-            <directory>src/main/config</directory>
-            <outputDirectory>config</outputDirectory>
-            <includes>
-                <include>**/*</include>
-            </includes>
-        </fileSet>
-        <fileSet>
-            <directory>src/main/desc</directory>
-            <outputDirectory>desc</outputDirectory>
-            <includes>
-                <include>**/*</include>
-            </includes>
-        </fileSet>
-        <fileSet>
-            <directory>src/main/top</directory>
-            <outputDirectory>/</outputDirectory>
-            <includes>
-                <include>**/*</include>
-            </includes>
-        </fileSet>
-        <fileSet>
-            <directory>.</directory>
-            <includes>
-                <include>LICENSE.txt</include>
-                <include>README.md</include>
-            </includes>
-        </fileSet>
-    </fileSets>
+<assembly xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
+  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
+  <id>release</id>
+  <formats>
+    <format>zip</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+  <dependencySets>
+    <dependencySet>
+      <useProjectArtifact>true</useProjectArtifact>
+      <useTransitiveDependencies>true</useTransitiveDependencies>
+      <outputDirectory>lib</outputDirectory>
+      <unpack>false</unpack>
+    </dependencySet>
+  </dependencySets>
+  <fileSets>
+    <fileSet>
+      <directoryMode>0755</directoryMode>
+      <fileMode>0755</fileMode>
+      <directory>src/main/bin</directory>
+      <outputDirectory>bin</outputDirectory>
+      <includes>
+        <include>**/*</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>src/main/config</directory>
+      <outputDirectory>config</outputDirectory>
+      <includes>
+        <include>**/*</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>src/main/desc</directory>
+      <outputDirectory>desc</outputDirectory>
+      <includes>
+        <include>**/*</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>src/main/top</directory>
+      <outputDirectory>/</outputDirectory>
+      <includes>
+        <include>**/*</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>.</directory>
+      <includes>
+        <include>LICENSE.txt</include>
+        <include>README.md</include>
+      </includes>
+    </fileSet>
+  </fileSets>
 </assembly>
diff --git a/src/main/desc/ae/annotator/SHStanfordConstituencyParser.xml b/src/main/desc/ae/annotator/SHStanfordConstituencyParser.xml
index a4a4e09..5bdfa48 100644
--- a/src/main/desc/ae/annotator/SHStanfordConstituencyParser.xml
+++ b/src/main/desc/ae/annotator/SHStanfordConstituencyParser.xml
@@ -17,114 +17,116 @@
   -->
 
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
-    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-    <primitive>true</primitive>
-    <!-- Do not change this -->
-    <annotatorImplementationName>
-        edu.umn.biomedicus.uima.adapter.DocumentProcessorRunnerAnnotator
-    </annotatorImplementationName>
-    <!-- Biomedicus document processors are all run using the DocumentProcessorRunnerAnnotator -->
-    <analysisEngineMetaData>
-        <name>Social History Stanford Constituency Parser</name>
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <!-- Do not change this -->
+  <annotatorImplementationName>
+    edu.umn.biomedicus.uima.adapter.DocumentProcessorUimaAdapter
+  </annotatorImplementationName>
+  <!-- Biomedicus document processors are all run using DocumentProcessorUimaAdapterator -->
+  <analysisEngineMetaData>
+    <name>Social History Stanford Constituency Parser</name>
+    <description>
+      Uses Stanford's Shift-reduce parser to parse social history candidates for constituency.
+    </description>
+    <version>${project.version}</version>
+    <vendor>${organization.name}</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>documentProcessor</name>
+        <description>The document processor class to instantiate.</description>
+        <type>String</type>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>viewName</name>
+        <description>The name of the UIMA view to use.</description>
+        <type>String</type>
+      </configurationParameter>
+      <configurationParameter>
+        <name>eagerLoad</name>
         <description>
-            Uses Stanford's Shift-reduce parser to parse social history candidates for constituency.
+          The name of any classes that need to be eagerly loaded by the Guice injector. Classes
+          which are
+          instances of LoadableDataModel will have the loadData method called.
         </description>
-        <version>${project.version}</version>
-        <vendor>${organization.name}</vendor>
-        <configurationParameters>
-            <configurationParameter>
-                <name>documentProcessor</name>
-                <description>The document processor class to instantiate.</description>
-                <type>String</type>
-                <mandatory>true</mandatory>
-            </configurationParameter>
-            <configurationParameter>
-                <name>viewName</name>
-                <description>The name of the UIMA view to use.</description>
-                <type>String</type>
-            </configurationParameter>
-            <configurationParameter>
-                <name>eagerLoad</name>
-                <description>
-                    The name of any classes that need to be eagerly loaded by the Guice injector. Classes which are
-                    instances of LoadableDataModel will have the loadData method called.
-                </description>
-                <type>String</type>
-                <multiValued>true</multiValued>
-                <mandatory>false</mandatory>
-            </configurationParameter>
-            <configurationParameter>
-                <name>postProcessors</name>
-                <description>
-                    The class names of any post processors that should be run after all documents have been processed.
-                </description>
-                <type>String</type>
-                <multiValued>true</multiValued>
-                <mandatory>false</mandatory>
-            </configurationParameter>
-        </configurationParameters>
-        <configurationParameterSettings>
-            <nameValuePair>
-                <name>documentProcessor</name>
-                <value>
-                    <string>edu.umn.biomedicus.gpl.stanford.parser.SHStanfordConstituencyParser</string>
-                </value>
-            </nameValuePair>
-            <nameValuePair>
-                <name>viewName</name>
-                <value>
-                    <string>SystemView</string>
-                </value>
-            </nameValuePair>
-            <nameValuePair>
-                <name>eagerLoad</name>
-                <value>
-                    <array>
-                        <string>edu.umn.biomedicus.gpl.stanford.parser.StanfordConstituencyParserModel</string>
-                    </array>
-                </value>
-            </nameValuePair>
-        </configurationParameterSettings>
-        <typeSystemDescription>
-            <imports>
-                <import name="edu.umn.biomedicus.types.TypeSystem"/>
-            </imports>
-        </typeSystemDescription>
-        <typePriorities>
-            <imports>
-                <import name="edu.umn.biomedicus.types.TypeSystemTypePriorities"/>
-            </imports>
-        </typePriorities>
-        <operationalProperties>
-            <modifiesCas>true</modifiesCas>
-            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-            <outputsNewCASes>false</outputsNewCASes>
-        </operationalProperties>
-    </analysisEngineMetaData>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>postProcessors</name>
+        <description>
+          The class names of any post processors that should be run after all documents have been
+          processed.
+        </description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>documentProcessor</name>
+        <value>
+          <string>edu.umn.biomedicus.gpl.stanford.parser.SHStanfordConstituencyParser</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>viewName</name>
+        <value>
+          <string>SystemView</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>eagerLoad</name>
+        <value>
+          <array>
+            <string>edu.umn.biomedicus.gpl.stanford.parser.StanfordConstituencyParserModel</string>
+          </array>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="edu.umn.biomedicus.types.TypeSystem"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities>
+      <imports>
+        <import name="edu.umn.biomedicus.types.TypeSystemTypePriorities"/>
+      </imports>
+    </typePriorities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
 
-    <externalResourceDependencies>
-        <externalResourceDependency>
-            <key>guiceInjector</key>
-            <description>The guice injector resource.</description>
-        </externalResourceDependency>
-    </externalResourceDependencies>
+  <externalResourceDependencies>
+    <externalResourceDependency>
+      <key>guiceInjector</key>
+      <description>The guice injector resource.</description>
+    </externalResourceDependency>
+  </externalResourceDependencies>
 
-    <resourceManagerConfiguration>
-        <externalResources>
-            <externalResource>
-                <name>guiceInjector</name>
-                <description>The guice resource.</description>
-                <customResourceSpecifier>
-                    <resourceClassName>edu.umn.biomedicus.uima.adapter.GuiceInjector</resourceClassName>
-                </customResourceSpecifier>
-            </externalResource>
-        </externalResources>
-        <externalResourceBindings>
-            <externalResourceBinding>
-                <key>guiceInjector</key>
-                <resourceName>guiceInjector</resourceName>
-            </externalResourceBinding>
-        </externalResourceBindings>
-    </resourceManagerConfiguration>
+  <resourceManagerConfiguration>
+    <externalResources>
+      <externalResource>
+        <name>guiceInjector</name>
+        <description>The guice resource.</description>
+        <customResourceSpecifier>
+          <resourceClassName>edu.umn.biomedicus.uima.adapter.GuiceInjector</resourceClassName>
+        </customResourceSpecifier>
+      </externalResource>
+    </externalResources>
+    <externalResourceBindings>
+      <externalResourceBinding>
+        <key>guiceInjector</key>
+        <resourceName>guiceInjector</resourceName>
+      </externalResourceBinding>
+    </externalResourceBindings>
+  </resourceManagerConfiguration>
 </analysisEngineDescription>
 
diff --git a/src/main/desc/ae/annotator/SeverityClassifier.xml b/src/main/desc/ae/annotator/SeverityClassifier.xml
index 4fa85d6..32f823f 100644
--- a/src/main/desc/ae/annotator/SeverityClassifier.xml
+++ b/src/main/desc/ae/annotator/SeverityClassifier.xml
@@ -17,104 +17,106 @@
   -->
 
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
-    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-    <primitive>true</primitive>
-    <!-- Do not change this -->
-    <annotatorImplementationName>
-        edu.umn.biomedicus.uima.adapter.DocumentProcessorRunnerAnnotator
-    </annotatorImplementationName>
-    <!-- Biomedicus document processors are all run using the DocumentProcessorRunnerAnnotator -->
-    <analysisEngineMetaData>
-        <name>Severity Classifier</name>
-        <description>Severity Classifier</description>
-        <version>${project.version}</version>
-        <vendor>${organization.name}</vendor>
-        <configurationParameters>
-            <configurationParameter>
-                <name>documentProcessor</name>
-                <description>The document processor class to instantiate.</description>
-                <type>String</type>
-                <mandatory>true</mandatory>
-            </configurationParameter>
-            <configurationParameter>
-                <name>viewName</name>
-                <description>The name of the UIMA view to use.</description>
-                <type>String</type>
-            </configurationParameter>
-            <configurationParameter>
-                <name>eagerLoad</name>
-                <description>
-                    The name of any classes that need to be eagerly loaded by the Guice injector. Classes which are
-                    instances of LoadableDataModel will have the loadData method called.
-                </description>
-                <type>String</type>
-                <multiValued>true</multiValued>
-                <mandatory>false</mandatory>
-            </configurationParameter>
-            <configurationParameter>
-                <name>postProcessors</name>
-                <description>
-                    The class names of any post processors that should be run after all documents have been processed.
-                </description>
-                <type>String</type>
-                <multiValued>true</multiValued>
-                <mandatory>false</mandatory>
-            </configurationParameter>
-        </configurationParameters>
-        <configurationParameterSettings>
-            <nameValuePair>
-                <name>documentProcessor</name>
-                <value>
-                    <string>edu.umn.biomedicus.internal.docclass.SeverityClassifier</string>
-                </value>
-            </nameValuePair>
-            <nameValuePair>
-                <name>viewName</name>
-                <value>
-                    <string>SystemView</string>
-                </value>
-            </nameValuePair>
-        </configurationParameterSettings>
-        <typeSystemDescription>
-            <imports>
-                <import name="edu.umn.biomedicus.types.TypeSystem"/>
-            </imports>
-        </typeSystemDescription>
-        <typePriorities>
-            <imports>
-                <import name="edu.umn.biomedicus.types.TypeSystemTypePriorities"/>
-            </imports>
-        </typePriorities>
-        <operationalProperties>
-            <modifiesCas>true</modifiesCas>
-            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-            <outputsNewCASes>false</outputsNewCASes>
-        </operationalProperties>
-    </analysisEngineMetaData>
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <!-- Do not change this -->
+  <annotatorImplementationName>
+    edu.umn.biomedicus.uima.adapter.DocumentProcessorUimaAdapter
+  </annotatorImplementationName>
+  <!-- Biomedicus document processors are all run using DocumentProcessorUimaAdapterator -->
+  <analysisEngineMetaData>
+    <name>Severity Classifier</name>
+    <description>Severity Classifier</description>
+    <version>${project.version}</version>
+    <vendor>${organization.name}</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>documentProcessor</name>
+        <description>The document processor class to instantiate.</description>
+        <type>String</type>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>viewName</name>
+        <description>The name of the UIMA view to use.</description>
+        <type>String</type>
+      </configurationParameter>
+      <configurationParameter>
+        <name>eagerLoad</name>
+        <description>
+          The name of any classes that need to be eagerly loaded by the Guice injector. Classes
+          which are
+          instances of LoadableDataModel will have the loadData method called.
+        </description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>postProcessors</name>
+        <description>
+          The class names of any post processors that should be run after all documents have been
+          processed.
+        </description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>documentProcessor</name>
+        <value>
+          <string>edu.umn.biomedicus.internal.docclass.SeverityClassifier</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>viewName</name>
+        <value>
+          <string>SystemView</string>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="edu.umn.biomedicus.types.TypeSystem"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities>
+      <imports>
+        <import name="edu.umn.biomedicus.types.TypeSystemTypePriorities"/>
+      </imports>
+    </typePriorities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
 
-    <externalResourceDependencies>
-        <externalResourceDependency>
-            <key>guiceInjector</key>
-            <description>The guice injector resource.</description>
-        </externalResourceDependency>
-    </externalResourceDependencies>
+  <externalResourceDependencies>
+    <externalResourceDependency>
+      <key>guiceInjector</key>
+      <description>The guice injector resource.</description>
+    </externalResourceDependency>
+  </externalResourceDependencies>
 
-    <resourceManagerConfiguration>
-        <externalResources>
-            <externalResource>
-                <name>guiceInjector</name>
-                <description>The guice resource.</description>
-                <customResourceSpecifier>
-                    <resourceClassName>edu.umn.biomedicus.uima.adapter.GuiceInjector</resourceClassName>
-                </customResourceSpecifier>
-            </externalResource>
-        </externalResources>
-        <externalResourceBindings>
-            <externalResourceBinding>
-                <key>guiceInjector</key>
-                <resourceName>guiceInjector</resourceName>
-            </externalResourceBinding>
-        </externalResourceBindings>
-    </resourceManagerConfiguration>
+  <resourceManagerConfiguration>
+    <externalResources>
+      <externalResource>
+        <name>guiceInjector</name>
+        <description>The guice resource.</description>
+        <customResourceSpecifier>
+          <resourceClassName>edu.umn.biomedicus.uima.adapter.GuiceInjector</resourceClassName>
+        </customResourceSpecifier>
+      </externalResource>
+    </externalResources>
+    <externalResourceBindings>
+      <externalResourceBinding>
+        <key>guiceInjector</key>
+        <resourceName>guiceInjector</resourceName>
+      </externalResourceBinding>
+    </externalResourceBindings>
+  </resourceManagerConfiguration>
 </analysisEngineDescription>
 
diff --git a/src/main/desc/ae/annotator/StanfordConstituencyParser.xml b/src/main/desc/ae/annotator/StanfordConstituencyParser.xml
index af9bb25..a3b0a12 100644
--- a/src/main/desc/ae/annotator/StanfordConstituencyParser.xml
+++ b/src/main/desc/ae/annotator/StanfordConstituencyParser.xml
@@ -17,112 +17,115 @@
   -->
 
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
-    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-    <primitive>true</primitive>
-    <!-- Do not change this -->
-    <annotatorImplementationName>
-        edu.umn.biomedicus.uima.adapter.DocumentProcessorRunnerAnnotator
-    </annotatorImplementationName>
-    <!-- Biomedicus document processors are all run using the DocumentProcessorRunnerAnnotator -->
-    <analysisEngineMetaData>
-        <name>Stanford Constituency Parser</name>
-        <description>Uses Stanford's Shift-reduce parser to parse all sentences for constituency.</description>
-        <version>${project.version}</version>
-        <vendor>${organization.name}</vendor>
-        <configurationParameters>
-            <configurationParameter>
-                <name>documentProcessor</name>
-                <description>The document processor class to instantiate.</description>
-                <type>String</type>
-                <mandatory>true</mandatory>
-            </configurationParameter>
-            <configurationParameter>
-                <name>viewName</name>
-                <description>The name of the UIMA view to use.</description>
-                <type>String</type>
-            </configurationParameter>
-            <configurationParameter>
-                <name>eagerLoad</name>
-                <description>
-                    The name of any classes that need to be eagerly loaded by the Guice injector. Classes which are
-                    instances of LoadableDataModel will have the loadData method called.
-                </description>
-                <type>String</type>
-                <multiValued>true</multiValued>
-                <mandatory>false</mandatory>
-            </configurationParameter>
-            <configurationParameter>
-                <name>postProcessors</name>
-                <description>
-                    The class names of any post processors that should be run after all documents have been processed.
-                </description>
-                <type>String</type>
-                <multiValued>true</multiValued>
-                <mandatory>false</mandatory>
-            </configurationParameter>
-        </configurationParameters>
-        <configurationParameterSettings>
-            <nameValuePair>
-                <name>documentProcessor</name>
-                <value>
-                    <string>edu.umn.biomedicus.gpl.stanford.parser.StanfordConstituencyParser</string>
-                </value>
-            </nameValuePair>
-            <nameValuePair>
-                <name>viewName</name>
-                <value>
-                    <string>SystemView</string>
-                </value>
-            </nameValuePair>
-            <nameValuePair>
-                <name>eagerLoad</name>
-                <value>
-                    <array>
-                        <string>edu.umn.biomedicus.gpl.stanford.parser.StanfordConstituencyParserModel</string>
-                    </array>
-                </value>
-            </nameValuePair>
-        </configurationParameterSettings>
-        <typeSystemDescription>
-            <imports>
-                <import name="edu.umn.biomedicus.types.TypeSystem"/>
-            </imports>
-        </typeSystemDescription>
-        <typePriorities>
-            <imports>
-                <import name="edu.umn.biomedicus.types.TypeSystemTypePriorities"/>
-            </imports>
-        </typePriorities>
-        <operationalProperties>
-            <modifiesCas>true</modifiesCas>
-            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-            <outputsNewCASes>false</outputsNewCASes>
-        </operationalProperties>
-    </analysisEngineMetaData>
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <!-- Do not change this -->
+  <annotatorImplementationName>
+    edu.umn.biomedicus.uima.adapter.DocumentProcessorUimaAdapter
+  </annotatorImplementationName>
+  <!-- Biomedicus document processors are all run using DocumentProcessorUimaAdapterator -->
+  <analysisEngineMetaData>
+    <name>Stanford Constituency Parser</name>
+    <description>Uses Stanford's Shift-reduce parser to parse all sentences for constituency.
+    </description>
+    <version>${project.version}</version>
+    <vendor>${organization.name}</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>documentProcessor</name>
+        <description>The document processor class to instantiate.</description>
+        <type>String</type>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>viewName</name>
+        <description>The name of the UIMA view to use.</description>
+        <type>String</type>
+      </configurationParameter>
+      <configurationParameter>
+        <name>eagerLoad</name>
+        <description>
+          The name of any classes that need to be eagerly loaded by the Guice injector. Classes
+          which are
+          instances of LoadableDataModel will have the loadData method called.
+        </description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>postProcessors</name>
+        <description>
+          The class names of any post processors that should be run after all documents have been
+          processed.
+        </description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>documentProcessor</name>
+        <value>
+          <string>edu.umn.biomedicus.gpl.stanford.parser.StanfordConstituencyParser</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>viewName</name>
+        <value>
+          <string>SystemView</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>eagerLoad</name>
+        <value>
+          <array>
+            <string>edu.umn.biomedicus.gpl.stanford.parser.StanfordConstituencyParserModel</string>
+          </array>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="edu.umn.biomedicus.types.TypeSystem"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities>
+      <imports>
+        <import name="edu.umn.biomedicus.types.TypeSystemTypePriorities"/>
+      </imports>
+    </typePriorities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
 
-    <externalResourceDependencies>
-        <externalResourceDependency>
-            <key>guiceInjector</key>
-            <description>The guice injector resource.</description>
-        </externalResourceDependency>
-    </externalResourceDependencies>
+  <externalResourceDependencies>
+    <externalResourceDependency>
+      <key>guiceInjector</key>
+      <description>The guice injector resource.</description>
+    </externalResourceDependency>
+  </externalResourceDependencies>
 
-    <resourceManagerConfiguration>
-        <externalResources>
-            <externalResource>
-                <name>guiceInjector</name>
-                <description>The guice resource.</description>
-                <customResourceSpecifier>
-                    <resourceClassName>edu.umn.biomedicus.uima.adapter.GuiceInjector</resourceClassName>
-                </customResourceSpecifier>
-            </externalResource>
-        </externalResources>
-        <externalResourceBindings>
-            <externalResourceBinding>
-                <key>guiceInjector</key>
-                <resourceName>guiceInjector</resourceName>
-            </externalResourceBinding>
-        </externalResourceBindings>
-    </resourceManagerConfiguration>
+  <resourceManagerConfiguration>
+    <externalResources>
+      <externalResource>
+        <name>guiceInjector</name>
+        <description>The guice resource.</description>
+        <customResourceSpecifier>
+          <resourceClassName>edu.umn.biomedicus.uima.adapter.GuiceInjector</resourceClassName>
+        </customResourceSpecifier>
+      </externalResource>
+    </externalResources>
+    <externalResourceBindings>
+      <externalResourceBinding>
+        <key>guiceInjector</key>
+        <resourceName>guiceInjector</resourceName>
+      </externalResourceBinding>
+    </externalResourceBindings>
+  </resourceManagerConfiguration>
 </analysisEngineDescription>
 
diff --git a/src/main/desc/ae/training/SeverityClassifierTrainer.xml b/src/main/desc/ae/training/SeverityClassifierTrainer.xml
index 7124b97..aa3201c 100644
--- a/src/main/desc/ae/training/SeverityClassifierTrainer.xml
+++ b/src/main/desc/ae/training/SeverityClassifierTrainer.xml
@@ -17,148 +17,150 @@
   -->
 
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
-    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-    <primitive>true</primitive>
-    <!-- Do not change this -->
-    <annotatorImplementationName>
-        edu.umn.biomedicus.uima.adapter.DocumentProcessorRunnerAnnotator
-    </annotatorImplementationName>
-    <!-- Biomedicus document processors are all run using the DocumentProcessorRunnerAnnotator -->
-    <analysisEngineMetaData>
-        <name>Severity Trainer</name>
-        <description>Trains the severity classification model.</description>
-        <version>${project.version}</version>
-        <vendor>${organization.name}</vendor>
-        <configurationParameters>
-            <configurationParameter>
-                <name>docclass.severity.model.path</name>
-                <description>Where to write the model to.</description>
-                <type>String</type>
-                <mandatory>false</mandatory>
-            </configurationParameter>
-            <configurationParameter>
-                <name>docclass.stopwords.path</name>
-                <description>Location of stopwords file</description>
-                <type>String</type>
-                <mandatory>false</mandatory>
-            </configurationParameter>
-            <configurationParameter>
-                <name>documentProcessor</name>
-                <description>The document processor class to instantiate.</description>
-                <type>String</type>
-                <mandatory>true</mandatory>
-            </configurationParameter>
-            <configurationParameter>
-                <name>docclass.severity.attributesToKeep</name>
-                <description>The number of attributes to keep.</description>
-                <type>Integer</type>
-                <mandatory>true</mandatory>
-            </configurationParameter>
-            <configurationParameter>
-                <name>docclass.severity.minWordCount</name>
-                <description>Minimum word count</description>
-                <type>Integer</type>
-                <mandatory>true</mandatory>
-            </configurationParameter>
-            <configurationParameter>
-                <name>viewName</name>
-                <description>The name of the UIMA view to use.</description>
-                <type>String</type>
-            </configurationParameter>
-            <configurationParameter>
-                <name>eagerLoad</name>
-                <description>
-                    The name of any classes that need to be eagerly loaded by the Guice injector. Classes which are
-                    instances of LoadableDataModel will have the loadData method called.
-                </description>
-                <type>String</type>
-                <multiValued>true</multiValued>
-                <mandatory>false</mandatory>
-            </configurationParameter>
-            <configurationParameter>
-                <name>postProcessors</name>
-                <description>
-                    The class names of any post processors that should be run after all documents have been processed.
-                </description>
-                <type>String</type>
-                <multiValued>true</multiValued>
-                <mandatory>false</mandatory>
-            </configurationParameter>
-        </configurationParameters>
-        <configurationParameterSettings>
-            <nameValuePair>
-                <name>docclass.severity.attributesToKeep</name>
-                <value>
-                    <integer>1000</integer>
-                </value>
-            </nameValuePair>
-            <nameValuePair>
-                <name>docclass.severity.minWordCount</name>
-                <value>
-                    <integer>2</integer>
-                </value>
-            </nameValuePair>
-            <nameValuePair>
-                <name>documentProcessor</name>
-                <value>
-                    <string>edu.umn.biomedicus.internal.docclass.SeverityTrainerProcessor</string>
-                </value>
-            </nameValuePair>
-            <nameValuePair>
-                <name>viewName</name>
-                <value>
-                    <string>SystemView</string>
-                </value>
-            </nameValuePair>
-            <nameValuePair>
-                <name>postProcessors</name>
-                <value>
-                    <array>
-                        <string>edu.umn.biomedicus.internal.docclass.SeverityClassifierTrainer</string>
-                    </array>
-                </value>
-            </nameValuePair>
-        </configurationParameterSettings>
-        <typeSystemDescription>
-            <imports>
-                <import name="edu.umn.biomedicus.types.TypeSystem"/>
-            </imports>
-        </typeSystemDescription>
-        <typePriorities>
-            <imports>
-                <import name="edu.umn.biomedicus.types.TypeSystemTypePriorities"/>
-            </imports>
-        </typePriorities>
-        <operationalProperties>
-            <modifiesCas>false</modifiesCas>
-            <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
-            <outputsNewCASes>false</outputsNewCASes>
-        </operationalProperties>
-    </analysisEngineMetaData>
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <!-- Do not change this -->
+  <annotatorImplementationName>
+    edu.umn.biomedicus.uima.adapter.DocumentProcessorUimaAdapter
+  </annotatorImplementationName>
+  <!-- Biomedicus document processors are all run using DocumentProcessorUimaAdapterator -->
+  <analysisEngineMetaData>
+    <name>Severity Trainer</name>
+    <description>Trains the severity classification model.</description>
+    <version>${project.version}</version>
+    <vendor>${organization.name}</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>docclass.severity.model.path</name>
+        <description>Where to write the model to.</description>
+        <type>String</type>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>docclass.stopwords.path</name>
+        <description>Location of stopwords file</description>
+        <type>String</type>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>documentProcessor</name>
+        <description>The document processor class to instantiate.</description>
+        <type>String</type>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>docclass.severity.attributesToKeep</name>
+        <description>The number of attributes to keep.</description>
+        <type>Integer</type>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>docclass.severity.minWordCount</name>
+        <description>Minimum word count</description>
+        <type>Integer</type>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>viewName</name>
+        <description>The name of the UIMA view to use.</description>
+        <type>String</type>
+      </configurationParameter>
+      <configurationParameter>
+        <name>eagerLoad</name>
+        <description>
+          The name of any classes that need to be eagerly loaded by the Guice injector. Classes
+          which are
+          instances of LoadableDataModel will have the loadData method called.
+        </description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>postProcessors</name>
+        <description>
+          The class names of any post processors that should be run after all documents have been
+          processed.
+        </description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>docclass.severity.attributesToKeep</name>
+        <value>
+          <integer>1000</integer>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>docclass.severity.minWordCount</name>
+        <value>
+          <integer>2</integer>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>documentProcessor</name>
+        <value>
+          <string>edu.umn.biomedicus.internal.docclass.SeverityTrainerProcessor</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>viewName</name>
+        <value>
+          <string>SystemView</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>postProcessors</name>
+        <value>
+          <array>
+            <string>edu.umn.biomedicus.internal.docclass.SeverityClassifierTrainer</string>
+          </array>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="edu.umn.biomedicus.types.TypeSystem"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities>
+      <imports>
+        <import name="edu.umn.biomedicus.types.TypeSystemTypePriorities"/>
+      </imports>
+    </typePriorities>
+    <operationalProperties>
+      <modifiesCas>false</modifiesCas>
+      <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
 
-    <externalResourceDependencies>
-        <externalResourceDependency>
-            <key>guiceInjector</key>
-            <description>The guice injector resource.</description>
-        </externalResourceDependency>
-    </externalResourceDependencies>
+  <externalResourceDependencies>
+    <externalResourceDependency>
+      <key>guiceInjector</key>
+      <description>The guice injector resource.</description>
+    </externalResourceDependency>
+  </externalResourceDependencies>
 
-    <resourceManagerConfiguration>
-        <externalResources>
-            <externalResource>
-                <name>guiceInjector</name>
-                <description>The guice resource.</description>
-                <customResourceSpecifier>
-                    <resourceClassName>edu.umn.biomedicus.uima.adapter.GuiceInjector</resourceClassName>
-                </customResourceSpecifier>
-            </externalResource>
-        </externalResources>
-        <externalResourceBindings>
-            <externalResourceBinding>
-                <key>guiceInjector</key>
-                <resourceName>guiceInjector</resourceName>
-            </externalResourceBinding>
-        </externalResourceBindings>
-    </resourceManagerConfiguration>
+  <resourceManagerConfiguration>
+    <externalResources>
+      <externalResource>
+        <name>guiceInjector</name>
+        <description>The guice resource.</description>
+        <customResourceSpecifier>
+          <resourceClassName>edu.umn.biomedicus.uima.adapter.GuiceInjector</resourceClassName>
+        </customResourceSpecifier>
+      </externalResource>
+    </externalResources>
+    <externalResourceBindings>
+      <externalResourceBinding>
+        <key>guiceInjector</key>
+        <resourceName>guiceInjector</resourceName>
+      </externalResourceBinding>
+    </externalResourceBindings>
+  </resourceManagerConfiguration>
 </analysisEngineDescription>
 
diff --git a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/SHStanfordConstituencyParser.java b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/SHStanfordConstituencyParser.java
index 8f7b1a3..61a5655 100644
--- a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/SHStanfordConstituencyParser.java
+++ b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/SHStanfordConstituencyParser.java
@@ -51,7 +51,7 @@ public SHStanfordConstituencyParser(
   }
 
   @Override
-  public void process() throws BiomedicusException {
+  public void process(Document document) throws BiomedicusException {
     for (Label<SocialHistoryCandidate> label : labelIndex) {
       stanfordConstituencyParserModel.parseSentence(label, parseTokenLabelIndex,
           partOfSpeechLabelIndex, constituencyParseLabeler);
diff --git a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParser.java b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParser.java
index a48988a..0b40239 100644
--- a/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParser.java
+++ b/src/main/java/edu/umn/biomedicus/gpl/stanford/parser/StanfordConstituencyParser.java
@@ -23,6 +23,7 @@
 import edu.umn.biomedicus.common.types.text.Sentence;
 import edu.umn.biomedicus.exc.BiomedicusException;
 import edu.umn.biomedicus.framework.DocumentProcessor;
+import edu.umn.biomedicus.framework.store.Document;
 import edu.umn.biomedicus.framework.store.Label;
 import edu.umn.biomedicus.framework.store.LabelIndex;
 import edu.umn.biomedicus.framework.store.Labeler;
@@ -48,7 +49,7 @@ public StanfordConstituencyParser(TextView textView,
   }
 
   @Override
-  public void process() throws BiomedicusException {
+  public void process(Document document) throws BiomedicusException {
     for (Label<Sentence> sentenceLabel : sentenceLabelIndex) {
       stanfordConstituencyParserModel.parseSentence(sentenceLabel, parseTokenLabelIndex,
           partOfSpeechLabelIndex, constituencyParseLabeler);
diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifier.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifier.java
index d4ba811..fee8d5c 100644
--- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifier.java
+++ b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifier.java
@@ -38,8 +38,8 @@ public SeverityClassifier(SeverityClassifierModel severityClassifierModel, Docum
   }
 
   @Override
-  public void process() throws BiomedicusException {
+  public void process(Document document) throws BiomedicusException {
     String prediction = severityClassifierModel.predict(textView);
-    document.putMetadata("Severity", prediction);
+    this.document.putMetadata("Severity", prediction);
   }
 }
diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierModel.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierModel.java
index 98a747e..886e62c 100644
--- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierModel.java
+++ b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierModel.java
@@ -24,16 +24,15 @@
 import edu.umn.biomedicus.exc.BiomedicusException;
 import edu.umn.biomedicus.framework.DataLoader;
 import edu.umn.biomedicus.framework.store.TextView;
-import weka.classifiers.Classifier;
-import weka.core.Instance;
-import weka.filters.Filter;
-
 import java.io.FileInputStream;
 import java.io.ObjectInputStream;
 import java.io.Serializable;
 import java.nio.file.Path;
 import java.util.HashMap;
 import java.util.Map;
+import weka.classifiers.Classifier;
+import weka.core.Instance;
+import weka.filters.Filter;
 
 /**
  * Classify documents based on symptom severity
@@ -43,93 +42,95 @@
  */
 @ProvidedBy(SeverityClassifierModel.Loader.class)
 public class SeverityClassifierModel implements Serializable {
-    // For unknown classes (test data or poorly formatted training data)
-    static final String UNK = "unknown";
-    private final Classifier classifier;
-    private final Filter attSel;
-    private final SeverityWekaProcessor severityWekaProcessor;
 
-    private final Map<Double, String> severityMap;
+  // For unknown classes (test data or poorly formatted training data)
+  static final String UNK = "unknown";
+  private final Classifier classifier;
+  private final Filter attSel;
+  private final SeverityWekaProcessor severityWekaProcessor;
 
-    /**
-     * Initialize this model
-     * All training happens in the trainer; just store what we need to keep for classification
-     * @param classifier a Weka Classifier object
-     * @param attSel an attribute selection object
-     * @param severityWekaProcessor a processor to convert Document objects into Weka Instance objects
-     * @throws BiomedicusException
-     */
-    SeverityClassifierModel(Classifier classifier,
-                            Filter attSel,
-                            SeverityWekaProcessor severityWekaProcessor) throws BiomedicusException {
-        severityMap = new HashMap<>();
-        severityMap.put(0., "ABSENT");
-        severityMap.put(1., "MILD");
-        severityMap.put(2., "MODERATE");
-        severityMap.put(3., "SEVERE");
-        severityMap.put(4., UNK);
-        this.classifier = classifier;
-        this.attSel = attSel;
-        this.severityWekaProcessor = severityWekaProcessor;
-    }
+  private final Map<Double, String> severityMap;
+
+  /**
+   * Initialize this model
+   * All training happens in the trainer; just store what we need to keep for classification
+   *
+   * @param classifier a Weka Classifier object
+   * @param attSel an attribute selection object
+   * @param severityWekaProcessor a processor to convert Document objects into Weka Instance
+   * objects
+   */
+  SeverityClassifierModel(Classifier classifier,
+      Filter attSel,
+      SeverityWekaProcessor severityWekaProcessor) throws BiomedicusException {
+    severityMap = new HashMap<>();
+    severityMap.put(0., "ABSENT");
+    severityMap.put(1., "MILD");
+    severityMap.put(2., "MODERATE");
+    severityMap.put(3., "SEVERE");
+    severityMap.put(4., UNK);
+    this.classifier = classifier;
+    this.attSel = attSel;
+    this.severityWekaProcessor = severityWekaProcessor;
+  }
 
-    /**
-     * Perform attribute selection and then classification using the stored Weka objects
-     * Where classes are tied, err on the side of higher class
-     * @param textView the textView
-     * @return a string (from the predefined classes) representing this textView's symptom severity
-     * @throws BiomedicusException
-     */
-    public String predict(TextView textView) throws BiomedicusException {
-        Instance inst = severityWekaProcessor.getTestData(textView);
-        double result;
-        try {
-            if(attSel.input(inst)) {
-                inst = attSel.output();
-                double[] dist = classifier.distributionForInstance(inst);
-                result=-1;
-                double max=-Double.MAX_VALUE;
-                for(int i=0; i<dist.length; i++) {
-                    if (dist[i] >= max) {
-                        max = dist[i];
-                        result = i;
-                    }
-                }
-            } else {
-                throw new Exception();
-            }
-        } catch(Exception e) {
-            throw new BiomedicusException();
+  /**
+   * Perform attribute selection and then classification using the stored Weka objects
+   * Where classes are tied, err on the side of higher class
+   *
+   * @param textView the textView
+   * @return a string (from the predefined classes) representing this textView's symptom severity
+   */
+  public String predict(TextView textView) throws BiomedicusException {
+    Instance inst = severityWekaProcessor.getTestData(textView);
+    double result;
+    try {
+      if (attSel.input(inst)) {
+        inst = attSel.output();
+        double[] dist = classifier.distributionForInstance(inst);
+        result = -1;
+        double max = -Double.MAX_VALUE;
+        for (int i = 0; i < dist.length; i++) {
+          if (dist[i] >= max) {
+            max = dist[i];
+            result = i;
+          }
         }
-        return severityMap.get(result);
+      } else {
+        throw new Exception();
+      }
+    } catch (Exception e) {
+      throw new BiomedicusException();
     }
+    return severityMap.get(result);
+  }
 
-    public String getMetadataKey() {
-        return "Severity";
-    }
+  public String getMetadataKey() {
+    return "Severity";
+  }
 
-    /**
-     * Load a serialized model
-     */
-    @ProcessorScoped
-    static class Loader extends DataLoader<SeverityClassifierModel> {
+  /**
+   * Load a serialized model
+   */
+  @ProcessorScoped
+  static class Loader extends DataLoader<SeverityClassifierModel> {
 
-        private final Path modelPath;
+    private final Path modelPath;
 
-        @Inject
-        public Loader(@ProcessorSetting("docclass.severity.model.path") Path modelPath) {
-            this.modelPath = modelPath;
-        }
+    @Inject
+    public Loader(@ProcessorSetting("docclass.severity.model.path") Path modelPath) {
+      this.modelPath = modelPath;
+    }
 
-        @Override
-        protected SeverityClassifierModel loadModel() throws BiomedicusException {
-            try {
-                ObjectInputStream ois = new ObjectInputStream(new FileInputStream(modelPath.toFile()));
-                return (SeverityClassifierModel) ois.readObject();
-            } catch(Exception e) {
-                throw new BiomedicusException();
-            }
-        }
+    @Override
+    protected SeverityClassifierModel loadModel() throws BiomedicusException {
+      try {
+        ObjectInputStream ois = new ObjectInputStream(new FileInputStream(modelPath.toFile()));
+        return (SeverityClassifierModel) ois.readObject();
+      } catch (Exception e) {
+        throw new BiomedicusException();
+      }
     }
+  }
 
 }
diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierTrainer.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierTrainer.java
index 2a92c1b..74b992b 100644
--- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierTrainer.java
+++ b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityClassifierTrainer.java
@@ -20,10 +20,19 @@
 import com.google.inject.Inject;
 import edu.umn.biomedicus.annotations.ProcessorScoped;
 import edu.umn.biomedicus.annotations.ProcessorSetting;
+import edu.umn.biomedicus.common.StandardViews;
 import edu.umn.biomedicus.exc.BiomedicusException;
-import edu.umn.biomedicus.framework.PostProcessor;
+import edu.umn.biomedicus.framework.Aggregator;
 import edu.umn.biomedicus.framework.store.Document;
 import edu.umn.biomedicus.framework.store.TextView;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectOutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Set;
+import java.util.stream.Collectors;
+import javax.annotation.Nullable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import weka.attributeSelection.ASEvaluation;
@@ -36,15 +45,6 @@
 import weka.filters.Filter;
 import weka.filters.unsupervised.attribute.Remove;
 
-import javax.annotation.Nullable;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.ObjectOutputStream;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.Set;
-import java.util.stream.Collectors;
-
 /**
  * Train a Weka model to classify documents according to symptom severity
  * Created for the 2016 i2b2 NLP Shared Task
@@ -52,78 +52,79 @@
  * @author Greg Finley
  */
 @ProcessorScoped
-public class SeverityClassifierTrainer implements PostProcessor {
+public class SeverityClassifierTrainer implements Aggregator {
 
-    private static final Logger LOGGER = LoggerFactory.getLogger(SeverityClassifierTrainer.class);
+  private static final Logger LOGGER = LoggerFactory.getLogger(SeverityClassifierTrainer.class);
 
-    private final Path outPath;
-    private final SeverityWekaProcessor wekaProcessor;
-    private final int attributesToKeep;
+  private final Path outPath;
+  private final SeverityWekaProcessor wekaProcessor;
+  private final int attributesToKeep;
 
-    /**
-     * Initialize this trainer. If the stopwords file is not present or can't be read from, trainer will still work
-     * @param outPath the path to write the model to
-     * @param stopWordsPath path to a stopwords file
-     */
-    @Inject
-    public SeverityClassifierTrainer(@ProcessorSetting("docclass.severity.output.path") Path outPath,
-                                     @ProcessorSetting("docclass.stopwords.path") @Nullable Path stopWordsPath,
-                                     @ProcessorSetting("docclass.severity.attributesToKeep") Integer attributesToKeep,
-                                     @ProcessorSetting("docclass.severity.minWordCount") Integer minWordCount) {
-        Set<String> stopWords = null;
-        if(stopWordsPath != null) {
-            try {
-                stopWords = Files.lines(stopWordsPath).collect(Collectors.toSet());
-            } catch (IOException e) {
-                LOGGER.warn("Could not load stopwords file; will not exclude stopwords");
-            }
-        }
-        this.outPath = outPath;
-        this.attributesToKeep = attributesToKeep;
-        wekaProcessor = new SeverityWekaProcessor(stopWords, minWordCount, true);
+  /**
+   * Initialize this trainer. If the stopwords file is not present or can't be read from, trainer
+   * will still work
+   *
+   * @param outPath the path to write the model to
+   * @param stopWordsPath path to a stopwords file
+   */
+  @Inject
+  public SeverityClassifierTrainer(@ProcessorSetting("docclass.severity.output.path") Path outPath,
+      @ProcessorSetting("docclass.stopwords.path") @Nullable Path stopWordsPath,
+      @ProcessorSetting("docclass.severity.attributesToKeep") Integer attributesToKeep,
+      @ProcessorSetting("docclass.severity.minWordCount") Integer minWordCount) {
+    Set<String> stopWords = null;
+    if (stopWordsPath != null) {
+      try {
+        stopWords = Files.lines(stopWordsPath).collect(Collectors.toSet());
+      } catch (IOException e) {
+        LOGGER.warn("Could not load stopwords file; will not exclude stopwords");
+      }
     }
+    this.outPath = outPath;
+    this.attributesToKeep = attributesToKeep;
+    wekaProcessor = new SeverityWekaProcessor(stopWords, minWordCount, true);
+  }
 
-    /**
-     * Add the document to the collection, which will be trained all at once at the end
-     * @param textView a document
-     */
-    public void processDocument(TextView textView) {
-        wekaProcessor.addTrainingDocument(textView);
-    }
+  @Override
+  public void addDocument(Document document) throws BiomedicusException {
+    TextView textView = document.getTextView(StandardViews.ORIGINAL_DOCUMENT)
+        .orElseThrow(() -> new BiomedicusException("No original document view"));
+    wekaProcessor.addTrainingDocument(textView);
+  }
 
-    @Override
-    public void afterProcessing() throws BiomedicusException {
-        Instances trainSet = wekaProcessor.getTrainingData();
-        Classifier classifier = new SMO();
-        AttributeSelection sel = new AttributeSelection();
-        ASEvaluation infogain = new InfoGainAttributeEval();
-        Ranker ranker = new Ranker();
-        Remove remove = new Remove();
+  @Override
+  public void done() throws BiomedicusException {
+    Instances trainSet = wekaProcessor.getTrainingData();
+    Classifier classifier = new SMO();
+    AttributeSelection sel = new AttributeSelection();
+    ASEvaluation infogain = new InfoGainAttributeEval();
+    Ranker ranker = new Ranker();
+    Remove remove = new Remove();
 
-        ranker.setNumToSelect(attributesToKeep);
-        sel.setEvaluator(infogain);
-        sel.setSearch(ranker);
+    ranker.setNumToSelect(attributesToKeep);
+    sel.setEvaluator(infogain);
+    sel.setSearch(ranker);
 
-        try {
-            sel.SelectAttributes(trainSet);
-            int[] selected = sel.selectedAttributes();
-            remove.setInvertSelection(true);
-            remove.setAttributeIndicesArray(selected);
-            remove.setInputFormat(trainSet);
-            trainSet = Filter.useFilter(trainSet, remove);
-            classifier.buildClassifier(trainSet);
-        } catch (Exception e) {
-            throw new BiomedicusException();
-        }
+    try {
+      sel.SelectAttributes(trainSet);
+      int[] selected = sel.selectedAttributes();
+      remove.setInvertSelection(true);
+      remove.setAttributeIndicesArray(selected);
+      remove.setInputFormat(trainSet);
+      trainSet = Filter.useFilter(trainSet, remove);
+      classifier.buildClassifier(trainSet);
+    } catch (Exception e) {
+      throw new BiomedicusException();
+    }
 
-        SeverityClassifierModel model = new SeverityClassifierModel(classifier, remove, wekaProcessor);
+    SeverityClassifierModel model = new SeverityClassifierModel(classifier, remove, wekaProcessor);
 
-        try {
-            ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(outPath.toFile()));
-            oos.writeObject(model);
-            oos.close();
-        } catch(IOException e) {
-            throw new BiomedicusException();
-        }
+    try {
+      ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(outPath.toFile()));
+      oos.writeObject(model);
+      oos.close();
+    } catch (IOException e) {
+      throw new BiomedicusException();
     }
+  }
 }
diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityTrainerProcessor.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityTrainerProcessor.java
deleted file mode 100644
index 787a0c9..0000000
--- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityTrainerProcessor.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2016 Regents of the University of Minnesota
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-package edu.umn.biomedicus.internal.docclass;
-
-import com.google.inject.Inject;
-import edu.umn.biomedicus.exc.BiomedicusException;
-import edu.umn.biomedicus.framework.DocumentProcessor;
-import edu.umn.biomedicus.framework.store.TextView;
-
-public class SeverityTrainerProcessor implements DocumentProcessor {
-    private final SeverityClassifierTrainer severityClassifierTrainer;
-    private final TextView textView;
-
-    @Inject
-    public SeverityTrainerProcessor(SeverityClassifierTrainer severityClassifierTrainer, TextView textView) {
-        this.severityClassifierTrainer = severityClassifierTrainer;
-        this.textView = textView;
-    }
-
-    @Override
-    public void process() throws BiomedicusException {
-        severityClassifierTrainer.processDocument(textView);
-    }
-}
diff --git a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityWekaProcessor.java b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityWekaProcessor.java
index 74db22e..fc36bab 100644
--- a/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityWekaProcessor.java
+++ b/src/main/java/edu/umn/biomedicus/internal/docclass/SeverityWekaProcessor.java
@@ -17,278 +17,306 @@
 
 package edu.umn.biomedicus.internal.docclass;
 
-import edu.umn.biomedicus.framework.store.Document;
 import edu.umn.biomedicus.framework.store.TextView;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import weka.core.*;
-
-import javax.annotation.Nullable;
 import java.io.Serializable;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import javax.annotation.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instance;
+import weka.core.Instances;
+import weka.core.SparseInstance;
 
 /**
- * Text processing used for the symptom severity annotator, as written for the 2016 i2b2 NLP Shared Task
- * Currently works on raw document text; could be modified to work on richer data (i.e., biomedicus's NLP results)
+ * Text processing used for the symptom severity annotator, as written for the 2016 i2b2 NLP Shared
+ * Task Currently works on raw document text; could be modified to work on richer data (i.e.,
+ * biomedicus's NLP results)
  *
  * @author Greg Finley
  */
 class SeverityWekaProcessor implements Serializable {
-    private static final Logger LOGGER = LoggerFactory.getLogger(SeverityWekaProcessor.class);
 
-    // Build this incrementally with each added document
-    private Instances trainingTextInstances;
+  private static final Logger LOGGER = LoggerFactory.getLogger(SeverityWekaProcessor.class);
+  // de-weight instances in the 'absent' and 'mild' classes to deal with sparsity issues
+  private final double[] byClassWeights = {1, .3, 3, 3};
+  private final List<String> classValues;
+  private final boolean sortWordsByDescendingFreq;
+  private final int minTermCount;
+  private final Set<String> stopWords;
+  private final Attribute classAttribute;
+  // Created specifically for the i2b2-format XML files
+  private final Pattern fileTextPattern = Pattern
+      .compile("\\|(.*)\\[report_end\\]", Pattern.DOTALL);
+  private final Pattern scorePattern = Pattern.compile("score=\"(\\w+)\"");
+  private final Pattern annotatedBy = Pattern.compile("annotated_by=\"(.)\"");
+  // Build this incrementally with each added document
+  private Instances trainingTextInstances;
+  private Map<String, Integer> dictionary;
+  // Empty Instances objects used to maintain consistent format between individual Instance objects
+  private Instances textTemplate;
+  private Instances vectorTemplate;
 
-    // de-weight instances in the 'absent' and 'mild' classes to deal with sparsity issues
-    private final double[] byClassWeights = {1, .3, 3, 3};
+  /**
+   * Initialize this processor
+   *
+   * @param stopWords an optional set of words to exclude from the vector space
+   * @param minTermCount minimum number of occurrences to use a term in the vector space (2 is a
+   * good value)
+   * @param sortWordsByDescendingFreq whether to sort words by global frequency (this helps
+   * attribute selection)
+   */
+  SeverityWekaProcessor(@Nullable Set<String> stopWords, int minTermCount,
+      boolean sortWordsByDescendingFreq) {
+    this.stopWords = stopWords == null ? new HashSet<>() : stopWords;
+    this.sortWordsByDescendingFreq = sortWordsByDescendingFreq;
+    this.minTermCount = minTermCount;
 
-    private final List<String> classValues;
+    ArrayList<Attribute> textInstanceAttributes = new ArrayList<>();
+    classValues = Arrays
+        .asList("ABSENT", "MILD", "MODERATE", "SEVERE", SeverityClassifierModel.UNK);
+    classAttribute = new Attribute("_class", classValues);
+    textInstanceAttributes.add(classAttribute);
+    textInstanceAttributes.add(new Attribute("text", (List<String>) null));
 
-    private final boolean sortWordsByDescendingFreq;
-    private final int minTermCount;
-    private final Set<String> stopWords;
-    private Map<String, Integer> dictionary;
+    textTemplate = new Instances("textTemplate", textInstanceAttributes, 0);
+    textTemplate.setClassIndex(0);
 
-    // Empty Instances objects used to maintain consistent format between individual Instance objects
-    private Instances textTemplate;
-    private Instances vectorTemplate;
-    private final Attribute classAttribute;
+    trainingTextInstances = new Instances(textTemplate);
+  }
 
-    // Created specifically for the i2b2-format XML files
-    private final Pattern fileTextPattern = Pattern.compile("\\|(.*)\\[report_end\\]", Pattern.DOTALL);
-    private final Pattern scorePattern = Pattern.compile("score=\"(\\w+)\"");
-    private final Pattern annotatedBy = Pattern.compile("annotated_by=\"(.)\"");
+  /**
+   * Once all documents have been passed, vectorize the text and return the real-valued feature
+   * vectors
+   *
+   * @return Instances containing all training data
+   */
+  Instances getTrainingData() {
+    buildDictionary(trainingTextInstances);
+    return vectorizeInstances(trainingTextInstances);
+  }
 
-    /**
-     * Initialize this processor
-     * @param stopWords an optional set of words to exclude from the vector space
-     * @param minTermCount minimum number of occurrences to use a term in the vector space (2 is a good value)
-     * @param sortWordsByDescendingFreq whether to sort words by global frequency (this helps attribute selection)
-     */
-    SeverityWekaProcessor(@Nullable Set<String> stopWords, int minTermCount, boolean sortWordsByDescendingFreq) {
-        this.stopWords = stopWords == null ? new HashSet<>() : stopWords;
-        this.sortWordsByDescendingFreq = sortWordsByDescendingFreq;
-        this.minTermCount = minTermCount;
-
-        ArrayList<Attribute> textInstanceAttributes = new ArrayList<>();
-        classValues = Arrays.asList("ABSENT", "MILD", "MODERATE", "SEVERE", SeverityClassifierModel.UNK);
-        classAttribute = new Attribute("_class", classValues);
-        textInstanceAttributes.add(classAttribute);
-        textInstanceAttributes.add(new Attribute("text", (List<String>) null));
+  /**
+   * Add a document for training. Will extract this doc's text but will not train on it until
+   * getTrainingData called
+   *
+   * @param textView a document
+   */
+  void addTrainingDocument(TextView textView) {
+    Instance trainingInstance = getTextInstance(textView.getText());
+    if (trainingInstance != null) {
+      trainingTextInstances.add(trainingInstance);
+    }
+  }
 
-        textTemplate = new Instances("textTemplate", textInstanceAttributes, 0);
-        textTemplate.setClassIndex(0);
+  /**
+   * Convert a document into a vector instance. buildDictionary() needs to have been run.
+   *
+   * @param textView a document
+   * @return an Instance with real-valued data
+   */
+  Instance getTestData(TextView textView) {
+    Instance textInstance = getTextInstance(textView.getText());
+    Instance vectorInstance = vectorizeInstance(textInstance);
+    vectorInstance.setDataset(vectorTemplate);
+    return vectorInstance;
+  }
 
-        trainingTextInstances = new Instances(textTemplate);
+  /**
+   * Process the text and class of this document and put it into an Instance
+   *
+   * @param docText raw text from the file (assumed XML format)
+   * @return an Instance with two attributes: class, and doctext
+   */
+  @Nullable
+  private Instance getTextInstance(String docText) {
+    String fileText;
+    String docClass;
+    double weight = 1;
+    Matcher matcher = fileTextPattern.matcher(docText);
+    if (matcher.find()) {
+      fileText = matcher.group(1);
+    } else {
+      fileText = docText;
     }
-
-    /**
-     * Once all documents have been passed, vectorize the text and return the real-valued feature vectors
-     * @return Instances containing all training data
-     */
-    Instances getTrainingData() {
-        buildDictionary(trainingTextInstances);
-        return vectorizeInstances(trainingTextInstances);
+    fileText = processText(fileText);
+    matcher = scorePattern.matcher(docText);
+    if (matcher.find() && classValues.contains(docClass = matcher.group(1))) {
+      weight *= byClassWeights[classValues.indexOf(docClass)];
+    } else {
+      docClass = SeverityClassifierModel.UNK;
+      if (dictionary == null) {
+        LOGGER.warn("Added document with unknown class during training; will ignore!");
+        return null;
+      }
     }
-
-    /**
-     * Add a document for training. Will extract this doc's text but will not train on it until getTrainingData called
-     * @param textView a document
-     */
-    void addTrainingDocument(TextView textView) {
-        Instance trainingInstance = getTextInstance(textView.getText());
-        if (trainingInstance != null) {
-            trainingTextInstances.add(trainingInstance);
-        }
+    // add the annotator as a word (this helps classification a little)
+    matcher = annotatedBy.matcher(docText);
+    if (matcher.find()) {
+      fileText += " thisNoteAnnotatedBy" + matcher.group(1);
+      if (matcher.group(1).equals("1")) {
+        weight /= 2;
+      }
     }
 
-    /**
-     * Convert a document into a vector instance. buildDictionary() needs to have been run.
-     * @param textView a document
-     * @return an Instance with real-valued data
-     */
-    Instance getTestData(TextView textView) {
-        Instance textInstance = getTextInstance(textView.getText());
-        Instance vectorInstance = vectorizeInstance(textInstance);
-        vectorInstance.setDataset(vectorTemplate);
-        return vectorInstance;
-    }
+    Instance inst = new DenseInstance(2);
+    inst.setDataset(textTemplate);
+    inst.setValue(0, docClass);
+    inst.attribute(1).addStringValue(fileText);
+    inst.setValue(1, fileText);
+    inst.setWeight(weight);
+    return inst;
+  }
 
-    /**
-     * Process the text and class of this document and put it into an Instance
-     * @param docText raw text from the file (assumed XML format)
-     * @return an Instance with two attributes: class, and doctext
-     */
-    @Nullable
-    private Instance getTextInstance(String docText) {
-        String fileText;
-        String docClass;
-        double weight = 1;
-        Matcher matcher = fileTextPattern.matcher(docText);
-        if(matcher.find()) {
-            fileText = matcher.group(1);
-        } else {
-            fileText = docText;
-        }
-        fileText = processText(fileText);
-        matcher = scorePattern.matcher(docText);
-        if(matcher.find() && classValues.contains(docClass = matcher.group(1))) {
-            weight *= byClassWeights[classValues.indexOf(docClass)];
-        } else {
-            docClass = SeverityClassifierModel.UNK;
-            if(dictionary == null) {
-                LOGGER.warn("Added document with unknown class during training; will ignore!");
-                return null;
-            }
-        }
-        // add the annotator as a word (this helps classification a little)
-        matcher = annotatedBy.matcher(docText);
-        if(matcher.find()) {
-            fileText += " thisNoteAnnotatedBy" + matcher.group(1);
-            if (matcher.group(1).equals("1")) {
-                weight /= 2;
-            }
-        }
+  /**
+   * Prepare text for vectorization (lowercasing, fixing bad line breaks, rough tokenization)
+   *
+   * @param origText entire text of a document
+   * @return the processed text
+   */
+  private String processText(String origText) {
+    String text = fixTableRows(origText);
+    text = text.toLowerCase();
+    String[] words = text.split("\\W+");
+    if (words.length > 0) {
+      StringBuilder builder = new StringBuilder();
+      builder.append(words[0]);
+      for (int i = 1; i < words.length; i++) {
+        builder.append(" ");
+        builder.append(words[i]);
+      }
+      addBigrams(words, builder);
+      return builder.toString();
+    } else {
+      LOGGER.warn("Empty document");
+      return "";
+    }
+  }
 
-        Instance inst = new DenseInstance(2);
-        inst.setDataset(textTemplate);
-        inst.setValue(0, docClass);
-        inst.attribute(1).addStringValue(fileText);
-        inst.setValue(1, fileText);
-        inst.setWeight(weight);
-        return inst;
+  /**
+   * Given a list of words and a StringBuilder, continue to build bigrams/trigrams/etc. onto the
+   * builder
+   *
+   * @param words array of words in their natural order
+   * @param builder an active StringBuilder
+   */
+  private void addBigrams(String[] words, StringBuilder builder) {
+    for (int i = 1; i < words.length; i++) {
+      builder.append(" ");
+      builder.append(words[i - 1]);
+      builder.append("_");
+      builder.append(words[i]);
     }
+  }
 
-    /**
-     * Prepare text for vectorization (lowercasing, fixing bad line breaks, rough tokenization)
-     * @param origText entire text of a document
-     * @return the processed text
-     */
-    private String processText(String origText) {
-        String text = fixTableRows(origText);
-        text = text.toLowerCase();
-        String[] words = text.split("\\W+");
-        if(words.length > 0) {
-            StringBuilder builder = new StringBuilder();
-            builder.append(words[0]);
-            for (int i = 1; i < words.length; i++) {
-                builder.append(" ");
-                builder.append(words[i]);
-            }
-            addBigrams(words, builder);
-            return builder.toString();
-        } else {
-            LOGGER.warn("Empty document");
-            return "";
+  /**
+   * Fixes a problem common in the i2b2 text: sometimes new table lines start without any whitespace
+   *
+   * @param origText text with table problems
+   * @return text with line breaks inserted
+   */
+  private String fixTableRows(String origText) {
+    String pattern = "(:.*)\n+(.*[^A-Z\\-\\( \\s/])([A-Z].*:)";
+    String repl = "$1 $2\n$3";
+    // have to run this a few times to be sure we get it all (adjacent ones won't both be matched)
+    String fixed = origText.replaceAll(pattern, repl);
+    fixed = fixed.replaceAll(pattern, repl);
+    fixed = fixed.replaceAll(pattern, repl);
+    return fixed;
+  }
+
+  /**
+   * Builds a dictionary from known text and set the attributes for vector instances In current
+   * implementation, this is done all at once, not incrementally, since total word counts must be
+   * known This function must be called before vectorizing any text instances
+   *
+   * @param textInstances Instances containing text (whitespace-delimited words)
+   */
+  private void buildDictionary(Instances textInstances) {
+    Map<String, Integer> globalCounts = new LinkedHashMap<>();
+    for (Instance inst : textInstances) {
+      String processed = inst.stringValue(1);
+      String[] words = processed.split("\\s+");
+      for (String uni : words) {
+        if (!stopWords.contains(uni)) {
+          if (!globalCounts.containsKey(uni)) {
+            globalCounts.put(uni, 0);
+          }
+          globalCounts.put(uni, globalCounts.get(uni) + 1);
         }
+      }
     }
-    /**
-     * Given a list of words and a StringBuilder, continue to build bigrams/trigrams/etc. onto the builder
-     * @param words array of words in their natural order
-     * @param builder an active StringBuilder
-     */
-    private void addBigrams(String[] words, StringBuilder builder) {
-        for(int i=1; i<words.length; i++) {
-            builder.append(" ");
-            builder.append(words[i-1]);
-            builder.append("_");
-            builder.append(words[i]);
-        }
+    List<String> sortedWords = new ArrayList<>();
+    sortedWords.addAll(globalCounts.keySet());
+    if (sortWordsByDescendingFreq) {
+      sortedWords.sort((word1, word2) -> {
+        int cmp = Integer.compare(globalCounts.get(word2), globalCounts.get(word1));
+        return cmp == 0 ? word2.compareTo(word1) : cmp;
+      });
     }
-
-    /**
-     * Fixes a problem common in the i2b2 text: sometimes new table lines start without any whitespace
-     * @param origText text with table problems
-     * @return text with line breaks inserted
-     */
-    private String fixTableRows(String origText) {
-        String pattern = "(:.*)\n+(.*[^A-Z\\-\\( \\s/])([A-Z].*:)";
-        String repl = "$1 $2\n$3";
-        // have to run this a few times to be sure we get it all (adjacent ones won't both be matched)
-        String fixed = origText.replaceAll(pattern, repl);
-        fixed = fixed.replaceAll(pattern, repl);
-        fixed = fixed.replaceAll(pattern, repl);
-        return fixed;
+    dictionary = new HashMap<>();
+    ArrayList<Attribute> vectorInstanceAttributes = new ArrayList<>();
+    vectorInstanceAttributes.add(classAttribute);
+    for (String word : sortedWords) {
+      if (globalCounts.get(word) >= minTermCount) {
+        dictionary.put(word, dictionary.size());
+        vectorInstanceAttributes.add(new Attribute(word));
+      }
     }
+    vectorTemplate = new Instances("vectorTemplate", vectorInstanceAttributes, 0);
+    vectorTemplate.setClassIndex(0);
+  }
 
-    /**
-     * Builds a dictionary from known text and set the attributes for vector instances
-     * In current implementation, this is done all at once, not incrementally, since total word counts must be known
-     * This function must be called before vectorizing any text instances
-     * @param textInstances Instances containing text (whitespace-delimited words)
-     */
-    private void buildDictionary(Instances textInstances) {
-        Map<String, Integer> globalCounts = new LinkedHashMap<>();
-        for (Instance inst : textInstances) {
-            String processed = inst.stringValue(1);
-            String[] words = processed.split("\\s+");
-            for (String uni : words) {
-                if (!stopWords.contains(uni)) {
-                    if (!globalCounts.containsKey(uni)) {
-                        globalCounts.put(uni, 0);
-                    }
-                    globalCounts.put(uni, globalCounts.get(uni) + 1);
-                }
-            }
-        }
-        List<String> sortedWords = new ArrayList<>();
-        sortedWords.addAll(globalCounts.keySet());
-        if (sortWordsByDescendingFreq) {
-            sortedWords.sort((word1, word2) -> {
-                int cmp = Integer.compare(globalCounts.get(word2), globalCounts.get(word1));
-                return cmp == 0 ? word2.compareTo(word1) : cmp;
-            });
-        }
-        dictionary = new HashMap<>();
-        ArrayList<Attribute> vectorInstanceAttributes = new ArrayList<>();
-        vectorInstanceAttributes.add(classAttribute);
-        for (String word : sortedWords) {
-            if (globalCounts.get(word) >= minTermCount) {
-                dictionary.put(word, dictionary.size());
-                vectorInstanceAttributes.add(new Attribute(word));
-            }
-        }
-        vectorTemplate = new Instances("vectorTemplate", vectorInstanceAttributes, 0);
-        vectorTemplate.setClassIndex(0);
+  /**
+   * Vectorize a bunch of text instances and put them into a single Instances object, probably to
+   * train a classifier
+   *
+   * @param textInstances Instances that have a class and text attribute
+   * @return Instances that have a class and many real-valued attributes
+   */
+  private Instances vectorizeInstances(Instances textInstances) {
+    List<Instance> listInstance = new ArrayList<>();
+    for (Instance textInstance : textInstances) {
+      listInstance.add(vectorizeInstance(textInstance));
     }
-
-    /**
-     * Vectorize a bunch of text instances and put them into a single Instances object, probably to train a classifier
-     * @param textInstances Instances that have a class and text attribute
-     * @return Instances that have a class and many real-valued attributes
-     */
-    private Instances vectorizeInstances(Instances textInstances) {
-        List<Instance> listInstance = new ArrayList<>();
-        for(Instance textInstance : textInstances) {
-            listInstance.add(vectorizeInstance(textInstance));
-        }
-        Instances vectorized = new Instances(vectorTemplate, textInstances.numInstances());
-        for(Instance inst : listInstance) vectorized.add(inst);
-        return vectorized;
+    Instances vectorized = new Instances(vectorTemplate, textInstances.numInstances());
+    for (Instance inst : listInstance) {
+      vectorized.add(inst);
     }
+    return vectorized;
+  }
 
-    /**
-     * Vectorize a text instance, probably for a classifier to evaluate
-     * @param textInstance Instance that has a class and text attribute
-     * @return Instance that has a class and many real-valued attributes
-     */
-    private Instance vectorizeInstance(Instance textInstance) {
-        // Put the class and word counts for this doc into an array, then build an Instance from that
-        // counts[0] is the doc class, not actually a word count
-        double[] counts = new double[dictionary.size() + 1];
-        counts[0] = textInstance.classValue();
-        String processed = textInstance.stringValue(1);
-        String[] words = processed.split("\\s+");
-        for (String uni : words) {
-            if (!stopWords.contains(uni) && dictionary.containsKey(uni)) {
-                counts[dictionary.get(uni) + 1]++;
-            }
-        }
-        Instance vec = new SparseInstance(1, counts);
-        vec.setWeight(textInstance.weight());
-        return vec;
+  /**
+   * Vectorize a text instance, probably for a classifier to evaluate
+   *
+   * @param textInstance Instance that has a class and text attribute
+   * @return Instance that has a class and many real-valued attributes
+   */
+  private Instance vectorizeInstance(Instance textInstance) {
+    // Put the class and word counts for this doc into an array, then build an Instance from that
+    // counts[0] is the doc class, not actually a word count
+    double[] counts = new double[dictionary.size() + 1];
+    counts[0] = textInstance.classValue();
+    String processed = textInstance.stringValue(1);
+    String[] words = processed.split("\\s+");
+    for (String uni : words) {
+      if (!stopWords.contains(uni) && dictionary.containsKey(uni)) {
+        counts[dictionary.get(uni) + 1]++;
+      }
     }
+    Instance vec = new SparseInstance(1, counts);
+    vec.setWeight(textInstance.weight());
+    return vec;
+  }
 
 }

From 0a7580f1ee2ce48b90e400c4a20f8576f9e70918 Mon Sep 17 00:00:00 2001
From: Ben Knoll <benknoll@umn.edu>
Date: Fri, 21 Jul 2017 18:21:23 -0500
Subject: [PATCH 4/5] =?UTF-8?q?Wrote=20our=20own=20version=20of=20a=20PTB?=
 =?UTF-8?q?=20reader=20so=20this=20doesn=E2=80=99t=20need=20to=20be=20GPL?=
 =?UTF-8?q?=20anymore.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../PennTreebankInputFileAdapter.java         | 219 ------------------
 1 file changed, 219 deletions(-)
 delete mode 100644 src/main/java/edu/umn/biomedicus/gpl/penntree/PennTreebankInputFileAdapter.java

diff --git a/src/main/java/edu/umn/biomedicus/gpl/penntree/PennTreebankInputFileAdapter.java b/src/main/java/edu/umn/biomedicus/gpl/penntree/PennTreebankInputFileAdapter.java
deleted file mode 100644
index c2778ca..0000000
--- a/src/main/java/edu/umn/biomedicus/gpl/penntree/PennTreebankInputFileAdapter.java
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright (C) 2016 Regents of the University of Minnesota
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-package edu.umn.biomedicus.gpl.penntree;
-
-import com.google.inject.Inject;
-import edu.stanford.nlp.ling.TaggedWord;
-import edu.stanford.nlp.trees.PennTreeReaderFactory;
-import edu.stanford.nlp.trees.Tree;
-import edu.stanford.nlp.trees.TreeReader;
-import edu.umn.biomedicus.common.types.syntax.PartOfSpeech;
-import edu.umn.biomedicus.common.types.syntax.PartsOfSpeech;
-import edu.umn.biomedicus.common.types.text.ImmutableParseToken;
-import edu.umn.biomedicus.common.types.text.Sentence;
-import edu.umn.biomedicus.framework.store.Document;
-import edu.umn.biomedicus.framework.store.Label;
-import edu.umn.biomedicus.framework.store.Span;
-import edu.umn.biomedicus.framework.store.TextView;
-import edu.umn.biomedicus.uima.adapter.UimaAdapters;
-import edu.umn.biomedicus.uima.files.InputFileAdapter;
-import edu.umn.biomedicus.uima.labels.LabelAdapters;
-import java.io.IOException;
-import java.io.Reader;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.collection.CollectionException;
-
-/**
- * Adapts Penn treebank format files to CAS files.
- *
- * @author Ben Knoll
- * @since 1.3.0
- */
-public final class PennTreebankInputFileAdapter implements InputFileAdapter {
-
-  /**
-   * The penn tree reader factory.
-   */
-  private final PennTreeReaderFactory pennTreeReaderFactory = new PennTreeReaderFactory();
-
-  private final LabelAdapters labelAdapters;
-
-  /**
-   * The view name to load into.
-   */
-  private String viewName;
-
-  @Inject
-  public PennTreebankInputFileAdapter(LabelAdapters labelAdapters) {
-    this.labelAdapters = labelAdapters;
-  }
-
-  @Override
-  public void adaptFile(CAS cas, Path path) throws CollectionException {
-    StringBuilder text = new StringBuilder();
-    ArrayList<SentenceBuilder> sentences = new ArrayList<>();
-    try (Reader reader = Files.newBufferedReader(path)) {
-      TreeReader treeReader = pennTreeReaderFactory.newTreeReader(reader);
-      Tree tree;
-      while ((tree = treeReader.readTree()) != null) {
-        int sentenceStart = text.length();
-
-        ArrayList<TaggedWord> taggedWords = tree.taggedYield();
-        SentenceBuilder sentenceBuilder = new SentenceBuilder();
-        sentenceBuilder.tokenBuilders = new ArrayList<>(taggedWords.size());
-        for (TaggedWord taggedWord : taggedWords) {
-          String tag = taggedWord.tag();
-          String word = taggedWord.word();
-          text.append(" ");
-
-          /**
-           * This -NONE- tag occurs in some documents when there is a assumed phrase.
-           */
-          if (!"-NONE-".equals(tag)) {
-            int tokenStart = text.length();
-            if ("-LRB-".equals(word)) {
-              text.append('(');
-            } else if ("-RRB-".equals(word)) {
-              text.append(')');
-            } else if ("-LCB-".equals(word)) {
-              text.append('{');
-            } else if ("-RCB-".equals(word)) {
-              text.append('}');
-            } else if ("-LSB-".equals(word)) {
-              text.append('[');
-            } else if ("-RSB-".equals(word)) {
-              text.append(']');
-            } else if ("``".equals(word)) {
-              text.append("\"");
-            } else if ("''".equals(word)) {
-              text.append("\"");
-            } else {
-              text.append(word);
-            }
-            int tokenEnd = text.length();
-
-            PartOfSpeech partOfSpeech;
-            if ("-LRB-".equals(tag)) {
-              partOfSpeech = PartOfSpeech.LEFT_PAREN;
-            } else if ("-RRB-".equals(tag)) {
-              partOfSpeech = PartOfSpeech.RIGHT_PAREN;
-            } else {
-              if (tag.contains("|")) {
-                String[] tags = tag.split("\\|");
-                Random random = new Random();
-                int randomIndex = random.nextInt(tags.length);
-                partOfSpeech = PartsOfSpeech.forTag(tags[randomIndex]);
-              } else {
-                partOfSpeech = PartsOfSpeech.forTag(tag);
-              }
-
-            }
-
-            TokenBuilder tokenBuilder = new TokenBuilder();
-            tokenBuilder.tokenSpan = new Span(tokenStart, tokenEnd);
-            tokenBuilder.partOfSpeech = partOfSpeech;
-            if (partOfSpeech == null) {
-              throw new AssertionError("part of speech should not be null");
-            }
-            sentenceBuilder.tokenBuilders.add(tokenBuilder);
-          }
-        }
-
-        int sentenceEnd = text.length();
-
-        sentenceBuilder.sentenceSpan = new Span(sentenceStart, sentenceEnd);
-        sentences.add(sentenceBuilder);
-      }
-    } catch (IOException e) {
-      throw new CollectionException(e);
-    }
-
-    Document document = UimaAdapters.createDocument(cas, labelAdapters,
-        path.getFileName().toString());
-    TextView textView = document.newTextView()
-        .withText(text.toString())
-        .withName(viewName)
-        .build();
-
-    for (SentenceBuilder sentence : sentences) {
-      Span sentenceSpan = sentence.sentenceSpan;
-
-      textView.label(Label.create(sentenceSpan, Sentence.create()));
-
-      for (TokenBuilder tokenBuilder : sentence.tokenBuilders) {
-        Span tokenSpan = tokenBuilder.tokenSpan;
-
-        textView.label(Label.create(tokenSpan,
-            ImmutableParseToken.builder()
-                .text(tokenSpan.getCovered(text).toString())
-                .hasSpaceAfter(true)
-                .build()
-        ));
-
-        PartOfSpeech partOfSpeech = tokenBuilder.partOfSpeech;
-        if (partOfSpeech != null) {
-          String pos = partOfSpeech.toString();
-          textView.label(Label.create(tokenSpan, PartsOfSpeech.forTag(pos)));
-        }
-      }
-    }
-
-  }
-
-  @Override
-  public void setTargetView(String viewName) {
-    this.viewName = viewName;
-  }
-
-  /**
-   * Used to build sentences.
-   */
-  private static class SentenceBuilder {
-
-    /**
-     * The begin and end of the sentence.
-     */
-    private Span sentenceSpan;
-
-    /**
-     * The tokens of the sentence.
-     */
-    private List<TokenBuilder> tokenBuilders;
-  }
-
-  /**
-   * Used to build tokens
-   */
-  private static class TokenBuilder {
-
-    /**
-     * The begin and end of the token
-     */
-    private Span tokenSpan;
-
-    /**
-     * The part of speech.
-     */
-    private PartOfSpeech partOfSpeech;
-  }
-}

From a9d786fa1cc89207c91449e998039b77efc984ea Mon Sep 17 00:00:00 2001
From: Ben Knoll <benknoll@umn.edu>
Date: Mon, 24 Jul 2017 09:16:22 -0500
Subject: [PATCH 5/5] 1.7.0 pom version change

---
 pom.xml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pom.xml b/pom.xml
index 17b043b..d0b5e25 100644
--- a/pom.xml
+++ b/pom.xml
@@ -23,7 +23,7 @@
   <groupId>edu.umn.biomedicus</groupId>
   <artifactId>biomedicus-gpl</artifactId>
   <packaging>jar</packaging>
-  <version>1.7.0-SNAPSHOT</version>
+  <version>1.7.0</version>
 
   <name>biomedicus-gpl</name>
   <description>BioMedICUS Annotation System - GPL Extensions</description>
@@ -37,12 +37,12 @@
     <dependency>
       <groupId>edu.umn.biomedicus</groupId>
       <artifactId>biomedicus-core</artifactId>
-      <version>1.7.0-SNAPSHOT</version>
+      <version>1.7.0</version>
     </dependency>
     <dependency>
       <groupId>edu.umn.biomedicus</groupId>
       <artifactId>biomedicus-uima</artifactId>
-      <version>1.7.0-SNAPSHOT</version>
+      <version>1.7.0</version>
     </dependency>
     <dependency>
       <groupId>edu.stanford.nlp</groupId>