stream results (#107)

VEuPathDB · Oct 30, 2024 · 29ce5da · 29ce5da
1 parent b7d016f
commit 29ce5da
Show file tree

Hide file tree

Showing 2 changed files with 55 additions and 55 deletions.
diff --git a/Model/src/main/java/org/gusdb/wdk/model/report/AbstractAttributeReporter.java b/Model/src/main/java/org/gusdb/wdk/model/report/AbstractAttributeReporter.java
@@ -2,21 +2,21 @@
 
 import java.io.OutputStream;
 import java.io.PrintStream;
-import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.util.LinkedHashMap;
 import java.util.Map;
+import java.util.Optional;
 import java.util.regex.Matcher;
 
-import javax.sql.DataSource;
-
+import org.gusdb.fgputil.Tuples;
 import org.gusdb.fgputil.db.SqlUtils;
+import org.gusdb.fgputil.db.stream.ResultSetIterator;
 import org.gusdb.wdk.model.WdkModel;
 import org.gusdb.wdk.model.WdkModelException;
+import org.gusdb.wdk.model.WdkRuntimeException;
 import org.gusdb.wdk.model.answer.AnswerValue;
 import org.gusdb.wdk.model.query.Column;
 import org.gusdb.wdk.model.query.SqlQuery;
-import org.gusdb.wdk.model.record.PrimaryKeyDefinition;
 import org.gusdb.wdk.model.record.PrimaryKeyValue;
 import org.gusdb.wdk.model.record.RecordClass;
 import org.gusdb.wdk.model.record.attribute.AttributeField;
@@ -27,6 +27,8 @@
 import org.gusdb.wdk.model.record.attribute.TextAttributeField;
 import org.json.JSONObject;
 
+import static org.gusdb.fgputil.functional.Functions.mapException;
+
 public abstract class AbstractAttributeReporter extends AbstractReporter {
 
   protected static final String ATTRIBUTE_COLUMN = "wdk_attribute";
@@ -158,37 +160,32 @@ private String formatColumn(AnswerValue answerValue, AttributeField attribute,
     return builder.append("'").toString();
   }
 
-  /**
-   * @return the values of the associated attribute. the key of the map is the
-   *         primary key of a record instance.
-   */
-  protected Map<PrimaryKeyValue, Object> getAttributeValues(AnswerValue answerValue)
-      throws WdkModelException, SQLException {
-    WdkModel wdkModel = answerValue.getWdkModel();
-    Map<PrimaryKeyValue, Object> values = new LinkedHashMap<>();
-    RecordClass recordClass = answerValue.getAnswerSpec().getQuestion().getRecordClass();
-    PrimaryKeyDefinition pkDef = recordClass.getPrimaryKeyDefinition();
-    String[] pkColumns = pkDef.getColumnRefs();
-    String sql = getAttributeSql(answerValue);
-    DataSource dataSource = wdkModel.getAppDb().getDataSource();
-    ResultSet resultSet = null;
-    try {
-      resultSet = SqlUtils.executeQuery(dataSource, sql,
-          answerValue.getAnswerSpec().getQuestion().getQuery().getFullName()
-              + "__attribute-plugin-combined", 5000);
-      while (resultSet.next()) {
-        Map<String, Object> pkValues = new LinkedHashMap<>();
-        for (String pkColumn : pkColumns) {
-          pkValues.put(pkColumn, resultSet.getObject(pkColumn));
-        }
-        PrimaryKeyValue pkValue = new PrimaryKeyValue(pkDef, pkValues);
-        Object value = resultSet.getObject(ATTRIBUTE_COLUMN);
-        values.put(pkValue, value);
-      }
-    } finally {
-      SqlUtils.closeResultSetAndStatement(resultSet, null);
-    }
-    return values;
+  protected ResultSetIterator<Tuples.TwoTuple<PrimaryKeyValue, Object>> getAttributeValueStream(AnswerValue answerValue)
+  throws WdkModelException, SQLException {
+    var pkDef = answerValue.getAnswerSpec()
+      .getQuestion()
+      .getRecordClass()
+      .getPrimaryKeyDefinition();
+
+    var pkColumns = pkDef.getColumnRefs();
+
+    var resultSet = SqlUtils.executeQuery(
+      answerValue.getWdkModel().getAppDb().getDataSource(),
+      getAttributeSql(answerValue),
+      answerValue.getAnswerSpec().getQuestion().getQuery().getFullName() + "__attribute-plugin-combined",
+      5000
+    );
+
+    return new ResultSetIterator<>(resultSet, row -> {
+      var pkValues = new LinkedHashMap<String, Object>(pkColumns.length);
+
+      for (var pkColumn : pkColumns)
+        pkValues.put(pkColumn, resultSet.getObject(pkColumn));
+
+      return Optional.of(new Tuples.TwoTuple<>(
+        mapException(() -> new PrimaryKeyValue(pkDef, pkValues), WdkRuntimeException::new),
+        resultSet.getObject(ATTRIBUTE_COLUMN)
+      ));
+    });
   }
-
 }
diff --git a/Model/src/main/java/org/gusdb/wdk/model/report/reporter/WordCloudAttributeReporter.java b/Model/src/main/java/org/gusdb/wdk/model/report/reporter/WordCloudAttributeReporter.java
@@ -12,13 +12,12 @@
 import org.apache.log4j.Logger;
 import org.gusdb.wdk.model.WdkModelException;
 import org.gusdb.wdk.model.answer.AnswerValue;
-import org.gusdb.wdk.model.record.PrimaryKeyValue;
 import org.gusdb.wdk.model.report.AbstractAttributeReporter;
 import org.json.JSONArray;
 import org.json.JSONObject;
 
-public class WordCloudAttributeReporter extends AbstractAttributeReporter { 
-  
+public class WordCloudAttributeReporter extends AbstractAttributeReporter {
+
   private static final String PROP_SPLIT_PATTERN = "split-pattern";
   private static final String PROP_MIN_WORD_LENGTH = "min-word-length";
   private static final String PROP_EXCLUDE_NUMBERS = "exclude-numbers";
@@ -42,32 +41,36 @@ public JSONObject getJsonResult(AnswerValue answerValue) throws WdkModelExceptio
     List<WordTag> tags = loadTags(answerValue);
 
     JSONObject jsonResult = new JSONObject();
- 
+
     JSONArray jsonWordTags = new JSONArray();
     for (WordTag tag : tags) {
       JSONObject tagJson = new JSONObject();
       tagJson.put("word", tag.getWord());
       tagJson.put("count", tag.getCount());
       jsonWordTags.put(tagJson);
     }
-    
+
     jsonResult.put(ATTR_TAGS, jsonWordTags);
 
     return jsonResult;
   }
-  
+
   private List<WordTag> loadTags(AnswerValue answerValue) {
-    List<WordTag> tags = new ArrayList<>();
+    List<WordTag> tags;
 
     resolveProperties();
-    try {
-      Map<String, WordTag> tagMap = new HashMap<String, WordTag>();
-      Map<PrimaryKeyValue, Object> values = getAttributeValues(answerValue);
-      for (Object value : values.values()) {
-        if (value == null)
+    var tagMap = new HashMap<String, WordTag>();
+
+    try (var valueStream = getAttributeValueStream(answerValue)) {
+      while (valueStream.hasNext()) {
+        var record = valueStream.next();
+
+        if (record.getSecond() == null)
           continue;
-        splitWords(value.toString(), tagMap);
+
+        splitWords(record.getSecond().toString(), tagMap);
       }
+
       // the tags are sorted by count
       tags = processTags(tagMap);
       return tags;
@@ -134,18 +137,18 @@ private void splitWords(String content, Map<String, WordTag> tags) {
       WordTag tag = tags.get(word);
 
       if (tag == null) tag = new WordTag(word, originalWord);
-        
+
       else {
         tag.increment();
-        
+
         // In addition to incrementing the overall count for the word, we need to amend the
         // mixedCaseCounter map either by adding a new case sensitive version of the word or
         // incrementing the count for an existing case sensitive version.
         Map<String, Integer> mixedCaseCounter = tag.getMixedCaseCounter();
         Integer count = mixedCaseCounter.get(originalWord);
         if(count == null) mixedCaseCounter.put(originalWord, 1);
         else mixedCaseCounter.put(originalWord, ++count);
-      }  
+      }
       tags.put(word, tag);
       // logger.debug("word count: '" + word + "' = " + count);
     }
@@ -175,7 +178,7 @@ private List<WordTag> processTags(Map<String, WordTag> tags) {
           WordTag partTag = tags.get(part);
           int count = tag.getCount() + partTag.getCount();
           partTag.setCount(count);
-          
+
           // In addition to absorbing overall plural counts for the case neutral word, we
           // need to absorb plural counts for the case sensitive versions of the word as
           // well.
@@ -204,14 +207,14 @@ private List<WordTag> processTags(Map<String, WordTag> tags) {
       // weights and scores are easier.
       Collections.sort(list);
     }
-   
+
     return  list.stream().map(tag -> {
           String dominantCase = tag.getDominantCase();
           tag.setWord(dominantCase);
           return tag;
         }).collect(Collectors.toList());
   }
-  
+
   /**
    * Mixed case plurals are stored in the mixed case counter map as plurals since no
    * determination about plurals can be easily be made.  But since the ending of the