Skip to content

Commit

Permalink
stream results (#107)
Browse files Browse the repository at this point in the history
  • Loading branch information
Foxcapades authored Oct 30, 2024
1 parent b7d016f commit 29ce5da
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@

import java.io.OutputStream;
import java.io.PrintStream;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Matcher;

import javax.sql.DataSource;

import org.gusdb.fgputil.Tuples;
import org.gusdb.fgputil.db.SqlUtils;
import org.gusdb.fgputil.db.stream.ResultSetIterator;
import org.gusdb.wdk.model.WdkModel;
import org.gusdb.wdk.model.WdkModelException;
import org.gusdb.wdk.model.WdkRuntimeException;
import org.gusdb.wdk.model.answer.AnswerValue;
import org.gusdb.wdk.model.query.Column;
import org.gusdb.wdk.model.query.SqlQuery;
import org.gusdb.wdk.model.record.PrimaryKeyDefinition;
import org.gusdb.wdk.model.record.PrimaryKeyValue;
import org.gusdb.wdk.model.record.RecordClass;
import org.gusdb.wdk.model.record.attribute.AttributeField;
Expand All @@ -27,6 +27,8 @@
import org.gusdb.wdk.model.record.attribute.TextAttributeField;
import org.json.JSONObject;

import static org.gusdb.fgputil.functional.Functions.mapException;

public abstract class AbstractAttributeReporter extends AbstractReporter {

protected static final String ATTRIBUTE_COLUMN = "wdk_attribute";
Expand Down Expand Up @@ -158,37 +160,32 @@ private String formatColumn(AnswerValue answerValue, AttributeField attribute,
return builder.append("'").toString();
}

/**
* @return the values of the associated attribute. the key of the map is the
* primary key of a record instance.
*/
protected Map<PrimaryKeyValue, Object> getAttributeValues(AnswerValue answerValue)
throws WdkModelException, SQLException {
WdkModel wdkModel = answerValue.getWdkModel();
Map<PrimaryKeyValue, Object> values = new LinkedHashMap<>();
RecordClass recordClass = answerValue.getAnswerSpec().getQuestion().getRecordClass();
PrimaryKeyDefinition pkDef = recordClass.getPrimaryKeyDefinition();
String[] pkColumns = pkDef.getColumnRefs();
String sql = getAttributeSql(answerValue);
DataSource dataSource = wdkModel.getAppDb().getDataSource();
ResultSet resultSet = null;
try {
resultSet = SqlUtils.executeQuery(dataSource, sql,
answerValue.getAnswerSpec().getQuestion().getQuery().getFullName()
+ "__attribute-plugin-combined", 5000);
while (resultSet.next()) {
Map<String, Object> pkValues = new LinkedHashMap<>();
for (String pkColumn : pkColumns) {
pkValues.put(pkColumn, resultSet.getObject(pkColumn));
}
PrimaryKeyValue pkValue = new PrimaryKeyValue(pkDef, pkValues);
Object value = resultSet.getObject(ATTRIBUTE_COLUMN);
values.put(pkValue, value);
}
} finally {
SqlUtils.closeResultSetAndStatement(resultSet, null);
}
return values;
protected ResultSetIterator<Tuples.TwoTuple<PrimaryKeyValue, Object>> getAttributeValueStream(AnswerValue answerValue)
throws WdkModelException, SQLException {
var pkDef = answerValue.getAnswerSpec()
.getQuestion()
.getRecordClass()
.getPrimaryKeyDefinition();

var pkColumns = pkDef.getColumnRefs();

var resultSet = SqlUtils.executeQuery(
answerValue.getWdkModel().getAppDb().getDataSource(),
getAttributeSql(answerValue),
answerValue.getAnswerSpec().getQuestion().getQuery().getFullName() + "__attribute-plugin-combined",
5000
);

return new ResultSetIterator<>(resultSet, row -> {
var pkValues = new LinkedHashMap<String, Object>(pkColumns.length);

for (var pkColumn : pkColumns)
pkValues.put(pkColumn, resultSet.getObject(pkColumn));

return Optional.of(new Tuples.TwoTuple<>(
mapException(() -> new PrimaryKeyValue(pkDef, pkValues), WdkRuntimeException::new),
resultSet.getObject(ATTRIBUTE_COLUMN)
));
});
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,12 @@
import org.apache.log4j.Logger;
import org.gusdb.wdk.model.WdkModelException;
import org.gusdb.wdk.model.answer.AnswerValue;
import org.gusdb.wdk.model.record.PrimaryKeyValue;
import org.gusdb.wdk.model.report.AbstractAttributeReporter;
import org.json.JSONArray;
import org.json.JSONObject;

public class WordCloudAttributeReporter extends AbstractAttributeReporter {
public class WordCloudAttributeReporter extends AbstractAttributeReporter {

private static final String PROP_SPLIT_PATTERN = "split-pattern";
private static final String PROP_MIN_WORD_LENGTH = "min-word-length";
private static final String PROP_EXCLUDE_NUMBERS = "exclude-numbers";
Expand All @@ -42,32 +41,36 @@ public JSONObject getJsonResult(AnswerValue answerValue) throws WdkModelExceptio
List<WordTag> tags = loadTags(answerValue);

JSONObject jsonResult = new JSONObject();

JSONArray jsonWordTags = new JSONArray();
for (WordTag tag : tags) {
JSONObject tagJson = new JSONObject();
tagJson.put("word", tag.getWord());
tagJson.put("count", tag.getCount());
jsonWordTags.put(tagJson);
}

jsonResult.put(ATTR_TAGS, jsonWordTags);

return jsonResult;
}

private List<WordTag> loadTags(AnswerValue answerValue) {
List<WordTag> tags = new ArrayList<>();
List<WordTag> tags;

resolveProperties();
try {
Map<String, WordTag> tagMap = new HashMap<String, WordTag>();
Map<PrimaryKeyValue, Object> values = getAttributeValues(answerValue);
for (Object value : values.values()) {
if (value == null)
var tagMap = new HashMap<String, WordTag>();

try (var valueStream = getAttributeValueStream(answerValue)) {
while (valueStream.hasNext()) {
var record = valueStream.next();

if (record.getSecond() == null)
continue;
splitWords(value.toString(), tagMap);

splitWords(record.getSecond().toString(), tagMap);
}

// the tags are sorted by count
tags = processTags(tagMap);
return tags;
Expand Down Expand Up @@ -134,18 +137,18 @@ private void splitWords(String content, Map<String, WordTag> tags) {
WordTag tag = tags.get(word);

if (tag == null) tag = new WordTag(word, originalWord);

else {
tag.increment();

// In addition to incrementing the overall count for the word, we need to amend the
// mixedCaseCounter map either by adding a new case sensitive version of the word or
// incrementing the count for an existing case sensitive version.
Map<String, Integer> mixedCaseCounter = tag.getMixedCaseCounter();
Integer count = mixedCaseCounter.get(originalWord);
if(count == null) mixedCaseCounter.put(originalWord, 1);
else mixedCaseCounter.put(originalWord, ++count);
}
}
tags.put(word, tag);
// logger.debug("word count: '" + word + "' = " + count);
}
Expand Down Expand Up @@ -175,7 +178,7 @@ private List<WordTag> processTags(Map<String, WordTag> tags) {
WordTag partTag = tags.get(part);
int count = tag.getCount() + partTag.getCount();
partTag.setCount(count);

// In addition to absorbing overall plural counts for the case neutral word, we
// need to absorb plural counts for the case sensitive versions of the word as
// well.
Expand Down Expand Up @@ -204,14 +207,14 @@ private List<WordTag> processTags(Map<String, WordTag> tags) {
// weights and scores are easier.
Collections.sort(list);
}

return list.stream().map(tag -> {
String dominantCase = tag.getDominantCase();
tag.setWord(dominantCase);
return tag;
}).collect(Collectors.toList());
}

/**
* Mixed case plurals are stored in the mixed case counter map as plurals since no
* determination about plurals can be easily be made. But since the ending of the
Expand Down

0 comments on commit 29ce5da

Please sign in to comment.