Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SingleGeneAiExpressionReporter #255

Open
wants to merge 43 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
8d16493
WIP
bobular Feb 12, 2025
f85f2a0
it compiles - at least
bobular Feb 13, 2025
089511c
all wired together and compiles
bobular Feb 13, 2025
5752cc4
extra comment
bobular Feb 13, 2025
4ce3f5f
one more comment
bobular Feb 13, 2025
eb203df
move CacheMode into separate file
bobular Feb 17, 2025
9ce38a4
Merge branch 'master' into ai-expression
ryanrdoherty Feb 18, 2025
dfab346
WIP cache wiring
bobular Feb 18, 2025
813d2f1
more cache wrangling
bobular Feb 18, 2025
5c323c3
a few changes for error handling and to set up caching
ryanrdoherty Feb 18, 2025
e53e86e
Merge branch 'ai-expression' into ai-expression-rrd
ryanrdoherty Feb 18, 2025
0e6b945
Massage class roles
ryanrdoherty Feb 20, 2025
135881e
Merge branch 'master' into ai-expression
ryanrdoherty Feb 20, 2025
8e7b3aa
Merge branch 'ai-expression' into ai-expression-rrd
ryanrdoherty Feb 20, 2025
86dad07
Checkpoint commit; finished up AiExpressionCache and just need to tri…
ryanrdoherty Feb 21, 2025
be6a61a
Clean cache logic out of summarizer
ryanrdoherty Feb 21, 2025
afeb850
Merge branch 'master' into ai-expression
ryanrdoherty Feb 22, 2025
13d47e6
Merge branch 'ai-expression' into ai-expression-rrd
ryanrdoherty Feb 22, 2025
32a7351
make cache validation digest symmetrical for both levels of AI query
bobular Feb 22, 2025
44b65aa
improved code formatting of JSONSchema definitions
bobular Feb 22, 2025
4bcb86f
Incorporate AI chat model string into result digests; they will becom…
ryanrdoherty Feb 24, 2025
be79018
Reparallelize experiment lookups
ryanrdoherty Feb 24, 2025
18f0870
increase timeout and use ChatModel.toString() to fix exception
bobular Feb 24, 2025
a579969
getGeneId() fix
bobular Feb 24, 2025
6693be5
bugfix
bobular Feb 24, 2025
4f52d37
reworked summary prompt to avoid generalities and for clarity
bobular Feb 25, 2025
6a5a078
prompt for structured summary paragraph
bobular Feb 25, 2025
d60cb84
Merge branch 'master' into ai-expression
ryanrdoherty Feb 25, 2025
7dd234a
Merge branch 'ai-expression' into ai-expression-rrd
ryanrdoherty Feb 25, 2025
f2e7845
Merge pull request #256 from VEuPathDB/ai-expression-rrd
ryanrdoherty Feb 25, 2025
d216343
Merge branch 'master' into ai-expression
ryanrdoherty Feb 28, 2025
f35a43e
Remove openai version (now in base pom)
ryanrdoherty Feb 28, 2025
6dd5380
pretty print JSON sent to the model
bobular Feb 28, 2025
6607b0d
sections renamed to topics
bobular Feb 28, 2025
3d3625e
add assay_type and experiment_name to first phase outputs to aid seco…
bobular Feb 28, 2025
e85ad1f
sort second level inputs and add DATA_MODEL_VERSION for better cache …
bobular Feb 28, 2025
dada1a5
increase concurrency and fix bugs
bobular Feb 28, 2025
5c20b32
apply experiment summary reporting in proper place
bobular Feb 28, 2025
5bd7344
Other topic section wording improved
bobular Feb 28, 2025
ac11caf
banish empty topics
bobular Feb 28, 2025
27fa586
preserve sort order during consolidation step
bobular Feb 28, 2025
b378524
Merge pull request #261 from VEuPathDB/ai-expression-topics
bobular Mar 2, 2025
70d42d5
Merge remote-tracking branch 'origin/master' into ai-expression
bobular Mar 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Model/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,12 @@
<groupId>io.vulpine.lib</groupId>
<artifactId>Jackfish</artifactId>
</dependency>

<dependency>
<groupId>com.openai</groupId>
<artifactId>openai-java</artifactId>
</dependency>

</dependencies>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package org.apidb.apicommon.model.report.ai;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.apidb.apicommon.model.TranscriptUtil;
import org.apidb.apicommon.model.report.ai.expression.AiExpressionCache;
import org.apidb.apicommon.model.report.ai.expression.GeneRecordProcessor;
import org.apidb.apicommon.model.report.ai.expression.GeneRecordProcessor.GeneSummaryInputs;
import org.apidb.apicommon.model.report.ai.expression.Summarizer;
import org.gusdb.wdk.model.WdkModelException;
import org.gusdb.wdk.model.answer.stream.RecordStream;
import org.gusdb.wdk.model.answer.stream.RecordStreamFactory;
import org.gusdb.wdk.model.record.RecordClass;
import org.gusdb.wdk.model.record.RecordInstance;
import org.gusdb.wdk.model.record.TableField;
import org.gusdb.wdk.model.report.AbstractReporter;
import org.gusdb.wdk.model.report.Reporter;
import org.gusdb.wdk.model.report.ReporterConfigException;
import org.json.JSONException;
import org.json.JSONObject;

public class SingleGeneAiExpressionReporter extends AbstractReporter {

private static final int MAX_RESULT_SIZE = 1; // one gene at a time for now

private static final String POPULATION_MODE_PROP_KEY = "populateIfNotPresent";

private boolean _populateIfNotPresent;

@Override
public Reporter configure(JSONObject config) throws ReporterConfigException, WdkModelException {
try {
// assign cache mode
_populateIfNotPresent = config.optBoolean(POPULATION_MODE_PROP_KEY, false);

// check model config; this should only be assigned to genes
RecordClass geneRecordClass = TranscriptUtil.getGeneRecordClass(_wdkModel);
if (_baseAnswer.getQuestion().getRecordClass() != geneRecordClass) {
throw new WdkModelException(SingleGeneAiExpressionReporter.class.getName() +
" should only be assigned to " + geneRecordClass.getFullName());
}

// check result size; limit to small results due to OpenAI cost
if (_baseAnswer.getResultSizeFactory().getResultSize() > MAX_RESULT_SIZE) {
throw new ReporterConfigException("This reporter cannot be called with results of size greater than " + MAX_RESULT_SIZE);
}
}
catch (JSONException | IllegalArgumentException e) {
throw new ReporterConfigException("Invalid cacheMode value: " + config.get("cacheMode"), e);
}
return this;
}

@Override
protected void write(OutputStream out) throws IOException, WdkModelException {

// get table fields needed to produce summary inputs
Map<String, TableField> tableFields = _baseAnswer.getQuestion().getRecordClass().getTableFieldMap();
List<TableField> tables = GeneRecordProcessor.REQUIRED_TABLE_NAMES.stream()
.map(name -> tableFields.get(name)).collect(Collectors.toList());

// open summary cache (manages persistence of expression data)
AiExpressionCache cache = AiExpressionCache.getInstance(_wdkModel);

// create summarizer (interacts with OpenAI)
Summarizer summarizer = new Summarizer(_wdkModel);

// open record and output streams
try (RecordStream recordStream = RecordStreamFactory.getRecordStream(_baseAnswer, List.of(), tables);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out))) {

// write a JSON object with gene ID keys and expression summary values
writer.write("{");
boolean firstRecord = true;
for (RecordInstance record : recordStream) {

// create summary inputs
GeneSummaryInputs summaryInputs =
GeneRecordProcessor.getSummaryInputsFromRecord(record, Summarizer.OPENAI_CHAT_MODEL.toString(),
Summarizer::getExperimentMessage, Summarizer::getFinalSummaryMessage);

// fetch summary, producing if necessary and requested
JSONObject expressionSummary = _populateIfNotPresent
? cache.populateSummary(summaryInputs, summarizer::describeExperiment, summarizer::summarizeExperiments)
: cache.readSummary(summaryInputs);

// join entries with commas
if (firstRecord) firstRecord = false; else writer.write(",");

// write JSON object property, keyed by gene ID
writer.write("\"" + summaryInputs.getGeneId() + "\":" + expressionSummary.toString());

}
writer.write("}");
}
}
}
Loading