diff --git a/pom.xml b/pom.xml index 42bc3aca55..d85d0a2e1b 100644 --- a/pom.xml +++ b/pom.xml @@ -1,102 +1,102 @@ - - - 4.0.0 - org.myrobotlab - mrl - 0.0.1-SNAPSHOT - MyRobotLab - Open Source Creative Machine Control - - - false - - - - 1.1. - - ${maven.build.timestamp} - yyyyMMddHHmm - ${timestamp} - ${version.prefix}${build.number} - ${git.branch} - ${NODE_NAME} - ${NODE_LABELS} - - - - 11 - 11 - UTF-8 - - - + + + 4.0.0 + org.myrobotlab + mrl + 0.0.1-SNAPSHOT + MyRobotLab + Open Source Creative Machine Control + + + false + + + + 1.1. + + ${maven.build.timestamp} + yyyyMMddHHmm + ${timestamp} + ${version.prefix}${build.number} + ${git.branch} + ${NODE_NAME} + ${NODE_LABELS} + + + + 11 + 11 + UTF-8 + + + @@ -135,9 +135,9 @@ https://m2.dv8tion.net/releases - - - + + + javazoom @@ -343,6 +343,12 @@ + + + + + + pl.allegro.tech @@ -1185,6 +1191,10 @@ 0.0.8.10 provided + + org.apache.lucene + * + ch.qos.logback logback-classic @@ -1399,6 +1409,18 @@ + + org.apache.solr + solr-scripting + 9.0.0 + provided + + + com.google.guava + * + + + org.apache.solr solr-test-framework @@ -1452,6 +1474,24 @@ + + com.robrua.nlp + easy-bert + 1.0.3 + provided + + + com.robrua.nlp.models + easy-bert-uncased-L-12-H-768-A-12 + 1.0.0 + provided + + + org.tensorflow + tensorflow + 1.15.0 + provided + @@ -1470,12 +1510,7 @@ - - org.tensorflow - tensorflow - 1.8.0 - provided - + @@ -1658,377 +1693,377 @@ - - - org.mockito - mockito-core - 3.12.4 - test - - - - - - - false - src/main/resources - - - false - src/main/java - - ** - - - **/*.java - - - - - - false - src/test/resources - - - false - src/test/java - - ** - - - **/*.java - - - - - - - - - org.codehaus.mojo - properties-maven-plugin - 1.0.0 - - - org.apache.maven.plugins - maven-enforcer-plugin - 3.1.0 - - - - - - - - org.apache.maven.plugins - maven-enforcer-plugin - - - no-duplicate-declared-dependencies - - enforce - - - - - - - - - - - - org.codehaus.mojo - properties-maven-plugin - - - initialize - - read-project-properties - - - - build.properties - - - - - - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.1.0 - - - package - - shade - - - myrobotlab - - true - myrobotlab-full - false - - - - - org.myrobotlab.service.Runtime - ${version} - ${version} - - ${build.number} - ${maven.build.timestamp} - ${agent.name} - ${user.name} - - - ${git.tags} - ${git.branch} - ${git.dirty} - ${git.remote.origin.url} - ${git.commit.id} - ${git.commit.id.abbrev} - ${git.commit.id.full} - ${git.commit.id.describe} - ${git.commit.id.describe-short} - ${git.commit.user.name} - ${git.commit.user.email} - - ${git.commit.time} - ${git.closest.tag.name} - ${git.closest.tag.commit.count} - ${git.build.user.name} - ${git.build.user.email} - ${git.build.time} - ${git.build.version} - - - - - - - *:* - - module-info.class - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - org.apache.maven.plugins - maven-assembly-plugin - - - assembly.xml - - myrobotlab - false - - - - trigger-assembly - package - - single - - - - - - - true - org.apache.maven.plugins - maven-compiler-plugin - 2.3.2 - - 11 - 11 - true - true - -parameters - - - - - org.apache.maven.plugins - maven-resources-plugin - 2.4.3 - - - - pl.project13.maven - git-commit-id-plugin - 4.9.10 - - - initialize - get-the-git-infos - - revision - - - - - ${project.basedir}/.git - git - false - true - ${project.build.outputDirectory}/git.properties - - - false - false - -dirty - - - - - - maven-surefire-plugin - org.apache.maven.plugins - 2.22.2 - - -Djava.library.path=libraries/native -Djna.library.path=libraries/native ${argLine} - - **/*Test.java - - - **/integration/* - - - - - - - - - org.apache.maven.plugins - maven-clean-plugin - 2.3 - - - - data/.myrobotlab - false - - - libraries - - ** - - false - - - data - - ** - - - - resource - - ** - - - - src/main/resources/resource/framework - - **/serviceData.json - - false - - - - - - - - - - - - org.apache.maven.plugins - maven-surefire-report-plugin - 2.22.2 - - - org.apache.maven.plugins - maven-javadoc-plugin - 3.0.1 - - - - - myrobotlab - http://myrobotlab.org - - - github - https://github.com/MyRobotLab/myrobotlab/issues - - + + + org.mockito + mockito-core + 3.12.4 + test + + + + + + + false + src/main/resources + + + false + src/main/java + + ** + + + **/*.java + + + + + + false + src/test/resources + + + false + src/test/java + + ** + + + **/*.java + + + + + + + + + org.codehaus.mojo + properties-maven-plugin + 1.0.0 + + + org.apache.maven.plugins + maven-enforcer-plugin + 3.1.0 + + + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + no-duplicate-declared-dependencies + + enforce + + + + + + + + + + + + org.codehaus.mojo + properties-maven-plugin + + + initialize + + read-project-properties + + + + build.properties + + + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.1.0 + + + package + + shade + + + myrobotlab + + true + myrobotlab-full + false + + + + + org.myrobotlab.service.Runtime + ${version} + ${version} + + ${build.number} + ${maven.build.timestamp} + ${agent.name} + ${user.name} + + + ${git.tags} + ${git.branch} + ${git.dirty} + ${git.remote.origin.url} + ${git.commit.id} + ${git.commit.id.abbrev} + ${git.commit.id.full} + ${git.commit.id.describe} + ${git.commit.id.describe-short} + ${git.commit.user.name} + ${git.commit.user.email} + + ${git.commit.time} + ${git.closest.tag.name} + ${git.closest.tag.commit.count} + ${git.build.user.name} + ${git.build.user.email} + ${git.build.time} + ${git.build.version} + + + + + + + *:* + + module-info.class + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + assembly.xml + + myrobotlab + false + + + + trigger-assembly + package + + single + + + + + + + true + org.apache.maven.plugins + maven-compiler-plugin + 2.3.2 + + 11 + 11 + true + true + -parameters + + + + + org.apache.maven.plugins + maven-resources-plugin + 2.4.3 + + + + pl.project13.maven + git-commit-id-plugin + 4.9.10 + + + initialize + get-the-git-infos + + revision + + + + + ${project.basedir}/.git + git + false + true + ${project.build.outputDirectory}/git.properties + + + false + false + -dirty + + + + + + maven-surefire-plugin + org.apache.maven.plugins + 2.22.2 + + -Djava.library.path=libraries/native -Djna.library.path=libraries/native ${argLine} + + **/*Test.java + + + **/integration/* + + + + + + + + + org.apache.maven.plugins + maven-clean-plugin + 2.3 + + + + data/.myrobotlab + false + + + libraries + + ** + + false + + + data + + ** + + + + resource + + ** + + + + src/main/resources/resource/framework + + **/serviceData.json + + false + + + + + + + + + + + + org.apache.maven.plugins + maven-surefire-report-plugin + 2.22.2 + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.0.1 + + + + + myrobotlab + http://myrobotlab.org + + + github + https://github.com/MyRobotLab/myrobotlab/issues + + diff --git a/src/main/java/org/myrobotlab/document/Document.java b/src/main/java/org/myrobotlab/document/Document.java index fbdf020c8d..7b76a80c5e 100644 --- a/src/main/java/org/myrobotlab/document/Document.java +++ b/src/main/java/org/myrobotlab/document/Document.java @@ -17,7 +17,7 @@ public class Document { private String id; - private HashMap> data; + private final HashMap> data; private ProcessingStatus status; public Document(String id) { @@ -27,11 +27,7 @@ public Document(String id) { } public ArrayList getField(String fieldName) { - if (data.containsKey(fieldName)) { - return data.get(fieldName); - } else { - return null; - } + return data.getOrDefault(fieldName, null); } public void setField(String fieldName, ArrayList value) { @@ -151,9 +147,7 @@ public boolean equals(Object obj) { return false; } else if (!id.equals(other.id)) return false; - if (status != other.status) - return false; - return true; + return status == other.status; } @Override diff --git a/src/main/java/org/myrobotlab/service/EasyBert.java b/src/main/java/org/myrobotlab/service/EasyBert.java new file mode 100644 index 0000000000..9f56068f20 --- /dev/null +++ b/src/main/java/org/myrobotlab/service/EasyBert.java @@ -0,0 +1,53 @@ +package org.myrobotlab.service; + +import com.google.common.primitives.Floats; +import com.robrua.nlp.bert.Bert; +import org.myrobotlab.framework.Service; +import org.myrobotlab.service.interfaces.TextEmbeddingGenerator; + +import java.io.File; +import java.util.List; + +public class EasyBert extends Service implements TextEmbeddingGenerator { + + private Bert bert; + public final String DEFAULT_BERT_MODEL = "com/robrua/nlp/easy-bert/bert-uncased-L-12-H-768-A-12"; + + + /** + * Constructor of service, reservedkey typically is a services name and inId + * will be its process id + * + * @param reservedKey the service name + * @param inId process id + */ + public EasyBert(String reservedKey, String inId) { + super(reservedKey, inId); + bert = Bert.load(DEFAULT_BERT_MODEL); + } + + @Override + public List generateEmbeddings(String words) { + List embeddings = Floats.asList(bert.embedSequence(words)); + invoke("publishEmbeddings", embeddings); + return embeddings; + } + + @Override + public List publishEmbeddings(List embeddings) { + return embeddings; + } + + @Override + public void onText(String text) throws Exception { + generateEmbeddings(text); + } + + public void setBertModel(String resource) { + bert = Bert.load(resource); + } + + public void setBertModel(File model) { + bert = Bert.load(model); + } +} diff --git a/src/main/java/org/myrobotlab/service/Solr.java b/src/main/java/org/myrobotlab/service/Solr.java index fe3932c5fb..b244f1725e 100644 --- a/src/main/java/org/myrobotlab/service/Solr.java +++ b/src/main/java/org/myrobotlab/service/Solr.java @@ -18,6 +18,8 @@ import javax.imageio.ImageIO; +import com.google.common.primitives.Floats; +import com.robrua.nlp.bert.Bert; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrClient; @@ -52,6 +54,8 @@ import org.myrobotlab.opencv.OpenCVData; import org.myrobotlab.opencv.YoloDetectedObject; import org.myrobotlab.programab.Response; +import org.myrobotlab.service.data.ChatMessage; +import org.myrobotlab.service.interfaces.ChatMessageVectorStore; import org.myrobotlab.service.interfaces.DocumentListener; import org.myrobotlab.service.interfaces.SpeechRecognizer; import org.myrobotlab.service.interfaces.TextListener; @@ -71,7 +75,7 @@ * @author kwatters * */ -public class Solr extends Service implements DocumentListener, TextListener, MessageListener { +public class Solr extends Service implements DocumentListener, TextListener, MessageListener, ChatMessageVectorStore { private static final String CORE_NAME = "core1"; public final static Logger log = LoggerFactory.getLogger(Solr.class); @@ -92,6 +96,15 @@ public class Solr extends Service implements DocumentListener, TextListener, Mes public int yoloPersonTrainingCount = 0; public String yoloPersonLabel = null; + /** + * The maximum number of memories that can be recalled + * at once via {@link ChatMessageVectorStore#recallMemories(List)}. + */ + private int maxNumRecalledMemories = 3; + + private final String CONVERSATION_DOC_ID_TEMPLATE = "conversation_%d"; + private final Bert bert = Bert.load("com/robrua/nlp/easy-bert/bert-uncased-L-12-H-768-A-12"); + public Solr(String n, String id) { super(n, id); } @@ -140,6 +153,9 @@ public void startEmbedded(String path) throws SolrServerException, IOException { // File.separator + "solr.xml"); // load up the solr core container and start solr +// System.setProperty("solr.modules", "scripting"); +// System.setProperty("solr.install.dir", "."); + // FIXME - a bit unsatisfactory File f = new File(getDataInstanceDir()); f.mkdirs(); @@ -247,6 +263,7 @@ public void deleteDocument(String docId) { /** * Returns a document given the doc id from the index if it exists otherwise + * null. * * @param docId * - the doc id @@ -985,9 +1002,17 @@ public static void main(String[] args) { try { Solr solr = (Solr) Runtime.start("solr", "Solr"); solr.startEmbedded(); + solr.deleteEmbeddedIndex(); + // WebGui webgui = (WebGui)Runtime.start("webgui", "WebGui"); // Create a test document SolrInputDocument doc = new SolrInputDocument(); + solr.memorize(new ChatMessage("AP", "I have a cat named Sunny", 1234)); + solr.memorize(new ChatMessage(ChatMessage.AI, "Hello AP, I am Hugo. How may I help you?", 1234)); + solr.memorize(new ChatMessage("AP", "Write a Python program for me.", 1234)); + solr.memorize(new ChatMessage(ChatMessage.AI, "Certainly, here is a hello world program:\n```python\nprint(\"hello world\")\n```", 1234)); + + System.out.println(solr.recallMemories(new ChatMessage("AP", "What is my cat's name?", 1234))); /* * doc.setField("id", "Doc1"); doc.setField("title", "My title"); * doc.setField("content", @@ -996,38 +1021,94 @@ public static void main(String[] args) { * index solr.commit(); */ - doc = new SolrInputDocument(); - doc.setField("id", "Doc3"); - doc.setField("title", "My title 3"); - doc.setField("content", "This is the text field, for a sample document in myrobotlab. 2 "); - doc.setField("annoyance", 1); - // add the document to the index - solr.addDocument(doc); - // commit the index - solr.commit(); - +// // Loading a BERT model that is stored in one of our Maven dependencies +// try (Bert bert = Bert.load("com/robrua/nlp/easy-bert/bert-uncased-L-12-H-768-A-12")) { +// String sentence = "Hello, my name is AP."; +//// solr.deleteDocument("doc1"); +//// solr.deleteDocument("doc2"); +// doc.addField("id", "doc1"); +// doc.addField("text_field", sentence); +// // I don't know what I should be doing here, I need a dense vector field but can't figure out the type +// doc.addField("test_vector", Floats.asList(bert.embedSequence(sentence))); +// +// +//// solr.addDocument(doc); +// +// for (int i = 0; i < 1; i++) { +// SolrInputDocument doc2 = new SolrInputDocument(); +// doc2.addField("id", "doc2" + i); +// doc2.addField("text_field", "Make a Python program: " + i * i); +// doc2.addField("test_vector", Floats.asList(bert.embedSequence("Make a Python program: " + i * i))); +//// solr.deleteDocument("doc2" + i); +//// solr.addDocument(doc2); +// +// } +// +// solr.commit(); +// SolrQuery query = new SolrQuery(); +// float[] embeddings = bert.embedSequence("What is my name."); +// +// query.setQuery("*:*"); +// query.setParam("q", "{!knn f=test_vector topK=3}" + Arrays.toString(embeddings)); +// query.setParam("fl", "*,score"); +// +// +// String vector = IntStream.range(0, embeddings.length) +// .mapToObj(i -> String.valueOf(embeddings[i])) +// .collect(Collectors.joining(",")); +//// query.setParam("vector", String.join(",", vector)); +// QueryResponse response = solr.search(query); +// SolrDocumentList results = response.getResults(); +// for (SolrDocument docResult : results) { +// String textData = ((ArrayList) docResult.getFieldValue("text_field")).toString(); +// System.out.println(docResult.getFieldValue("score")); +// System.out.println(textData); +// } + +// } // search for the word myrobotlab - String queryString = "content:myrobotlab"; - QueryResponse resp = solr.search(queryString); - for (int i = 0; i < resp.getResults().size(); i++) { - System.out.println("---------------------------------"); - System.out.println("-- Printing Result number :" + i); - // grab a document out of the result set. - SolrDocument d = resp.getResults().get(i); - // iterate over the fields on the returned document - for (String fieldName : d.getFieldNames()) { - - System.out.print(fieldName + "\t"); - // fields can be multi-valued - for (Object value : d.getFieldValues(fieldName)) { - System.out.print(value); - System.out.print("\t"); - } - System.out.println(""); - } - } - System.out.println("---------------------------------"); - System.out.println("Done."); +// String queryString = "content:myrobotlab"; +// QueryResponse resp = solr.search(queryString); +// for (int i = 0; i < resp.getResults().size(); i++) { +// System.out.println("---------------------------------"); +// System.out.println("-- Printing Result number :" + i); +// // grab a document out of the result set. +// SolrDocument d = resp.getResults().get(i); +// // iterate over the fields on the returned document +// for (String fieldName : d.getFieldNames()) { + +// doc = new SolrInputDocument(); +// doc.setField("id", "Doc3"); +// doc.setField("title", "My title 3"); +// doc.setField("content", "This is the text field, for a sample document in myrobotlab. 2 "); +// doc.setField("annoyance", 1); +// // add the document to the index +// solr.addDocument(doc); +// // commit the index +// solr.commit(); +// +// // search for the word myrobotlab +// String queryString = "myrobotlab"; +// QueryResponse resp = solr.search(queryString); +// for (int i = 0; i < resp.getResults().size(); i++) { +// System.out.println("---------------------------------"); +// System.out.println("-- Printing Result number :" + i); +// // grab a document out of the result set. +// SolrDocument d = resp.getResults().get(i); +// // iterate over the fields on the returned document +// for (String fieldName : d.getFieldNames()) { +// +// System.out.print(fieldName + "\t"); +// // fields can be multi-valued +// for (Object value : d.getFieldValues(fieldName)) { +// System.out.print(value); +// System.out.print("\t"); +// } +// System.out.println(""); +// } +// } +// System.out.println("---------------------------------"); +// System.out.println("Done."); } catch (Exception e) { Logging.logError(e); @@ -1099,4 +1180,129 @@ public void releaseService() { super.releaseService(); } + + public void memorize(ChatMessage memory) { + memorize(memory, Floats.asList(bert.embedSequence(memory.message))); + } + + /** + * Commit a piece of the conversation to memory. + * Once memorized, the memory can be recalled if a request + * has high enough similarity to the memory. + * + * @param memory The turn to be remembered. + * @param embeddings + */ + @Override + public void memorize(ChatMessage memory, List embeddings) { + SolrInputDocument memoryDoc = new SolrInputDocument(); + + memoryDoc.setField("id", String.format(CONVERSATION_DOC_ID_TEMPLATE, memory.conversationId) + memory.message.hashCode()); + memoryDoc.setField("text_field", memory.message); + memoryDoc.setField("speaker_field", memory.speaker); + memoryDoc.setField("conversation_id", memory.conversationId); + memoryDoc.setField("vector", embeddings); + addDocument(memoryDoc); + + + SolrInputDocument newConversationDoc = new SolrInputDocument(); + newConversationDoc.addField("id", String.format(CONVERSATION_DOC_ID_TEMPLATE, memory.conversationId)); + newConversationDoc.addChildDocument(memoryDoc); +// addDocument(newConversationDoc); + commit(); + } + + + public List recallMemories(ChatMessage request) { + + + float[] embeddings = bert.embedSequence(request.message); + return recallMemories(Floats.asList(embeddings)); + + + } + + /** + * Recall a number of memorized conversation turns + * that have similarity to the request. The maximum number + * of memories recalled is set via {@link #setMaxNumMemoriesRecalled(int)}. + * This usually corresponds to the {@code top_k} parameter in vector stores. + * + * @param embeddings@return Recalled memories + */ + @Override + public List recallMemories(List embeddings) { + SolrQuery query = new SolrQuery(); + query.setQuery("*:*"); + query.setParam("q", "{!knn f=vector topK=3}" + embeddings.toString()); + query.setParam("fl", "*,score"); + QueryResponse response = search(query); + List turns = new ArrayList<>(); + for (SolrDocument result : response.getResults()) { + System.out.println("Score: " + result.getFieldValue("score")); + turns.add( + new ChatMessage( + ((ArrayList) result.getFieldValue("speaker_field")).get(0), + ((ArrayList) result.getFieldValue("text_field")).get(0), + Long.parseLong(((ArrayList) result.getFieldValue("conversation_id")).get(0)) + ) + ); + } + return turns; + } + + /** + * Upon recalling memories, they are published through this method. + * Services that are interested in recalled memories should subscribe to this method. + * + * @param memories The memories that have been recalled. + * @return The recalled memories. + */ + @Override + public List publishMemories(List memories) { + return memories; + } + + /** + * Sets the maximum number of memories to be recalled + * via {@link ChatMessageVectorStore#recallMemories(List)}. + * + * @param number The maximum number of memories that can be recalled at once + */ + @Override + public void setMaxNumMemoriesRecalled(int number) { + maxNumRecalledMemories = number; + } + + /** + * Gets the maximum number of memories to be recalled + * via {@link ChatMessageVectorStore#recallMemories(List)}. + * + * @return The maximum number of memories that can be recalled at once. + */ + @Override + public int getMaxNumMemoriesRecalled() { + return maxNumRecalledMemories; + } + + @Override + public int getEmbeddingDimensions() { + return 786; + } + + @Override + public void setEmbeddingDimensions(int dimensions) { + throw new UnsupportedOperationException( + "Cannot change embedding dimensions with Solr, manually modify the schema instead." + ); + } + + @Override + public void clearStore() { + try { + deleteEmbeddedIndex(); + } catch (SolrServerException | IOException e) { + error("Caught exception while trying to delete embedded index.", e); + } + } } diff --git a/src/main/java/org/myrobotlab/service/data/ChatMessage.java b/src/main/java/org/myrobotlab/service/data/ChatMessage.java new file mode 100644 index 0000000000..3c08e8e662 --- /dev/null +++ b/src/main/java/org/myrobotlab/service/data/ChatMessage.java @@ -0,0 +1,96 @@ +package org.myrobotlab.service.data; + +import java.util.Objects; + +/** + * Represents one "turn" of a conversation. + * In a conversation the participants take turns + * speaking, while one is speaking the others + * should be listening. In most conversations, + * a participant can speak for as long as they like, + * but in all cases respondents must know who was speaking. + * Thus, this class contains the name of the speaker + * and what they said during their turn. + * Since one cannot unsay what has been said, + * this class is immutable. It is meant as a data object + * to pass records of the conversation around. + * + * @author AutonomicPerfectionist + */ +public class ChatMessage { + + /** + * When an AI / Chatbot is speaking during + * the conversation, its speakerName is the value + * of this constant. This allows a chatbot to be + * renamed without forgetting everything. + */ + public static final String AI = "AI"; + + /** + * The person who was speaking during this turn. + * If a chatbot was speaking, then this field should have + * the value of {@link #AI}. + */ + public final String speaker; + + /** + * What the {@link #speaker} said during + * their turn. + */ + public final String message; + + /** + * The ID of the conversation this message was a part of. + * This ID can be generated through a number of ways, a simple + * way would be to add the hashcodes of the participants' names. + */ + public final long conversationId; + + public ChatMessage(String speaker, String message, long conversationId) { + Objects.requireNonNull(speaker, "Speaker may not be null"); + Objects.requireNonNull(message, "Turn contents may not be null"); + this.speaker = speaker; + this.message = message; + this.conversationId = conversationId; + } + + public String getSpeaker() { + return speaker; + } + + public String getMessage() { + return message; + } + + public long getConversationId() { + return conversationId; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ChatMessage that = (ChatMessage) o; + + if (!speaker.equals(that.speaker)) return false; + return message.equals(that.message); + } + + @Override + public int hashCode() { + int result = speaker.hashCode(); + result = 31 * result + message.hashCode(); + return result; + } + + @Override + public String toString() { + return "ChatMessage{" + + "speaker='" + speaker + '\'' + + ", turnContents='" + message + '\'' + + ", conversationId=" + conversationId + + '}'; + } +} diff --git a/src/main/java/org/myrobotlab/service/interfaces/ChatMessageVectorStore.java b/src/main/java/org/myrobotlab/service/interfaces/ChatMessageVectorStore.java new file mode 100644 index 0000000000..ff8ee48be4 --- /dev/null +++ b/src/main/java/org/myrobotlab/service/interfaces/ChatMessageVectorStore.java @@ -0,0 +1,67 @@ +package org.myrobotlab.service.interfaces; + + +import org.myrobotlab.service.data.ChatMessage; + +import java.util.List; + +/** + * Provides a form of memory for chatbots. + * The idea is to store information in an + * index of some kind during the conversation, + * and then recall specific information using + * the current input request. Usually + * this would be implemented through a vector store. + * + * @author AutonomicPerfectionist + */ +public interface ChatMessageVectorStore { + + /** + * Commit a piece of the conversation to memory. + * Once memorized, the memory can be recalled if a request + * has high enough similarity to the memory. + * + * @param memory The turn to be remembered. + * @param embeddings + */ + void memorize(ChatMessage memory, List embeddings); + + /** + * Recall a number of memorized conversation turns + * that have similarity to the request. The maximum number + * of memories recalled is set via {@link #setMaxNumMemoriesRecalled(int)}. + * This usually corresponds to the {@code top_k} parameter in vector stores. + * + * @param embeddings@return Recalled memories + */ + List recallMemories(List embeddings); + + /** + * Upon recalling memories, they are published through this method. + * Services that are interested in recalled memories should subscribe to this method. + * @param memories The memories that have been recalled. + * @return The recalled memories. + */ + List publishMemories(List memories); + + /** + * Sets the maximum number of memories to be recalled + * via {@link #recallMemories(List)}. + * @param number The maximum number of memories that can be recalled at once + */ + void setMaxNumMemoriesRecalled(int number); + + /** + * Gets the maximum number of memories to be recalled + * via {@link #recallMemories(List)}. + * @return The maximum number of memories that can be recalled at once. + */ + int getMaxNumMemoriesRecalled(); + + int getEmbeddingDimensions(); + + void setEmbeddingDimensions(int dimensions); + + void clearStore(); +} diff --git a/src/main/java/org/myrobotlab/service/interfaces/TextEmbeddingGenerator.java b/src/main/java/org/myrobotlab/service/interfaces/TextEmbeddingGenerator.java new file mode 100644 index 0000000000..1fca923232 --- /dev/null +++ b/src/main/java/org/myrobotlab/service/interfaces/TextEmbeddingGenerator.java @@ -0,0 +1,9 @@ +package org.myrobotlab.service.interfaces; + +import java.util.List; + +public interface TextEmbeddingGenerator extends TextListener { + + List generateEmbeddings(String words); + List publishEmbeddings(List embeddings); +} diff --git a/src/main/java/org/myrobotlab/service/meta/EasyBertMeta.java b/src/main/java/org/myrobotlab/service/meta/EasyBertMeta.java new file mode 100644 index 0000000000..7805b5199b --- /dev/null +++ b/src/main/java/org/myrobotlab/service/meta/EasyBertMeta.java @@ -0,0 +1,28 @@ +package org.myrobotlab.service.meta; + +import org.myrobotlab.logging.LoggerFactory; +import org.myrobotlab.service.meta.abstracts.MetaData; +import org.slf4j.Logger; + +public class EasyBertMeta extends MetaData { + private static final long serialVersionUID = 1L; + public final static Logger log = LoggerFactory.getLogger(EasyBertMeta.class); + + /** + * This class is contains all the meta data details of a service. It's peers, + * dependencies, and all other meta data related to the service. + */ + public EasyBertMeta() { + + addDescription("EasyBert service - Java BERT sentence embeddings."); + addCategory("search"); + + addDependency("com.robrua.nlp", "easy-bert", "1.0.3"); + addDependency("com.robrua.nlp.models", "easy-bert-uncased-L-12-H-768-A-12", "1.0.0"); + addDependency("org.tensorflow", "tensorflow", "1.15.0"); + + setAvailable(true); + + } + +} diff --git a/src/main/java/org/myrobotlab/service/meta/SolrMeta.java b/src/main/java/org/myrobotlab/service/meta/SolrMeta.java index 0c572a9f1f..151d0da4f2 100644 --- a/src/main/java/org/myrobotlab/service/meta/SolrMeta.java +++ b/src/main/java/org/myrobotlab/service/meta/SolrMeta.java @@ -19,12 +19,17 @@ public SolrMeta() { String solrVersion = "9.2.0"; String luceneVersion = "9.4.2"; addDependency("org.apache.lucene", "lucene-core", luceneVersion); + addDependency("org.apache.lucene", "lucene-codecs", luceneVersion); addDependency("org.apache.solr", "solr-core", solrVersion); exclude("log4j", "*"); exclude("org.apache.logging.log4j", "*"); exclude("com.fasterxml.jackson.core", "*"); exclude("io.netty", "*"); // prevent it from bringing in an old version of netty + // Some parts of Solr 8 were factored out into modules it seems + addDependency("org.apache.solr", "solr-scripting", solrVersion); + exclude("com.google.guava", "*"); + addDependency("org.apache.solr", "solr-test-framework", solrVersion); exclude("log4j", "*"); exclude("org.apache.logging.log4j", "*"); @@ -45,6 +50,11 @@ public SolrMeta() { // force correct version of netty addDependency("io.netty", "netty-all", "4.1.82.Final"); + // BERT embeddings. Could be moved to diff service + addDependency("com.robrua.nlp", "easy-bert", "1.0.3"); + addDependency("com.robrua.nlp.models", "easy-bert-uncased-L-12-H-768-A-12", "1.0.0"); + addDependency("org.tensorflow", "tensorflow", "1.15.0"); + // Dependencies issue setAvailable(true); diff --git a/src/main/java/org/myrobotlab/service/meta/TensorflowMeta.java b/src/main/java/org/myrobotlab/service/meta/TensorflowMeta.java index 9f8e43c10a..75dc0e9ebf 100644 --- a/src/main/java/org/myrobotlab/service/meta/TensorflowMeta.java +++ b/src/main/java/org/myrobotlab/service/meta/TensorflowMeta.java @@ -22,10 +22,12 @@ public TensorflowMeta() { addCategory("ai"); // TODO: what happens when you try to install this on an ARM processor like // RasPI or the Jetson TX2 ? - addDependency("org.tensorflow", "tensorflow", "1.8.0"); + // Needed to update because conflicts with BERT. + // FIXME our POM generation is still putting two artifacts with same ID but diff version in pom + addDependency("org.tensorflow", "tensorflow", "1.15.0"); // enable GPU support ? - boolean gpu = Boolean.valueOf(System.getProperty("gpu.enabled", "false")); + boolean gpu = Boolean.parseBoolean(System.getProperty("gpu.enabled", "false")); if (gpu) { // Currently only supported on Linux. 64 bit. addDependency("org.tensorflow", "libtensorflow", "1.8.0"); diff --git a/src/main/resources/resource/Solr/core1/conf/managed-schema.xml b/src/main/resources/resource/Solr/core1/conf/managed-schema.xml new file mode 100644 index 0000000000..553e762613 --- /dev/null +++ b/src/main/resources/resource/Solr/core1/conf/managed-schema.xml @@ -0,0 +1,499 @@ + + + + iddiff --git a/src/main/resources/resource/Solr/core1/conf/solrconfig.xml b/src/main/resources/resource/Solr/core1/conf/solrconfig.xml index c5f7134dbc..dc298ea84f 100755 --- a/src/main/resources/resource/Solr/core1/conf/solrconfig.xml +++ b/src/main/resources/resource/Solr/core1/conf/solrconfig.xml @@ -21,6 +21,11 @@ this file, see https://solr.apache.org/guide/solr/latest/configuration-guide/configuring-solrconfig-xml.html. --> + + true + managed-schema.xml + +