From 1370064c0c0e7b2b811e968c4e9b08e827365b95 Mon Sep 17 00:00:00 2001 From: D'Amico Massimiliano Date: Tue, 5 Dec 2023 09:20:52 +0100 Subject: [PATCH 1/9] update hooks --- hyde.py | 30 ++++++++++++++++++++++++++++-- settings.json | 12 ++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 settings.json diff --git a/hyde.py b/hyde.py index ffb38d5..4de5510 100644 --- a/hyde.py +++ b/hyde.py @@ -1,7 +1,8 @@ import json import langchain -from cat.mad_hatter.decorators import hook + from cat.log import log +from cat.mad_hatter.decorators import hook with open("cat/plugins/ccat_hyde/settings.json", "r") as json_file: @@ -21,4 +22,29 @@ def cat_recall_query(user_message, cat): hypothesis_chain = langchain.chains.LLMChain(prompt=hypothesis_prompt, llm=cat._llm) answer = hypothesis_chain(user_message) log(answer, "INFO") - return answer["text"] + + # Calculate hyde embedding + cat.working_memory["hyde_embedding"] = cat.embedder(answer["text"]) + return user_message + +#@hook(priority=0) +#def before_cat_recalls_memories(cat): +# cat.working_memory["memory_query"] = (cat.working_memory["memory_query"] + cat.working_memory["hyde_query"])/2 + +# Calculates the average between the user's message embedding and the Hyde response embedding +def _calculate_vector_average(config, cat): + user_embedding = config.embedding + hyde_embedding = cat.working_memory["hyde_embedding"] + config.embedding = (user_embedding + hyde_embedding) / 2 + +@hook(priority=0) +def before_cat_recalls_episodic_memories(config, cat): + _calculate_vector_average(config, cat) + +@hook(priority=0) +def before_cat_recalls_declarative_memories(config, cat): + _calculate_vector_average(config, cat) + +@hook(priority=0) +def before_cat_recalls_procedural_memories(config, cat): + _calculate_vector_average(config, cat) diff --git a/settings.json b/settings.json new file mode 100644 index 0000000..2a1ad2c --- /dev/null +++ b/settings.json @@ -0,0 +1,12 @@ +{ + "title": "MySettings", + "type": "object", + "properties": { + "hyde_prompt": { + "title": "HyDe prompt", + "default": "You will be given a sentence.\n If the sentence is a question, convert it to a plausible answer. If the sentence does not contain a question, \n just repeat the sentence as is without adding anything to it.\n\n Examples:\n - what furniture there is in my room? --> In my room there is a bed, a wardrobe and a desk with my computer\n - where did you go today --> today I was at school\n - I like ice cream --> I like ice cream\n - how old is Jack --> Jack is 20 years old\n\n Sentence:\n - {input} -->", + "extra": {"type": "TextArea"}, + "type": "string" + } + } +} \ No newline at end of file From 99d174c1fbc33a6dfb9119fa16eaff31b281300c Mon Sep 17 00:00:00 2001 From: D'Amico Massimiliano Date: Tue, 5 Dec 2023 09:32:02 +0100 Subject: [PATCH 2/9] update --- hyde.py | 4 +--- plugin.json | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/hyde.py b/hyde.py index 4de5510..ac5fcbd 100644 --- a/hyde.py +++ b/hyde.py @@ -27,9 +27,6 @@ def cat_recall_query(user_message, cat): cat.working_memory["hyde_embedding"] = cat.embedder(answer["text"]) return user_message -#@hook(priority=0) -#def before_cat_recalls_memories(cat): -# cat.working_memory["memory_query"] = (cat.working_memory["memory_query"] + cat.working_memory["hyde_query"])/2 # Calculates the average between the user's message embedding and the Hyde response embedding def _calculate_vector_average(config, cat): @@ -37,6 +34,7 @@ def _calculate_vector_average(config, cat): hyde_embedding = cat.working_memory["hyde_embedding"] config.embedding = (user_embedding + hyde_embedding) / 2 + @hook(priority=0) def before_cat_recalls_episodic_memories(config, cat): _calculate_vector_average(config, cat) diff --git a/plugin.json b/plugin.json index a58d1f2..ab60930 100644 --- a/plugin.json +++ b/plugin.json @@ -1,8 +1,8 @@ { "name": "Hypothetical Document Embedding", - "version": "0.0.4", + "version": "0.0.5", "description": "Official plugin of the Cheshire Cat to add the Hypothetical Document Embedding (HyDE) technique", - "author_name": "Nicola Corbellini", + "author_name": "Nicola Corbellini, Salvatore Mirlocca, Massimiliano D'Amico", "author_url": "", "plugin_url": "https://github.com/Furrmidable-Crew/ccat_hyde", "tags": "hyde, llm, cheshire-cat, embedding", From e48fa8dbe0622f0b11495d509406479c3d81cedb Mon Sep 17 00:00:00 2001 From: D'Amico Massimiliano Date: Tue, 5 Dec 2023 09:38:04 +0100 Subject: [PATCH 3/9] priority --- hyde.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hyde.py b/hyde.py index ac5fcbd..76bb9be 100644 --- a/hyde.py +++ b/hyde.py @@ -35,14 +35,14 @@ def _calculate_vector_average(config, cat): config.embedding = (user_embedding + hyde_embedding) / 2 -@hook(priority=0) +@hook(priority=1) def before_cat_recalls_episodic_memories(config, cat): _calculate_vector_average(config, cat) -@hook(priority=0) +@hook(priority=1) def before_cat_recalls_declarative_memories(config, cat): _calculate_vector_average(config, cat) -@hook(priority=0) +@hook(priority=1) def before_cat_recalls_procedural_memories(config, cat): _calculate_vector_average(config, cat) From ef40b17160a7d68f92d2212bde1cd37860f13808 Mon Sep 17 00:00:00 2001 From: D'Amico Massimiliano Date: Tue, 5 Dec 2023 16:47:53 +0100 Subject: [PATCH 4/9] update average vectors --- hyde.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/hyde.py b/hyde.py index 76bb9be..c9daf30 100644 --- a/hyde.py +++ b/hyde.py @@ -24,25 +24,26 @@ def cat_recall_query(user_message, cat): log(answer, "INFO") # Calculate hyde embedding - cat.working_memory["hyde_embedding"] = cat.embedder(answer["text"]) + cat.working_memory["hyde_embedding"] = cat.embedder.embed_query(answer["text"]) return user_message # Calculates the average between the user's message embedding and the Hyde response embedding -def _calculate_vector_average(config, cat): - user_embedding = config.embedding +def _calculate_vector_average(config: dict, cat): + user_embedding = config['embedding'] hyde_embedding = cat.working_memory["hyde_embedding"] - config.embedding = (user_embedding + hyde_embedding) / 2 + average_embedding = [(x + y)/2 for x, y in zip(user_embedding, hyde_embedding)] + config['embedding'] = average_embedding @hook(priority=1) -def before_cat_recalls_episodic_memories(config, cat): +def before_cat_recalls_episodic_memories(config: dict, cat): _calculate_vector_average(config, cat) @hook(priority=1) -def before_cat_recalls_declarative_memories(config, cat): +def before_cat_recalls_declarative_memories(config: dict, cat): _calculate_vector_average(config, cat) @hook(priority=1) -def before_cat_recalls_procedural_memories(config, cat): +def before_cat_recalls_procedural_memories(config: dict, cat): _calculate_vector_average(config, cat) From 022910a198c6ecb834de64712022c69d3db088ae Mon Sep 17 00:00:00 2001 From: D'Amico Massimiliano Date: Tue, 5 Dec 2023 17:41:18 +0100 Subject: [PATCH 5/9] update keys --- hyde.py | 44 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/hyde.py b/hyde.py index c9daf30..f359740 100644 --- a/hyde.py +++ b/hyde.py @@ -8,6 +8,10 @@ with open("cat/plugins/ccat_hyde/settings.json", "r") as json_file: settings = json.load(json_file) +# Keys +HYDE_ANSWER = "hyde_answer" +AVERAGE_EMBEDDING = "average_embedding" + @hook(priority=1) def cat_recall_query(user_message, cat): @@ -21,19 +25,45 @@ def cat_recall_query(user_message, cat): # Run a LLM chain with the user message as input hypothesis_chain = langchain.chains.LLMChain(prompt=hypothesis_prompt, llm=cat._llm) answer = hypothesis_chain(user_message) - log(answer, "INFO") - # Calculate hyde embedding - cat.working_memory["hyde_embedding"] = cat.embedder.embed_query(answer["text"]) + # Save HyDE answer in working memory + cat.working_memory[HYDE_ANSWER] = answer["text"] + + print("------------- HYDE -------------") + print(f"user message: {user_message}") + print(f"hyde answer: {answer['text']}") + return user_message # Calculates the average between the user's message embedding and the Hyde response embedding def _calculate_vector_average(config: dict, cat): - user_embedding = config['embedding'] - hyde_embedding = cat.working_memory["hyde_embedding"] - average_embedding = [(x + y)/2 for x, y in zip(user_embedding, hyde_embedding)] - config['embedding'] = average_embedding + + # If average embedding not exists & hyde answer exists .. + if AVERAGE_EMBEDDING not in cat.working_memory.keys() and HYDE_ANSWER in cat.working_memory.keys(): + + # Get user message embedding + user_embedding = config['embedding'] + + # Calculate hyde embedding from hyde answer + hyde_answer = cat.working_memory[HYDE_ANSWER] + hyde_embedding = cat.embedder.embed_query(hyde_answer) + + # Calculate average embedding and stores it into a working memory + average_embedding = [(x + y)/2 for x, y in zip(user_embedding, hyde_embedding)] + cat.working_memory[AVERAGE_EMBEDDING] = average_embedding + + #print(f"user_embedding: {user_embedding}") + #print(f"hyde_embedding: {hyde_embedding}") + #print(f"average_embedding: {average_embedding}") + + # Delete Hyde Answer from working memory + del cat.working_memory[HYDE_ANSWER] + + # If average embedding exists, set the embedding + if AVERAGE_EMBEDDING in cat.working_memory.keys(): + average_embedding = cat.working_memory[AVERAGE_EMBEDDING] + config['embedding'] = average_embedding @hook(priority=1) From fbf3ff4952051c6edc4b2e6dc1e1d5614d765c98 Mon Sep 17 00:00:00 2001 From: D'Amico Massimiliano Date: Thu, 7 Dec 2023 09:56:03 +0100 Subject: [PATCH 6/9] fix bug --- hyde.py | 68 +++++++++++++++++++++++++++++++++++++-------------- settings.json | 12 --------- settings.py | 26 -------------------- 3 files changed, 50 insertions(+), 56 deletions(-) delete mode 100644 settings.json delete mode 100644 settings.py diff --git a/hyde.py b/hyde.py index f359740..c1224d0 100644 --- a/hyde.py +++ b/hyde.py @@ -1,12 +1,33 @@ -import json -import langchain - +from langchain.prompts import PromptTemplate +from langchain.chains import LLMChain +from cat.mad_hatter.decorators import plugin +from pydantic import BaseModel, Field from cat.log import log from cat.mad_hatter.decorators import hook -with open("cat/plugins/ccat_hyde/settings.json", "r") as json_file: - settings = json.load(json_file) +class MySettings(BaseModel): + hyde_prompt: str = Field( + title="HyDe prompt", + default="""You will be given a sentence. + If the sentence is a question, convert it to a plausible answer. If the sentence does not contain a question, + just repeat the sentence as is without adding anything to it. + + Examples: + - what furniture there is in my room? --> In my room there is a bed, a wardrobe and a desk with my computer + - where did you go today --> today I was at school + - I like ice cream --> I like ice cream + - how old is Jack --> Jack is 20 years old + + Sentence: + - {input} -->""", + extra={"type": "TextArea"} + ) + +@plugin +def settings_schema(): + return MySettings.schema() + # Keys HYDE_ANSWER = "hyde_answer" @@ -16,22 +37,28 @@ @hook(priority=1) def cat_recall_query(user_message, cat): + #This method not work yet, but in the future I hope it will work + settings = cat.mad_hatter.get_plugin().load_settings() + + log.debug(f" --------- ACQUIRE SETTINGS ---------") + log.debug(f"settings: {settings}") + # Make a prompt from template - hypothesis_prompt = langchain.PromptTemplate( + hypothesis_prompt = PromptTemplate( input_variables=["input"], template=settings["hyde_prompt"] ) # Run a LLM chain with the user message as input - hypothesis_chain = langchain.chains.LLMChain(prompt=hypothesis_prompt, llm=cat._llm) + hypothesis_chain = LLMChain(prompt=hypothesis_prompt, llm=cat._llm) answer = hypothesis_chain(user_message) # Save HyDE answer in working memory cat.working_memory[HYDE_ANSWER] = answer["text"] - print("------------- HYDE -------------") - print(f"user message: {user_message}") - print(f"hyde answer: {answer['text']}") + log.warning("------------- HYDE -------------") + log.warning(f"user message: {user_message}") + log.warning(f"hyde answer: {answer['text']}") return user_message @@ -39,10 +66,10 @@ def cat_recall_query(user_message, cat): # Calculates the average between the user's message embedding and the Hyde response embedding def _calculate_vector_average(config: dict, cat): - # If average embedding not exists & hyde answer exists .. - if AVERAGE_EMBEDDING not in cat.working_memory.keys() and HYDE_ANSWER in cat.working_memory.keys(): + # If hyde answer exists, calculate and set average embedding + if HYDE_ANSWER in cat.working_memory.keys(): - # Get user message embedding + # Get user message embedding user_embedding = config['embedding'] # Calculate hyde embedding from hyde answer @@ -53,18 +80,23 @@ def _calculate_vector_average(config: dict, cat): average_embedding = [(x + y)/2 for x, y in zip(user_embedding, hyde_embedding)] cat.working_memory[AVERAGE_EMBEDDING] = average_embedding - #print(f"user_embedding: {user_embedding}") - #print(f"hyde_embedding: {hyde_embedding}") - #print(f"average_embedding: {average_embedding}") + log.debug(f" --------- CALCULATE AVERAGE ---------") + log.debug(f"hyde answer: {hyde_answer}") + log.debug(f"user_embedding: {user_embedding}") + log.debug(f"hyde_embedding: {hyde_embedding}") + log.debug(f"average_embedding: {average_embedding}") # Delete Hyde Answer from working memory del cat.working_memory[HYDE_ANSWER] - + # If average embedding exists, set the embedding if AVERAGE_EMBEDDING in cat.working_memory.keys(): average_embedding = cat.working_memory[AVERAGE_EMBEDDING] config['embedding'] = average_embedding - + + log.debug(f" --------- SET EMBEDDING ---------") + log.debug(f"average_embedding: {average_embedding}") + @hook(priority=1) def before_cat_recalls_episodic_memories(config: dict, cat): diff --git a/settings.json b/settings.json deleted file mode 100644 index 2a1ad2c..0000000 --- a/settings.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "title": "MySettings", - "type": "object", - "properties": { - "hyde_prompt": { - "title": "HyDe prompt", - "default": "You will be given a sentence.\n If the sentence is a question, convert it to a plausible answer. If the sentence does not contain a question, \n just repeat the sentence as is without adding anything to it.\n\n Examples:\n - what furniture there is in my room? --> In my room there is a bed, a wardrobe and a desk with my computer\n - where did you go today --> today I was at school\n - I like ice cream --> I like ice cream\n - how old is Jack --> Jack is 20 years old\n\n Sentence:\n - {input} -->", - "extra": {"type": "TextArea"}, - "type": "string" - } - } -} \ No newline at end of file diff --git a/settings.py b/settings.py deleted file mode 100644 index 0801cd9..0000000 --- a/settings.py +++ /dev/null @@ -1,26 +0,0 @@ -from cat.mad_hatter.decorators import plugin -from pydantic import BaseModel, Field - - -class MySettings(BaseModel): - hyde_prompt: str = Field( - title="HyDe prompt", - default="""You will be given a sentence. - If the sentence is a question, convert it to a plausible answer. If the sentence does not contain a question, - just repeat the sentence as is without adding anything to it. - - Examples: - - what furniture there is in my room? --> In my room there is a bed, a wardrobe and a desk with my computer - - where did you go today --> today I was at school - - I like ice cream --> I like ice cream - - how old is Jack --> Jack is 20 years old - - Sentence: - - {input} -->""", - extra={"type": "TextArea"} - ) - - -@plugin -def settings_schema(): - return MySettings.schema() From c70c443e0adb5b664aa5539f087b9f66fb45f06e Mon Sep 17 00:00:00 2001 From: D'Amico Massimiliano Date: Thu, 7 Dec 2023 09:56:54 +0100 Subject: [PATCH 7/9] fix bug --- hyde.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hyde.py b/hyde.py index c1224d0..b5c5679 100644 --- a/hyde.py +++ b/hyde.py @@ -37,9 +37,8 @@ def settings_schema(): @hook(priority=1) def cat_recall_query(user_message, cat): - #This method not work yet, but in the future I hope it will work + # Acquire settings settings = cat.mad_hatter.get_plugin().load_settings() - log.debug(f" --------- ACQUIRE SETTINGS ---------") log.debug(f"settings: {settings}") From fe5aae8c5666af3d693ddb3548aaf0b8b818b97d Mon Sep 17 00:00:00 2001 From: Nicola Date: Thu, 7 Dec 2023 22:13:00 +0100 Subject: [PATCH 8/9] restore settings.py --- settings.json | 3 +++ settings.py | 0 2 files changed, 3 insertions(+) create mode 100644 settings.json create mode 100644 settings.py diff --git a/settings.json b/settings.json new file mode 100644 index 0000000..a8c0fa8 --- /dev/null +++ b/settings.json @@ -0,0 +1,3 @@ +{ + "hyde_prompt": "You will be given a sentence.\n If the sentence is a question, convert it to a plausible answer. If the sentence does not contain a question, \n just repeat the sentence as is without adding anything to it.\n\n Examples:\n - what furniture there is in my room? --> In my room there is a bed, a wardrobe and a desk with my computer\n - where did you go today --> today I was at school\n - I like ice cream --> I like ice cream\n - how old is Jack --> Jack is 20 years old\n\n Sentence:\n - {input} -->" +} \ No newline at end of file diff --git a/settings.py b/settings.py new file mode 100644 index 0000000..e69de29 From fe2a03a4ed603d76e921a98e3d3b322fb4298c57 Mon Sep 17 00:00:00 2001 From: Nicola Date: Thu, 7 Dec 2023 22:13:33 +0100 Subject: [PATCH 9/9] restore settings.py --- hyde.py | 36 ++++++------------------------------ settings.json | 2 +- settings.py | 25 +++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 31 deletions(-) diff --git a/hyde.py b/hyde.py index b5c5679..de0aa50 100644 --- a/hyde.py +++ b/hyde.py @@ -1,34 +1,10 @@ -from langchain.prompts import PromptTemplate from langchain.chains import LLMChain -from cat.mad_hatter.decorators import plugin -from pydantic import BaseModel, Field +from langchain.prompts import PromptTemplate + from cat.log import log from cat.mad_hatter.decorators import hook -class MySettings(BaseModel): - hyde_prompt: str = Field( - title="HyDe prompt", - default="""You will be given a sentence. - If the sentence is a question, convert it to a plausible answer. If the sentence does not contain a question, - just repeat the sentence as is without adding anything to it. - - Examples: - - what furniture there is in my room? --> In my room there is a bed, a wardrobe and a desk with my computer - - where did you go today --> today I was at school - - I like ice cream --> I like ice cream - - how old is Jack --> Jack is 20 years old - - Sentence: - - {input} -->""", - extra={"type": "TextArea"} - ) - -@plugin -def settings_schema(): - return MySettings.schema() - - # Keys HYDE_ANSWER = "hyde_answer" AVERAGE_EMBEDDING = "average_embedding" @@ -38,7 +14,7 @@ def settings_schema(): def cat_recall_query(user_message, cat): # Acquire settings - settings = cat.mad_hatter.get_plugin().load_settings() + settings = cat.mad_hatter.get_plugin().load_settings() log.debug(f" --------- ACQUIRE SETTINGS ---------") log.debug(f"settings: {settings}") @@ -55,9 +31,9 @@ def cat_recall_query(user_message, cat): # Save HyDE answer in working memory cat.working_memory[HYDE_ANSWER] = answer["text"] - log.warning("------------- HYDE -------------") - log.warning(f"user message: {user_message}") - log.warning(f"hyde answer: {answer['text']}") + log.debug("------------- HYDE -------------") + log.debug(f"user message: {user_message}") + log.debug(f"hyde answer: {answer['text']}") return user_message diff --git a/settings.json b/settings.json index a8c0fa8..4a80184 100644 --- a/settings.json +++ b/settings.json @@ -1,3 +1,3 @@ { - "hyde_prompt": "You will be given a sentence.\n If the sentence is a question, convert it to a plausible answer. If the sentence does not contain a question, \n just repeat the sentence as is without adding anything to it.\n\n Examples:\n - what furniture there is in my room? --> In my room there is a bed, a wardrobe and a desk with my computer\n - where did you go today --> today I was at school\n - I like ice cream --> I like ice cream\n - how old is Jack --> Jack is 20 years old\n\n Sentence:\n - {input} -->" + "hyde_prompt": "You will be given a sentence.\n If the sentence is a question, convert it to a plausible answer. If the sentence does not contain a question, \n just repeat the sentence as is without adding anything to it.\n\n Examples:\n - what furniture there is in my room? --> In my room there is a bed, a wardrobe and a desk with my computer\n - where did you go today --> today I was at school\n - I like ice cream --> I like ice cream\n - how old is Jack --> Jack is 20 years old\n\n Sentence:\n - {input} -->" } \ No newline at end of file diff --git a/settings.py b/settings.py index e69de29..cc73548 100644 --- a/settings.py +++ b/settings.py @@ -0,0 +1,25 @@ +from pydantic import BaseModel, Field +from cat.mad_hatter.decorators import plugin + + +class MySettings(BaseModel): + hyde_prompt: str = Field( + title="HyDe prompt", + default="""You will be given a sentence. + If the sentence is a question, convert it to a plausible answer. If the sentence does not contain a question, + just repeat the sentence as is without adding anything to it. + + Examples: + - what furniture there is in my room? --> In my room there is a bed, a wardrobe and a desk with my computer + - where did you go today --> today I was at school + - I like ice cream --> I like ice cream + - how old is Jack --> Jack is 20 years old + + Sentence: + - {input} -->""", + extra={"type": "TextArea"} + ) + +@plugin +def settings_schema(): + return MySettings.schema()