From 6075d4543f0bbced8d716d1a8ca899e45646b8de Mon Sep 17 00:00:00 2001 From: Joshua Aresty Date: Sun, 11 Aug 2024 22:22:07 -0700 Subject: [PATCH 1/6] Image handling is currently broken - When you fetch the image source it formats the messages - If you try to extract them as text at that point, images will not be processed at all - In order to process images you have to keep the messages intact from source extraction through to insertion --- GPT/gpt.py | 56 ++++++++++++++++++++++----------------------- lib/modelHelpers.py | 1 + lib/modelState.py | 5 ++++ 3 files changed, 33 insertions(+), 29 deletions(-) diff --git a/GPT/gpt.py b/GPT/gpt.py index 40b365cf..ba199494 100644 --- a/GPT/gpt.py +++ b/GPT/gpt.py @@ -56,7 +56,7 @@ def gpt_blend(source_text: str, destination_text: str) -> None: """ result = gpt_query(format_message(prompt), format_message(source_text)) - actions.user.gpt_insert_response(extract_message(result), "paste") + actions.user.gpt_insert_response(result, "paste") def gpt_blend_list(source_text: list[str], destination_text: str) -> None: """Blend all the source text as a list and send it to the destination""" @@ -138,11 +138,6 @@ def gpt_apply_prompt(prompt: str, source: str = "", destination: str = ""): text_to_process: GPTMessageItem = actions.user.gpt_get_source_text(source) - # If after creating the message there wasn't anything in the text_to_process, set it to None so - # we don't send meaningless data to the model - if text_to_process.get("text", "") == "": - text_to_process = None # type: ignore - # Handle special cases in the prompt ### Ask is a special case, where the text to process is the prompted question, not selected text if prompt.startswith("ask"): @@ -150,15 +145,15 @@ def gpt_apply_prompt(prompt: str, source: str = "", destination: str = ""): prompt = "Generate text that satisfies the question or request given in the input." response = gpt_query(format_message(prompt), text_to_process, destination) - extracted_text = extract_message(response) - actions.user.gpt_insert_response(extracted_text, destination) + actions.user.gpt_insert_response(response, destination) return response def gpt_pass(source: str = "", destination: str = "") -> None: """Passes a response from source to destination""" - source_text = extract_message(actions.user.gpt_get_source_text(source)) - actions.user.gpt_insert_response(source_text, destination) + actions.user.gpt_insert_response( + actions.user.gpt_get_source_text(source), destination + ) def gpt_help() -> None: """Open the GPT help file in the web browser""" @@ -192,7 +187,7 @@ def gpt_reformat_last(how_to_reformat: str) -> str: raise Exception("No text to reformat") def gpt_insert_response( - result: str, + gpt_message: GPTMessageItem, method: str = "", cursorless_destination: Any = None, ) -> None: @@ -212,72 +207,75 @@ def gpt_insert_response( # Skip inserting the response if the user is just viewing the thread in the window actions.user.confirmation_gui_refresh_thread() + message_text_no_images = extract_message(gpt_message) match method: case "above": actions.key("left") actions.edit.line_insert_up() GPTState.last_was_pasted = True - actions.user.paste(result) + actions.user.paste(message_text_no_images) case "below": actions.key("right") actions.edit.line_insert_down() GPTState.last_was_pasted = True - actions.user.paste(result) + actions.user.paste(message_text_no_images) case "clipboard": - clip.set_text(result) + clip.set_text(message_text_no_images) case "snip": - actions.user.insert_snippet(result) + actions.user.insert_snippet(message_text_no_images) case "context": - GPTState.push_context(format_message(result)) + GPTState.push_context(gpt_message) case "newContext": GPTState.clear_context() - GPTState.push_context(format_message(result)) + GPTState.push_context(gpt_message) case "thread": - GPTState.push_thread(format_messages("user", [format_message(result)])) + GPTState.push_thread(format_messages("user", [gpt_message])) case "newThread": GPTState.new_thread() - GPTState.push_thread(format_messages("user", [format_message(result)])) + GPTState.push_thread(format_messages("user", [gpt_message])) case "appendClipboard": if clip.text() is not None: - clip.set_text(clip.text() + "\n" + result) # type: ignore Unclear why this is throwing a type error in pylance + clip.set_text(clip.text() + "\n" + message_text_no_images) # type: ignore Unclear why this is throwing a type error in pylance else: - clip.set_text(result) + clip.set_text(message_text_no_images) case "browser": builder = Builder() builder.h1("Talon GPT Result") - for line in result.split("\n"): + for line in message_text_no_images.split("\n"): builder.p(line) builder.render() case "textToSpeech": try: - actions.user.tts(result) + actions.user.tts(message_text_no_images) except KeyError: notify("GPT Failure: text to speech is not installed") # Although we can insert to a cursorless destination, the cursorless_target capture # Greatly increases DFA compliation times and should be avoided if possible case "cursorless": - actions.user.cursorless_insert(cursorless_destination, result) + actions.user.cursorless_insert( + cursorless_destination, message_text_no_images + ) # Don't add to the window twice if the thread is enabled case "window": # If there was prior text in the confirmation GUI and the user # explicitly passed new text to the gui, clear the old result - GPTState.text_to_confirm = result - actions.user.confirmation_gui_append(result) + GPTState.text_to_confirm = message_text_no_images + actions.user.confirmation_gui_append(message_text_no_images) case "chain": GPTState.last_was_pasted = True - actions.user.paste(result) + actions.user.paste(message_text_no_images) actions.user.gpt_select_last() case "paste": GPTState.last_was_pasted = True - actions.user.paste(result) + actions.user.paste(message_text_no_images) # If the user doesn't specify a method assume they want to paste. # However if they didn't specify a method when the confirmation gui # is showing, assume they don't want anything to be inserted case _ if not confirmation_gui.showing: GPTState.last_was_pasted = True - actions.user.paste(result) + actions.user.paste(message_text_no_images) # Don't do anything if none of the previous conditions were valid case _: pass diff --git a/lib/modelHelpers.py b/lib/modelHelpers.py index b111ee20..efb15d8d 100644 --- a/lib/modelHelpers.py +++ b/lib/modelHelpers.py @@ -179,6 +179,7 @@ def send_request( data["tools"] = tools url: str = settings.get("user.model_endpoint") # type: ignore + print(data) raw_response = requests.post(url, headers=headers, data=json.dumps(data)) match raw_response.status_code: diff --git a/lib/modelState.py b/lib/modelState.py index cd064716..ec5fb2a7 100644 --- a/lib/modelState.py +++ b/lib/modelState.py @@ -40,6 +40,11 @@ def disable_thread(cls): @classmethod def push_context(cls, context: GPTMessageItem): """Add the selected text to the stored context""" + if context.get("type") != "text": + actions.app.notify( + "Only text can be added to context. To add images, try using a prompt to summarize or otherwise describe the image to the context." + ) + return cls.context += [context] actions.app.notify("Appended user context") From 516cb61d498ddbf400c2ebcd2307393b7ed8da0a Mon Sep 17 00:00:00 2001 From: Joshua Aresty Date: Sun, 11 Aug 2024 22:38:39 -0700 Subject: [PATCH 2/6] Removed debug log --- lib/modelHelpers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/modelHelpers.py b/lib/modelHelpers.py index efb15d8d..b111ee20 100644 --- a/lib/modelHelpers.py +++ b/lib/modelHelpers.py @@ -179,7 +179,6 @@ def send_request( data["tools"] = tools url: str = settings.get("user.model_endpoint") # type: ignore - print(data) raw_response = requests.post(url, headers=headers, data=json.dumps(data)) match raw_response.status_code: From a5407dd9f6a2aaf16cdeef7216e85d7b7dea5171 Mon Sep 17 00:00:00 2001 From: Joshua Aresty Date: Mon, 12 Aug 2024 07:28:57 -0700 Subject: [PATCH 3/6] Split insertion destination case statement - since only some destination support images, it makes sense to handle them in a separate case - this will now print an error message of someone tries to 'pass clip to window' with an image in their clipboard --- GPT/gpt.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/GPT/gpt.py b/GPT/gpt.py index ba199494..ce1fe225 100644 --- a/GPT/gpt.py +++ b/GPT/gpt.py @@ -207,6 +207,21 @@ def gpt_insert_response( # Skip inserting the response if the user is just viewing the thread in the window actions.user.confirmation_gui_refresh_thread() + match method: + case "thread": + GPTState.push_thread(format_messages("user", [gpt_message])) + return + case "newThread": + GPTState.new_thread() + GPTState.push_thread(format_messages("user", [gpt_message])) + return + + if gpt_message.get("type") != "text": + actions.app.notify( + f"Tried to insert an image to {method}, but that is not currently supported. To insert an image to this destination use a prompt to convert it to text." + ) + return + message_text_no_images = extract_message(gpt_message) match method: case "above": @@ -228,11 +243,6 @@ def gpt_insert_response( case "newContext": GPTState.clear_context() GPTState.push_context(gpt_message) - case "thread": - GPTState.push_thread(format_messages("user", [gpt_message])) - case "newThread": - GPTState.new_thread() - GPTState.push_thread(format_messages("user", [gpt_message])) case "appendClipboard": if clip.text() is not None: clip.set_text(clip.text() + "\n" + message_text_no_images) # type: ignore Unclear why this is throwing a type error in pylance From d607dbfbb82456c1c6efdf7240933744e2df4542 Mon Sep 17 00:00:00 2001 From: Joshua Aresty Date: Mon, 12 Aug 2024 11:36:15 -0700 Subject: [PATCH 4/6] Skip processing except for ask when content is not provided --- GPT/gpt.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/GPT/gpt.py b/GPT/gpt.py index ce1fe225..f19fbc2f 100644 --- a/GPT/gpt.py +++ b/GPT/gpt.py @@ -143,6 +143,11 @@ def gpt_apply_prompt(prompt: str, source: str = "", destination: str = ""): if prompt.startswith("ask"): text_to_process = format_message(prompt.removeprefix("ask")) prompt = "Generate text that satisfies the question or request given in the input." + elif ( + text_to_process.get("text", "") == "" + and text_to_process.get("image_url", "") == "" + ): + text_to_process = None # type: ignore response = gpt_query(format_message(prompt), text_to_process, destination) From 0d90daa72071c475f93d079467aebdb2aef284cb Mon Sep 17 00:00:00 2001 From: Joshua Aresty Date: Mon, 12 Aug 2024 11:43:14 -0700 Subject: [PATCH 5/6] Respond to pull request feedback --- GPT/gpt.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/GPT/gpt.py b/GPT/gpt.py index f19fbc2f..25e1cb2d 100644 --- a/GPT/gpt.py +++ b/GPT/gpt.py @@ -137,17 +137,17 @@ def gpt_apply_prompt(prompt: str, source: str = "", destination: str = ""): """Apply an arbitrary prompt to arbitrary text""" text_to_process: GPTMessageItem = actions.user.gpt_get_source_text(source) + if ( + text_to_process.get("text", "") == "" + and text_to_process.get("image_url", "") == "" + ): + text_to_process = None # type: ignore # Handle special cases in the prompt ### Ask is a special case, where the text to process is the prompted question, not selected text if prompt.startswith("ask"): text_to_process = format_message(prompt.removeprefix("ask")) prompt = "Generate text that satisfies the question or request given in the input." - elif ( - text_to_process.get("text", "") == "" - and text_to_process.get("image_url", "") == "" - ): - text_to_process = None # type: ignore response = gpt_query(format_message(prompt), text_to_process, destination) From b8097796e4401906f97ec4abb96034686d23e26d Mon Sep 17 00:00:00 2001 From: Colton Loftus <70598503+C-Loftus@users.noreply.github.com> Date: Mon, 12 Aug 2024 20:23:38 -0400 Subject: [PATCH 6/6] fix two bug where the model was not updating the confirmation GUI correctly --- GPT/gpt.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/GPT/gpt.py b/GPT/gpt.py index 25e1cb2d..c32906c1 100644 --- a/GPT/gpt.py +++ b/GPT/gpt.py @@ -208,17 +208,21 @@ def gpt_insert_response( GPTState.thread_enabled and confirmation_gui.showing and not method == "window" + # If they ask for thread or newThread specifically, + # it should be pushed to the thread and not just refreshed + and not method == "thread" + and not method == "newThread" ): # Skip inserting the response if the user is just viewing the thread in the window actions.user.confirmation_gui_refresh_thread() + return match method: - case "thread": - GPTState.push_thread(format_messages("user", [gpt_message])) - return - case "newThread": - GPTState.new_thread() + case "thread" | "newThread" as t: + if t == "newThread": + GPTState.new_thread() GPTState.push_thread(format_messages("user", [gpt_message])) + actions.user.confirmation_gui_refresh_thread() return if gpt_message.get("type") != "text":