Skip to content

Commit

Permalink
Improving prompts - Summarization and Internet
Browse files Browse the repository at this point in the history
  • Loading branch information
yorevs committed Mar 14, 2024
1 parent 25fe495 commit f2ac96b
Show file tree
Hide file tree
Showing 17 changed files with 132 additions and 60 deletions.
3 changes: 3 additions & 0 deletions dependencies.hspd
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ package: langchain-community
package: unstructured
package: python-magic-bin
package: chromadb
package: tiktoken
package: aiohttp
package: html2text
package: PyAudio
package: SpeechRecognition
package: openai-whisper
Expand Down
5 changes: 2 additions & 3 deletions src/main/askai/core/askai.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
from askai.core.processor.ai_processor import AIProcessor
from askai.core.processor.generic_processor import GenericProcessor
from askai.core.processor.internet_processor import InternetProcessor
from askai.core.processor.output_processor import OutputProcessor
from askai.core.processor.processor_proxy import proxy
from askai.core.processor.summary_processor import SummaryProcessor
from askai.core.support.object_mapper import object_mapper
Expand Down Expand Up @@ -244,14 +243,14 @@ def _process_response(self, proxy_response: QueryResponse) -> bool:
# Intrinsic features
if not proxy_response.intelligible:
self.reply_error(msg.intelligible(proxy_response.question))
return False
return True
elif proxy_response.terminating:
log.info("User wants to terminate the conversation.")
return False
elif proxy_response.require_internet:
log.info("Internet is required to fulfill the request.")
processor = AIProcessor.get_by_name(InternetProcessor.__name__)
processor.bind(AIProcessor.get_by_query_type(proxy_response.query_type))
processor.bind(AIProcessor.get_by_name(GenericProcessor.__name__))
elif proxy_response.require_summarization:
log.info("Summarization is required to fulfill the request.")
processor = AIProcessor.get_by_name(SummaryProcessor.__name__)
Expand Down
6 changes: 5 additions & 1 deletion src/main/askai/core/askai_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ def summarizing(self, path: str) -> str:
def cmd_no_output(self) -> str:
return self.translate(f"The command didn't return an output !")

@lru_cache
def search_empty(self) -> str:
return self.translate(f"The google research didn't return an output !")

@lru_cache
def access_grant(self) -> str:
return self.translate(f"'AskAI' requires access to your files, folders and apps. Continue (yes/[no])?")
Expand Down Expand Up @@ -103,7 +107,7 @@ def cmd_failed(self, cmd_line: str) -> str:

@lru_cache
def intelligible(self, question: str) -> str:
return self.translate(f"Your question '{question}' is not clear, please rephrase !")
return self.translate(f"Your question '{question}' is not clear, please reformulate !")

@lru_cache
def llm_error(self, error: str) -> str:
Expand Down
46 changes: 27 additions & 19 deletions src/main/askai/core/component/internet_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,21 @@
Copyright·(c)·2024,·HSPyLib
"""
import logging as log
import os
from functools import lru_cache
from typing import Optional
from typing import Optional, List

from hspylib.core.metaclass.singleton import Singleton
from langchain.chains import load_summarize_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.document_loaders.async_html import AsyncHtmlLoader
from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import Tool
from langchain_text_splitters import CharacterTextSplitter

from askai.core.askai_events import AskAiEvents
from askai.core.askai_messages import msg
from askai.core.support.langchain_support import lc_llm
from askai.core.support.langchain_support import lc_llm, load_document


class InternetService(metaclass=Singleton):
Expand All @@ -33,37 +37,41 @@ class InternetService(metaclass=Singleton):

ASKAI_INTERNET_DATA_KEY = "askai-internet-data"

@staticmethod
def scrap_sites(*sites: str) -> Optional[str]:
"""TODO"""
log.info("Scrapping sites: '%s'", str(sites))
docs: List[Document] = load_document(AsyncHtmlLoader, *sites)
chain = load_summarize_chain(lc_llm.create_chat_model(), chain_type="stuff")
search_results = chain.invoke(docs)
return search_results['output_text']

def __init__(self):
self._google = GoogleSearchAPIWrapper()
self._tool = Tool(
name="google_search",
description="Search Google for recent results.",
func=self._google.run)

@lru_cache
def search_google(self, query: str, *sites: str) -> Optional[str]:
"""Search the web using google search API.
:param query: The google search query string.
:param sites: The sites you want google to search for.
"""
search_results: str = ''
AskAiEvents.ASKAI_BUS.events.reply.emit(message=msg.searching())
log.info("Searching GOOGLE for '%s' url: '%s'", query, str(sites))
if sites:
if len(sites) > 0:
log.info("Searching GOOGLE for '%s' url: '%s'", query, str(sites))
search_results: str = ''
for url in sites:
search_results += str(self._tool.run(f"{query} site: {url}"))
else:
search_results += str(self._tool.run(f"{query}"))
log.debug(f"Internet search output: %s", search_results)
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
docs: List[Document] = [Document(page_content=x) for x in text_splitter.split_text(search_results)]
prompt = ChatPromptTemplate.from_messages([("system", "{query}\n\n{context}")])
chain = create_stuff_documents_chain(lc_llm.create_chat_model(), prompt)
search_results = chain.invoke({"query": query, "context": docs})
return search_results

return search_results
return None


assert (internet := InternetService().INSTANCE) is not None


if __name__ == '__main__':
q = 'What is the whether like in Belo Horizonte now'
embeddings = lc_llm.create_embeddings()
c = internet.search_google(q)
eq = embeddings.embed_query(q)
20 changes: 16 additions & 4 deletions src/main/askai/core/engine/ai_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
Copyright·(c)·2024,·HSPyLib
"""
from langchain_core.language_models import BaseLLM, BaseChatModel

from askai.core.model.ai_model import AIModel
from askai.core.model.ai_reply import AIReply
from typing import Any, List, Optional, Protocol
Expand All @@ -20,8 +22,18 @@
class AIEngine(Protocol):
"""Provide an interface for AI engines."""

def lc_model(self, temperature: float, top_p: float) -> Any:
"""Create a LangChain AI model instance."""
def lc_model(self, temperature: float, top_p: float) -> BaseLLM:
"""Create a LangChain AI model instance.
:param temperature: The model engine temperature.
:param top_p: The model engine top_p.
"""
...

def lc_chat_model(self, temperature: float = 0.0) -> BaseChatModel:
"""Create a LangChain OpenAI llm chat model instance.
:param temperature: The model engine temperature.
"""
...

def lc_embeddings(self) -> Any:
"""Create a LangChain AI embeddings instance."""
Expand Down Expand Up @@ -50,8 +62,8 @@ def models(self) -> List[AIModel]:
def ask(self, chat_context: List[dict], temperature: float = 0.8, top_p: float = 0.0) -> AIReply:
"""Ask AI assistance for the given question and expect a response.
:param chat_context: The chat history or context.
:param temperature: TODO
:param top_p: TODO
:param temperature: The model engine temperature.
:param top_p: The model engine top_p.
"""
...

Expand Down
13 changes: 10 additions & 3 deletions src/main/askai/core/engine/openai/openai_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@
import logging as log
import os
from threading import Thread
from typing import Any, List, Optional
from typing import List, Optional

import langchain_openai
import pause
from hspylib.core.preconditions import check_not_none
from langchain_core.embeddings import Embeddings
from langchain_core.language_models import BaseLLM, BaseChatModel
from openai import APIError, OpenAI

from askai.core.component.audio_player import AudioPlayer
Expand Down Expand Up @@ -52,12 +54,17 @@ def url(self) -> str:
def client(self) -> OpenAI:
return self._client

def lc_model(self, temperature: float = 0.8, top_p: float = 0.0) -> Any:
def lc_model(self, temperature: float = 0.0, top_p: float = 0.0) -> BaseLLM:
"""Create a LangChain OpenAI llm model instance."""
return langchain_openai.OpenAI(
openai_api_key=self._api_key, temperature=temperature, top_p=top_p)

def lc_embeddings(self) -> Any:
def lc_chat_model(self, temperature: float = 0.0) -> BaseChatModel:
"""Create a LangChain OpenAI llm chat model instance."""
return langchain_openai.ChatOpenAI(
openai_api_key=self._api_key, temperature=temperature)

def lc_embeddings(self) -> Embeddings:
"""Create a LangChain AI embeddings instance."""
return langchain_openai.OpenAIEmbeddings()

Expand Down
9 changes: 5 additions & 4 deletions src/main/askai/core/model/chat_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class ChatContext:

def __init__(self, token_limit: int):
self._context = defaultdict(list)
self._token_limit: int = token_limit #* 1024 # The limit is given in KB
self._token_limit: int = token_limit * 1024 # The limit is given in KB

def __str__(self):
return os.linesep.join(f"'{k}': '{v}'" for k, v in self._context.items())
Expand Down Expand Up @@ -79,8 +79,9 @@ def join(self, *keys: str) -> ContextRaw:
context.extend(self.get(key))
return context

def clear(self, key: str) -> int:
def clear(self, *keys: str) -> int:
"""Clear the all the chat context specified by key."""
if self._context[key]:
del self._context[key]
for key in keys:
if self._context[key]:
del self._context[key]
return len(self._context)
2 changes: 1 addition & 1 deletion src/main/askai/core/model/summary_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ class SummaryResult:
answer: str = None

def __str__(self):
return f"Summarization: {json.dumps(self.__dict__, default=lambda obj: obj.__dict__)}"
return f"Summarization results: {json.dumps(self.__dict__, default=lambda obj: obj.__dict__)}"
1 change: 1 addition & 0 deletions src/main/askai/core/processor/generic_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def process(self, query_response: QueryResponse) -> Tuple[bool, Optional[str]]:
cache.save_reply(query_response.question, output)
cache.save_query_history()
status = True
shared.context.clear("INTERNET", "SUMMARY")
else:
output = msg.llm_error(response.message)

Expand Down
2 changes: 2 additions & 0 deletions src/main/askai/core/processor/internet_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def process(self, query_response: QueryResponse) -> Tuple[bool, Optional[str]]:
shared.context.set("INTERNET", output, "assistant")
cache.save_reply(query_response.question, output)
status = True
else:
output = msg.search_empty()
else:
output = msg.llm_error(response.message)
else:
Expand Down
21 changes: 19 additions & 2 deletions src/main/askai/core/support/langchain_support.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
from typing import Any, Dict, List
from functools import lru_cache
from typing import Any, Dict, List, Type

from hspylib.core.metaclass.singleton import Singleton
from hspylib.core.preconditions import check_not_none
from langchain_core.documents import Document
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

from askai.core.model.chat_context import ChatContext
from askai.core.support.shared_instances import shared


def load_document(loader_type: Type, url: str | List[str]) -> List[Document]:
"""TODO"""
return loader_type(url).load()


class LangChainSupport(metaclass=Singleton):
"""TODO"""

Expand All @@ -16,18 +23,28 @@ class LangChainSupport(metaclass=Singleton):
LANGCHAIN_ROLE_MAP: Dict = {"user": HumanMessage, "system": SystemMessage, "assistant": AIMessage}

@staticmethod
def create_model(temperature: float = 0.8, top_p: float = 0.0) -> Any:
@lru_cache
def create_model(temperature: float = 0.0, top_p: float = 0.0) -> Any:
"""TODO"""
check_not_none(shared.engine, "AI Engine was not created yet!")
return shared.engine.lc_model(temperature, top_p)

@staticmethod
@lru_cache
def create_chat_model(temperature: float = 0.0) -> Any:
"""TODO"""
check_not_none(shared.engine, "AI Engine was not created yet!")
return shared.engine.lc_chat_model(temperature)

@staticmethod
@lru_cache
def create_embeddings() -> Any:
"""TODO"""
check_not_none(shared.engine, "AI Engine was not created yet!")
return shared.engine.lc_embeddings()

@classmethod
@lru_cache
def get_context(cls, key: str) -> List:
"""TODO"""
context: List[ChatContext.ContextEntry] = shared.context[key]
Expand Down
42 changes: 30 additions & 12 deletions src/main/askai/core/support/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,23 @@
Copyright·(c)·2024,·HSPyLib
"""
from askai.core.support.presets import Presets
from askai.language.language import Language
import hashlib
import os
import re
from os.path import basename, dirname
from pathlib import Path
from typing import Any, Optional, Tuple

import pause
from clitt.core.term.cursor import Cursor
from hspylib.core.enums.charset import Charset
from hspylib.core.preconditions import check_argument
from hspylib.core.tools.commons import file_is_not_empty, sysout
from hspylib.core.tools.text_tools import ensure_endswith
from hspylib.modules.cli.vt100.vt_color import VtColor
from os.path import basename, dirname
from pathlib import Path
from typing import Any, Optional, Tuple

import hashlib
import os
import pause
import re
from askai.core.support.presets import Presets
from askai.language.language import Language

ASKAI_CHAT_ICONS = {
"": "%RED%",
Expand All @@ -45,8 +46,12 @@ def beautify(text: Any) -> str:
:param text: The text to be beautified.
"""
# fmt: off
re_url = r'(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})'
re_url = (
r'(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|'
r'www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))'
r'[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})')
text = str(text)
text = re.sub(r"\n{2,}", '\n\n', text)
text = re.sub(r"[Hh]ints?( and tips)?[-:\s][ \n\t]*", f"{ASKAI_CHAT_ICONS['']}{''} Tips: ", text)
text = re.sub(r"[Aa]nalysis[-:\s][ \n\t]*", f"{ASKAI_CHAT_ICONS['']}{''} Analysis: ", text)
text = re.sub(r"[Ss]ummary[-:\s][ \n\t]*", f"{ASKAI_CHAT_ICONS['']}{''} Summary:", text)
Expand Down Expand Up @@ -137,7 +142,7 @@ def stream_text(text: Any, tempo: int = 1, language: Language = Language.EN_US)
for i, char in enumerate(text):
if char == "%" and (i + 1) < len(text):
try:
if (color := text[i + 1 : text.index("%", i + 1)]) in VtColor.names():
if (color := text[i + 1: text.index("%", i + 1)]) in VtColor.names():
hide, idx = True, text.index("%", i + 1)
sysout(f"%{color}%", end="")
continue
Expand Down Expand Up @@ -188,4 +193,17 @@ def stream_text(text: Any, tempo: int = 1, language: Language = Language.EN_US)


if __name__ == '__main__':
display_text(" Error: 'LLM' returned an error: Directory not found: 'HomeSetup/docs/'")
display_text("""
Este text tem ln
aqui
este
"""
)
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ Before responding to the user, it is imperative that you follow the step-by-step

- Start your response with the phrase: Analysing the provided data\n

- Wrap up your reply by offering a summarized analysis about the content; prefix with: \n'Analysis:'.
- Wrap up your reply by offering a summarized analysis about the content; prefix with: 'Analysis:'.
Loading

0 comments on commit f2ac96b

Please sign in to comment.