diff --git a/libs/kotaemon/kotaemon/base/component.py b/libs/kotaemon/kotaemon/base/component.py
index 6936b2a8f..230ce9ddc 100644
--- a/libs/kotaemon/kotaemon/base/component.py
+++ b/libs/kotaemon/kotaemon/base/component.py
@@ -39,7 +39,7 @@ def set_output_queue(self, queue):
if isinstance(node, BaseComponent):
node.set_output_queue(queue)
- def report_output(self, output: Optional[dict]):
+ def report_output(self, output: Optional[Document]):
if self._queue is not None:
self._queue.put_nowait(output)
diff --git a/libs/kotaemon/kotaemon/llms/chats/openai.py b/libs/kotaemon/kotaemon/llms/chats/openai.py
index 6f492c7ad..1a31e24f6 100644
--- a/libs/kotaemon/kotaemon/llms/chats/openai.py
+++ b/libs/kotaemon/kotaemon/llms/chats/openai.py
@@ -270,7 +270,7 @@ def prepare_client(self, async_version: bool = False):
def openai_response(self, client, **kwargs):
"""Get the openai response"""
- params = {
+ params_ = {
"model": self.model,
"temperature": self.temperature,
"max_tokens": self.max_tokens,
@@ -285,6 +285,7 @@ def openai_response(self, client, **kwargs):
"top_logprobs": self.top_logprobs,
"top_p": self.top_p,
}
+ params = {k: v for k, v in params_.items() if v is not None}
params.update(kwargs)
return client.chat.completions.create(**params)
diff --git a/libs/ktem/ktem/llms/manager.py b/libs/ktem/ktem/llms/manager.py
index 0ef64e002..71ad42565 100644
--- a/libs/ktem/ktem/llms/manager.py
+++ b/libs/ktem/ktem/llms/manager.py
@@ -5,7 +5,7 @@
from theflow.settings import settings as flowsettings
from theflow.utils.modules import deserialize
-from kotaemon.base import BaseComponent
+from kotaemon.llms import ChatLLM
from .db import LLMTable, engine
@@ -14,7 +14,7 @@ class LLMManager:
"""Represent a pool of models"""
def __init__(self):
- self._models: dict[str, BaseComponent] = {}
+ self._models: dict[str, ChatLLM] = {}
self._info: dict[str, dict] = {}
self._default: str = ""
self._vendors: list[Type] = []
@@ -63,7 +63,7 @@ def load_vendors(self):
self._vendors = [ChatOpenAI, AzureChatOpenAI, LlamaCppChat, EndpointChatLLM]
- def __getitem__(self, key: str) -> BaseComponent:
+ def __getitem__(self, key: str) -> ChatLLM:
"""Get model by name"""
return self._models[key]
@@ -71,9 +71,7 @@ def __contains__(self, key: str) -> bool:
"""Check if model exists"""
return key in self._models
- def get(
- self, key: str, default: Optional[BaseComponent] = None
- ) -> Optional[BaseComponent]:
+ def get(self, key: str, default: Optional[ChatLLM] = None) -> Optional[ChatLLM]:
"""Get model by name with default value"""
return self._models.get(key, default)
@@ -119,18 +117,18 @@ def get_default_name(self) -> str:
return self._default
- def get_random(self) -> BaseComponent:
+ def get_random(self) -> ChatLLM:
"""Get random model"""
return self._models[self.get_random_name()]
- def get_default(self) -> BaseComponent:
+ def get_default(self) -> ChatLLM:
"""Get default model
In case there is no default model, choose random model from pool. In
case there are multiple default models, choose random from them.
Returns:
- BaseComponent: model
+ ChatLLM: model
"""
return self._models[self.get_default_name()]
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 3397250de..d4881d8f9 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -8,6 +8,7 @@
import tiktoken
from ktem.llms.manager import llms
+from ktem.utils.render import Render
from kotaemon.base import (
BaseComponent,
@@ -20,7 +21,7 @@
from kotaemon.indices.qa.citation import CitationPipeline
from kotaemon.indices.splitters import TokenSplitter
from kotaemon.llms import ChatLLM, PromptTemplate
-from kotaemon.loaders.utils.gpt4v import stream_gpt4v
+from kotaemon.loaders.utils.gpt4v import generate_gpt4v, stream_gpt4v
from .base import BaseReasoning
@@ -205,31 +206,10 @@ class AnswerWithContextPipeline(BaseComponent):
system_prompt: str = ""
lang: str = "English" # support English and Japanese
- async def run( # type: ignore
- self, question: str, evidence: str, evidence_mode: int = 0, **kwargs
- ) -> Document:
- """Answer the question based on the evidence
-
- In addition to the question and the evidence, this method also take into
- account evidence_mode. The evidence_mode tells which kind of evidence is.
- The kind of evidence affects:
- 1. How the evidence is represented.
- 2. The prompt to generate the answer.
-
- By default, the evidence_mode is 0, which means the evidence is plain text with
- no particular semantic representation. The evidence_mode can be:
- 1. "table": There will be HTML markup telling that there is a table
- within the evidence.
- 2. "chatbot": There will be HTML markup telling that there is a chatbot.
- This chatbot is a scenario, extracted from an Excel file, where each
- row corresponds to an interaction.
+ def get_prompt(self, question, evidence, evidence_mode: int):
+ """Prepare the prompt and other information for LLM"""
+ images = []
- Args:
- question: the original question posed by user
- evidence: the text that contain relevant information to answer the question
- (determined by retrieval pipeline)
- evidence_mode: the mode of evidence, 0 for text, 1 for table, 2 for chatbot
- """
if evidence_mode == EVIDENCE_MODE_TEXT:
prompt_template = PromptTemplate(self.qa_template)
elif evidence_mode == EVIDENCE_MODE_TABLE:
@@ -239,7 +219,6 @@ async def run( # type: ignore
else:
prompt_template = PromptTemplate(self.qa_chatbot_template)
- images = []
if evidence_mode == EVIDENCE_MODE_FIGURE:
# isolate image from evidence
evidence, images = self.extract_evidence_images(evidence)
@@ -255,6 +234,66 @@ async def run( # type: ignore
lang=self.lang,
)
+ return prompt, images
+
+ def run(
+ self, question: str, evidence: str, evidence_mode: int = 0, **kwargs
+ ) -> Document:
+ return self.invoke(question, evidence, evidence_mode, **kwargs)
+
+ def invoke(
+ self, question: str, evidence: str, evidence_mode: int = 0, **kwargs
+ ) -> Document:
+ prompt, images = self.get_prompt(question, evidence, evidence_mode)
+
+ output = ""
+ if evidence_mode == EVIDENCE_MODE_FIGURE:
+ output = generate_gpt4v(self.vlm_endpoint, images, prompt, max_tokens=768)
+ else:
+ messages = []
+ if self.system_prompt:
+ messages.append(SystemMessage(content=self.system_prompt))
+ messages.append(HumanMessage(content=prompt))
+ output = self.llm(messages).text
+
+ # retrieve the citation
+ citation = None
+ if evidence and self.enable_citation:
+ citation = self.citation_pipeline.invoke(
+ context=evidence, question=question
+ )
+
+ answer = Document(text=output, metadata={"citation": citation})
+
+ return answer
+
+ async def ainvoke( # type: ignore
+ self, question: str, evidence: str, evidence_mode: int = 0, **kwargs
+ ) -> Document:
+ """Answer the question based on the evidence
+
+ In addition to the question and the evidence, this method also take into
+ account evidence_mode. The evidence_mode tells which kind of evidence is.
+ The kind of evidence affects:
+ 1. How the evidence is represented.
+ 2. The prompt to generate the answer.
+
+ By default, the evidence_mode is 0, which means the evidence is plain text with
+ no particular semantic representation. The evidence_mode can be:
+ 1. "table": There will be HTML markup telling that there is a table
+ within the evidence.
+ 2. "chatbot": There will be HTML markup telling that there is a chatbot.
+ This chatbot is a scenario, extracted from an Excel file, where each
+ row corresponds to an interaction.
+
+ Args:
+ question: the original question posed by user
+ evidence: the text that contain relevant information to answer the question
+ (determined by retrieval pipeline)
+ evidence_mode: the mode of evidence, 0 for text, 1 for table, 2 for chatbot
+ """
+ prompt, images = self.get_prompt(question, evidence, evidence_mode)
+
citation_task = None
if evidence and self.enable_citation:
citation_task = asyncio.create_task(
@@ -266,7 +305,7 @@ async def run( # type: ignore
if evidence_mode == EVIDENCE_MODE_FIGURE:
for text in stream_gpt4v(self.vlm_endpoint, images, prompt, max_tokens=768):
output += text
- self.report_output({"output": text})
+ self.report_output(Document(channel="chat", content=text))
await asyncio.sleep(0)
else:
messages = []
@@ -279,12 +318,12 @@ async def run( # type: ignore
print("Trying LLM streaming")
for text in self.llm.stream(messages):
output += text.text
- self.report_output({"output": text.text})
+ self.report_output(Document(content=text.text, channel="chat"))
await asyncio.sleep(0)
except NotImplementedError:
print("Streaming is not supported, falling back to normal processing")
output = self.llm(messages).text
- self.report_output({"output": output})
+ self.report_output(Document(content=output, channel="chat"))
# retrieve the citation
print("Waiting for citation task")
@@ -300,52 +339,7 @@ async def run( # type: ignore
def stream( # type: ignore
self, question: str, evidence: str, evidence_mode: int = 0, **kwargs
) -> Generator[Document, None, Document]:
- """Answer the question based on the evidence
-
- In addition to the question and the evidence, this method also take into
- account evidence_mode. The evidence_mode tells which kind of evidence is.
- The kind of evidence affects:
- 1. How the evidence is represented.
- 2. The prompt to generate the answer.
-
- By default, the evidence_mode is 0, which means the evidence is plain text with
- no particular semantic representation. The evidence_mode can be:
- 1. "table": There will be HTML markup telling that there is a table
- within the evidence.
- 2. "chatbot": There will be HTML markup telling that there is a chatbot.
- This chatbot is a scenario, extracted from an Excel file, where each
- row corresponds to an interaction.
-
- Args:
- question: the original question posed by user
- evidence: the text that contain relevant information to answer the question
- (determined by retrieval pipeline)
- evidence_mode: the mode of evidence, 0 for text, 1 for table, 2 for chatbot
- """
- if evidence_mode == EVIDENCE_MODE_TEXT:
- prompt_template = PromptTemplate(self.qa_template)
- elif evidence_mode == EVIDENCE_MODE_TABLE:
- prompt_template = PromptTemplate(self.qa_table_template)
- elif evidence_mode == EVIDENCE_MODE_FIGURE:
- prompt_template = PromptTemplate(self.qa_figure_template)
- else:
- prompt_template = PromptTemplate(self.qa_chatbot_template)
-
- images = []
- if evidence_mode == EVIDENCE_MODE_FIGURE:
- # isolate image from evidence
- evidence, images = self.extract_evidence_images(evidence)
- prompt = prompt_template.populate(
- context=evidence,
- question=question,
- lang=self.lang,
- )
- else:
- prompt = prompt_template.populate(
- context=evidence,
- question=question,
- lang=self.lang,
- )
+ prompt, images = self.get_prompt(question, evidence, evidence_mode)
output = ""
if evidence_mode == EVIDENCE_MODE_FIGURE:
@@ -425,51 +419,35 @@ class Config:
rewrite_pipeline: RewriteQuestionPipeline = RewriteQuestionPipeline.withx()
use_rewrite: bool = False
- async def ainvoke( # type: ignore
- self, message: str, conv_id: str, history: list, **kwargs # type: ignore
- ) -> Document: # type: ignore
- import markdown
-
- docs = []
- doc_ids = []
- if self.use_rewrite:
- rewrite = await self.rewrite_pipeline(question=message)
- message = rewrite.text
-
+ def retrieve(self, message: str) -> tuple[list[RetrievedDocument], list[Document]]:
+ """Retrieve the documents based on the message"""
+ docs, doc_ids = [], []
for retriever in self.retrievers:
for doc in retriever(text=message):
if doc.doc_id not in doc_ids:
docs.append(doc)
doc_ids.append(doc.doc_id)
+
+ info = []
for doc in docs:
- # TODO: a better approach to show the information
- text = markdown.markdown(
- doc.text, extensions=["markdown.extensions.tables"]
- )
- self.report_output(
- {
- "evidence": (
- ""
- f"{doc.metadata['file_name']}
"
- f"{text}"
- "
"
- )
- }
+ info.append(
+ Document(
+ channel="info",
+ content=Render.collapsible(
+ header=doc.metadata["file_name"],
+ content=Render.table(doc.text),
+ open=True,
+ ),
+ )
)
- await asyncio.sleep(0.1)
- evidence_mode, evidence = self.evidence_pipeline(docs).content
- answer = await self.answering_pipeline(
- question=message,
- history=history,
- evidence=evidence,
- evidence_mode=evidence_mode,
- conv_id=conv_id,
- **kwargs,
- )
+ return docs, info
- # prepare citation
+ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document]]:
+ """Prepare the citations to show on the UI"""
+ with_citation, without_citation = [], []
spans = defaultdict(list)
+
if answer.metadata["citation"] is not None:
for fact_with_evidence in answer.metadata["citation"].answer:
for quote in fact_with_evidence.substring_quote:
@@ -500,9 +478,7 @@ async def ainvoke( # type: ignore
break
id2docs = {doc.doc_id: doc for doc in docs}
- lack_evidence = True
not_detected = set(id2docs.keys()) - set(spans.keys())
- self.report_output({"evidence": None})
for id, ss in spans.items():
if not ss:
not_detected.add(id)
@@ -510,48 +486,74 @@ async def ainvoke( # type: ignore
ss = sorted(ss, key=lambda x: x["start"])
text = id2docs[id].text[: ss[0]["start"]]
for idx, span in enumerate(ss):
- text += (
- "" + id2docs[id].text[span["start"] : span["end"]] + ""
- )
+ text += Render.highlight(id2docs[id].text[span["start"] : span["end"]])
if idx < len(ss) - 1:
text += id2docs[id].text[span["end"] : ss[idx + 1]["start"]]
text += id2docs[id].text[ss[-1]["end"] :]
- text_out = markdown.markdown(
- text, extensions=["markdown.extensions.tables"]
+ with_citation.append(
+ Document(
+ channel="info",
+ content=Render.collapsible(
+ header=id2docs[id].metadata["file_name"],
+ content=Render.table(text),
+ open=True,
+ ),
+ )
)
- self.report_output(
- {
- "evidence": (
- ""
- f"{id2docs[id].metadata['file_name']}
"
- f"{text_out}"
- "
"
- )
- }
+
+ without_citation = [
+ Document(
+ channel="info",
+ content=Render.collapsible(
+ header=id2docs[id].metadata["file_name"],
+ content=Render.table(id2docs[id].text),
+ open=False,
+ ),
)
- lack_evidence = False
+ for id in list(not_detected)
+ ]
- if lack_evidence:
- self.report_output({"evidence": "No evidence found.\n"})
+ return with_citation, without_citation
- if not_detected:
- self.report_output(
- {"evidence": "Retrieved segments without matching evidence:\n"}
- )
- for id in list(not_detected):
- text_out = markdown.markdown(
- id2docs[id].text, extensions=["markdown.extensions.tables"]
- )
+ async def ainvoke( # type: ignore
+ self, message: str, conv_id: str, history: list, **kwargs # type: ignore
+ ) -> Document: # type: ignore
+ if self.use_rewrite:
+ rewrite = await self.rewrite_pipeline(question=message)
+ message = rewrite.text
+
+ docs, infos = self.retrieve(message)
+ for _ in infos:
+ self.report_output(_)
+ await asyncio.sleep(0.1)
+
+ evidence_mode, evidence = self.evidence_pipeline(docs).content
+ answer = await self.answering_pipeline(
+ question=message,
+ history=history,
+ evidence=evidence,
+ evidence_mode=evidence_mode,
+ conv_id=conv_id,
+ **kwargs,
+ )
+
+ # show the evidence
+ with_citation, without_citation = self.prepare_citations(answer, docs)
+ if not with_citation and not without_citation:
+ self.report_output(Document(channel="info", content="No evidence found.\n"))
+ else:
+ self.report_output(Document(channel="info", content=None))
+ for _ in with_citation:
+ self.report_output(_)
+ if without_citation:
self.report_output(
- {
- "evidence": (
- ""
- f"{id2docs[id].metadata['file_name']}
"
- f"{text_out}"
- "
"
- )
- }
+ Document(
+ channel="info",
+ content="Retrieved segments without matching evidence:\n",
+ )
)
+ for _ in without_citation:
+ self.report_output(_)
self.report_output(None)
return answer
@@ -559,32 +561,12 @@ async def ainvoke( # type: ignore
def stream( # type: ignore
self, message: str, conv_id: str, history: list, **kwargs # type: ignore
) -> Generator[Document, None, Document]:
- import markdown
-
- docs = []
- doc_ids = []
if self.use_rewrite:
message = self.rewrite_pipeline(question=message).text
- for retriever in self.retrievers:
- for doc in retriever(text=message):
- if doc.doc_id not in doc_ids:
- docs.append(doc)
- doc_ids.append(doc.doc_id)
- for doc in docs:
- # TODO: a better approach to show the information
- text = markdown.markdown(
- doc.text, extensions=["markdown.extensions.tables"]
- )
- yield Document(
- content=(
- ""
- f"{doc.metadata['file_name']}
"
- f"{text}"
- "
"
- ),
- channel="info",
- )
+ docs, infos = self.retrieve(message)
+ for _ in infos:
+ yield _
evidence_mode, evidence = self.evidence_pipeline(docs).content
answer = yield from self.answering_pipeline.stream(
@@ -596,89 +578,21 @@ def stream( # type: ignore
**kwargs,
)
- # prepare citation
- spans = defaultdict(list)
- if answer.metadata["citation"] is not None:
- for fact_with_evidence in answer.metadata["citation"].answer:
- for quote in fact_with_evidence.substring_quote:
- for doc in docs:
- start_idx = doc.text.find(quote)
- if start_idx == -1:
- continue
-
- end_idx = start_idx + len(quote)
-
- current_idx = start_idx
- if "|" not in doc.text[start_idx:end_idx]:
- spans[doc.doc_id].append(
- {"start": start_idx, "end": end_idx}
- )
- else:
- while doc.text[current_idx:end_idx].find("|") != -1:
- match_idx = doc.text[current_idx:end_idx].find("|")
- spans[doc.doc_id].append(
- {
- "start": current_idx,
- "end": current_idx + match_idx,
- }
- )
- current_idx += match_idx + 2
- if current_idx > end_idx:
- break
- break
-
- id2docs = {doc.doc_id: doc for doc in docs}
- lack_evidence = True
- not_detected = set(id2docs.keys()) - set(spans.keys())
- yield Document(channel="info", content=None)
- for id, ss in spans.items():
- if not ss:
- not_detected.add(id)
- continue
- ss = sorted(ss, key=lambda x: x["start"])
- text = id2docs[id].text[: ss[0]["start"]]
- for idx, span in enumerate(ss):
- text += (
- "" + id2docs[id].text[span["start"] : span["end"]] + ""
- )
- if idx < len(ss) - 1:
- text += id2docs[id].text[span["end"] : ss[idx + 1]["start"]]
- text += id2docs[id].text[ss[-1]["end"] :]
- text_out = markdown.markdown(
- text, extensions=["markdown.extensions.tables"]
- )
- yield Document(
- content=(
- ""
- f"{id2docs[id].metadata['file_name']}
"
- f"{text_out}"
- "
"
- ),
- channel="info",
- )
- lack_evidence = False
-
- if lack_evidence:
+ # show the evidence
+ with_citation, without_citation = self.prepare_citations(answer, docs)
+ if not with_citation and not without_citation:
yield Document(channel="info", content="No evidence found.\n")
-
- if not_detected:
- yield Document(
- channel="info",
- content="Retrieved segments without matching evidence:\n",
- )
- for id in list(not_detected):
- text_out = markdown.markdown(
- id2docs[id].text, extensions=["markdown.extensions.tables"]
- )
+ else:
+ yield Document(channel="info", content=None)
+ for _ in with_citation:
+ yield _
+ if without_citation:
yield Document(
- content=(
- ""
- f"{id2docs[id].metadata['file_name']}
"
- f"{text_out}"
- "
"
- ),
channel="info",
+ content="Retrieved segments without matching evidence:\n",
)
+ for _ in without_citation:
+ yield _
return answer
diff --git a/libs/ktem/ktem/utils/__init__.py b/libs/ktem/ktem/utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/libs/ktem/ktem/utils/render.py b/libs/ktem/ktem/utils/render.py
new file mode 100644
index 000000000..5890d3327
--- /dev/null
+++ b/libs/ktem/ktem/utils/render.py
@@ -0,0 +1,21 @@
+import markdown
+
+
+class Render:
+ """Default text rendering into HTML for the UI"""
+
+ @staticmethod
+ def collapsible(header, content, open: bool = False) -> str:
+ """Render an HTML friendly collapsible section"""
+ o = " open" if open else ""
+ return f"{header}
{content}
"
+
+ @staticmethod
+ def table(text: str) -> str:
+ """Render table from markdown format into HTML"""
+ return markdown.markdown(text, extensions=["markdown.extensions.tables"])
+
+ @staticmethod
+ def highlight(text: str) -> str:
+ """Highlight text"""
+ return f"{text}"