From 32329fad98e6a3d87aa2ab4ddbe8d78d5e3be0d7 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Sat, 11 May 2024 10:35:30 +0700
Subject: [PATCH 01/56] Support hybrid vector retrieval

---
 libs/kotaemon/kotaemon/indices/vectorindex.py | 50 ++++++++++++++++---
 .../kotaemon/storages/docstores/base.py       |  7 +++
 .../storages/docstores/elasticsearch.py       |  5 +-
 .../kotaemon/storages/docstores/in_memory.py  |  6 +++
 libs/ktem/ktem/index/file/pipelines.py        |  8 ++-
 libs/ktem/ktem/reasoning/simple.py            |  4 +-
 6 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/libs/kotaemon/kotaemon/indices/vectorindex.py b/libs/kotaemon/kotaemon/indices/vectorindex.py
index 8902528d2..7eddc8700 100644
--- a/libs/kotaemon/kotaemon/indices/vectorindex.py
+++ b/libs/kotaemon/kotaemon/indices/vectorindex.py
@@ -78,7 +78,8 @@ class VectorRetrieval(BaseRetrieval):
     doc_store: Optional[BaseDocumentStore] = None
     embedding: BaseEmbeddings
     rerankers: Sequence[BaseReranking] = []
-    top_k: int = 1
+    top_k: int = 5
+    retrieval_mode: str = "hybrid"  # vector, text, hybrid
 
     def run(
         self, text: str | Document, top_k: Optional[int] = None, **kwargs
@@ -101,13 +102,46 @@ def run(
                 "retrieve the documents"
             )
 
-        emb: list[float] = self.embedding(text)[0].embedding
-        _, scores, ids = self.vector_store.query(embedding=emb, top_k=top_k, **kwargs)
-        docs = self.doc_store.get(ids)
-        result = [
-            RetrievedDocument(**doc.to_dict(), score=score)
-            for doc, score in zip(docs, scores)
-        ]
+        result: list[RetrievedDocument] = []
+        # TODO: should declare scope directly in the run params
+        scope = kwargs.pop("scope", None)
+        emb: list[float]
+
+        if self.retrieval_mode == "vector":
+            emb = self.embedding(text)[0].embedding
+            _, scores, ids = self.vector_store.query(
+                embedding=emb, top_k=top_k, **kwargs
+            )
+            docs = self.doc_store.get(ids)
+            result = [
+                RetrievedDocument(**doc.to_dict(), score=score)
+                for doc, score in zip(docs, scores)
+            ]
+        elif self.retrieval_mode == "text":
+            query = text.text if isinstance(text, Document) else text
+            docs = self.doc_store.query(query, top_k=top_k, doc_ids=scope)
+            result = [RetrievedDocument(**doc.to_dict(), score=-1.0) for doc in docs]
+        elif self.retrieval_mode == "hybrid":
+            # similartiy search section
+            emb = self.embedding(text)[0].embedding
+            _, vs_scores, vs_ids = self.vector_store.query(
+                embedding=emb, top_k=top_k, **kwargs
+            )
+            vs_docs = self.doc_store.get(vs_ids)
+
+            # full-text search section
+            query = text.text if isinstance(text, Document) else text
+            docs = self.doc_store.query(query, top_k=top_k, doc_ids=scope)
+            result = [
+                RetrievedDocument(**doc.to_dict(), score=-1.0)
+                for doc in docs
+                if doc not in vs_ids
+            ]
+            result += [
+                RetrievedDocument(**doc.to_dict(), score=score)
+                for doc, score in zip(vs_docs, vs_scores)
+            ]
+
         # use additional reranker to re-order the document list
         if self.rerankers:
             for reranker in self.rerankers:
diff --git a/libs/kotaemon/kotaemon/storages/docstores/base.py b/libs/kotaemon/kotaemon/storages/docstores/base.py
index 243584be7..4b6f397cb 100644
--- a/libs/kotaemon/kotaemon/storages/docstores/base.py
+++ b/libs/kotaemon/kotaemon/storages/docstores/base.py
@@ -41,6 +41,13 @@ def count(self) -> int:
         """Count number of documents"""
         ...
 
+    @abstractmethod
+    def query(
+        self, query: str, top_k: int = 10, doc_ids: Optional[list] = None
+    ) -> List[Document]:
+        """Search document store using search query"""
+        ...
+
     @abstractmethod
     def delete(self, ids: Union[List[str], str]):
         """Delete document by id"""
diff --git a/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py b/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py
index b58b2111b..a8cb2527f 100644
--- a/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py
+++ b/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py
@@ -92,7 +92,10 @@ def add(
                 "_id": doc_id,
             }
             requests.append(request)
-        self.es_bulk(self.client, requests)
+
+        success, failed = self.es_bulk(self.client, requests)
+        print("Added/Updated documents to index", success)
+        print("Failed documents to index", failed)
 
         if refresh_indices:
             self.client.indices.refresh(index=self.index_name)
diff --git a/libs/kotaemon/kotaemon/storages/docstores/in_memory.py b/libs/kotaemon/kotaemon/storages/docstores/in_memory.py
index 3e2ee01b5..0df7a3e58 100644
--- a/libs/kotaemon/kotaemon/storages/docstores/in_memory.py
+++ b/libs/kotaemon/kotaemon/storages/docstores/in_memory.py
@@ -81,6 +81,12 @@ def load(self, path: Union[str, Path]):
         # Also, for portability, use SQLAlchemy for document store.
         self._store = {key: Document.from_dict(value) for key, value in store.items()}
 
+    def query(
+        self, query: str, top_k: int = 10, doc_ids: Optional[list] = None
+    ) -> List[Document]:
+        """Perform full-text search on document store"""
+        return []
+
     def __persist_flow__(self):
         return {}
 
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 450bb3a1e..9dd558ad2 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -79,6 +79,7 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
     get_extra_table: bool = False
     mmr: bool = False
     top_k: int = 5
+    retrieval_mode: str = "hybrid"
 
     @Node.auto(depends_on=["embedding", "VS", "DS"])
     def vector_retrieval(self) -> VectorRetrieval:
@@ -86,6 +87,7 @@ def vector_retrieval(self) -> VectorRetrieval:
             embedding=self.embedding,
             vector_store=self.VS,
             doc_store=self.DS,
+            retrieval_mode=self.retrieval_mode,  # type: ignore
         )
 
     def run(
@@ -105,7 +107,7 @@ def run(
             logger.info(f"Skip retrieval because of no selected files: {self}")
             return []
 
-        retrieval_kwargs = {}
+        retrieval_kwargs: dict = {}
         with Session(engine) as session:
             stmt = select(self.Index).where(
                 self.Index.relation_type == "vector",
@@ -114,6 +116,7 @@ def run(
             results = session.execute(stmt)
             vs_ids = [r[0].target_id for r in results.all()]
 
+        retrieval_kwargs["scope"] = vs_ids
         retrieval_kwargs["filters"] = MetadataFilters(
             filters=[
                 MetadataFilter(
@@ -200,7 +203,7 @@ def get_user_settings(cls) -> dict:
             },
             "retrieval_mode": {
                 "name": "Retrieval mode",
-                "value": "vector",
+                "value": "hybrid",
                 "choices": ["vector", "text", "hybrid"],
                 "component": "dropdown",
             },
@@ -241,6 +244,7 @@ def get_pipeline(cls, user_settings, index_settings, selected):
                     "embedding", embedding_models_manager.get_default_name()
                 )
             ],
+            retrieval_mode=user_settings["retrieval_mode"],
         )
         if not user_settings["use_reranking"]:
             retriever.reranker = None  # type: ignore
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 5118b8fce..dd0b71eee 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -493,7 +493,9 @@ def retrieve(
             query = message
         print(f"Rewritten query: {query}")
         if not query:
-            return [], []
+            # TODO: previously return [], [] because we think this message as something
+            # like "Hello", "I need help"...
+            query = message
 
         docs, doc_ids = [], []
         for retriever in self.retrievers:

From 3e2f98ce08dbd307f44c5328f7314cc3032b20e2 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Mon, 13 May 2024 02:27:12 +0700
Subject: [PATCH 02/56] Enable figures and table reading in Azure DI

---
 .../azureai_document_intelligence_loader.py   | 147 +++++++++++++++++-
 1 file changed, 144 insertions(+), 3 deletions(-)

diff --git a/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py b/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
index 7e4c516bd..f53db6fd4 100644
--- a/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
@@ -1,10 +1,52 @@
+import base64
 import os
+from io import BytesIO
 from pathlib import Path
 from typing import Optional
 
+import fitz
+from PIL import Image
+
 from kotaemon.base import Document, Param
 
 from .base import BaseReader
+from .utils.adobe import generate_single_figure_caption
+
+
+def crop_image(file_path: Path, bbox: list[float], page_number: int = 0) -> Image.Image:
+    """Crop the image based on the bounding box
+
+    Args:
+        file_path (Path): path to the image file
+        bbox (list[float]): bounding box of the image (in percentage [x0, y0, x1, y1])
+        page_number (int, optional): page number of the image. Defaults to 0.
+
+    Returns:
+        Image.Image: cropped image
+    """
+    left, upper, right, lower = bbox
+
+    img: Image.Image
+    suffix = file_path.suffix.lower()
+    if suffix == ".pdf":
+        doc = fitz.open(file_path)
+        page = doc.load_page(page_number)
+        pm = page.get_pixmap(dpi=150)
+        img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
+    elif suffix in [".tif", ".tiff"]:
+        img = Image.open(file_path)
+        img.seek(page_number)
+    else:
+        img = Image.open(file_path)
+
+    return img.crop(
+        (
+            int(left * img.width),
+            int(upper * img.height),
+            int(right * img.width),
+            int(lower * img.height),
+        )
+    )
 
 
 class AzureAIDocumentIntelligenceLoader(BaseReader):
@@ -14,7 +56,7 @@ class AzureAIDocumentIntelligenceLoader(BaseReader):
     heif, docx, xlsx, pptx and html.
     """
 
-    _dependencies = ["azure-ai-documentintelligence"]
+    _dependencies = ["azure-ai-documentintelligence", "PyMuPDF", "Pillow"]
 
     endpoint: str = Param(
         os.environ.get("AZUREAI_DOCUMENT_INTELLIGENT_ENDPOINT", None),
@@ -34,6 +76,25 @@ class AzureAIDocumentIntelligenceLoader(BaseReader):
             "#model-analysis-features)"
         ),
     )
+    output_content_format: str = Param(
+        "markdown",
+        help="Output content format. Can be 'markdown' or 'text'.Default is markdown",
+    )
+    vlm_endpoint: str = Param(
+        help=(
+            "Default VLM endpoint for figure captioning. If not provided, will not "
+            "caption the figures"
+        )
+    )
+    figure_friendly_filetypes: list[str] = Param(
+        [".pdf", ".jpeg", ".jpg", ".png", ".bmp", ".tiff", ".heif", ".tif"],
+        help=(
+            "File types that we can reliably open and extract figures. "
+            "For files like .docx or .html, the visual layout may be different "
+            "when viewed from different tools, hence we cannot use Azure DI "
+            "location to extract figures."
+        ),
+    )
 
     @Param.auto(depends_on=["endpoint", "credential"])
     def client_(self):
@@ -55,14 +116,94 @@ def run(
     def load_data(
         self, file_path: Path, extra_info: Optional[dict] = None, **kwargs
     ) -> list[Document]:
+        """Extract the input file, allowing multi-modal extraction"""
         metadata = extra_info or {}
         with open(file_path, "rb") as fi:
             poller = self.client_.begin_analyze_document(
                 self.model,
                 analyze_request=fi,
                 content_type="application/octet-stream",
-                output_content_format="markdown",
+                output_content_format=self.output_content_format,
             )
             result = poller.result()
 
-        return [Document(content=result.content, metadata=metadata)]
+        # the total text content of the document in `output_content_format` format
+        text_content = result.content
+        removed_spans: list[dict] = []
+
+        # extract the figures
+        figures = []
+        for figure_desc in result.get("figures", []):
+            if not self.vlm_endpoint:
+                continue
+            if file_path.suffix.lower() not in self.figure_friendly_filetypes:
+                continue
+
+            # read & crop the image
+            page_number = figure_desc["boundingRegions"][0]["pageNumber"]
+            page_width = result.pages[page_number - 1]["width"]
+            page_height = result.pages[page_number - 1]["height"]
+            bbox = [
+                figure_desc["boundingRegions"][0]["polygon"][0] / page_width,
+                figure_desc["boundingRegions"][0]["polygon"][1] / page_height,
+                figure_desc["boundingRegions"][0]["polygon"][4] / page_width,
+                figure_desc["boundingRegions"][0]["polygon"][5] / page_height,
+            ]
+            img = crop_image(file_path, bbox, page_number - 1)
+
+            # convert the image into base64
+            img_bytes = BytesIO()
+            img.save(img_bytes, format="PNG")
+            img_base64 = base64.b64encode(img_bytes.getvalue()).decode("utf-8")
+            img_base64 = f"data:image/png;base64,{img_base64}"
+
+            # caption the image
+            caption = generate_single_figure_caption(
+                figure=img_base64, vlm_endpoint=self.vlm_endpoint
+            )
+
+            # store the image into document
+            figure_metadata = {
+                "image_origin": img_base64,
+                "type": "image",
+                "page_label": page_number,
+            }
+            figure_metadata.update(metadata)
+
+            figures.append(
+                Document(
+                    text=caption,
+                    metadata=figure_metadata,
+                )
+            )
+            removed_spans += figure_desc["spans"]
+
+        # extract the tables
+        tables = []
+        for table_desc in result.get("tables", []):
+            # convert the tables into markdown format
+            table_metadata = {
+                "type": "table",
+                "page_label": table_desc["boundingRegions"][0],
+            }
+            table_metadata.update(metadata)
+
+            # store the tables into document
+            offset = table_desc["spans"][0]["offset"]
+            length = table_desc["spans"][0]["length"]
+            tables.append(
+                Document(
+                    text=text_content[offset : offset + length],
+                    metadata=table_metadata,
+                )
+            )
+            removed_spans += table_desc["spans"]
+
+        removed_spans = sorted(removed_spans, key=lambda x: x["offset"], reverse=True)
+        for span in removed_spans:
+            text_content = (
+                text_content[: span["offset"]]
+                + text_content[span["offset"] + span["length"] :]
+            )
+
+        return [Document(content=text_content, metadata=metadata)] + figures + tables

From d876ed27f66ab2e00721f2fbcbde470f2d6b202f Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Mon, 13 May 2024 04:24:40 +0700
Subject: [PATCH 03/56] Retrieve with multi-modal

---
 .../azureai_document_intelligence_loader.py   | 10 ++-
 .../storages/docstores/elasticsearch.py       |  5 +-
 libs/ktem/ktem/index/file/pipelines.py        | 12 ++-
 libs/ktem/ktem/reasoning/simple.py            | 73 +++++++++++++------
 libs/ktem/ktem/utils/render.py                |  9 +++
 5 files changed, 80 insertions(+), 29 deletions(-)

diff --git a/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py b/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
index f53db6fd4..f561056cf 100644
--- a/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
@@ -182,10 +182,12 @@ def load_data(
         tables = []
         for table_desc in result.get("tables", []):
             # convert the tables into markdown format
-            table_metadata = {
-                "type": "table",
-                "page_label": table_desc["boundingRegions"][0],
-            }
+            boundingRegions = table_desc["boundingRegions"]
+            if boundingRegions:
+                page_number = boundingRegions[0]["pageNumber"]
+            else:
+                page_number = 1
+            table_metadata = {"type": "table", "page_label": page_number}
             table_metadata.update(metadata)
 
             # store the tables into document
diff --git a/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py b/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py
index a8cb2527f..3e1b34e76 100644
--- a/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py
+++ b/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py
@@ -134,9 +134,10 @@ def query(
         Returns:
             List[Document]: List of result documents
         """
-        query_dict: dict = {"query": {"match": {"content": query}}, "size": top_k}
+        query_dict: dict = {"match": {"content": query}}
         if doc_ids:
-            query_dict["query"]["match"]["_id"] = {"values": doc_ids}
+            query_dict = {"bool": {"must": [query_dict, {"terms": {"_id": doc_ids}}]}}
+        query_dict = {"query": query_dict, "size": top_k}
         return self.query_raw(query_dict)
 
     def get(self, ids: Union[List[str], str]) -> List[Document]:
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 9dd558ad2..5359f3d13 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -282,7 +282,16 @@ def vector_indexing(self) -> VectorIndexing:
     def handle_docs(self, docs, file_id, file_name) -> Generator[Document, None, int]:
         chunks = []
         n_chunks = 0
-        for cidx, chunk in enumerate(self.splitter(docs)):
+
+        text_docs = []
+        non_text_docs = []
+        for doc in docs:
+            if doc.metadata.get("type", "text") == "text":
+                text_docs.append(doc)
+            else:
+                non_text_docs.append(doc)
+
+        for cidx, chunk in enumerate(self.splitter(text_docs)):
             chunks.append(chunk)
             if cidx % self.chunk_batch_size == 0:
                 self.handle_chunks(chunks, file_id)
@@ -292,6 +301,7 @@ def handle_docs(self, docs, file_id, file_name) -> Generator[Document, None, int
                     f" => [{file_name}] Processed {n_chunks} chunks", channel="debug"
                 )
 
+        chunks += non_text_docs
         if chunks:
             self.handle_chunks(chunks, file_id)
             n_chunks += len(chunks)
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index dd0b71eee..150865a81 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -9,6 +9,7 @@
 import tiktoken
 from ktem.llms.manager import llms
 from ktem.utils.render import Render
+from theflow.settings import settings as flowsettings
 
 from kotaemon.base import (
     AIMessage,
@@ -193,7 +194,7 @@ class AnswerWithContextPipeline(BaseComponent):
     """
 
     llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
-    vlm_endpoint: str = ""
+    vlm_endpoint: str = getattr(flowsettings, "KH_VLM_ENDPOINT", "")
     citation_pipeline: CitationPipeline = Node(
         default_callback=lambda _: CitationPipeline(llm=llms.get_default())
     )
@@ -506,16 +507,30 @@ def retrieve(
 
         info = []
         for doc in docs:
-            info.append(
-                Document(
-                    channel="info",
-                    content=Render.collapsible(
-                        header=doc.metadata["file_name"],
-                        content=Render.table(doc.text),
-                        open=True,
-                    ),
+            if doc.metadata.get("type", "") == "image":
+                info.append(
+                    Document(
+                        channel="info",
+                        content=Render.collapsible(
+                            header=doc.metadata["file_name"],
+                            content=Render.image(
+                                url=doc.metadata["image_origin"], text=doc.text
+                            ),
+                            open=True,
+                        ),
+                    )
+                )
+            else:
+                info.append(
+                    Document(
+                        channel="info",
+                        content=Render.collapsible(
+                            header=doc.metadata["file_name"],
+                            content=Render.table(doc.text),
+                            open=True,
+                        ),
+                    )
                 )
-            )
 
         return docs, info
 
@@ -577,18 +592,32 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
                 )
             )
 
-        without_citation = [
-            Document(
-                channel="info",
-                content=Render.collapsible(
-                    header=id2docs[id].metadata["file_name"],
-                    content=Render.table(id2docs[id].text),
-                    open=False,
-                ),
-            )
-            for id in list(not_detected)
-        ]
-
+        for id_ in list(not_detected):
+            doc = id2docs[id_]
+            if doc.metadata.get("type", "") == "image":
+                without_citation.append(
+                    Document(
+                        channel="info",
+                        content=Render.collapsible(
+                            header=doc.metadata["file_name"],
+                            content=Render.image(
+                                url=doc.metadata["image_origin"], text=doc.text
+                            ),
+                            open=True,
+                        ),
+                    )
+                )
+            else:
+                without_citation.append(
+                    Document(
+                        channel="info",
+                        content=Render.collapsible(
+                            header=doc.metadata["file_name"],
+                            content=Render.table(doc.text),
+                            open=True,
+                        ),
+                    )
+                )
         return with_citation, without_citation
 
     async def ainvoke(  # type: ignore
diff --git a/libs/ktem/ktem/utils/render.py b/libs/ktem/ktem/utils/render.py
index 5890d3327..ac8d6aa3a 100644
--- a/libs/ktem/ktem/utils/render.py
+++ b/libs/ktem/ktem/utils/render.py
@@ -19,3 +19,12 @@ def table(text: str) -> str:
     def highlight(text: str) -> str:
         """Highlight text"""
         return f"<mark>{text}</mark>"
+
+    @staticmethod
+    def image(url: str, text: str = "") -> str:
+        """Render an image"""
+        img = f'<img src="{url}"><br>'
+        if text:
+            caption = f"<p>{text}</p>"
+            return f"<figure>{img}{caption}</figure><br>"
+        return img

From b9ceb007d4460aa53ac716ec59c8ada2b53f2cfe Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Mon, 13 May 2024 11:46:18 +0700
Subject: [PATCH 04/56] Fix mixing up table

---
 .../loaders/azureai_document_intelligence_loader.py  | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py b/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
index f561056cf..b3585ee4c 100644
--- a/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
@@ -181,18 +181,26 @@ def load_data(
         # extract the tables
         tables = []
         for table_desc in result.get("tables", []):
+            if not table_desc["spans"]:
+                continue
+
             # convert the tables into markdown format
             boundingRegions = table_desc["boundingRegions"]
             if boundingRegions:
                 page_number = boundingRegions[0]["pageNumber"]
             else:
                 page_number = 1
-            table_metadata = {"type": "table", "page_label": page_number}
-            table_metadata.update(metadata)
 
             # store the tables into document
             offset = table_desc["spans"][0]["offset"]
             length = table_desc["spans"][0]["length"]
+            table_metadata = {
+                "type": "table",
+                "page_label": page_number,
+                "table_origin": text_content[offset : offset + length],
+            }
+            table_metadata.update(metadata)
+
             tables.append(
                 Document(
                     text=text_content[offset : offset + length],

From 63879f2b45d433bbe1026bfc5114d0798ce99435 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Tue, 14 May 2024 10:19:51 +0700
Subject: [PATCH 05/56] Add txt loader

---
 libs/kotaemon/kotaemon/loaders/__init__.py   |  2 ++
 libs/kotaemon/kotaemon/loaders/txt_loader.py | 22 ++++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 libs/kotaemon/kotaemon/loaders/txt_loader.py

diff --git a/libs/kotaemon/kotaemon/loaders/__init__.py b/libs/kotaemon/kotaemon/loaders/__init__.py
index 6ccdbda6b..134012640 100644
--- a/libs/kotaemon/kotaemon/loaders/__init__.py
+++ b/libs/kotaemon/kotaemon/loaders/__init__.py
@@ -7,6 +7,7 @@
 from .html_loader import HtmlReader, MhtmlReader
 from .mathpix_loader import MathpixPDFReader
 from .ocr_loader import ImageReader, OCRReader
+from .txt_loader import TxtReader
 from .unstructured_loader import UnstructuredReader
 
 __all__ = [
@@ -23,4 +24,5 @@
     "HtmlReader",
     "MhtmlReader",
     "AdobeReader",
+    "TxtReader",
 ]
diff --git a/libs/kotaemon/kotaemon/loaders/txt_loader.py b/libs/kotaemon/kotaemon/loaders/txt_loader.py
new file mode 100644
index 000000000..648402985
--- /dev/null
+++ b/libs/kotaemon/kotaemon/loaders/txt_loader.py
@@ -0,0 +1,22 @@
+from pathlib import Path
+from typing import Optional
+
+from kotaemon.base import Document
+
+from .base import BaseReader
+
+
+class TxtReader(BaseReader):
+    def run(
+        self, file_path: str | Path, extra_info: Optional[dict] = None, **kwargs
+    ) -> list[Document]:
+        return self.load_data(Path(file_path), extra_info=extra_info, **kwargs)
+
+    def load_data(
+        self, file_path: Path, extra_info: Optional[dict] = None, **kwargs
+    ) -> list[Document]:
+        with open(file_path, "r") as f:
+            text = f.read()
+
+        metadata = extra_info or {}
+        return [Document(text=text, metadata=metadata)]

From 322a94b6881b9e11213cb8710316b61dff6dc7e6 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Thu, 16 May 2024 15:31:31 +0700
Subject: [PATCH 06/56] Add Anthropic Chat

---
 libs/kotaemon/kotaemon/llms/__init__.py       |  2 ++
 libs/kotaemon/kotaemon/llms/chats/__init__.py |  8 ++++++-
 .../kotaemon/llms/chats/langchain_based.py    | 24 +++++++++++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/libs/kotaemon/kotaemon/llms/__init__.py b/libs/kotaemon/kotaemon/llms/__init__.py
index 266e39197..6494fc9ae 100644
--- a/libs/kotaemon/kotaemon/llms/__init__.py
+++ b/libs/kotaemon/kotaemon/llms/__init__.py
@@ -7,6 +7,7 @@
     ChatLLM,
     ChatOpenAI,
     EndpointChatLLM,
+    LCAnthropicChat,
     LCAzureChatOpenAI,
     LCChatOpenAI,
     LlamaCppChat,
@@ -27,6 +28,7 @@
     "SystemMessage",
     "AzureChatOpenAI",
     "ChatOpenAI",
+    "LCAnthropicChat",
     "LCAzureChatOpenAI",
     "LCChatOpenAI",
     "LlamaCppChat",
diff --git a/libs/kotaemon/kotaemon/llms/chats/__init__.py b/libs/kotaemon/kotaemon/llms/chats/__init__.py
index 7fc1c40ad..6e3d3d535 100644
--- a/libs/kotaemon/kotaemon/llms/chats/__init__.py
+++ b/libs/kotaemon/kotaemon/llms/chats/__init__.py
@@ -1,6 +1,11 @@
 from .base import ChatLLM
 from .endpoint_based import EndpointChatLLM
-from .langchain_based import LCAzureChatOpenAI, LCChatMixin, LCChatOpenAI
+from .langchain_based import (
+    LCAnthropicChat,
+    LCAzureChatOpenAI,
+    LCChatMixin,
+    LCChatOpenAI,
+)
 from .llamacpp import LlamaCppChat
 from .openai import AzureChatOpenAI, ChatOpenAI
 
@@ -10,6 +15,7 @@
     "ChatLLM",
     "EndpointChatLLM",
     "ChatOpenAI",
+    "LCAnthropicChat",
     "LCChatOpenAI",
     "LCAzureChatOpenAI",
     "LCChatMixin",
diff --git a/libs/kotaemon/kotaemon/llms/chats/langchain_based.py b/libs/kotaemon/kotaemon/llms/chats/langchain_based.py
index fca78dc99..077c3f893 100644
--- a/libs/kotaemon/kotaemon/llms/chats/langchain_based.py
+++ b/libs/kotaemon/kotaemon/llms/chats/langchain_based.py
@@ -221,3 +221,27 @@ def _get_lc_class(self):
             from langchain.chat_models import AzureChatOpenAI
 
         return AzureChatOpenAI
+
+
+class LCAnthropicChat(LCChatMixin, ChatLLM):  # type: ignore
+    def __init__(
+        self,
+        api_key: str | None = None,
+        model_name: str | None = None,
+        temperature: float = 0.7,
+        **params,
+    ):
+        super().__init__(
+            api_key=api_key,
+            model_name=model_name,
+            temperature=temperature,
+            **params,
+        )
+
+    def _get_lc_class(self):
+        try:
+            from langchain_anthropic import ChatAnthropic
+        except ImportError:
+            raise ImportError("Please install langchain-anthropic")
+
+        return ChatAnthropic

From c55572c9f2b7da4ce5bfcd0b8d29fc777585046e Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Mon, 20 May 2024 16:24:54 +0700
Subject: [PATCH 07/56] Raising error when retrieving help file

---
 libs/ktem/ktem/pages/help.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libs/ktem/ktem/pages/help.py b/libs/ktem/ktem/pages/help.py
index fd750ef54..e3438d1e6 100644
--- a/libs/ktem/ktem/pages/help.py
+++ b/libs/ktem/ktem/pages/help.py
@@ -9,6 +9,7 @@
 def get_remote_doc(url: str) -> str:
     try:
         res = requests.get(url)
+        res.raise_for_status()
         return res.text
     except Exception as e:
         print(f"Failed to fetch document from {url}: {e}")

From 5b2773cf0c348ddae66e33a9fb38561e97477912 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Mon, 20 May 2024 22:34:17 +0700
Subject: [PATCH 08/56] Allow same filename for different people if private is
 True

---
 libs/ktem/ktem/index/file/base.py      |  1 +
 libs/ktem/ktem/index/file/index.py     | 73 ++++++++++++++++++--------
 libs/ktem/ktem/index/file/pipelines.py | 12 ++++-
 3 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/libs/ktem/ktem/index/file/base.py b/libs/ktem/ktem/index/file/base.py
index a489a8e51..706e59f82 100644
--- a/libs/ktem/ktem/index/file/base.py
+++ b/libs/ktem/ktem/index/file/base.py
@@ -54,6 +54,7 @@ class BaseFileIndexIndexing(BaseComponent):
     DS = Param(help="The DocStore")
     FSPath = Param(help="The file storage path")
     user_id = Param(help="The user id")
+    private = Param(False, help="Whether this is private index")
 
     def run(
         self, file_paths: str | Path | list[str | Path], *args, **kwargs
diff --git a/libs/ktem/ktem/index/file/index.py b/libs/ktem/ktem/index/file/index.py
index e3d4405d4..49bc0d657 100644
--- a/libs/ktem/ktem/index/file/index.py
+++ b/libs/ktem/ktem/index/file/index.py
@@ -4,7 +4,7 @@
 from ktem.components import filestorage_path, get_docstore, get_vectorstore
 from ktem.db.engine import engine
 from ktem.index.base import BaseIndex
-from sqlalchemy import Column, DateTime, Integer, String
+from sqlalchemy import Column, DateTime, Integer, String, UniqueConstraint
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.sql import func
 from theflow.settings import settings as flowsettings
@@ -52,27 +52,54 @@ def _setup_resources(self):
             - File storage path
         """
         Base = declarative_base()
-        Source = type(
-            "Source",
-            (Base,),
-            {
-                "__tablename__": f"index__{self.id}__source",
-                "id": Column(
-                    String,
-                    primary_key=True,
-                    default=lambda: str(uuid.uuid4()),
-                    unique=True,
-                ),
-                "name": Column(String, unique=True),
-                "path": Column(String),
-                "size": Column(Integer, default=0),
-                "text_length": Column(Integer, default=0),
-                "date_created": Column(
-                    DateTime(timezone=True), server_default=func.now()
-                ),
-                "user": Column(Integer, default=1),
-            },
-        )
+
+        if self.config.get("private", False):
+            Source = type(
+                "Source",
+                (Base,),
+                {
+                    "__tablename__": f"index__{self.id}__source",
+                    "__table_args__": (
+                        UniqueConstraint("name", "user", name="_name_user_uc"),
+                    ),
+                    "id": Column(
+                        String,
+                        primary_key=True,
+                        default=lambda: str(uuid.uuid4()),
+                        unique=True,
+                    ),
+                    "name": Column(String),
+                    "path": Column(String),
+                    "size": Column(Integer, default=0),
+                    "text_length": Column(Integer, default=0),
+                    "date_created": Column(
+                        DateTime(timezone=True), server_default=func.now()
+                    ),
+                    "user": Column(Integer, default=1),
+                },
+            )
+        else:
+            Source = type(
+                "Source",
+                (Base,),
+                {
+                    "__tablename__": f"index__{self.id}__source",
+                    "id": Column(
+                        String,
+                        primary_key=True,
+                        default=lambda: str(uuid.uuid4()),
+                        unique=True,
+                    ),
+                    "name": Column(String, unique=True),
+                    "path": Column(String),
+                    "size": Column(Integer, default=0),
+                    "text_length": Column(Integer, default=0),
+                    "date_created": Column(
+                        DateTime(timezone=True), server_default=func.now()
+                    ),
+                    "user": Column(Integer, default=1),
+                },
+            )
         Index = type(
             "IndexTable",
             (Base,),
@@ -85,6 +112,7 @@ def _setup_resources(self):
                 "user": Column(Integer, default=1),
             },
         )
+
         self._vs: BaseVectorStore = get_vectorstore(f"index_{self.id}")
         self._docstore: BaseDocumentStore = get_docstore(f"index_{self.id}")
         self._fs_path = filestorage_path / f"index_{self.id}"
@@ -368,6 +396,7 @@ def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing:
         obj.DS = self._docstore
         obj.FSPath = self._fs_path
         obj.user_id = user_id
+        obj.private = self.config.get("private", False)
 
         return obj
 
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 5359f3d13..9697904a5 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -271,6 +271,7 @@ class IndexPipeline(BaseComponent):
     DS = Param(help="The DocStore")
     FSPath = Param(help="The file storage path")
     user_id = Param(help="The user id")
+    private: bool = False
     embedding: BaseEmbeddings
 
     @Node.auto(depends_on=["Source", "Index", "embedding"])
@@ -346,8 +347,16 @@ def get_id_if_exists(self, file_path: Path) -> Optional[str]:
         Returns:
             the file id if the file is indexed, otherwise None
         """
+        if self.private:
+            cond: tuple = (
+                self.Source.name == file_path.name,
+                self.Source.user == self.user_id,
+            )
+        else:
+            cond = (self.Source.name == file_path.name,)
+
         with Session(engine) as session:
-            stmt = select(self.Source).where(self.Source.name == file_path.name)
+            stmt = select(self.Source).where(*cond)
             item = session.execute(stmt).first()
             if item:
                 return item[0].id
@@ -535,6 +544,7 @@ def route(self, file_path: Path) -> IndexPipeline:
             DS=self.DS,
             FSPath=self.FSPath,
             user_id=self.user_id,
+            private=self.private,
             embedding=self.embedding,
         )
 

From 3e80168b25164c2c2009c052fba03193257930ab Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Sat, 25 May 2024 20:24:33 +0700
Subject: [PATCH 09/56] Allow declaring extra LLM vendors

---
 libs/ktem/ktem/llms/manager.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libs/ktem/ktem/llms/manager.py b/libs/ktem/ktem/llms/manager.py
index 6baa759cb..f6adbb296 100644
--- a/libs/ktem/ktem/llms/manager.py
+++ b/libs/ktem/ktem/llms/manager.py
@@ -3,7 +3,7 @@
 from sqlalchemy import select
 from sqlalchemy.orm import Session
 from theflow.settings import settings as flowsettings
-from theflow.utils.modules import deserialize
+from theflow.utils.modules import deserialize, import_dotted_string
 
 from kotaemon.llms import ChatLLM
 
@@ -63,6 +63,9 @@ def load_vendors(self):
 
         self._vendors = [ChatOpenAI, AzureChatOpenAI, LlamaCppChat, EndpointChatLLM]
 
+        for extra_vendor in getattr(flowsettings, "KH_LLM_EXTRA_VENDORS", []):
+            self._vendors.append(import_dotted_string(extra_vendor, safe=False))
+
     def __getitem__(self, key: str) -> ChatLLM:
         """Get model by name"""
         return self._models[key]

From 7d3354f8dd12b743160d30ef629a6a3d86897c45 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Sun, 26 May 2024 01:51:06 +0700
Subject: [PATCH 10/56] Show chunks on the File page

---
 libs/kotaemon/kotaemon/agents/tools/google.py |  2 +-
 .../kotaemon/contribs/promptui/ui/chat.py     |  4 +-
 .../kotaemon/contribs/promptui/ui/pipeline.py |  4 +-
 libs/kotaemon/pyproject.toml                  |  2 +-
 libs/ktem/ktem/app.py                         |  2 +-
 libs/ktem/ktem/assets/js/main.js              | 51 +++++------
 libs/ktem/ktem/embeddings/ui.py               | 12 +--
 libs/ktem/ktem/index/file/ui.py               | 86 +++++++++++++++----
 libs/ktem/ktem/index/ui.py                    | 12 +--
 libs/ktem/ktem/llms/ui.py                     | 14 +--
 libs/ktem/ktem/pages/login.py                 |  6 +-
 libs/ktem/ktem/pages/resources/user.py        |  2 +-
 libs/ktem/ktem/pages/settings.py              |  5 +-
 libs/ktem/pyproject.toml                      |  2 +-
 14 files changed, 131 insertions(+), 73 deletions(-)

diff --git a/libs/kotaemon/kotaemon/agents/tools/google.py b/libs/kotaemon/kotaemon/agents/tools/google.py
index 80009e291..98c5eaf40 100644
--- a/libs/kotaemon/kotaemon/agents/tools/google.py
+++ b/libs/kotaemon/kotaemon/agents/tools/google.py
@@ -1,7 +1,7 @@
 from typing import AnyStr, Optional, Type
 from urllib.error import HTTPError
 
-from langchain.utilities import SerpAPIWrapper
+from langchain_community.utilities import SerpAPIWrapper
 from pydantic import BaseModel, Field
 
 from .base import BaseTool
diff --git a/libs/kotaemon/kotaemon/contribs/promptui/ui/chat.py b/libs/kotaemon/kotaemon/contribs/promptui/ui/chat.py
index 78eecb59f..b3ef35612 100644
--- a/libs/kotaemon/kotaemon/contribs/promptui/ui/chat.py
+++ b/libs/kotaemon/kotaemon/contribs/promptui/ui/chat.py
@@ -133,9 +133,7 @@ def construct_chat_ui(
                     label="Output file", show_label=True, height=100
                 )
                 export_btn = gr.Button("Export")
-                export_btn.click(
-                    func_export_to_excel, inputs=None, outputs=exported_file
-                )
+                export_btn.click(func_export_to_excel, inputs=[], outputs=exported_file)
 
         with gr.Row():
             with gr.Column():
diff --git a/libs/kotaemon/kotaemon/contribs/promptui/ui/pipeline.py b/libs/kotaemon/kotaemon/contribs/promptui/ui/pipeline.py
index 725893de4..978a751b0 100644
--- a/libs/kotaemon/kotaemon/contribs/promptui/ui/pipeline.py
+++ b/libs/kotaemon/kotaemon/contribs/promptui/ui/pipeline.py
@@ -91,7 +91,7 @@ def construct_pipeline_ui(
                 save_btn.click(func_save, inputs=params, outputs=history_dataframe)
                 load_params_btn = gr.Button("Reload params")
                 load_params_btn.click(
-                    func_load_params, inputs=None, outputs=history_dataframe
+                    func_load_params, inputs=[], outputs=history_dataframe
                 )
             history_dataframe.render()
             history_dataframe.select(
@@ -103,7 +103,7 @@ def construct_pipeline_ui(
             export_btn = gr.Button(
                 "Export (Result will be in Exported file next to Output)"
             )
-            export_btn.click(func_export, inputs=None, outputs=exported_file)
+            export_btn.click(func_export, inputs=[], outputs=exported_file)
         with gr.Row():
             with gr.Column():
                 if params:
diff --git a/libs/kotaemon/pyproject.toml b/libs/kotaemon/pyproject.toml
index 101b91b11..67467b06a 100644
--- a/libs/kotaemon/pyproject.toml
+++ b/libs/kotaemon/pyproject.toml
@@ -28,7 +28,7 @@ dependencies = [
     "theflow>=0.8.6,<0.9.0",
     "llama-index==0.9.48",
     "llama-hub>=0.0.79,<0.1.0",
-    "gradio>=4.26.0,<5",
+    "gradio>=4.31.0,<5",
     "openpyxl>=3.1.2,<3.2",
     "cookiecutter>=2.6.0,<2.7",
     "click>=8.1.7,<9",
diff --git a/libs/ktem/ktem/app.py b/libs/ktem/ktem/app.py
index 357b0d949..5fb7006c5 100644
--- a/libs/ktem/ktem/app.py
+++ b/libs/ktem/ktem/app.py
@@ -161,6 +161,7 @@ def make(self):
             css=self._css,
             title=self.app_name,
             analytics_enabled=False,
+            js=self._js,
         ) as demo:
             self.app = demo
             self.settings_state.render()
@@ -200,7 +201,6 @@ def register_events(self):
 
     def on_app_created(self):
         """Execute on app created callbacks"""
-        self.app.load(lambda: None, None, None, js=f"() => {{{self._js}}}")
         self._on_app_created()
         for value in self.__dict__.values():
             if isinstance(value, BasePage):
diff --git a/libs/ktem/ktem/assets/js/main.js b/libs/ktem/ktem/assets/js/main.js
index 9ce6933b4..6436932b2 100644
--- a/libs/ktem/ktem/assets/js/main.js
+++ b/libs/ktem/ktem/assets/js/main.js
@@ -1,30 +1,33 @@
-let main_parent = document.getElementById("chat-tab").parentNode;
+function run() {
+  let main_parent = document.getElementById("chat-tab").parentNode;
 
-main_parent.childNodes[0].classList.add("header-bar");
-main_parent.style = "padding: 0; margin: 0";
-main_parent.parentNode.style = "gap: 0";
-main_parent.parentNode.parentNode.style = "padding: 0";
+  main_parent.childNodes[0].classList.add("header-bar");
+  main_parent.style = "padding: 0; margin: 0";
+  main_parent.parentNode.style = "gap: 0";
+  main_parent.parentNode.parentNode.style = "padding: 0";
 
 
-// clpse
-globalThis.clpseFn = (id) => {
-  var obj = document.getElementById('clpse-btn-' + id);
-  obj.classList.toggle("clpse-active");
-  var content = obj.nextElementSibling;
-  if (content.style.display === "none") {
-    content.style.display = "block";
-  } else {
-    content.style.display = "none";
+  // clpse
+  globalThis.clpseFn = (id) => {
+    var obj = document.getElementById('clpse-btn-' + id);
+    obj.classList.toggle("clpse-active");
+    var content = obj.nextElementSibling;
+    if (content.style.display === "none") {
+      content.style.display = "block";
+    } else {
+      content.style.display = "none";
+    }
   }
-}
 
-// store info in local storage
-globalThis.setStorage = (key, value) => {
-    localStorage.setItem(key, JSON.stringify(value))
-}
-globalThis.getStorage = (key, value) => {
-    return JSON.parse(localStorage.getItem(key))
-}
-globalThis.removeFromStorage = (key) => {
-    localStorage.removeItem(key)
+  // store info in local storage
+  globalThis.setStorage = (key, value) => {
+      localStorage.setItem(key, value)
+  }
+  globalThis.getStorage = (key, value) => {
+    item = localStorage.getItem(key);
+    return item ? item : value;
+  }
+  globalThis.removeFromStorage = (key) => {
+      localStorage.removeItem(key)
+  }
 }
diff --git a/libs/ktem/ktem/embeddings/ui.py b/libs/ktem/ktem/embeddings/ui.py
index f8db9f1ed..1b2e549cc 100644
--- a/libs/ktem/ktem/embeddings/ui.py
+++ b/libs/ktem/ktem/embeddings/ui.py
@@ -115,7 +115,7 @@ def _on_app_created(self):
         """Called when the app is created"""
         self._app.app.load(
             self.list_embeddings,
-            inputs=None,
+            inputs=[],
             outputs=[self.emb_list],
         )
         self._app.app.load(
@@ -144,7 +144,7 @@ def on_register_events(self):
             self.create_emb,
             inputs=[self.name, self.emb_choices, self.spec, self.default],
             outputs=None,
-        ).success(self.list_embeddings, inputs=None, outputs=[self.emb_list]).success(
+        ).success(self.list_embeddings, inputs=[], outputs=[self.emb_list]).success(
             lambda: ("", None, "", False, self.spec_desc_default),
             outputs=[
                 self.name,
@@ -179,7 +179,7 @@ def on_register_events(self):
         )
         self.btn_delete.click(
             self.on_btn_delete_click,
-            inputs=None,
+            inputs=[],
             outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
             show_progress="hidden",
         )
@@ -190,7 +190,7 @@ def on_register_events(self):
             show_progress="hidden",
         ).then(
             self.list_embeddings,
-            inputs=None,
+            inputs=[],
             outputs=[self.emb_list],
         )
         self.btn_delete_no.click(
@@ -199,7 +199,7 @@ def on_register_events(self):
                 gr.update(visible=False),
                 gr.update(visible=False),
             ),
-            inputs=None,
+            inputs=[],
             outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
             show_progress="hidden",
         )
@@ -213,7 +213,7 @@ def on_register_events(self):
             show_progress="hidden",
         ).then(
             self.list_embeddings,
-            inputs=None,
+            inputs=[],
             outputs=[self.emb_list],
         )
         self.btn_close.click(
diff --git a/libs/ktem/ktem/index/file/ui.py b/libs/ktem/ktem/index/file/ui.py
index d46e0725d..9ff1cfb83 100644
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@@ -9,6 +9,7 @@
 from gradio.utils import NamedString
 from ktem.app import BasePage
 from ktem.db.engine import engine
+from ktem.utils.render import Render
 from sqlalchemy import select
 from sqlalchemy.orm import Session
 
@@ -154,17 +155,18 @@ def on_building_ui(self):
                     with gr.Column(scale=2):
                         self.selected_panel = gr.Markdown(self.selected_panel_false)
 
-                self.deselect_button = gr.Button(
-                    "Deselect",
-                    visible=False,
-                    elem_classes=["right-button"],
-                )
-                self.delete_button = gr.Button(
-                    "Delete",
-                    variant="stop",
-                    visible=False,
-                    elem_classes=["right-button"],
-                )
+                self.chunks = gr.HTML(visible=False)
+
+                with gr.Row():
+                    self.deselect_button = gr.Button(
+                        "Close",
+                        visible=False,
+                    )
+                    self.delete_button = gr.Button(
+                        "Delete",
+                        variant="stop",
+                        visible=False,
+                    )
 
     def on_subscribe_public_events(self):
         """Subscribe to the declared public event of the app"""
@@ -189,7 +191,42 @@ def on_subscribe_public_events(self):
             )
 
     def file_selected(self, file_id):
+        chunks = []
+        if file_id is not None:
+            # get the chunks
+
+            Index = self._index._resources["Index"]
+            with Session(engine) as session:
+                matches = session.execute(
+                    select(Index).where(
+                        Index.source_id == file_id,
+                        Index.relation_type == "document",
+                    )
+                )
+                doc_ids = [doc.target_id for (doc,) in matches]
+                docs = self._index._docstore.get(doc_ids)
+
+                for idx, doc in enumerate(docs):
+                    title = f"{doc.text[:50]}..." if len(doc.text) > 50 else doc.text
+                    doc_type = doc.metadata.get("type", "text")
+                    content = ""
+                    if doc_type == "text":
+                        content = doc.text
+                    elif doc_type == "table":
+                        content = Render.table(doc.text)
+                    elif doc_type == "image":
+                        content = Render.image(
+                            url=doc.metadata.get("image_origin", ""), text=doc.text
+                        )
+
+                    chunks.append(
+                        Render.collapsible(
+                            header=f"[{idx+1}/{len(docs)}] {title}",
+                            content=content,
+                        )
+                    )
         return (
+            gr.update(value="".join(chunks), visible=file_id is not None),
             gr.update(visible=file_id is not None),
             gr.update(visible=file_id is not None),
         )
@@ -241,7 +278,7 @@ def on_register_events(self):
             )
             .then(
                 fn=lambda: (None, self.selected_panel_false),
-                inputs=None,
+                inputs=[],
                 outputs=[self.selected_file_id, self.selected_panel],
                 show_progress="hidden",
             )
@@ -250,20 +287,30 @@ def on_register_events(self):
                 inputs=[self._app.user_id],
                 outputs=[self.file_list_state, self.file_list],
             )
+            .then(
+                fn=self.file_selected,
+                inputs=[self.selected_file_id],
+                outputs=[
+                    self.chunks,
+                    self.deselect_button,
+                    self.delete_button,
+                ],
+                show_progress="hidden",
+            )
         )
         for event in self._app.get_event(f"onFileIndex{self._index.id}Changed"):
             onDeleted = onDeleted.then(**event)
 
         self.deselect_button.click(
             fn=lambda: (None, self.selected_panel_false),
-            inputs=None,
+            inputs=[],
             outputs=[self.selected_file_id, self.selected_panel],
             show_progress="hidden",
-        )
-        self.selected_panel.change(
+        ).then(
             fn=self.file_selected,
             inputs=[self.selected_file_id],
             outputs=[
+                self.chunks,
                 self.deselect_button,
                 self.delete_button,
             ],
@@ -309,6 +356,15 @@ def on_register_events(self):
             inputs=[self.file_list],
             outputs=[self.selected_file_id, self.selected_panel],
             show_progress="hidden",
+        ).then(
+            fn=self.file_selected,
+            inputs=[self.selected_file_id],
+            outputs=[
+                self.chunks,
+                self.deselect_button,
+                self.delete_button,
+            ],
+            show_progress="hidden",
         )
 
     def _on_app_created(self):
diff --git a/libs/ktem/ktem/index/ui.py b/libs/ktem/ktem/index/ui.py
index 9d6b7b25e..b705887d5 100644
--- a/libs/ktem/ktem/index/ui.py
+++ b/libs/ktem/ktem/index/ui.py
@@ -95,7 +95,7 @@ def _on_app_created(self):
         """Called when the app is created"""
         self._app.app.load(
             self.list_indices,
-            inputs=None,
+            inputs=[],
             outputs=[self.index_list],
         )
         self._app.app.load(
@@ -117,7 +117,7 @@ def on_register_events(self):
             self.create_index,
             inputs=[self.name, self.index_type, self.spec],
             outputs=None,
-        ).success(self.list_indices, inputs=None, outputs=[self.index_list]).success(
+        ).success(self.list_indices, inputs=[], outputs=[self.index_list]).success(
             lambda: ("", None, "", self.spec_desc_default),
             outputs=[
                 self.name,
@@ -152,7 +152,7 @@ def on_register_events(self):
                 gr.update(visible=False),
                 gr.update(visible=True),
             ),
-            inputs=None,
+            inputs=[],
             outputs=[
                 self.btn_edit_save,
                 self.btn_delete,
@@ -168,7 +168,7 @@ def on_register_events(self):
             show_progress="hidden",
         ).then(
             self.list_indices,
-            inputs=None,
+            inputs=[],
             outputs=[self.index_list],
         )
         self.btn_delete_no.click(
@@ -178,7 +178,7 @@ def on_register_events(self):
                 gr.update(visible=True),
                 gr.update(visible=False),
             ),
-            inputs=None,
+            inputs=[],
             outputs=[
                 self.btn_edit_save,
                 self.btn_delete,
@@ -197,7 +197,7 @@ def on_register_events(self):
             show_progress="hidden",
         ).then(
             self.list_indices,
-            inputs=None,
+            inputs=[],
             outputs=[self.index_list],
         )
         self.btn_close.click(
diff --git a/libs/ktem/ktem/llms/ui.py b/libs/ktem/ktem/llms/ui.py
index 8b6f4ee5a..116982ed4 100644
--- a/libs/ktem/ktem/llms/ui.py
+++ b/libs/ktem/ktem/llms/ui.py
@@ -112,7 +112,7 @@ def _on_app_created(self):
         """Called when the app is created"""
         self._app.app.load(
             self.list_llms,
-            inputs=None,
+            inputs=[],
             outputs=[self.llm_list],
         )
         self._app.app.load(
@@ -140,8 +140,8 @@ def on_register_events(self):
         self.btn_new.click(
             self.create_llm,
             inputs=[self.name, self.llm_choices, self.spec, self.default],
-            outputs=None,
-        ).success(self.list_llms, inputs=None, outputs=[self.llm_list]).success(
+            outputs=[],
+        ).success(self.list_llms, inputs=[], outputs=[self.llm_list]).success(
             lambda: ("", None, "", False, self.spec_desc_default),
             outputs=[
                 self.name,
@@ -176,7 +176,7 @@ def on_register_events(self):
         )
         self.btn_delete.click(
             self.on_btn_delete_click,
-            inputs=None,
+            inputs=[],
             outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
             show_progress="hidden",
         )
@@ -187,7 +187,7 @@ def on_register_events(self):
             show_progress="hidden",
         ).then(
             self.list_llms,
-            inputs=None,
+            inputs=[],
             outputs=[self.llm_list],
         )
         self.btn_delete_no.click(
@@ -196,7 +196,7 @@ def on_register_events(self):
                 gr.update(visible=False),
                 gr.update(visible=False),
             ),
-            inputs=None,
+            inputs=[],
             outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
             show_progress="hidden",
         )
@@ -210,7 +210,7 @@ def on_register_events(self):
             show_progress="hidden",
         ).then(
             self.list_llms,
-            inputs=None,
+            inputs=[],
             outputs=[self.llm_list],
         )
         self.btn_close.click(
diff --git a/libs/ktem/ktem/pages/login.py b/libs/ktem/ktem/pages/login.py
index 50f505f34..9dc4839c4 100644
--- a/libs/ktem/ktem/pages/login.py
+++ b/libs/ktem/ktem/pages/login.py
@@ -7,9 +7,9 @@
 
 fetch_creds = """
 function() {
-    const username = getStorage('username')
-    const password = getStorage('password')
-    return [username, password];
+    const username = getStorage('username', '')
+    const password = getStorage('password', '')
+    return [username, password, null];
 }
 """
 
diff --git a/libs/ktem/ktem/pages/resources/user.py b/libs/ktem/ktem/pages/resources/user.py
index 2b6507521..1792d6201 100644
--- a/libs/ktem/ktem/pages/resources/user.py
+++ b/libs/ktem/ktem/pages/resources/user.py
@@ -224,7 +224,7 @@ def on_register_events(self):
                 gr.update(visible=False),
                 gr.update(visible=False),
             ),
-            inputs=None,
+            inputs=[],
             outputs=[self.btn_delete, self.btn_delete_yes, self.btn_delete_no],
             show_progress="hidden",
         )
diff --git a/libs/ktem/ktem/pages/settings.py b/libs/ktem/ktem/pages/settings.py
index 23bbfac99..03d0e895c 100644
--- a/libs/ktem/ktem/pages/settings.py
+++ b/libs/ktem/ktem/pages/settings.py
@@ -6,9 +6,10 @@
 from sqlmodel import Session, select
 
 signout_js = """
-function() {
+function(u, c, pw, pwc) {
     removeFromStorage('username');
     removeFromStorage('password');
+    return [u, c, pw, pwc];
 }
 """
 
@@ -192,7 +193,7 @@ def on_register_events(self):
             )
             onSignOutClick = self.signout.click(
                 lambda: (None, "Current user: ___", "", ""),
-                inputs=None,
+                inputs=[],
                 outputs=[
                     self._user_id,
                     self.current_name,
diff --git a/libs/ktem/pyproject.toml b/libs/ktem/pyproject.toml
index eb6ac122e..a995495c2 100644
--- a/libs/ktem/pyproject.toml
+++ b/libs/ktem/pyproject.toml
@@ -26,7 +26,7 @@ dependencies = [
     "SQLAlchemy>=2.0.29,<3",
     "sqlmodel>=0.0.16,<0.1",
     "tiktoken>=0.6.0,<1",
-    "gradio>=4.26.0,<5",
+    "gradio>=4.31.0,<5",
     "markdown>=3.6,<4",
 ]
 authors = [

From 620b81d6771594a6beb902dce532257f4421a333 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Tue, 28 May 2024 19:03:42 +0700
Subject: [PATCH 11/56] Allow elasticsearch to get more docs

---
 libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py b/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py
index 3e1b34e76..8692bc387 100644
--- a/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py
+++ b/libs/kotaemon/kotaemon/storages/docstores/elasticsearch.py
@@ -144,7 +144,7 @@ def get(self, ids: Union[List[str], str]) -> List[Document]:
         """Get document by id"""
         if not isinstance(ids, list):
             ids = [ids]
-        query_dict = {"query": {"terms": {"_id": ids}}}
+        query_dict = {"query": {"terms": {"_id": ids}}, "size": 10000}
         return self.query_raw(query_dict)
 
     def count(self) -> int:

From ad61c71f5d2f5f12cd5ddf2537546d5147751437 Mon Sep 17 00:00:00 2001
From: cin-ace <ace@cinnamon.is>
Date: Fri, 31 May 2024 13:46:50 +0700
Subject: [PATCH 12/56] Fix Cohere response (#86)

* Fix Cohere response

* Remove Adobe pdfservice from dependency

kotaemon doesn't rely more pdfservice for its core functionality,
and pdfservice uses very out-dated dependency that causes conflict.

---------

Co-authored-by: trducng <trungduc1992@gmail.com>
---
 libs/kotaemon/kotaemon/indices/rankings/cohere.py | 4 ++--
 libs/kotaemon/pyproject.toml                      | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/libs/kotaemon/kotaemon/indices/rankings/cohere.py b/libs/kotaemon/kotaemon/indices/rankings/cohere.py
index d102efdb4..a7beb61f2 100644
--- a/libs/kotaemon/kotaemon/indices/rankings/cohere.py
+++ b/libs/kotaemon/kotaemon/indices/rankings/cohere.py
@@ -29,10 +29,10 @@ def run(self, documents: list[Document], query: str) -> list[Document]:
             return compressed_docs
 
         _docs = [d.content for d in documents]
-        results = cohere_client.rerank(
+        response = cohere_client.rerank(
             model=self.model_name, query=query, documents=_docs, top_n=self.top_k
         )
-        for r in results:
+        for r in response.results:
             doc = documents[r.index]
             doc.metadata["relevance_score"] = r.relevance_score
             compressed_docs.append(doc)
diff --git a/libs/kotaemon/pyproject.toml b/libs/kotaemon/pyproject.toml
index 67467b06a..36de876ea 100644
--- a/libs/kotaemon/pyproject.toml
+++ b/libs/kotaemon/pyproject.toml
@@ -66,7 +66,6 @@ adv = [
     "unstructured[pdf]==0.13.4",
     "sentence_transformers==2.7.0",
     "elasticsearch>=8.13.0,<8.14",
-    "pdfservices-sdk @  git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements",
     "beautifulsoup4>=4.12.3,<4.13",
 ]
 dev = [

From 0708fb5b96d102a7a16077cb2746a9d5b0d2e6e6 Mon Sep 17 00:00:00 2001
From: cin-ace <ace@cinnamon.is>
Date: Mon, 3 Jun 2024 09:40:23 +0700
Subject: [PATCH 13/56] Add confidence score (#87)

* Save question answering data as a log file

* Save the original information besides the rewritten info

* Export Cohere relevance score as confidence score

* Fix style check
---
 libs/ktem/ktem/index/file/pipelines.py |   5 +-
 libs/ktem/ktem/pages/chat/__init__.py  | 127 +++++++++++++++++++++++++
 libs/ktem/ktem/reasoning/simple.py     |   4 +-
 3 files changed, 132 insertions(+), 4 deletions(-)

diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 9697904a5..9a75bf755 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -30,7 +30,7 @@
 from kotaemon.embeddings import BaseEmbeddings
 from kotaemon.indices import VectorIndexing, VectorRetrieval
 from kotaemon.indices.ingests.files import KH_DEFAULT_FILE_EXTRACTORS
-from kotaemon.indices.rankings import BaseReranking, LLMReranking
+from kotaemon.indices.rankings import BaseReranking, CohereReranking, LLMReranking
 from kotaemon.indices.splitters import BaseSplitter, TokenSplitter
 
 from .base import BaseFileIndexIndexing, BaseFileIndexRetriever
@@ -245,10 +245,11 @@ def get_pipeline(cls, user_settings, index_settings, selected):
                 )
             ],
             retrieval_mode=user_settings["retrieval_mode"],
+            reranker=CohereReranking(),
         )
         if not user_settings["use_reranking"]:
             retriever.reranker = None  # type: ignore
-        else:
+        elif isinstance(retriever.reranker, LLMReranking):
             retriever.reranker.llm = llms.get(
                 user_settings["reranking_llm"], llms.get_default()
             )
diff --git a/libs/ktem/ktem/pages/chat/__init__.py b/libs/ktem/ktem/pages/chat/__init__.py
index d9826e0db..b7f814323 100644
--- a/libs/ktem/ktem/pages/chat/__init__.py
+++ b/libs/ktem/ktem/pages/chat/__init__.py
@@ -1,8 +1,12 @@
 import asyncio
+import csv
 from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
 from typing import Optional
 
 import gradio as gr
+from filelock import FileLock
 from ktem.app import BasePage
 from ktem.components import reasonings
 from ktem.db.models import Conversation, engine
@@ -27,6 +31,10 @@ def __init__(self, app):
     def on_building_ui(self):
         with gr.Row():
             self.chat_state = gr.State(STATE)
+            self.original_chat_history = gr.State([])
+            self.original_settings = gr.State({})
+            self.original_info_panel = gr.State("")
+
             with gr.Column(scale=1, elem_id="conv-settings-panel"):
                 self.chat_control = ConversationControl(self._app)
 
@@ -109,6 +117,19 @@ def on_register_events(self):
             ],
             concurrency_limit=20,
             show_progress="minimal",
+        ).success(
+            fn=self.backup_original_info,
+            inputs=[
+                self.chat_panel.chatbot,
+                self._app.settings_state,
+                self.info_panel,
+                self.original_chat_history,
+            ],
+            outputs=[
+                self.original_chat_history,
+                self.original_settings,
+                self.original_info_panel,
+            ],
         ).then(
             fn=self.update_data_source,
             inputs=[
@@ -154,6 +175,19 @@ def on_register_events(self):
             fn=self.is_liked,
             inputs=[self.chat_control.conversation_id],
             outputs=None,
+        ).success(
+            self.save_log,
+            inputs=[
+                self.chat_control.conversation_id,
+                self.chat_panel.chatbot,
+                self._app.settings_state,
+                self.info_panel,
+                self.original_chat_history,
+                self.original_settings,
+                self.original_info_panel,
+                gr.State(getattr(flowsettings, "KH_APP_DATA_DIR", "logs")),
+            ],
+            outputs=None,
         )
 
         self.chat_control.btn_new.click(
@@ -478,3 +512,96 @@ def regen_fn(
             yield chat, refs, new_state
 
         state["app"]["regen"] = False
+
+    def backup_original_info(
+        self, chat_history, settings, info_pannel, original_chat_history
+    ):
+        original_chat_history.append(chat_history[-1])
+        return original_chat_history, settings, info_pannel
+
+    def save_log(
+        self,
+        conversation_id,
+        chat_history,
+        settings,
+        info_panel,
+        original_chat_history,
+        original_settings,
+        original_info_panel,
+        log_dir,
+    ):
+        if not Path(log_dir).exists():
+            Path(log_dir).mkdir(parents=True)
+
+        lock = FileLock(Path(log_dir) / ".lock")
+        # get current date
+        today = datetime.now()
+        formatted_date = today.strftime("%d%m%Y_%H")
+
+        with Session(engine) as session:
+            statement = select(Conversation).where(Conversation.id == conversation_id)
+            result = session.exec(statement).one()
+
+            data_source = deepcopy(result.data_source)
+            likes = data_source.get("likes", [])
+            if not likes:
+                return
+
+        feedback = likes[-1][-1]
+        message_index = likes[-1][0]
+
+        current_message = chat_history[message_index[0]]
+        original_message = original_chat_history[message_index[0]]
+        is_original = all(
+            [
+                current_item == original_item
+                for current_item, original_item in zip(
+                    current_message, original_message
+                )
+            ]
+        )
+
+        dataframe = [
+            [
+                conversation_id,
+                message_index,
+                current_message[0],
+                current_message[1],
+                chat_history,
+                settings,
+                info_panel,
+                feedback,
+                is_original,
+                original_message[1],
+                original_chat_history,
+                original_settings,
+                original_info_panel,
+            ]
+        ]
+
+        with lock:
+            log_file = Path(log_dir) / f"{formatted_date}_log.csv"
+            is_log_file_exist = log_file.is_file()
+            with open(log_file, "a") as f:
+                writer = csv.writer(f)
+                # write headers
+                if not is_log_file_exist:
+                    writer.writerow(
+                        [
+                            "Conversation ID",
+                            "Message ID",
+                            "Question",
+                            "Answer",
+                            "Chat History",
+                            "Settings",
+                            "Evidences",
+                            "Feedback",
+                            "Original/ Rewritten",
+                            "Original Answer",
+                            "Original Chat History",
+                            "Original Settings",
+                            "Original Evidences",
+                        ]
+                    )
+
+                writer.writerows(dataframe)
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 150865a81..a8259fbc2 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -585,7 +585,7 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
                 Document(
                     channel="info",
                     content=Render.collapsible(
-                        header=id2docs[id].metadata["file_name"],
+                        header=f'{id2docs[id].metadata["file_name"]}- Relevance score: {id2docs[id].metadata.get("relevance_score")}',
                         content=Render.table(text),
                         open=True,
                     ),
@@ -612,7 +612,7 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
                     Document(
                         channel="info",
                         content=Render.collapsible(
-                            header=doc.metadata["file_name"],
+                            header=f'{doc.metadata["file_name"]}- Relevance score: {doc.metadata.get("relevance_score")}',
                             content=Render.table(doc.text),
                             open=True,
                         ),

From 684f1c593bf93564610357cb7bbd0b3b53c6bd96 Mon Sep 17 00:00:00 2001
From: cin-ace <ace@cinnamon.is>
Date: Thu, 6 Jun 2024 16:37:01 +0700
Subject: [PATCH 14/56] Upgrade the confidence score appearance (#90)

* Highlight the relevance score

* Round relevance score. Get key from config instead of env

* Cohere return all scores

* Display relevance score for image
---
 .../kotaemon/indices/rankings/cohere.py        |  9 ++++-----
 libs/ktem/ktem/reasoning/simple.py             | 18 +++++++++++++++---
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/libs/kotaemon/kotaemon/indices/rankings/cohere.py b/libs/kotaemon/kotaemon/indices/rankings/cohere.py
index a7beb61f2..e92bd8a88 100644
--- a/libs/kotaemon/kotaemon/indices/rankings/cohere.py
+++ b/libs/kotaemon/kotaemon/indices/rankings/cohere.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-import os
+from decouple import config
 
 from kotaemon.base import Document
 
@@ -9,8 +9,7 @@
 
 class CohereReranking(BaseReranking):
     model_name: str = "rerank-multilingual-v2.0"
-    cohere_api_key: str = os.environ.get("COHERE_API_KEY", "")
-    top_k: int = 1
+    cohere_api_key: str = config("COHERE_API_KEY", "")
 
     def run(self, documents: list[Document], query: str) -> list[Document]:
         """Use Cohere Reranker model to re-order documents
@@ -30,11 +29,11 @@ def run(self, documents: list[Document], query: str) -> list[Document]:
 
         _docs = [d.content for d in documents]
         response = cohere_client.rerank(
-            model=self.model_name, query=query, documents=_docs, top_n=self.top_k
+            model=self.model_name, query=query, documents=_docs
         )
         for r in response.results:
             doc = documents[r.index]
-            doc.metadata["relevance_score"] = r.relevance_score
+            doc.metadata["relevance_score"] = round(r.relevance_score, 2)
             compressed_docs.append(doc)
 
         return compressed_docs
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index a8259fbc2..6339bd62a 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -585,7 +585,11 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
                 Document(
                     channel="info",
                     content=Render.collapsible(
-                        header=f'{id2docs[id].metadata["file_name"]}- Relevance score: {id2docs[id].metadata.get("relevance_score")}',
+                        header=(
+                            f'{id2docs[id].metadata["file_name"]}<br>'
+                            "<b>Relevance score:</b>"
+                            f' {id2docs[id].metadata.get("relevance_score")}'
+                        ),
                         content=Render.table(text),
                         open=True,
                     ),
@@ -599,7 +603,11 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
                     Document(
                         channel="info",
                         content=Render.collapsible(
-                            header=doc.metadata["file_name"],
+                            header=(
+                                f'{doc.metadata["file_name"]}<br>'
+                                "<b>Relevance score:</b>"
+                                f' {doc.metadata.get("relevance_score")}'
+                            ),
                             content=Render.image(
                                 url=doc.metadata["image_origin"], text=doc.text
                             ),
@@ -612,7 +620,11 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
                     Document(
                         channel="info",
                         content=Render.collapsible(
-                            header=f'{doc.metadata["file_name"]}- Relevance score: {doc.metadata.get("relevance_score")}',
+                            header=(
+                                f'{doc.metadata["file_name"]}<br>'
+                                "<b>Relevance score:</b>"
+                                f' {doc.metadata.get("relevance_score")}'
+                            ),
                             content=Render.table(doc.text),
                             open=True,
                         ),

From ce1f9d6590eab61f46fd0a79496d684e5443d105 Mon Sep 17 00:00:00 2001
From: Linh Nguyen <70562198+linhnguyen-cinnamon@users.noreply.github.com>
Date: Fri, 7 Jun 2024 13:57:47 +0900
Subject: [PATCH 15/56] Remove columns and rows in Excel loader which contains
 all NaN (#91)

* remove columns and rows which contains all NaN

* back to multiple joiner options

* Fix style

---------

Co-authored-by: linhnguyen-cinnamon <cinmc0019@CINMC0019-LinhNguyen.local>
Co-authored-by: trducng <trungduc1992@gmail.com>
---
 libs/kotaemon/kotaemon/loaders/excel_loader.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libs/kotaemon/kotaemon/loaders/excel_loader.py b/libs/kotaemon/kotaemon/loaders/excel_loader.py
index d903aea84..f8950cf00 100644
--- a/libs/kotaemon/kotaemon/loaders/excel_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/excel_loader.py
@@ -82,6 +82,9 @@ def load_data(
             sheet = []
             if include_sheetname:
                 sheet.append([key])
+            dfs[key] = dfs[key].dropna(axis=0, how="all")
+            dfs[key] = dfs[key].dropna(axis=0, how="all")
+            dfs[key].fillna("", inplace=True)
             sheet.extend(dfs[key].values.astype(str).tolist())
             df_sheets.append(sheet)
 

From 3b8ce6ab0dcfd61ecf19030b01c195cd752b10ca Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Fri, 7 Jun 2024 11:56:02 +0700
Subject: [PATCH 16/56] Track retriever state

---
 .../loaders/azureai_document_intelligence_loader.py   | 11 +++++++----
 libs/ktem/ktem/reasoning/simple.py                    |  7 ++++---
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py b/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
index b3585ee4c..204c786fd 100644
--- a/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
@@ -143,11 +143,14 @@ def load_data(
             page_number = figure_desc["boundingRegions"][0]["pageNumber"]
             page_width = result.pages[page_number - 1]["width"]
             page_height = result.pages[page_number - 1]["height"]
+            polygon = figure_desc["boundingRegions"][0]["polygon"]
+            xs = [polygon[i] for i in range(0, len(polygon), 2)]
+            ys = [polygon[i] for i in range(1, len(polygon), 2)]
             bbox = [
-                figure_desc["boundingRegions"][0]["polygon"][0] / page_width,
-                figure_desc["boundingRegions"][0]["polygon"][1] / page_height,
-                figure_desc["boundingRegions"][0]["polygon"][4] / page_width,
-                figure_desc["boundingRegions"][0]["polygon"][5] / page_height,
+                min(xs) / page_width,
+                min(ys) / page_height,
+                max(xs) / page_width,
+                max(ys) / page_height,
             ]
             img = crop_image(file_path, bbox, page_number - 1)
 
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 6339bd62a..7382c3fcc 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -381,7 +381,7 @@ def stream(  # type: ignore
         # retrieve the citation
         citation = None
         if evidence and self.enable_citation:
-            citation = self.citation_pipeline.invoke(
+            citation = self.citation_pipeline(
                 context=evidence, question=question
             )
 
@@ -499,8 +499,9 @@ def retrieve(
             query = message
 
         docs, doc_ids = [], []
-        for retriever in self.retrievers:
-            for doc in retriever(text=query):
+        for idx, retriever in enumerate(self.retrievers):
+            retriever_node = self._prepare_child(retriever, f"retriever_{idx}")
+            for doc in retriever_node(text=query):
                 if doc.doc_id not in doc_ids:
                     docs.append(doc)
                     doc_ids.append(doc.doc_id)

From f279447a47319d075cffc7f40f0c83e67b118437 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Sat, 8 Jun 2024 00:44:43 +0000
Subject: [PATCH 17/56] Bump llama-index version 0.10

---
 libs/kotaemon/kotaemon/base/schema.py                     | 4 ++--
 libs/kotaemon/kotaemon/indices/base.py                    | 4 ++--
 libs/kotaemon/kotaemon/indices/extractors/doc_parsers.py  | 4 ++--
 libs/kotaemon/kotaemon/indices/ingests/files.py           | 4 ++--
 libs/kotaemon/kotaemon/indices/splitters/__init__.py      | 4 ++--
 libs/kotaemon/kotaemon/loaders/adobe_loader.py            | 2 +-
 libs/kotaemon/kotaemon/loaders/base.py                    | 2 +-
 libs/kotaemon/kotaemon/loaders/composite_loader.py        | 2 +-
 libs/kotaemon/kotaemon/loaders/docx_loader.py             | 2 +-
 libs/kotaemon/kotaemon/loaders/excel_loader.py            | 2 +-
 libs/kotaemon/kotaemon/loaders/html_loader.py             | 2 +-
 libs/kotaemon/kotaemon/loaders/mathpix_loader.py          | 2 +-
 libs/kotaemon/kotaemon/loaders/ocr_loader.py              | 2 +-
 libs/kotaemon/kotaemon/loaders/unstructured_loader.py     | 2 +-
 libs/kotaemon/kotaemon/storages/vectorstores/base.py      | 8 ++++----
 libs/kotaemon/kotaemon/storages/vectorstores/in_memory.py | 4 ++--
 .../kotaemon/storages/vectorstores/simple_file.py         | 4 ++--
 libs/kotaemon/pyproject.toml                              | 2 +-
 libs/kotaemon/tests/test_reader.py                        | 2 +-
 libs/kotaemon/tests/test_splitter.py                      | 2 +-
 libs/ktem/ktem/index/file/pipelines.py                    | 8 ++++----
 libs/ktem/ktem/reasoning/simple.py                        | 4 +---
 libs/ktem/pyproject.toml                                  | 1 +
 23 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/libs/kotaemon/kotaemon/base/schema.py b/libs/kotaemon/kotaemon/base/schema.py
index a153ed30f..10fcb888e 100644
--- a/libs/kotaemon/kotaemon/base/schema.py
+++ b/libs/kotaemon/kotaemon/base/schema.py
@@ -5,8 +5,8 @@
 from langchain.schema.messages import AIMessage as LCAIMessage
 from langchain.schema.messages import HumanMessage as LCHumanMessage
 from langchain.schema.messages import SystemMessage as LCSystemMessage
-from llama_index.bridge.pydantic import Field
-from llama_index.schema import Document as BaseDocument
+from llama_index.core.bridge.pydantic import Field
+from llama_index.core.schema import Document as BaseDocument
 
 if TYPE_CHECKING:
     from haystack.schema import Document as HaystackDocument
diff --git a/libs/kotaemon/kotaemon/indices/base.py b/libs/kotaemon/kotaemon/indices/base.py
index 938be6695..a20ca6b2d 100644
--- a/libs/kotaemon/kotaemon/indices/base.py
+++ b/libs/kotaemon/kotaemon/indices/base.py
@@ -3,7 +3,7 @@
 from abc import abstractmethod
 from typing import Any, Type
 
-from llama_index.node_parser.interface import NodeParser
+from llama_index.core.node_parser.interface import NodeParser
 
 from kotaemon.base import BaseComponent, Document, RetrievedDocument
 
@@ -32,7 +32,7 @@ class LlamaIndexDocTransformerMixin:
     Example:
         class TokenSplitter(LlamaIndexMixin, BaseSplitter):
             def _get_li_class(self):
-                from llama_index.text_splitter import TokenTextSplitter
+                from llama_index.core.text_splitter import TokenTextSplitter
                 return TokenTextSplitter
 
     To use this mixin, please:
diff --git a/libs/kotaemon/kotaemon/indices/extractors/doc_parsers.py b/libs/kotaemon/kotaemon/indices/extractors/doc_parsers.py
index 7dad52801..a57743b71 100644
--- a/libs/kotaemon/kotaemon/indices/extractors/doc_parsers.py
+++ b/libs/kotaemon/kotaemon/indices/extractors/doc_parsers.py
@@ -15,7 +15,7 @@ def __init__(
         super().__init__(llm=llm, nodes=nodes, **params)
 
     def _get_li_class(self):
-        from llama_index.extractors import TitleExtractor
+        from llama_index.core.extractors import TitleExtractor
 
         return TitleExtractor
 
@@ -30,6 +30,6 @@ def __init__(
         super().__init__(llm=llm, summaries=summaries, **params)
 
     def _get_li_class(self):
-        from llama_index.extractors import SummaryExtractor
+        from llama_index.core.extractors import SummaryExtractor
 
         return SummaryExtractor
diff --git a/libs/kotaemon/kotaemon/indices/ingests/files.py b/libs/kotaemon/kotaemon/indices/ingests/files.py
index 3eb536110..eaf723cbf 100644
--- a/libs/kotaemon/kotaemon/indices/ingests/files.py
+++ b/libs/kotaemon/kotaemon/indices/ingests/files.py
@@ -1,8 +1,8 @@
 from pathlib import Path
 from typing import Type
 
-from llama_index.readers import PDFReader
-from llama_index.readers.base import BaseReader
+from llama_index.core.readers.base import BaseReader
+from llama_index.readers.file import PDFReader
 
 from kotaemon.base import BaseComponent, Document, Param
 from kotaemon.indices.extractors import BaseDocParser
diff --git a/libs/kotaemon/kotaemon/indices/splitters/__init__.py b/libs/kotaemon/kotaemon/indices/splitters/__init__.py
index 16a31fe2c..d9eb65f62 100644
--- a/libs/kotaemon/kotaemon/indices/splitters/__init__.py
+++ b/libs/kotaemon/kotaemon/indices/splitters/__init__.py
@@ -23,7 +23,7 @@ def __init__(
         )
 
     def _get_li_class(self):
-        from llama_index.text_splitter import TokenTextSplitter
+        from llama_index.core.text_splitter import TokenTextSplitter
 
         return TokenTextSplitter
 
@@ -44,6 +44,6 @@ def __init__(
         )
 
     def _get_li_class(self):
-        from llama_index.node_parser import SentenceWindowNodeParser
+        from llama_index.core.node_parser import SentenceWindowNodeParser
 
         return SentenceWindowNodeParser
diff --git a/libs/kotaemon/kotaemon/loaders/adobe_loader.py b/libs/kotaemon/kotaemon/loaders/adobe_loader.py
index 09a802c37..9ea278f92 100644
--- a/libs/kotaemon/kotaemon/loaders/adobe_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/adobe_loader.py
@@ -6,7 +6,7 @@
 from typing import Any, Dict, List, Optional
 
 from decouple import config
-from llama_index.readers.base import BaseReader
+from llama_index.core.readers.base import BaseReader
 
 from kotaemon.base import Document
 
diff --git a/libs/kotaemon/kotaemon/loaders/base.py b/libs/kotaemon/kotaemon/loaders/base.py
index 52bef490f..eff70c8be 100644
--- a/libs/kotaemon/kotaemon/loaders/base.py
+++ b/libs/kotaemon/kotaemon/loaders/base.py
@@ -4,7 +4,7 @@
 from kotaemon.base import BaseComponent, Document
 
 if TYPE_CHECKING:
-    from llama_index.readers.base import BaseReader as LIBaseReader
+    from llama_index.core.readers.base import BaseReader as LIBaseReader
 
 
 class BaseReader(BaseComponent):
diff --git a/libs/kotaemon/kotaemon/loaders/composite_loader.py b/libs/kotaemon/kotaemon/loaders/composite_loader.py
index 9d35e26e9..4090aef95 100644
--- a/libs/kotaemon/kotaemon/loaders/composite_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/composite_loader.py
@@ -1,6 +1,6 @@
 from typing import Callable, List, Optional, Type
 
-from llama_index.readers.base import BaseReader as LIBaseReader
+from llama_index.core.readers.base import BaseReader as LIBaseReader
 
 from .base import BaseReader, LIReaderMixin
 
diff --git a/libs/kotaemon/kotaemon/loaders/docx_loader.py b/libs/kotaemon/kotaemon/loaders/docx_loader.py
index dcec53984..ef0ae6527 100644
--- a/libs/kotaemon/kotaemon/loaders/docx_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/docx_loader.py
@@ -3,7 +3,7 @@
 from typing import List, Optional
 
 import pandas as pd
-from llama_index.readers.base import BaseReader
+from llama_index.core.readers.base import BaseReader
 
 from kotaemon.base import Document
 
diff --git a/libs/kotaemon/kotaemon/loaders/excel_loader.py b/libs/kotaemon/kotaemon/loaders/excel_loader.py
index f8950cf00..83909299d 100644
--- a/libs/kotaemon/kotaemon/loaders/excel_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/excel_loader.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 from typing import Any, List, Optional, Union
 
-from llama_index.readers.base import BaseReader
+from llama_index.core.readers.base import BaseReader
 
 from kotaemon.base import Document
 
diff --git a/libs/kotaemon/kotaemon/loaders/html_loader.py b/libs/kotaemon/kotaemon/loaders/html_loader.py
index c939c8a60..552c29255 100644
--- a/libs/kotaemon/kotaemon/loaders/html_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/html_loader.py
@@ -2,7 +2,7 @@
 from pathlib import Path
 from typing import Optional
 
-from llama_index.readers.base import BaseReader
+from llama_index.core.readers.base import BaseReader
 
 from kotaemon.base import Document
 
diff --git a/libs/kotaemon/kotaemon/loaders/mathpix_loader.py b/libs/kotaemon/kotaemon/loaders/mathpix_loader.py
index 19f1001d4..d07b06941 100644
--- a/libs/kotaemon/kotaemon/loaders/mathpix_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/mathpix_loader.py
@@ -6,7 +6,7 @@
 
 import requests
 from langchain.utils import get_from_dict_or_env
-from llama_index.readers.base import BaseReader
+from llama_index.core.readers.base import BaseReader
 
 from kotaemon.base import Document
 
diff --git a/libs/kotaemon/kotaemon/loaders/ocr_loader.py b/libs/kotaemon/kotaemon/loaders/ocr_loader.py
index bb1ac5dca..e67f0a51f 100644
--- a/libs/kotaemon/kotaemon/loaders/ocr_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/ocr_loader.py
@@ -5,7 +5,7 @@
 from uuid import uuid4
 
 import requests
-from llama_index.readers.base import BaseReader
+from llama_index.core.readers.base import BaseReader
 from tenacity import after_log, retry, stop_after_attempt, wait_fixed, wait_random
 
 from kotaemon.base import Document
diff --git a/libs/kotaemon/kotaemon/loaders/unstructured_loader.py b/libs/kotaemon/kotaemon/loaders/unstructured_loader.py
index 82f3255ad..e8f8e30a1 100644
--- a/libs/kotaemon/kotaemon/loaders/unstructured_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/unstructured_loader.py
@@ -12,7 +12,7 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 
-from llama_index.readers.base import BaseReader
+from llama_index.core.readers.base import BaseReader
 
 from kotaemon.base import Document
 
diff --git a/libs/kotaemon/kotaemon/storages/vectorstores/base.py b/libs/kotaemon/kotaemon/storages/vectorstores/base.py
index 0e762a9ee..e6f25187b 100644
--- a/libs/kotaemon/kotaemon/storages/vectorstores/base.py
+++ b/libs/kotaemon/kotaemon/storages/vectorstores/base.py
@@ -3,10 +3,10 @@
 from abc import ABC, abstractmethod
 from typing import Any, Optional
 
-from llama_index.schema import NodeRelationship, RelatedNodeInfo
-from llama_index.vector_stores.types import BasePydanticVectorStore
-from llama_index.vector_stores.types import VectorStore as LIVectorStore
-from llama_index.vector_stores.types import VectorStoreQuery
+from llama_index.core.schema import NodeRelationship, RelatedNodeInfo
+from llama_index.core.vector_stores.types import BasePydanticVectorStore
+from llama_index.core.vector_stores.types import VectorStore as LIVectorStore
+from llama_index.core.vector_stores.types import VectorStoreQuery
 
 from kotaemon.base import DocumentWithEmbedding
 
diff --git a/libs/kotaemon/kotaemon/storages/vectorstores/in_memory.py b/libs/kotaemon/kotaemon/storages/vectorstores/in_memory.py
index a8ba7e187..9e01766bf 100644
--- a/libs/kotaemon/kotaemon/storages/vectorstores/in_memory.py
+++ b/libs/kotaemon/kotaemon/storages/vectorstores/in_memory.py
@@ -2,8 +2,8 @@
 from typing import Any, Optional, Type
 
 import fsspec
-from llama_index.vector_stores import SimpleVectorStore as LISimpleVectorStore
-from llama_index.vector_stores.simple import SimpleVectorStoreData
+from llama_index.core.vector_stores import SimpleVectorStore as LISimpleVectorStore
+from llama_index.core.vector_stores.simple import SimpleVectorStoreData
 
 from .base import LlamaIndexVectorStore
 
diff --git a/libs/kotaemon/kotaemon/storages/vectorstores/simple_file.py b/libs/kotaemon/kotaemon/storages/vectorstores/simple_file.py
index 043c66e2a..b96b67c0e 100644
--- a/libs/kotaemon/kotaemon/storages/vectorstores/simple_file.py
+++ b/libs/kotaemon/kotaemon/storages/vectorstores/simple_file.py
@@ -3,8 +3,8 @@
 from typing import Any, Optional, Type
 
 import fsspec
-from llama_index.vector_stores import SimpleVectorStore as LISimpleVectorStore
-from llama_index.vector_stores.simple import SimpleVectorStoreData
+from llama_index.core.vector_stores import SimpleVectorStore as LISimpleVectorStore
+from llama_index.core.vector_stores.simple import SimpleVectorStoreData
 
 from kotaemon.base import DocumentWithEmbedding
 
diff --git a/libs/kotaemon/pyproject.toml b/libs/kotaemon/pyproject.toml
index 36de876ea..6f48079fb 100644
--- a/libs/kotaemon/pyproject.toml
+++ b/libs/kotaemon/pyproject.toml
@@ -26,7 +26,7 @@ dependencies = [
     "langchain-openai>=0.1.4,<0.2.0",
     "openai>=1.23.6,<2",
     "theflow>=0.8.6,<0.9.0",
-    "llama-index==0.9.48",
+    "llama-index>=0.10.40,<0.11.0",
     "llama-hub>=0.0.79,<0.1.0",
     "gradio>=4.31.0,<5",
     "openpyxl>=3.1.2,<3.2",
diff --git a/libs/kotaemon/tests/test_reader.py b/libs/kotaemon/tests/test_reader.py
index 0cdfa5180..24c2dc0e8 100644
--- a/libs/kotaemon/tests/test_reader.py
+++ b/libs/kotaemon/tests/test_reader.py
@@ -2,7 +2,7 @@
 from unittest.mock import patch
 
 from langchain.schema import Document as LangchainDocument
-from llama_index.node_parser import SimpleNodeParser
+from llama_index.core.node_parser import SimpleNodeParser
 
 from kotaemon.base import Document
 from kotaemon.loaders import (
diff --git a/libs/kotaemon/tests/test_splitter.py b/libs/kotaemon/tests/test_splitter.py
index 71e63ee91..75e9bc3e2 100644
--- a/libs/kotaemon/tests/test_splitter.py
+++ b/libs/kotaemon/tests/test_splitter.py
@@ -1,4 +1,4 @@
-from llama_index.schema import NodeRelationship
+from llama_index.core.schema import NodeRelationship
 
 from kotaemon.base import Document
 from kotaemon.indices.splitters import TokenSplitter
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 9a75bf755..59f638d50 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -12,15 +12,15 @@
 from ktem.db.models import engine
 from ktem.embeddings.manager import embedding_models_manager
 from ktem.llms.manager import llms
-from llama_index.readers.base import BaseReader
-from llama_index.readers.file.base import default_file_metadata_func
-from llama_index.vector_stores import (
+from llama_index.core.readers.base import BaseReader
+from llama_index.core.readers.file.base import default_file_metadata_func
+from llama_index.core.vector_stores import (
     FilterCondition,
     FilterOperator,
     MetadataFilter,
     MetadataFilters,
 )
-from llama_index.vector_stores.types import VectorStoreQueryMode
+from llama_index.core.vector_stores.types import VectorStoreQueryMode
 from sqlalchemy import delete, select
 from sqlalchemy.orm import Session
 from theflow.settings import settings
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 7382c3fcc..436e4b6e3 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -381,9 +381,7 @@ def stream(  # type: ignore
         # retrieve the citation
         citation = None
         if evidence and self.enable_citation:
-            citation = self.citation_pipeline(
-                context=evidence, question=question
-            )
+            citation = self.citation_pipeline(context=evidence, question=question)
 
         answer = Document(text=output, metadata={"citation": citation})
 
diff --git a/libs/ktem/pyproject.toml b/libs/ktem/pyproject.toml
index a995495c2..b2094c6b4 100644
--- a/libs/ktem/pyproject.toml
+++ b/libs/ktem/pyproject.toml
@@ -20,6 +20,7 @@ requires-python = ">= 3.10"
 description = "RAG-based Question and Answering Application"
 dependencies = [
     "click>=8.1.7,<9",
+    "llama-index-vector-stores-chroma>=0.1.9",
     "platformdirs>=4.2.1,<5",
     "pluggy>=1.5.0,<2",
     "python-decouple>=3.8,<4",

From baabcbcbdc0f6e348bf999d0af825b75fe673af2 Mon Sep 17 00:00:00 2001
From: cin-jacky <101088014+jacky0218@users.noreply.github.com>
Date: Wed, 12 Jun 2024 12:33:47 +0800
Subject: [PATCH 18/56] feat/save-azuredi-mhtml-to-markdown (#93)

* feat/save-azuredi-mhtml-to-markdown

* fix: replace os.path to pathlib change theflow.settings

* refactor: base on pre-commit

* chore: move the func of saving content markdown above removed_spans

---------

Co-authored-by: jacky0218 <jacky0218@github.com>
---
 libs/kotaemon/kotaemon/indices/vectorindex.py | 10 +++
 .../azureai_document_intelligence_loader.py   |  9 +++
 libs/kotaemon/kotaemon/loaders/html_loader.py | 13 ++++
 libs/ktem/ktem/index/file/ui.py               | 72 +++++++++++++++++++
 4 files changed, 104 insertions(+)

diff --git a/libs/kotaemon/kotaemon/indices/vectorindex.py b/libs/kotaemon/kotaemon/indices/vectorindex.py
index 7eddc8700..80534151c 100644
--- a/libs/kotaemon/kotaemon/indices/vectorindex.py
+++ b/libs/kotaemon/kotaemon/indices/vectorindex.py
@@ -1,8 +1,11 @@
 from __future__ import annotations
 
 import uuid
+from pathlib import Path
 from typing import Optional, Sequence, cast
 
+from theflow.settings import settings as flowsettings
+
 from kotaemon.base import BaseComponent, Document, RetrievedDocument
 from kotaemon.embeddings import BaseEmbeddings
 from kotaemon.storages import BaseDocumentStore, BaseVectorStore
@@ -23,6 +26,7 @@ class VectorIndexing(BaseIndexing):
         - List of texts
     """
 
+    cache_dir: Optional[str] = getattr(flowsettings, "KH_CHUNKS_OUTPUT_DIR", None)
     vector_store: BaseVectorStore
     doc_store: Optional[BaseDocumentStore] = None
     embedding: BaseEmbeddings
@@ -69,6 +73,12 @@ def run(self, text: str | list[str] | Document | list[Document]):
         if self.doc_store:
             print("Adding documents to doc store")
             self.doc_store.add(input_)
+        # save the chunks content into markdown format
+        if self.cache_dir is not None:
+            file_name = Path(input_[0].metadata["file_name"])
+            for i in range(len(input_)):
+                with open(Path(self.cache_dir) / f"{file_name.stem}_{i}.md", "w") as f:
+                    f.write(input_[i].text)
 
 
 class VectorRetrieval(BaseRetrieval):
diff --git a/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py b/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
index 204c786fd..85ecf1460 100644
--- a/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
@@ -95,6 +95,10 @@ class AzureAIDocumentIntelligenceLoader(BaseReader):
             "location to extract figures."
         ),
     )
+    cache_dir: str = Param(
+        None,
+        help="Directory to cache the downloaded files. Default is None",
+    )
 
     @Param.auto(depends_on=["endpoint", "credential"])
     def client_(self):
@@ -118,6 +122,7 @@ def load_data(
     ) -> list[Document]:
         """Extract the input file, allowing multi-modal extraction"""
         metadata = extra_info or {}
+        file_name = Path(file_path)
         with open(file_path, "rb") as fi:
             poller = self.client_.begin_analyze_document(
                 self.model,
@@ -211,6 +216,10 @@ def load_data(
                 )
             )
             removed_spans += table_desc["spans"]
+        # save the text content into markdown format
+        if self.cache_dir is not None:
+            with open(Path(self.cache_dir) / f"{file_name.stem}.md", "w") as f:
+                f.write(text_content)
 
         removed_spans = sorted(removed_spans, key=lambda x: x["offset"], reverse=True)
         for span in removed_spans:
diff --git a/libs/kotaemon/kotaemon/loaders/html_loader.py b/libs/kotaemon/kotaemon/loaders/html_loader.py
index 552c29255..555efe50f 100644
--- a/libs/kotaemon/kotaemon/loaders/html_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/html_loader.py
@@ -3,6 +3,7 @@
 from typing import Optional
 
 from llama_index.core.readers.base import BaseReader
+from theflow.settings import settings as flowsettings
 
 from kotaemon.base import Document
 
@@ -78,6 +79,9 @@ class MhtmlReader(BaseReader):
 
     def __init__(
         self,
+        cache_dir: Optional[str] = getattr(
+            flowsettings, "KH_MARKDOWN_OUTPUT_DIR", None
+        ),
         open_encoding: Optional[str] = None,
         bs_kwargs: Optional[dict] = None,
         get_text_separator: str = "",
@@ -86,6 +90,7 @@ def __init__(
         to pass to the BeautifulSoup object.
 
         Args:
+            cache_dir: Path for markdwon format.
             file_path: Path to file to load.
             open_encoding: The encoding to use when opening the file.
             bs_kwargs: Any kwargs to pass to the BeautifulSoup object.
@@ -100,6 +105,7 @@ def __init__(
                 "`pip install beautifulsoup4`"
             )
 
+        self.cache_dir = cache_dir
         self.open_encoding = open_encoding
         if bs_kwargs is None:
             bs_kwargs = {"features": "lxml"}
@@ -116,6 +122,7 @@ def load_data(
         extra_info = extra_info or {}
         metadata: dict = extra_info
         page = []
+        file_name = Path(file_path)
         with open(file_path, "r", encoding=self.open_encoding) as f:
             message = email.message_from_string(f.read())
             parts = message.get_payload()
@@ -144,5 +151,11 @@ def load_data(
                     text = "\n\n".join(lines)
                     if text:
                         page.append(text)
+        # save the page into markdown format
+        print(self.cache_dir)
+        if self.cache_dir is not None:
+            print(Path(self.cache_dir) / f"{file_name.stem}.md")
+            with open(Path(self.cache_dir) / f"{file_name.stem}.md", "w") as f:
+                f.write(page[0])
 
         return [Document(text="\n\n".join(page), metadata=metadata)]
diff --git a/libs/ktem/ktem/index/file/ui.py b/libs/ktem/ktem/index/file/ui.py
index 9ff1cfb83..16cadb788 100644
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@@ -1,5 +1,6 @@
 import os
 import tempfile
+import zipfile
 from pathlib import Path
 from typing import Generator
 
@@ -12,6 +13,7 @@
 from ktem.utils.render import Render
 from sqlalchemy import select
 from sqlalchemy.orm import Session
+from theflow.settings import settings as flowsettings
 
 
 class File(gr.File):
@@ -167,6 +169,15 @@ def on_building_ui(self):
                         variant="stop",
                         visible=False,
                     )
+                with gr.Row():
+                    self.download_all_button = gr.DownloadButton(
+                        "Download all files",
+                        visible=True,
+                    )
+                    self.download_single_button = gr.DownloadButton(
+                        "Download file",
+                        visible=False,
+                    )
 
     def on_subscribe_public_events(self):
         """Subscribe to the declared public event of the app"""
@@ -229,6 +240,7 @@ def file_selected(self, file_id):
             gr.update(value="".join(chunks), visible=file_id is not None),
             gr.update(visible=file_id is not None),
             gr.update(visible=file_id is not None),
+            gr.update(visible=file_id is not None),
         )
 
     def delete_event(self, file_id):
@@ -268,6 +280,49 @@ def delete_no_event(self):
             gr.update(visible=False),
         )
 
+    def download_single_file(self, file_id):
+        with Session(engine) as session:
+            source = session.execute(
+                select(self._index._resources["Source"]).where(
+                    self._index._resources["Source"].id == file_id
+                )
+            ).first()
+        if source:
+            target_file_name = Path(source[0].name)
+        zip_files = []
+        for file_name in os.listdir(flowsettings.KH_CHUNKS_OUTPUT_DIR):
+            if target_file_name.stem in file_name:
+                zip_files.append(
+                    os.path.join(flowsettings.KH_CHUNKS_OUTPUT_DIR, file_name)
+                )
+        for file_name in os.listdir(flowsettings.KH_MARKDOWN_OUTPUT_DIR):
+            if target_file_name.stem in file_name:
+                zip_files.append(
+                    os.path.join(flowsettings.KH_MARKDOWN_OUTPUT_DIR, file_name)
+                )
+        zip_file_path = os.path.join(
+            flowsettings.KH_ZIP_OUTPUT_DIR, target_file_name.stem
+        )
+        with zipfile.ZipFile(f"{zip_file_path}.zip", "w") as zipMe:
+            for file in zip_files:
+                zipMe.write(file, arcname=os.path.basename(file))
+        return gr.DownloadButton(label="Download pressed", value=f"{zip_file_path}.zip")
+
+    def download_all_files(self):
+        zip_files = []
+        for file_name in os.listdir(flowsettings.KH_CHUNKS_OUTPUT_DIR):
+            zip_files.append(os.path.join(flowsettings.KH_CHUNKS_OUTPUT_DIR, file_name))
+        for file_name in os.listdir(flowsettings.KH_MARKDOWN_OUTPUT_DIR):
+            zip_files.append(
+                os.path.join(flowsettings.KH_MARKDOWN_OUTPUT_DIR, file_name)
+            )
+        zip_file_path = os.path.join(flowsettings.KH_ZIP_OUTPUT_DIR, "all")
+        with zipfile.ZipFile(f"{zip_file_path}.zip", "w") as zipMe:
+            for file in zip_files:
+                arcname = Path(file)
+                zipMe.write(file, arcname=arcname.name)
+        return gr.DownloadButton(label="Download pressed", value=f"{zip_file_path}.zip")
+
     def on_register_events(self):
         """Register all events to the app"""
         onDeleted = (
@@ -294,6 +349,7 @@ def on_register_events(self):
                     self.chunks,
                     self.deselect_button,
                     self.delete_button,
+                    self.download_single_button,
                 ],
                 show_progress="hidden",
             )
@@ -313,10 +369,25 @@ def on_register_events(self):
                 self.chunks,
                 self.deselect_button,
                 self.delete_button,
+                self.download_single_button,
             ],
             show_progress="hidden",
         )
 
+        self.download_all_button.click(
+            fn=self.download_all_files,
+            inputs=[],
+            outputs=self.download_all_button,
+            show_progress="hidden",
+        )
+
+        self.download_single_button.click(
+            fn=self.download_single_file,
+            inputs=[self.selected_file_id],
+            outputs=self.download_single_button,
+            show_progress="hidden",
+        )
+
         onUploaded = self.upload_button.click(
             fn=lambda: gr.update(visible=True),
             outputs=[self.upload_progress_panel],
@@ -363,6 +434,7 @@ def on_register_events(self):
                 self.chunks,
                 self.deselect_button,
                 self.delete_button,
+                self.download_single_button,
             ],
             show_progress="hidden",
         )

From e5e85ea3832da121e4cd72c570fbfc747b5224d3 Mon Sep 17 00:00:00 2001
From: cin-jacky <101088014+jacky0218@users.noreply.github.com>
Date: Wed, 12 Jun 2024 17:19:04 +0800
Subject: [PATCH 19/56] fix: losing first chunk (#94)

* fix: losing first chunk.

* fix: update the method of preventing losing chunks

---------

Co-authored-by: jacky0218 <jacky0218@github.com>
---
 libs/kotaemon/kotaemon/indices/vectorindex.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libs/kotaemon/kotaemon/indices/vectorindex.py b/libs/kotaemon/kotaemon/indices/vectorindex.py
index 80534151c..c191ae78b 100644
--- a/libs/kotaemon/kotaemon/indices/vectorindex.py
+++ b/libs/kotaemon/kotaemon/indices/vectorindex.py
@@ -30,6 +30,7 @@ class VectorIndexing(BaseIndexing):
     vector_store: BaseVectorStore
     doc_store: Optional[BaseDocumentStore] = None
     embedding: BaseEmbeddings
+    count_: int = 0
 
     def to_retrieval_pipeline(self, *args, **kwargs):
         """Convert the indexing pipeline to a retrieval pipeline"""
@@ -74,11 +75,15 @@ def run(self, text: str | list[str] | Document | list[Document]):
             print("Adding documents to doc store")
             self.doc_store.add(input_)
         # save the chunks content into markdown format
-        if self.cache_dir is not None:
+        if self.cache_dir:
             file_name = Path(input_[0].metadata["file_name"])
             for i in range(len(input_)):
-                with open(Path(self.cache_dir) / f"{file_name.stem}_{i}.md", "w") as f:
+                print(Path(self.cache_dir) / f"{file_name.stem}_{self.count_+i}.md")
+                with open(
+                    Path(self.cache_dir) / f"{file_name.stem}_{self.count_+i}.md", "w"
+                ) as f:
                     f.write(input_[i].text)
+            self.count_ += len(input_)
 
 
 class VectorRetrieval(BaseRetrieval):

From fd1e5acacad530ef1f31cd59e521adc48e1f4e3a Mon Sep 17 00:00:00 2001
From: cin-jacky <101088014+jacky0218@users.noreply.github.com>
Date: Fri, 14 Jun 2024 06:52:38 +0800
Subject: [PATCH 20/56] fix: adding the base64 image in markdown (#95)

---
 libs/kotaemon/kotaemon/indices/vectorindex.py | 21 +++++++++++++++++--
 libs/kotaemon/kotaemon/loaders/utils/gpt4v.py |  3 ++-
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/libs/kotaemon/kotaemon/indices/vectorindex.py b/libs/kotaemon/kotaemon/indices/vectorindex.py
index c191ae78b..457d42558 100644
--- a/libs/kotaemon/kotaemon/indices/vectorindex.py
+++ b/libs/kotaemon/kotaemon/indices/vectorindex.py
@@ -78,11 +78,28 @@ def run(self, text: str | list[str] | Document | list[Document]):
         if self.cache_dir:
             file_name = Path(input_[0].metadata["file_name"])
             for i in range(len(input_)):
-                print(Path(self.cache_dir) / f"{file_name.stem}_{self.count_+i}.md")
+                markdown_content = ""
+                if "page_label" in input_[i].metadata:
+                    page_label = str(input_[i].metadata["page_label"])
+                    markdown_content += f"Page label: {page_label}"
+                if "file_name" in input_[i].metadata:
+                    filename = input_[i].metadata["file_name"]
+                    markdown_content += f"\nFile name: {filename}"
+                if "section" in input_[i].metadata:
+                    section = input_[i].metadata["section"]
+                    markdown_content += f"\nSection: {section}"
+                if "type" in input_[i].metadata:
+                    if input_[i].metadata["type"] == "image":
+                        image_origin = input_[i].metadata["image_origin"]
+                        image_origin = f'<p><img src="{image_origin}"></p>'
+                        markdown_content += f"\nImage origin: {image_origin}"
+                if input_[i].text:
+                    markdown_content += f"\ntext:\n{input_[i].text}"
+
                 with open(
                     Path(self.cache_dir) / f"{file_name.stem}_{self.count_+i}.md", "w"
                 ) as f:
-                    f.write(input_[i].text)
+                    f.write(markdown_content)
             self.count_ += len(input_)
 
 
diff --git a/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py b/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
index 1e219d660..214774253 100644
--- a/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
+++ b/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
@@ -38,7 +38,8 @@ def generate_gpt4v(
         response = requests.post(endpoint, headers=headers, json=payload)
         output = response.json()
         output = output["choices"][0]["message"]["content"]
-    except Exception:
+    except Exception as e:
+        print(e)
         output = ""
     return output
 

From 7dd26f5e60724eca081f73ff20d24a3734b67328 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Fri, 14 Jun 2024 05:56:06 +0700
Subject: [PATCH 21/56] feat: more chunk info on UI

---
 libs/kotaemon/kotaemon/loaders/utils/gpt4v.py |  8 ++++++--
 libs/ktem/ktem/index/file/ui.py               |  9 ++++++++-
 libs/ktem/ktem/reasoning/simple.py            | 20 ++++++++++++++-----
 3 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py b/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
index 214774253..a260078c3 100644
--- a/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
+++ b/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
@@ -1,9 +1,12 @@
 import json
+import logging
 from typing import Any, List
 
 import requests
 from decouple import config
 
+logger = logging.getLogger(__name__)
+
 
 def generate_gpt4v(
     endpoint: str, images: str | List[str], prompt: str, max_tokens: int = 512
@@ -39,7 +42,7 @@ def generate_gpt4v(
         output = response.json()
         output = output["choices"][0]["message"]["content"]
     except Exception as e:
-        print(e)
+        logger.error(f"Error generating gpt4v {e}")
         output = ""
     return output
 
@@ -92,6 +95,7 @@ def stream_gpt4v(
                 if len(line["choices"]):
                     output += line["choices"][0]["delta"].get("content", "")
                     yield line["choices"][0]["delta"].get("content", "")
-    except Exception:
+    except Exception as e:
+        logger.error(f"Error streaming gpt4v {e}")
         output = ""
     return output
diff --git a/libs/ktem/ktem/index/file/ui.py b/libs/ktem/ktem/index/file/ui.py
index 16cadb788..8e41329ed 100644
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@@ -216,6 +216,9 @@ def file_selected(self, file_id):
                 )
                 doc_ids = [doc.target_id for (doc,) in matches]
                 docs = self._index._docstore.get(doc_ids)
+                docs = sorted(
+                    docs, key=lambda x: x.metadata.get("page_label", float("inf"))
+                )
 
                 for idx, doc in enumerate(docs):
                     title = f"{doc.text[:50]}..." if len(doc.text) > 50 else doc.text
@@ -230,9 +233,13 @@ def file_selected(self, file_id):
                             url=doc.metadata.get("image_origin", ""), text=doc.text
                         )
 
+                    header_prefix = f"[{idx+1}/{len(docs)}]"
+                    if doc.metadata.get("page_label"):
+                        header_prefix += f" [Page {doc.metadata['page_label']}]"
+
                     chunks.append(
                         Render.collapsible(
-                            header=f"[{idx+1}/{len(docs)}] {title}",
+                            header=f"{header_prefix} {title}",
                             content=content,
                         )
                     )
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 436e4b6e3..885ddd08a 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -35,6 +35,16 @@
 EVIDENCE_MODE_FIGURE = 3
 
 
+def get_header(doc: Document):
+    """Get the header for the document"""
+    header = ""
+    if "page_label" in doc.metadata:
+        header += f" [Page {doc.metadata['page_label']}]"
+
+    header += f" {doc.metadata.get('file_name', '<Unknown>')}"
+    return header.strip()
+
+
 class PrepareEvidencePipeline(BaseComponent):
     """Prepare the evidence text from the list of retrieved documents
 
@@ -511,7 +521,7 @@ def retrieve(
                     Document(
                         channel="info",
                         content=Render.collapsible(
-                            header=doc.metadata["file_name"],
+                            header=get_header(doc),
                             content=Render.image(
                                 url=doc.metadata["image_origin"], text=doc.text
                             ),
@@ -524,7 +534,7 @@ def retrieve(
                     Document(
                         channel="info",
                         content=Render.collapsible(
-                            header=doc.metadata["file_name"],
+                            header=get_header(doc),
                             content=Render.table(doc.text),
                             open=True,
                         ),
@@ -585,7 +595,7 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
                     channel="info",
                     content=Render.collapsible(
                         header=(
-                            f'{id2docs[id].metadata["file_name"]}<br>'
+                            f"{get_header(id2docs[id])}<br>"
                             "<b>Relevance score:</b>"
                             f' {id2docs[id].metadata.get("relevance_score")}'
                         ),
@@ -603,7 +613,7 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
                         channel="info",
                         content=Render.collapsible(
                             header=(
-                                f'{doc.metadata["file_name"]}<br>'
+                                f"{get_header(doc)}<br>"
                                 "<b>Relevance score:</b>"
                                 f' {doc.metadata.get("relevance_score")}'
                             ),
@@ -620,7 +630,7 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
                         channel="info",
                         content=Render.collapsible(
                             header=(
-                                f'{doc.metadata["file_name"]}<br>'
+                                f"{get_header(doc)}<br>"
                                 "<b>Relevance score:</b>"
                                 f' {doc.metadata.get("relevance_score")}'
                             ),

From bf61f1b12a88900da68360496f7040dd2b2ce936 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Fri, 14 Jun 2024 06:40:39 +0700
Subject: [PATCH 22/56] fix: error when reindexing files

---
 libs/ktem/ktem/index/file/pipelines.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 59f638d50..79442e5e1 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -426,8 +426,11 @@ def delete_file(self, file_id: str):
                     ds_ids.append(each[0].target_id)
                 session.delete(each[0])
             session.commit()
-        self.VS.delete(vs_ids)
-        self.DS.delete(ds_ids)
+
+        if vs_ids:
+            self.VS.delete(vs_ids)
+        if ds_ids:
+            self.DS.delete(ds_ids)
 
     def run(self, file_path: str | Path, reindex: bool, **kwargs) -> str:
         """Index the file and return the file id"""

From 761873872bbc762bd96d5a60bf0699a2ba04400c Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Sun, 16 Jun 2024 10:15:24 +0700
Subject: [PATCH 23/56] refactor: allow more information exception trace when
 using gpt4v

---
 libs/kotaemon/kotaemon/loaders/utils/gpt4v.py | 13 ++++++++-----
 libs/ktem/ktem/index/file/pipelines.py        |  2 +-
 libs/ktem/ktem/pages/chat/control.py          |  1 +
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py b/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
index a260078c3..535dc5dbf 100644
--- a/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
+++ b/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
@@ -37,13 +37,16 @@ def generate_gpt4v(
         "max_tokens": max_tokens,
     }
 
+    response = requests.post(endpoint, headers=headers, json=payload)
+
     try:
-        response = requests.post(endpoint, headers=headers, json=payload)
-        output = response.json()
-        output = output["choices"][0]["message"]["content"]
+        response.raise_for_status()
     except Exception as e:
-        logger.error(f"Error generating gpt4v {e}")
-        output = ""
+        logger.exception(f"Error generating gpt4v: {response.text}; error {e}")
+        return ""
+
+    output = response.json()
+    output = output["choices"][0]["message"]["content"]
     return output
 
 
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 79442e5e1..6669d0e13 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -607,7 +607,7 @@ def stream(
                     channel="index",
                 )
             except Exception as e:
-                logger.error(e)
+                logger.exception(e)
                 file_ids.append(None)
                 errors.append(str(e))
                 yield Document(
diff --git a/libs/ktem/ktem/pages/chat/control.py b/libs/ktem/ktem/pages/chat/control.py
index 5e369b53f..fc9a84502 100644
--- a/libs/ktem/ktem/pages/chat/control.py
+++ b/libs/ktem/ktem/pages/chat/control.py
@@ -185,6 +185,7 @@ def rename_conv(self, conversation_id, new_name, user_id):
             session.commit()
 
         history = self.load_chat_history(user_id)
+        gr.Info("Conversation renamed.")
         return gr.update(choices=history), conversation_id
 
     def _on_app_created(self):

From 8ac48f6d9a4a63097b1c86f028f12523a36c5891 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Sun, 16 Jun 2024 10:16:31 +0700
Subject: [PATCH 24/56] feat: add excel reader that treats each worksheet as a
 document

---
 libs/kotaemon/kotaemon/loaders/__init__.py    |  3 +-
 .../kotaemon/kotaemon/loaders/excel_loader.py | 88 +++++++++++++++++++
 2 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/libs/kotaemon/kotaemon/loaders/__init__.py b/libs/kotaemon/kotaemon/loaders/__init__.py
index 134012640..705c2720e 100644
--- a/libs/kotaemon/kotaemon/loaders/__init__.py
+++ b/libs/kotaemon/kotaemon/loaders/__init__.py
@@ -3,7 +3,7 @@
 from .base import AutoReader, BaseReader
 from .composite_loader import DirectoryReader
 from .docx_loader import DocxReader
-from .excel_loader import PandasExcelReader
+from .excel_loader import ExcelReader, PandasExcelReader
 from .html_loader import HtmlReader, MhtmlReader
 from .mathpix_loader import MathpixPDFReader
 from .ocr_loader import ImageReader, OCRReader
@@ -15,6 +15,7 @@
     "AzureAIDocumentIntelligenceLoader",
     "BaseReader",
     "PandasExcelReader",
+    "ExcelReader",
     "MathpixPDFReader",
     "ImageReader",
     "OCRReader",
diff --git a/libs/kotaemon/kotaemon/loaders/excel_loader.py b/libs/kotaemon/kotaemon/loaders/excel_loader.py
index 83909299d..67d00435e 100644
--- a/libs/kotaemon/kotaemon/loaders/excel_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/excel_loader.py
@@ -102,3 +102,91 @@ def load_data(
         ]
 
         return output
+
+
+class ExcelReader(BaseReader):
+    r"""Spreadsheet exporter respecting multiple worksheets
+
+    Parses CSVs using the separator detection from Pandas `read_csv` function.
+    If special parameters are required, use the `pandas_config` dict.
+
+    Args:
+
+        pandas_config (dict): Options for the `pandas.read_excel` function call.
+            Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html
+            for more information. Set to empty dict by default,
+            this means defaults will be used.
+
+    """
+
+    def __init__(
+        self,
+        *args: Any,
+        pandas_config: Optional[dict] = None,
+        row_joiner: str = "\n",
+        col_joiner: str = " ",
+        **kwargs: Any,
+    ) -> None:
+        """Init params."""
+        super().__init__(*args, **kwargs)
+        self._pandas_config = pandas_config or {}
+        self._row_joiner = row_joiner if row_joiner else "\n"
+        self._col_joiner = col_joiner if col_joiner else " "
+
+    def load_data(
+        self,
+        file: Path,
+        include_sheetname: bool = True,
+        sheet_name: Optional[Union[str, int, list]] = None,
+        extra_info: Optional[dict] = None,
+        **kwargs,
+    ) -> List[Document]:
+        """Parse file and extract values from a specific column.
+
+        Args:
+            file (Path): The path to the Excel file to read.
+            include_sheetname (bool): Whether to include the sheet name in the output.
+            sheet_name (Union[str, int, None]): The specific sheet to read from,
+                default is None which reads all sheets.
+
+        Returns:
+            List[Document]: A list of`Document objects containing the
+                values from the specified column in the Excel file.
+        """
+
+        try:
+            import pandas as pd
+        except ImportError:
+            raise ImportError(
+                "install pandas using `pip3 install pandas` to use this loader"
+            )
+
+        if sheet_name is not None:
+            sheet_name = (
+                [sheet_name] if not isinstance(sheet_name, list) else sheet_name
+            )
+
+        # clean up input
+        file = Path(file)
+        extra_info = extra_info or {}
+
+        dfs = pd.read_excel(file, sheet_name=sheet_name, **self._pandas_config)
+        sheet_names = dfs.keys()
+        output = []
+
+        for idx, key in enumerate(sheet_names):
+            dfs[key] = dfs[key].dropna(axis=0, how="all")
+            dfs[key] = dfs[key].dropna(axis=0, how="all")
+            dfs[key] = dfs[key].astype("object")
+            dfs[key].fillna("", inplace=True)
+
+            rows = dfs[key].values.astype(str).tolist()
+            content = self._row_joiner.join(
+                self._col_joiner.join(row).strip() for row in rows
+            ).strip()
+            if include_sheetname:
+                content = f"(Sheet {key} of file {file.name})\n{content}"
+            metadata = {"page_label": idx + 1, "sheet_name": key, **extra_info}
+            output.append(Document(text=content, metadata=metadata))
+
+        return output

From 91ebdca8c38679b785394daddab0d528ab17fcf0 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Tue, 18 Jun 2024 11:53:05 +0700
Subject: [PATCH 25/56] Persist loader information when indexing file

---
 libs/ktem/ktem/embeddings/manager.py   |  2 +-
 libs/ktem/ktem/index/file/index.py     |  7 ++++++-
 libs/ktem/ktem/index/file/pipelines.py | 29 ++++++++++++++++++--------
 libs/ktem/ktem/index/file/ui.py        | 13 +++++++++++-
 libs/ktem/ktem/llms/manager.py         |  2 +-
 5 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/libs/ktem/ktem/embeddings/manager.py b/libs/ktem/ktem/embeddings/manager.py
index dde04f498..4948aeb5b 100644
--- a/libs/ktem/ktem/embeddings/manager.py
+++ b/libs/ktem/ktem/embeddings/manager.py
@@ -36,7 +36,7 @@ def __init__(self):
 
     def load(self):
         """Load the model pool from database"""
-        self._models, self._info, self._defaut = {}, {}, ""
+        self._models, self._info, self._default = {}, {}, ""
         with Session(engine) as sess:
             stmt = select(EmbeddingTable)
             items = sess.execute(stmt)
diff --git a/libs/ktem/ktem/index/file/index.py b/libs/ktem/ktem/index/file/index.py
index 49bc0d657..84b665114 100644
--- a/libs/ktem/ktem/index/file/index.py
+++ b/libs/ktem/ktem/index/file/index.py
@@ -4,8 +4,9 @@
 from ktem.components import filestorage_path, get_docstore, get_vectorstore
 from ktem.db.engine import engine
 from ktem.index.base import BaseIndex
-from sqlalchemy import Column, DateTime, Integer, String, UniqueConstraint
+from sqlalchemy import JSON, Column, DateTime, Integer, String, UniqueConstraint
 from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.ext.mutable import MutableDict
 from sqlalchemy.sql import func
 from theflow.settings import settings as flowsettings
 from theflow.utils.modules import import_dotted_string
@@ -76,6 +77,10 @@ def _setup_resources(self):
                         DateTime(timezone=True), server_default=func.now()
                     ),
                     "user": Column(Integer, default=1),
+                    "note": Column(
+                        MutableDict.as_mutable(JSON),  # type: ignore
+                        default={},
+                    ),
                 },
             )
         else:
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 6669d0e13..0e0a67136 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -393,17 +393,28 @@ def store_file(self, file_path: Path) -> str:
     def finish(self, file_id: str, file_path: Path) -> str:
         """Finish the indexing"""
         with Session(engine) as session:
-            stmt = select(self.Index.target_id).where(self.Index.source_id == file_id)
-            doc_ids = [_[0] for _ in session.execute(stmt)]
+            stmt = select(self.Source).where(self.Source.id == file_id)
+            result = session.execute(stmt).first()
+            if not result:
+                return file_id
+
+            item = result[0]
+
+            # populate the text length
+            doc_ids_stmt = select(self.Index.target_id).where(
+                self.Index.source_id == file_id,
+                self.Index.relation_type == "document",
+            )
+            doc_ids = [_[0] for _ in session.execute(doc_ids_stmt)]
             if doc_ids:
                 docs = self.DS.get(doc_ids)
-                stmt = select(self.Source).where(self.Source.id == file_id)
-                result = session.execute(stmt).first()
-                if result:
-                    item = result[0]
-                    item.text_length = sum([len(doc.text) for doc in docs])
-                    session.add(item)
-                session.commit()
+                item.text_length = sum([len(doc.text) for doc in docs])
+
+            # populate the note
+            item.note["loader"] = self.get_from_path("loader").__class__.__name__
+
+            session.add(item)
+            session.commit()
 
         return file_id
 
diff --git a/libs/ktem/ktem/index/file/ui.py b/libs/ktem/ktem/index/file/ui.py
index 8e41329ed..bfcad7810 100644
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@@ -148,8 +148,16 @@ def on_building_ui(self):
                 gr.Markdown("## File List")
                 self.file_list_state = gr.State(value=None)
                 self.file_list = gr.DataFrame(
-                    headers=["id", "name", "size", "text_length", "date_created"],
+                    headers=[
+                        "id",
+                        "name",
+                        "size",
+                        "text_length",
+                        "loader",
+                        "date_created",
+                    ],
                     interactive=False,
+                    wrap=True,
                 )
 
                 with gr.Row() as self.selection_info:
@@ -597,6 +605,7 @@ def list_file(self, user_id):
                         "name": "-",
                         "size": "-",
                         "text_length": "-",
+                        "loader": "-",
                         "date_created": "-",
                     }
                 ]
@@ -613,6 +622,7 @@ def list_file(self, user_id):
                     "name": each[0].name,
                     "size": each[0].size,
                     "text_length": each[0].text_length,
+                    "loader": each[0].note.get("loader", "-"),
                     "date_created": each[0].date_created.strftime("%Y-%m-%d %H:%M:%S"),
                 }
                 for each in session.execute(statement).all()
@@ -628,6 +638,7 @@ def list_file(self, user_id):
                         "name": "-",
                         "size": "-",
                         "text_length": "-",
+                        "loader": "-",
                         "date_created": "-",
                     }
                 ]
diff --git a/libs/ktem/ktem/llms/manager.py b/libs/ktem/ktem/llms/manager.py
index f6adbb296..e688d363e 100644
--- a/libs/ktem/ktem/llms/manager.py
+++ b/libs/ktem/ktem/llms/manager.py
@@ -38,7 +38,7 @@ def __init__(self):
 
     def load(self):
         """Load the model pool from database"""
-        self._models, self._info, self._defaut = {}, {}, ""
+        self._models, self._info, self._default = {}, {}, ""
         with Session(engine) as session:
             stmt = select(LLMTable)
             items = session.execute(stmt)

From 868a0d8d727d1f5b18642cb77de6be6dddc8387e Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Tue, 18 Jun 2024 16:03:58 +0700
Subject: [PATCH 26/56] feat: allow hiding unneeded setting panels

---
 libs/ktem/ktem/pages/settings.py | 15 +++++++--------
 libs/ktem/ktem/settings.py       |  3 +++
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/libs/ktem/ktem/pages/settings.py b/libs/ktem/ktem/pages/settings.py
index 03d0e895c..9b1cea971 100644
--- a/libs/ktem/ktem/pages/settings.py
+++ b/libs/ktem/ktem/pages/settings.py
@@ -104,12 +104,11 @@ def on_building_ui(self):
         if self._app.f_user_management:
             with gr.Tab("Users"):
                 self.user_tab()
-        with gr.Tab("General"):
-            self.app_tab()
-        with gr.Tab("Document Indices"):
-            self.index_tab()
-        with gr.Tab("Reasoning Pipelines"):
-            self.reasoning_tab()
+
+        self.app_tab()
+        self.index_tab()
+        self.reasoning_tab()
+
         self.setting_save_btn = gr.Button(
             "Save changes", variant="primary", scale=1, elem_classes=["right-button"]
         )
@@ -249,7 +248,7 @@ def change_password(self, user_id, password, password_confirm):
         return "", ""
 
     def app_tab(self):
-        with gr.Tab("General application settings", visible=self._render_app_tab):
+        with gr.Tab("General", visible=self._render_app_tab):
             for n, si in self._default_settings.application.settings.items():
                 obj = render_setting_item(si, si.value)
                 self._components[f"application.{n}"] = obj
@@ -261,7 +260,7 @@ def index_tab(self):
         #         obj = render_setting_item(si, si.value)
         #         self._components[f"index.{n}"] = obj
 
-        with gr.Tab("Index settings", visible=self._render_index_tab):
+        with gr.Tab("Document Indices", visible=self._render_index_tab):
             for pn, sig in self._default_settings.index.options.items():
                 with gr.Tab(f"Index {pn}"):
                     for n, si in sig.settings.items():
diff --git a/libs/ktem/ktem/settings.py b/libs/ktem/ktem/settings.py
index dec1d4c61..2665e4f02 100644
--- a/libs/ktem/ktem/settings.py
+++ b/libs/ktem/ktem/settings.py
@@ -55,6 +55,9 @@ def get_setting_item(self, path: str) -> SettingItem:
         option = self.options[option_id]
         return option.get_setting_item(sub_path)
 
+    def __bool__(self):
+        return bool(self.settings) or bool(self.options)
+
 
 class SettingReasoningGroup(BaseSettingGroup):
     def _get_options(self) -> dict:

From c5415078d5658006091ee8aefa37247201afdab2 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Tue, 18 Jun 2024 16:51:49 +0700
Subject: [PATCH 27/56] feat: allow specific timezone when creating
 conversation

---
 libs/ktem/ktem/db/base_models.py   | 6 +++++-
 libs/ktem/ktem/index/file/index.py | 4 ++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/libs/ktem/ktem/db/base_models.py b/libs/ktem/ktem/db/base_models.py
index 36349fba7..f33d17ff0 100644
--- a/libs/ktem/ktem/db/base_models.py
+++ b/libs/ktem/ktem/db/base_models.py
@@ -1,9 +1,11 @@
 import datetime
 import uuid
 from typing import Optional
+from zoneinfo import ZoneInfo
 
 from sqlalchemy import JSON, Column
 from sqlmodel import Field, SQLModel
+from theflow.settings import settings as flowsettings
 
 
 class BaseConversation(SQLModel):
@@ -24,7 +26,9 @@ class BaseConversation(SQLModel):
         default_factory=lambda: uuid.uuid4().hex, primary_key=True, index=True
     )
     name: str = Field(
-        default_factory=lambda: datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
+        default_factory=lambda: datetime.datetime.now(
+            ZoneInfo(getattr(flowsettings, "TIME_ZONE", "UTC"))
+        ).strftime("%Y-%m-%d %H:%M:%S")
     )
     user: int = Field(default=0)  # For now we only have one user
 
diff --git a/libs/ktem/ktem/index/file/index.py b/libs/ktem/ktem/index/file/index.py
index 84b665114..24d32ef3d 100644
--- a/libs/ktem/ktem/index/file/index.py
+++ b/libs/ktem/ktem/index/file/index.py
@@ -103,6 +103,10 @@ def _setup_resources(self):
                         DateTime(timezone=True), server_default=func.now()
                     ),
                     "user": Column(Integer, default=1),
+                    "note": Column(
+                        MutableDict.as_mutable(JSON),  # type: ignore
+                        default={},
+                    ),
                 },
             )
         Index = type(

From 428bbe218847c52d4e26f7e3dea8f74b0e89acec Mon Sep 17 00:00:00 2001
From: cin-ace <ace@cinnamon.is>
Date: Tue, 18 Jun 2024 17:15:06 +0700
Subject: [PATCH 28/56] feat: add more confidence score (#96)

* Allow a list of rerankers

* Export llm reranking score instead of filter with boolean

* Get logprobs from LLMs

* Rename cohere reranking score

* Call 2 rerankers at once

* Run QA pipeline for each chunk to get qa_score

* Display more relevance scores

* Define another LLMScoring instead of editing the original one

* Export logprobs instead of probs

* Call LLMScoring

* Get qa_score only in the final answer
---
 libs/kotaemon/kotaemon/base/schema.py         |   1 +
 .../kotaemon/indices/rankings/__init__.py     |   3 +-
 .../kotaemon/indices/rankings/cohere.py       |   2 +-
 .../kotaemon/indices/rankings/llm_scoring.py  |  53 +++++++++
 libs/kotaemon/kotaemon/indices/vectorindex.py |   2 +-
 libs/kotaemon/kotaemon/llms/chats/openai.py   |  31 ++++-
 libs/kotaemon/kotaemon/loaders/utils/gpt4v.py |  19 +++-
 libs/ktem/ktem/index/file/pipelines.py        |  28 +++--
 libs/ktem/ktem/reasoning/react.py             |   2 +-
 libs/ktem/ktem/reasoning/rewoo.py             |   2 +-
 libs/ktem/ktem/reasoning/simple.py            | 106 ++++++++++++++----
 11 files changed, 208 insertions(+), 41 deletions(-)
 create mode 100644 libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py

diff --git a/libs/kotaemon/kotaemon/base/schema.py b/libs/kotaemon/kotaemon/base/schema.py
index 10fcb888e..499e505d7 100644
--- a/libs/kotaemon/kotaemon/base/schema.py
+++ b/libs/kotaemon/kotaemon/base/schema.py
@@ -140,6 +140,7 @@ class LLMInterface(AIMessage):
     total_cost: float = 0
     logits: list[list[float]] = Field(default_factory=list)
     messages: list[AIMessage] = Field(default_factory=list)
+    logprobs: list[float] = []
 
 
 class ExtractorOutput(Document):
diff --git a/libs/kotaemon/kotaemon/indices/rankings/__init__.py b/libs/kotaemon/kotaemon/indices/rankings/__init__.py
index ccd99bbe7..9de04d8d8 100644
--- a/libs/kotaemon/kotaemon/indices/rankings/__init__.py
+++ b/libs/kotaemon/kotaemon/indices/rankings/__init__.py
@@ -1,5 +1,6 @@
 from .base import BaseReranking
 from .cohere import CohereReranking
 from .llm import LLMReranking
+from .llm_scoring import LLMScoring
 
-__all__ = ["CohereReranking", "LLMReranking", "BaseReranking"]
+__all__ = ["CohereReranking", "LLMReranking", "LLMScoring", "BaseReranking"]
diff --git a/libs/kotaemon/kotaemon/indices/rankings/cohere.py b/libs/kotaemon/kotaemon/indices/rankings/cohere.py
index e92bd8a88..d22be9a8a 100644
--- a/libs/kotaemon/kotaemon/indices/rankings/cohere.py
+++ b/libs/kotaemon/kotaemon/indices/rankings/cohere.py
@@ -33,7 +33,7 @@ def run(self, documents: list[Document], query: str) -> list[Document]:
         )
         for r in response.results:
             doc = documents[r.index]
-            doc.metadata["relevance_score"] = round(r.relevance_score, 2)
+            doc.metadata["cohere_reranking_score"] = round(r.relevance_score, 2)
             compressed_docs.append(doc)
 
         return compressed_docs
diff --git a/libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py b/libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py
new file mode 100644
index 000000000..e575f9476
--- /dev/null
+++ b/libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+from concurrent.futures import ThreadPoolExecutor
+
+import numpy as np
+from langchain.output_parsers.boolean import BooleanOutputParser
+
+from kotaemon.base import Document
+
+from .llm import LLMReranking
+
+
+class LLMScoring(LLMReranking):
+    def run(
+        self,
+        documents: list[Document],
+        query: str,
+    ) -> list[Document]:
+        """Filter down documents based on their relevance to the query."""
+        filtered_docs: list[Document] = []
+        output_parser = BooleanOutputParser()
+
+        if self.concurrent:
+            with ThreadPoolExecutor() as executor:
+                futures = []
+                for doc in documents:
+                    _prompt = self.prompt_template.populate(
+                        question=query, context=doc.get_content()
+                    )
+                    futures.append(executor.submit(lambda: self.llm(_prompt)))
+
+                results = [future.result() for future in futures]
+        else:
+            results = []
+            for doc in documents:
+                _prompt = self.prompt_template.populate(
+                    question=query, context=doc.get_content()
+                )
+                results.append(self.llm(_prompt))
+
+        for result, doc in zip(results, documents):
+            score = np.exp(np.average(result.logprobs))
+            include_doc = output_parser.parse(result.text)
+            if include_doc:
+                doc.metadata["llm_reranking_score"] = round(score, 2)
+            else:
+                doc.metadata["llm_reranking_score"] = round(1 - score, 2)
+
+        # prevent returning empty result
+        if len(filtered_docs) == 0:
+            filtered_docs = documents[: self.top_k]
+
+        return filtered_docs
diff --git a/libs/kotaemon/kotaemon/indices/vectorindex.py b/libs/kotaemon/kotaemon/indices/vectorindex.py
index 457d42558..571320866 100644
--- a/libs/kotaemon/kotaemon/indices/vectorindex.py
+++ b/libs/kotaemon/kotaemon/indices/vectorindex.py
@@ -175,7 +175,7 @@ def run(
             ]
 
         # use additional reranker to re-order the document list
-        if self.rerankers:
+        if self.rerankers and text:
             for reranker in self.rerankers:
                 result = reranker(documents=result, query=text)
 
diff --git a/libs/kotaemon/kotaemon/llms/chats/openai.py b/libs/kotaemon/kotaemon/llms/chats/openai.py
index b12567d9c..e8d4783f8 100644
--- a/libs/kotaemon/kotaemon/llms/chats/openai.py
+++ b/libs/kotaemon/kotaemon/llms/chats/openai.py
@@ -159,6 +159,15 @@ def prepare_output(self, resp: dict) -> LLMInterface:
             additional_kwargs["tool_calls"] = resp["choices"][0]["message"][
                 "tool_calls"
             ]
+
+        if resp["choices"][0].get("logprobs") is None:
+            logprobs = []
+        else:
+            logprobs = [
+                logprob["logprob"]
+                for logprob in resp["choices"][0]["logprobs"].get("content", [])
+            ]
+
         output = LLMInterface(
             candidates=[(_["message"]["content"] or "") for _ in resp["choices"]],
             content=resp["choices"][0]["message"]["content"] or "",
@@ -170,6 +179,7 @@ def prepare_output(self, resp: dict) -> LLMInterface:
                 AIMessage(content=(_["message"]["content"]) or "")
                 for _ in resp["choices"]
             ],
+            logprobs=logprobs,
         )
 
         return output
@@ -216,11 +226,24 @@ def stream(
             client, messages=input_messages, stream=True, **kwargs
         )
 
-        for chunk in resp:
-            if not chunk.choices:
+        for c in resp:
+            chunk = c.dict()
+            if not chunk["choices"]:
                 continue
-            if chunk.choices[0].delta.content is not None:
-                yield LLMInterface(content=chunk.choices[0].delta.content)
+            if chunk["choices"][0]["delta"]["content"] is not None:
+                if chunk["choices"][0].get("logprobs") is None:
+                    logprobs = []
+                else:
+                    logprobs = [
+                        logprob["logprob"]
+                        for logprob in chunk["choices"][0]["logprobs"].get(
+                            "content", []
+                        )
+                    ]
+
+                yield LLMInterface(
+                    content=chunk["choices"][0]["delta"]["content"], logprobs=logprobs
+                )
 
     async def astream(
         self, messages: str | BaseMessage | list[BaseMessage], *args, **kwargs
diff --git a/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py b/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
index 535dc5dbf..5aa7495cc 100644
--- a/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
+++ b/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
@@ -78,11 +78,13 @@ def stream_gpt4v(
         ],
         "max_tokens": max_tokens,
         "stream": True,
+        "logprobs": True,
     }
     try:
         response = requests.post(endpoint, headers=headers, json=payload, stream=True)
         assert response.status_code == 200, str(response.content)
         output = ""
+        logprobs = []
         for line in response.iter_lines():
             if line:
                 if line.startswith(b"\xef\xbb\xbf"):
@@ -96,9 +98,22 @@ def stream_gpt4v(
                 except Exception:
                     break
                 if len(line["choices"]):
+                    if line["choices"][0].get("logprobs") is None:
+                        _logprobs = []
+                    else:
+                        _logprobs = [
+                            logprob["logprob"]
+                            for logprob in line["choices"][0]["logprobs"].get(
+                                "content", []
+                            )
+                        ]
+
                     output += line["choices"][0]["delta"].get("content", "")
-                    yield line["choices"][0]["delta"].get("content", "")
+                    logprobs += _logprobs
+                    yield line["choices"][0]["delta"].get("content", ""), _logprobs
+
     except Exception as e:
         logger.error(f"Error streaming gpt4v {e}")
         output = ""
-    return output
+
+    return output, logprobs
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 0e0a67136..df2503203 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -7,7 +7,7 @@
 from functools import lru_cache
 from hashlib import sha256
 from pathlib import Path
-from typing import Generator, Optional
+from typing import Generator, Optional, Sequence
 
 from ktem.db.models import engine
 from ktem.embeddings.manager import embedding_models_manager
@@ -30,7 +30,12 @@
 from kotaemon.embeddings import BaseEmbeddings
 from kotaemon.indices import VectorIndexing, VectorRetrieval
 from kotaemon.indices.ingests.files import KH_DEFAULT_FILE_EXTRACTORS
-from kotaemon.indices.rankings import BaseReranking, CohereReranking, LLMReranking
+from kotaemon.indices.rankings import (
+    BaseReranking,
+    CohereReranking,
+    LLMReranking,
+    LLMScoring,
+)
 from kotaemon.indices.splitters import BaseSplitter, TokenSplitter
 
 from .base import BaseFileIndexIndexing, BaseFileIndexRetriever
@@ -75,7 +80,7 @@ class DocumentRetrievalPipeline(BaseFileIndexRetriever):
     """
 
     embedding: BaseEmbeddings
-    reranker: BaseReranking = LLMReranking.withx()
+    rerankers: Sequence[BaseReranking] = [LLMReranking.withx()]
     get_extra_table: bool = False
     mmr: bool = False
     top_k: int = 5
@@ -88,6 +93,7 @@ def vector_retrieval(self) -> VectorRetrieval:
             vector_store=self.VS,
             doc_store=self.DS,
             retrieval_mode=self.retrieval_mode,  # type: ignore
+            rerankers=self.rerankers,
         )
 
     def run(
@@ -136,8 +142,6 @@ def run(
 
         # rerank
         docs = self.vector_retrieval(text=text, top_k=self.top_k, **retrieval_kwargs)
-        if docs and self.get_from_path("reranker"):
-            docs = self.reranker(docs, query=text)
 
         if not self.get_extra_table:
             return docs
@@ -245,14 +249,16 @@ def get_pipeline(cls, user_settings, index_settings, selected):
                 )
             ],
             retrieval_mode=user_settings["retrieval_mode"],
-            reranker=CohereReranking(),
+            rerankers=[LLMScoring(), CohereReranking()],
         )
         if not user_settings["use_reranking"]:
-            retriever.reranker = None  # type: ignore
-        elif isinstance(retriever.reranker, LLMReranking):
-            retriever.reranker.llm = llms.get(
-                user_settings["reranking_llm"], llms.get_default()
-            )
+            retriever.rerankers = []  # type: ignore
+        else:
+            for reranker in retriever.rerankers:
+                if isinstance(reranker, LLMReranking):
+                    reranker.llm = llms.get(
+                        user_settings["reranking_llm"], llms.get_default()
+                    )
 
         kwargs = {".doc_ids": selected}
         retriever.set_run(kwargs, temp=True)
diff --git a/libs/ktem/ktem/reasoning/react.py b/libs/ktem/ktem/reasoning/react.py
index 9f9202332..21d8089fc 100644
--- a/libs/ktem/ktem/reasoning/react.py
+++ b/libs/ktem/ktem/reasoning/react.py
@@ -97,7 +97,7 @@ def prepare_evidence(self, docs, trim_len: int = 4000):
                     )
 
             print("Retrieved #{}: {}".format(_id, retrieved_content[:100]))
-            print("Score", retrieved_item.metadata.get("relevance_score", None))
+            print("Score", retrieved_item.metadata.get("cohere_reranking_score", None))
 
         # trim context by trim_len
         if evidence:
diff --git a/libs/ktem/ktem/reasoning/rewoo.py b/libs/ktem/ktem/reasoning/rewoo.py
index 87210f725..a3b3ed2ba 100644
--- a/libs/ktem/ktem/reasoning/rewoo.py
+++ b/libs/ktem/ktem/reasoning/rewoo.py
@@ -135,7 +135,7 @@ def prepare_evidence(self, docs, trim_len: int = 3000):
                     )
 
             print("Retrieved #{}: {}".format(_id, retrieved_content))
-            print("Score", retrieved_item.metadata.get("relevance_score", None))
+            print("Score", retrieved_item.metadata.get("cohere_reranking_score", None))
 
         # trim context by trim_len
         if evidence:
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 885ddd08a..37b6c5cf4 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -6,6 +6,7 @@
 from functools import partial
 from typing import Generator
 
+import numpy as np
 import tiktoken
 from ktem.llms.manager import llms
 from ktem.utils.render import Render
@@ -45,6 +46,10 @@ def get_header(doc: Document):
     return header.strip()
 
 
+def is_close(val1, val2, tolerance=1e-9):
+    return abs(val1 - val2) <= tolerance
+
+
 class PrepareEvidencePipeline(BaseComponent):
     """Prepare the evidence text from the list of retrieved documents
 
@@ -119,10 +124,6 @@ def run(self, docs: list[RetrievedDocument]) -> Document:
                         + " \n<br>"
                     )
 
-            print("Retrieved #{}: {}".format(_id, retrieved_content))
-            print(retrieved_item.metadata)
-            print("Score", retrieved_item.metadata.get("relevance_score", None))
-
         if evidence_mode != EVIDENCE_MODE_FIGURE:
             # trim context by trim_len
             print("len (original)", len(evidence))
@@ -131,8 +132,6 @@ def run(self, docs: list[RetrievedDocument]) -> Document:
                 evidence = texts[0].text
                 print("len (trimmed)", len(evidence))
 
-        print(f"PrepareEvidence with input {docs}\nOutput: {evidence}\n")
-
         return Document(content=(evidence_mode, evidence))
 
 
@@ -364,9 +363,13 @@ def stream(  # type: ignore
         prompt, images = self.get_prompt(question, evidence, evidence_mode)
 
         output = ""
+        logprobs = []
         if evidence_mode == EVIDENCE_MODE_FIGURE:
-            for text in stream_gpt4v(self.vlm_endpoint, images, prompt, max_tokens=768):
+            for text, _logprobs in stream_gpt4v(
+                self.vlm_endpoint, images, prompt, max_tokens=768
+            ):
                 output += text
+                logprobs += _logprobs
                 yield Document(channel="chat", content=text)
         else:
             messages = []
@@ -380,9 +383,10 @@ def stream(  # type: ignore
             try:
                 # try streaming first
                 print("Trying LLM streaming")
-                for text in self.llm.stream(messages):
-                    output += text.text
-                    yield Document(channel="chat", content=text.text)
+                for out_msg in self.llm.stream(messages):
+                    output += out_msg.text
+                    logprobs += out_msg.logprobs
+                    yield Document(channel="chat", content=out_msg.text)
             except NotImplementedError:
                 print("Streaming is not supported, falling back to normal processing")
                 output = self.llm(messages).text
@@ -393,7 +397,13 @@ def stream(  # type: ignore
         if evidence and self.enable_citation:
             citation = self.citation_pipeline(context=evidence, question=question)
 
-        answer = Document(text=output, metadata={"citation": citation})
+        answer = Document(
+            text=output,
+            metadata={
+                "citation": citation,
+                "qa_score": round(np.exp(np.average(logprobs)), 2),
+            },
+        )
 
         return answer
 
@@ -590,14 +600,35 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
                 if idx < len(ss) - 1:
                     text += id2docs[id].text[span["end"] : ss[idx + 1]["start"]]
             text += id2docs[id].text[ss[-1]["end"] :]
+            if is_close(id2docs[id].score, -1.0):
+                text_search_str = " default from full-text search<br>"
+            else:
+                text_search_str = "<br>"
+
+            if (
+                id2docs[id].metadata.get("llm_reranking_score") is None
+                or id2docs[id].metadata.get("cohere_reranking_score") is None
+            ):
+                cloned_chunk_str = (
+                    "<b>Cloned chunk for a table. No reranking score</b><br>"
+                )
+            else:
+                cloned_chunk_str = ""
+
             with_citation.append(
                 Document(
                     channel="info",
                     content=Render.collapsible(
                         header=(
                             f"{get_header(id2docs[id])}<br>"
-                            "<b>Relevance score:</b>"
-                            f' {id2docs[id].metadata.get("relevance_score")}'
+                            "<b>Vectorstore score:</b>"
+                            f" {round(id2docs[id].score, 2)}"
+                            f"{text_search_str}"
+                            f"{cloned_chunk_str}"
+                            "<b>LLM reranking score:</b>"
+                            f' {id2docs[id].metadata.get("llm_reranking_score")}<br>'
+                            "<b>Cohere reranking score:</b>"
+                            f' {id2docs[id].metadata.get("cohere_reranking_score")}<br>'
                         ),
                         content=Render.table(text),
                         open=True,
@@ -607,6 +638,20 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
 
         for id_ in list(not_detected):
             doc = id2docs[id_]
+            if is_close(doc.score, -1.0):
+                text_search_str = " default from full-text search<br>"
+            else:
+                text_search_str = "<br>"
+
+            if (
+                doc.metadata.get("llm_reranking_score") is None
+                or doc.metadata.get("cohere_reranking_score") is None
+            ):
+                cloned_chunk_str = (
+                    "<b>Cloned chunk for a table. No reranking score</b><br>"
+                )
+            else:
+                cloned_chunk_str = ""
             if doc.metadata.get("type", "") == "image":
                 without_citation.append(
                     Document(
@@ -614,8 +659,14 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
                         content=Render.collapsible(
                             header=(
                                 f"{get_header(doc)}<br>"
-                                "<b>Relevance score:</b>"
-                                f' {doc.metadata.get("relevance_score")}'
+                                "<b>Vectorstore score:</b>"
+                                f" {round(doc.score, 2)}"
+                                f"{text_search_str}"
+                                f"{cloned_chunk_str}"
+                                "<b>LLM reranking score:</b>"
+                                f' {doc.metadata.get("llm_reranking_score")}<br>'
+                                "<b>Cohere reranking score:</b>"
+                                f' {doc.metadata.get("cohere_reranking_score")}<br>'
                             ),
                             content=Render.image(
                                 url=doc.metadata["image_origin"], text=doc.text
@@ -631,8 +682,14 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
                         content=Render.collapsible(
                             header=(
                                 f"{get_header(doc)}<br>"
-                                "<b>Relevance score:</b>"
-                                f' {doc.metadata.get("relevance_score")}'
+                                "<b>Vectorstore score:</b>"
+                                f" {round(doc.score, 2)}"
+                                f"{text_search_str}"
+                                f"{cloned_chunk_str}"
+                                "<b>LLM reranking score:</b>"
+                                f' {doc.metadata.get("llm_reranking_score")}<br>'
+                                "<b>Cohere reranking score:</b>"
+                                f' {doc.metadata.get("cohere_reranking_score")}<br>'
                             ),
                             content=Render.table(doc.text),
                             open=True,
@@ -708,7 +765,7 @@ def stream(  # type: ignore
         # show the evidence
         with_citation, without_citation = self.prepare_citations(answer, docs)
         if not with_citation and not without_citation:
-            yield Document(channel="info", content="No evidence found.\n")
+            yield Document(channel="info", content="<h5><b>No evidence found.</b></h5>")
         else:
             yield Document(channel="info", content=None)
             for _ in with_citation:
@@ -716,10 +773,21 @@ def stream(  # type: ignore
             if without_citation:
                 yield Document(
                     channel="info",
-                    content="Retrieved segments without matching evidence:\n",
+                    content=(
+                        "<h5><b>Retrieved segments without matching evidence:"
+                        "</b></h5><br>"
+                    ),
                 )
                 for _ in without_citation:
                     yield _
+        yield Document(
+            channel="info",
+            content=(
+                "<h5><b>Question answering</b></h5><br>"
+                "<b>Question answering confidence:</b> "
+                f"{answer.metadata.get('qa_score')}"
+            ),
+        )
 
         return answer
 

From 1e26358981985265d3c93e392b21f29a126f1182 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Tue, 18 Jun 2024 18:26:17 +0700
Subject: [PATCH 29/56] feat: replace text length with token in file list

---
 docs/pages/app/index/file.md           | 12 ++++++------
 libs/ktem/ktem/index/file/index.py     |  2 --
 libs/ktem/ktem/index/file/pipelines.py | 15 ++++++++++++---
 libs/ktem/ktem/index/file/ui.py        |  8 ++++----
 libs/ktem/ktem/reasoning/simple.py     |  3 +++
 5 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/docs/pages/app/index/file.md b/docs/pages/app/index/file.md
index 2d479c52a..e3bb344fc 100644
--- a/docs/pages/app/index/file.md
+++ b/docs/pages/app/index/file.md
@@ -107,9 +107,9 @@ string rather than a string.
 
 ## Software infrastructure
 
-| Infra            | Access        | Schema                                                                                                                                                                                                                                                                                             | Ref                                                        |
-| ---------------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------- |
-| SQL table Source | self.\_Source | - id (int): id of the source (auto)<br>- name (str): the name of the file<br>- path (str): the path of the file<br>- size (int): the file size in bytes<br>- text_length (int): the number of characters in the file (default 0)<br>- date_created (datetime): the time the file is created (auto) | This is SQLALchemy ORM class. Can consult                  |
-| SQL table Index  | self.\_Index  | - id (int): id of the index entry (auto)<br>- source_id (int): the id of a file in the Source table<br>- target_id: the id of the segment in docstore or vector store<br>- relation_type (str): if the link is "document" or "vector"                                                              | This is SQLAlchemy ORM class                               |
-| Vector store     | self.\_VS     | - self.\_VS.add: add the list of embeddings to the vector store (optionally associate metadata and ids)<br>- self.\_VS.delete: delete vector entries based on ids<br>- self.\_VS.query: get embeddings based on embeddings.                                                                        | kotaemon > storages > vectorstores > BaseVectorStore       |
-| Doc store        | self.\_DS     | - self.\_DS.add: add the segments to document stores<br>- self.\_DS.get: get the segments based on id<br>- self.\_DS.get_all: get all segments<br>- self.\_DS.delete: delete segments based on id                                                                                                  | kotaemon > storages > docstores > base > BaseDocumentStore |
+| Infra            | Access        | Schema                                                                                                                                                                                                                                                                                      | Ref                                                        |
+| ---------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------- |
+| SQL table Source | self.\_Source | - id (int): id of the source (auto)<br>- name (str): the name of the file<br>- path (str): the path of the file<br>- size (int): the file size in bytes<br>- note (dict): allow extra optional information about the file<br>- date_created (datetime): the time the file is created (auto) | This is SQLALchemy ORM class. Can consult                  |
+| SQL table Index  | self.\_Index  | - id (int): id of the index entry (auto)<br>- source_id (int): the id of a file in the Source table<br>- target_id: the id of the segment in docstore or vector store<br>- relation_type (str): if the link is "document" or "vector"                                                       | This is SQLAlchemy ORM class                               |
+| Vector store     | self.\_VS     | - self.\_VS.add: add the list of embeddings to the vector store (optionally associate metadata and ids)<br>- self.\_VS.delete: delete vector entries based on ids<br>- self.\_VS.query: get embeddings based on embeddings.                                                                 | kotaemon > storages > vectorstores > BaseVectorStore       |
+| Doc store        | self.\_DS     | - self.\_DS.add: add the segments to document stores<br>- self.\_DS.get: get the segments based on id<br>- self.\_DS.get_all: get all segments<br>- self.\_DS.delete: delete segments based on id                                                                                           | kotaemon > storages > docstores > base > BaseDocumentStore |
diff --git a/libs/ktem/ktem/index/file/index.py b/libs/ktem/ktem/index/file/index.py
index 24d32ef3d..3212edbd4 100644
--- a/libs/ktem/ktem/index/file/index.py
+++ b/libs/ktem/ktem/index/file/index.py
@@ -72,7 +72,6 @@ def _setup_resources(self):
                     "name": Column(String),
                     "path": Column(String),
                     "size": Column(Integer, default=0),
-                    "text_length": Column(Integer, default=0),
                     "date_created": Column(
                         DateTime(timezone=True), server_default=func.now()
                     ),
@@ -98,7 +97,6 @@ def _setup_resources(self):
                     "name": Column(String, unique=True),
                     "path": Column(String),
                     "size": Column(Integer, default=0),
-                    "text_length": Column(Integer, default=0),
                     "date_created": Column(
                         DateTime(timezone=True), server_default=func.now()
                     ),
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index df2503203..29288b9dd 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -9,6 +9,7 @@
 from pathlib import Path
 from typing import Generator, Optional, Sequence
 
+import tiktoken
 from ktem.db.models import engine
 from ktem.embeddings.manager import embedding_models_manager
 from ktem.llms.manager import llms
@@ -65,6 +66,9 @@ def dev_settings():
     return file_extractors, chunk_size, chunk_overlap
 
 
+_default_token_func = tiktoken.encoding_for_model("gpt-3.5-turbo").encode
+
+
 class DocumentRetrievalPipeline(BaseFileIndexRetriever):
     """Retrieve relevant document
 
@@ -406,15 +410,16 @@ def finish(self, file_id: str, file_path: Path) -> str:
 
             item = result[0]
 
-            # populate the text length
+            # populate the number of tokens
             doc_ids_stmt = select(self.Index.target_id).where(
                 self.Index.source_id == file_id,
                 self.Index.relation_type == "document",
             )
             doc_ids = [_[0] for _ in session.execute(doc_ids_stmt)]
-            if doc_ids:
+            token_func = self.get_token_func()
+            if doc_ids and token_func:
                 docs = self.DS.get(doc_ids)
-                item.text_length = sum([len(doc.text) for doc in docs])
+                item.note["tokens"] = sum([len(token_func(doc.text)) for doc in docs])
 
             # populate the note
             item.note["loader"] = self.get_from_path("loader").__class__.__name__
@@ -424,6 +429,10 @@ def finish(self, file_id: str, file_path: Path) -> str:
 
         return file_id
 
+    def get_token_func(self):
+        """Get the token function for calculating the number of tokens"""
+        return _default_token_func
+
     def delete_file(self, file_id: str):
         """Delete a file from the db, including its chunks in docstore and vectorstore
 
diff --git a/libs/ktem/ktem/index/file/ui.py b/libs/ktem/ktem/index/file/ui.py
index bfcad7810..3ddc9ceda 100644
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@@ -152,7 +152,7 @@ def on_building_ui(self):
                         "id",
                         "name",
                         "size",
-                        "text_length",
+                        "tokens",
                         "loader",
                         "date_created",
                     ],
@@ -604,7 +604,7 @@ def list_file(self, user_id):
                         "id": "-",
                         "name": "-",
                         "size": "-",
-                        "text_length": "-",
+                        "tokens": "-",
                         "loader": "-",
                         "date_created": "-",
                     }
@@ -621,7 +621,7 @@ def list_file(self, user_id):
                     "id": each[0].id,
                     "name": each[0].name,
                     "size": each[0].size,
-                    "text_length": each[0].text_length,
+                    "tokens": each[0].note.get("tokens", "-"),
                     "loader": each[0].note.get("loader", "-"),
                     "date_created": each[0].date_created.strftime("%Y-%m-%d %H:%M:%S"),
                 }
@@ -637,7 +637,7 @@ def list_file(self, user_id):
                         "id": "-",
                         "name": "-",
                         "size": "-",
-                        "text_length": "-",
+                        "tokens": "-",
                         "loader": "-",
                         "date_created": "-",
                     }
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 37b6c5cf4..56318b388 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -50,6 +50,9 @@ def is_close(val1, val2, tolerance=1e-9):
     return abs(val1 - val2) <= tolerance
 
 
+_default_token_func = tiktoken.encoding_for_model("gpt-3.5-turbo").encode
+
+
 class PrepareEvidencePipeline(BaseComponent):
     """Prepare the evidence text from the list of retrieved documents
 

From 8cd8758ab08dd301e2036bed8d557574b2d2f2e8 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Tue, 18 Jun 2024 19:51:23 +0700
Subject: [PATCH 30/56] ui: show index name instead of id in the settings

---
 libs/ktem/ktem/pages/settings.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libs/ktem/ktem/pages/settings.py b/libs/ktem/ktem/pages/settings.py
index 9b1cea971..a4c786212 100644
--- a/libs/ktem/ktem/pages/settings.py
+++ b/libs/ktem/ktem/pages/settings.py
@@ -260,9 +260,11 @@ def index_tab(self):
         #         obj = render_setting_item(si, si.value)
         #         self._components[f"index.{n}"] = obj
 
+        id2name = {k: v.name for k, v in self._app.index_manager.info().items()}
         with gr.Tab("Document Indices", visible=self._render_index_tab):
             for pn, sig in self._default_settings.index.options.items():
-                with gr.Tab(f"Index {pn}"):
+                name = id2name.get(pn, f"<id {pn}>")
+                with gr.Tab(name):
                     for n, si in sig.settings.items():
                         obj = render_setting_item(si, si.value)
                         self._components[f"index.options.{pn}.{n}"] = obj

From 11bc6d6a8ac188702aaf09a06ee66bdc6db93154 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Tue, 18 Jun 2024 19:56:20 +0700
Subject: [PATCH 31/56] feat(ai): restrict the vision temperature

---
 libs/kotaemon/kotaemon/loaders/utils/gpt4v.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py b/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
index 5aa7495cc..1afbdfba9 100644
--- a/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
+++ b/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
@@ -35,6 +35,7 @@ def generate_gpt4v(
             }
         ],
         "max_tokens": max_tokens,
+        "temperature": 0,
     }
 
     response = requests.post(endpoint, headers=headers, json=payload)
@@ -79,6 +80,7 @@ def stream_gpt4v(
         "max_tokens": max_tokens,
         "stream": True,
         "logprobs": True,
+        "temperature": 0,
     }
     try:
         response = requests.post(endpoint, headers=headers, json=payload, stream=True)

From aea24dee43edadec747ef72aeeeceec704c3e032 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Tue, 18 Jun 2024 20:17:41 +0700
Subject: [PATCH 32/56] fix(ui): remove the misleading message about
 non-retrieved evidences

---
 libs/ktem/ktem/reasoning/simple.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 56318b388..14a5357ac 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -732,12 +732,6 @@ async def ainvoke(  # type: ignore
             for _ in with_citation:
                 self.report_output(_)
             if without_citation:
-                self.report_output(
-                    Document(
-                        channel="info",
-                        content="Retrieved segments without matching evidence:\n",
-                    )
-                )
                 for _ in without_citation:
                     self.report_output(_)
 
@@ -774,13 +768,6 @@ def stream(  # type: ignore
             for _ in with_citation:
                 yield _
             if without_citation:
-                yield Document(
-                    channel="info",
-                    content=(
-                        "<h5><b>Retrieved segments without matching evidence:"
-                        "</b></h5><br>"
-                    ),
-                )
                 for _ in without_citation:
                     yield _
         yield Document(

From aa18272fc9745cb620fb43858a4412c7989e99be Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Tue, 18 Jun 2024 22:32:43 +0700
Subject: [PATCH 33/56] feat(ui): show the reasoning name and description in
 the reasoning setting page

---
 libs/ktem/ktem/pages/settings.py   | 8 +++++++-
 libs/ktem/ktem/reasoning/react.py  | 7 ++++++-
 libs/ktem/ktem/reasoning/rewoo.py  | 6 +++++-
 libs/ktem/ktem/reasoning/simple.py | 6 +++++-
 4 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/libs/ktem/ktem/pages/settings.py b/libs/ktem/ktem/pages/settings.py
index a4c786212..59875eee2 100644
--- a/libs/ktem/ktem/pages/settings.py
+++ b/libs/ktem/ktem/pages/settings.py
@@ -2,6 +2,7 @@
 
 import gradio as gr
 from ktem.app import BasePage
+from ktem.components import reasonings
 from ktem.db.models import Settings, User, engine
 from sqlmodel import Session, select
 
@@ -291,7 +292,12 @@ def reasoning_tab(self):
                     visible=idx == 0,
                     elem_id=pn,
                 ) as self._reasoning_mode[pn]:
-                    gr.Markdown("**Name**: Description")
+                    reasoning = reasonings.get(pn, None)
+                    if reasoning is None:
+                        gr.Markdown("**Name**: Description")
+                    else:
+                        info = reasoning.get_info()
+                        gr.Markdown(f"**{info['name']}**: {info['description']}")
                     for n, si in sig.settings.items():
                         obj = render_setting_item(si, si.value)
                         self._components[f"reasoning.options.{pn}.{n}"] = obj
diff --git a/libs/ktem/ktem/reasoning/react.py b/libs/ktem/ktem/reasoning/react.py
index 21d8089fc..d9be6b4bb 100644
--- a/libs/ktem/ktem/reasoning/react.py
+++ b/libs/ktem/ktem/reasoning/react.py
@@ -325,5 +325,10 @@ def get_info(cls) -> dict:
         return {
             "id": "ReAct",
             "name": "ReAct Agent",
-            "description": "Implementing ReAct paradigm",
+            "description": (
+                "Implementing ReAct paradigm: https://arxiv.org/abs/2210.03629. "
+                "ReAct agent answers the user's request by iteratively formulating "
+                "plan and executing it. The agent can use multiple tools to gather "
+                "information and generate the final answer."
+            ),
         }
diff --git a/libs/ktem/ktem/reasoning/rewoo.py b/libs/ktem/ktem/reasoning/rewoo.py
index a3b3ed2ba..a86ae8dd5 100644
--- a/libs/ktem/ktem/reasoning/rewoo.py
+++ b/libs/ktem/ktem/reasoning/rewoo.py
@@ -457,6 +457,10 @@ def get_info(cls) -> dict:
             "id": "ReWOO",
             "name": "ReWOO Agent",
             "description": (
-                "Implementing ReWOO paradigm " "https://arxiv.org/pdf/2305.18323.pdf"
+                "Implementing ReWOO paradigm: https://arxiv.org/abs/2305.18323. "
+                "The ReWOO agent makes a step by step plan in the first stage, "
+                "then solves each step in the second stage. The agent can use "
+                "external tools to help in the reasoning process. Once all stages "
+                "are completed, the agent will summarize the answer."
             ),
         }
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 14a5357ac..a174157c9 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -877,5 +877,9 @@ def get_info(cls) -> dict:
         return {
             "id": "simple",
             "name": "Simple QA",
-            "description": "Simple QA pipeline",
+            "description": (
+                "Simple RAG-based question answering pipeline. This pipeline can "
+                "perform both keyword search and similarity search to retrieve the "
+                "context. After that it includes that context to generate the answer."
+            ),
         }

From 55ac272e8076a588ed191444aca7d466385e2d02 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Tue, 18 Jun 2024 22:58:48 +0700
Subject: [PATCH 34/56] feat(ui): show version on the main windows

---
 libs/ktem/ktem/app.py            | 2 ++
 libs/ktem/ktem/assets/js/main.js | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/libs/ktem/ktem/app.py b/libs/ktem/ktem/app.py
index 5fb7006c5..991327cee 100644
--- a/libs/ktem/ktem/app.py
+++ b/libs/ktem/ktem/app.py
@@ -36,6 +36,7 @@ class BaseApp:
     def __init__(self):
         self.dev_mode = getattr(settings, "KH_MODE", "") == "dev"
         self.app_name = getattr(settings, "KH_APP_NAME", "Kotaemon")
+        self.app_version = getattr(settings, "KH_APP_VERSION", "")
         self.f_user_management = getattr(settings, "KH_FEATURE_USER_MANAGEMENT", False)
         self._theme = gr.Theme.from_hub("lone17/kotaemon")
 
@@ -44,6 +45,7 @@ def __init__(self):
             self._css = fi.read()
         with (dir_assets / "js" / "main.js").open() as fi:
             self._js = fi.read()
+            self._js = self._js.replace("KH_APP_VERSION", self.app_version)
         self._favicon = str(dir_assets / "img" / "favicon.svg")
 
         self.default_settings = SettingGroup(
diff --git a/libs/ktem/ktem/assets/js/main.js b/libs/ktem/ktem/assets/js/main.js
index 6436932b2..a441b8a34 100644
--- a/libs/ktem/ktem/assets/js/main.js
+++ b/libs/ktem/ktem/assets/js/main.js
@@ -6,6 +6,10 @@ function run() {
   main_parent.parentNode.style = "gap: 0";
   main_parent.parentNode.parentNode.style = "padding: 0";
 
+  const version_node = document.createElement("p");
+  version_node.innerHTML = "version: KH_APP_VERSION";
+  version_node.style = "position: fixed; top: 10px; right: 10px;";
+  main_parent.appendChild(version_node);
 
   // clpse
   globalThis.clpseFn = (id) => {

From 3f5fd6f0b76b6ad57a4cce7f50bafcbc5a88b545 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Wed, 19 Jun 2024 00:27:07 +0700
Subject: [PATCH 35/56] feat(ui): show default llm name in the setting page

---
 libs/ktem/ktem/index/file/pipelines.py |  1 +
 libs/ktem/ktem/pages/settings.py       | 55 ++++++++++++++++++++++++++
 libs/ktem/ktem/reasoning/react.py      |  1 +
 libs/ktem/ktem/reasoning/rewoo.py      |  2 +
 libs/ktem/ktem/reasoning/simple.py     |  1 +
 libs/ktem/ktem/settings.py             |  1 +
 6 files changed, 61 insertions(+)

diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 29288b9dd..22e9a1d2a 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -197,6 +197,7 @@ def get_user_settings(cls) -> dict:
                 "value": reranking_llm,
                 "component": "dropdown",
                 "choices": reranking_llm_choices,
+                "special_type": "llm",
             },
             "separate_embedding": {
                 "name": "Use separate embedding",
diff --git a/libs/ktem/ktem/pages/settings.py b/libs/ktem/ktem/pages/settings.py
index 59875eee2..72803dd63 100644
--- a/libs/ktem/ktem/pages/settings.py
+++ b/libs/ktem/ktem/pages/settings.py
@@ -74,6 +74,10 @@ def __init__(self, app):
         self._components = {}
         self._reasoning_mode = {}
 
+        # store llms and embeddings components
+        self._llms = []
+        self._embeddings = []
+
         # render application page if there are application settings
         self._render_app_tab = False
         if self._default_settings.application.settings:
@@ -253,6 +257,10 @@ def app_tab(self):
             for n, si in self._default_settings.application.settings.items():
                 obj = render_setting_item(si, si.value)
                 self._components[f"application.{n}"] = obj
+                if si.special_type == "llm":
+                    self._llms.append(obj)
+                if si.special_type == "embedding":
+                    self._embeddings.append(obj)
 
     def index_tab(self):
         # TODO: double check if we need general
@@ -269,6 +277,10 @@ def index_tab(self):
                     for n, si in sig.settings.items():
                         obj = render_setting_item(si, si.value)
                         self._components[f"index.options.{pn}.{n}"] = obj
+                        if si.special_type == "llm":
+                            self._llms.append(obj)
+                        if si.special_type == "embedding":
+                            self._embeddings.append(obj)
 
     def reasoning_tab(self):
         with gr.Tab("Reasoning settings", visible=self._render_reasoning_tab):
@@ -278,6 +290,10 @@ def reasoning_tab(self):
                         continue
                     obj = render_setting_item(si, si.value)
                     self._components[f"reasoning.{n}"] = obj
+                    if si.special_type == "llm":
+                        self._llms.append(obj)
+                    if si.special_type == "embedding":
+                        self._embeddings.append(obj)
 
             gr.Markdown("### Reasoning-specific settings")
             self._components["reasoning.use"] = render_setting_item(
@@ -301,6 +317,10 @@ def reasoning_tab(self):
                     for n, si in sig.settings.items():
                         obj = render_setting_item(si, si.value)
                         self._components[f"reasoning.options.{pn}.{n}"] = obj
+                        if si.special_type == "llm":
+                            self._llms.append(obj)
+                        if si.special_type == "embedding":
+                            self._embeddings.append(obj)
 
     def change_reasoning_mode(self, value):
         output = []
@@ -368,3 +388,38 @@ def _on_app_created(self):
                 outputs=[self._settings_state] + self.components(),
                 show_progress="hidden",
             )
+
+        def update_llms():
+            from ktem.llms.manager import llms
+
+            if llms._default:
+                llm_choices = [(f"{llms._default} (default)", "")]
+            else:
+                llm_choices = [("(random)", "")]
+            llm_choices += [(_, _) for _ in llms.options().keys()]
+            return gr.update(choices=llm_choices)
+
+        def update_embeddings():
+            from ktem.embeddings.manager import embedding_models_manager
+
+            if embedding_models_manager._default:
+                emb_choices = [(f"{embedding_models_manager._default} (default)", "")]
+            else:
+                emb_choices = [("(random)", "")]
+            emb_choices += [(_, _) for _ in embedding_models_manager.options().keys()]
+            return gr.update(choices=emb_choices)
+
+        for llm in self._llms:
+            self._app.app.load(
+                update_llms,
+                inputs=[],
+                outputs=[llm],
+                show_progress="hidden",
+            )
+        for emb in self._embeddings:
+            self._app.app.load(
+                update_embeddings,
+                inputs=[],
+                outputs=[emb],
+                show_progress="hidden",
+            )
diff --git a/libs/ktem/ktem/reasoning/react.py b/libs/ktem/ktem/reasoning/react.py
index d9be6b4bb..c93827451 100644
--- a/libs/ktem/ktem/reasoning/react.py
+++ b/libs/ktem/ktem/reasoning/react.py
@@ -298,6 +298,7 @@ def get_user_settings(cls) -> dict:
                 "value": llm,
                 "component": "dropdown",
                 "choices": llm_choices,
+                "special_type": "llm",
                 "info": (
                     "The language model to use for generating the answer. If None, "
                     "the application default language model will be used."
diff --git a/libs/ktem/ktem/reasoning/rewoo.py b/libs/ktem/ktem/reasoning/rewoo.py
index a86ae8dd5..7b66c8b1d 100644
--- a/libs/ktem/ktem/reasoning/rewoo.py
+++ b/libs/ktem/ktem/reasoning/rewoo.py
@@ -413,6 +413,7 @@ def get_user_settings(cls) -> dict:
                 "value": llm,
                 "component": "dropdown",
                 "choices": llm_choices,
+                "special_type": "llm",
                 "info": (
                     "The language model to use for planning. "
                     "This model will generate a plan based on the "
@@ -424,6 +425,7 @@ def get_user_settings(cls) -> dict:
                 "value": llm,
                 "component": "dropdown",
                 "choices": llm_choices,
+                "special_type": "llm",
                 "info": (
                     "The language model to use for solving. "
                     "This model will generate the answer based on the "
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index a174157c9..ad8ef4a3a 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -837,6 +837,7 @@ def get_user_settings(cls) -> dict:
                 "value": llm,
                 "component": "dropdown",
                 "choices": choices,
+                "special_type": "llm",
                 "info": (
                     "The language model to use for generating the answer. If None, "
                     "the application default language model will be used."
diff --git a/libs/ktem/ktem/settings.py b/libs/ktem/ktem/settings.py
index 2665e4f02..89f5c6518 100644
--- a/libs/ktem/ktem/settings.py
+++ b/libs/ktem/ktem/settings.py
@@ -19,6 +19,7 @@ class SettingItem(BaseModel):
     choices: list = Field(default_factory=list)
     metadata: dict = Field(default_factory=dict)
     component: str = "text"
+    special_type: str = ""
 
 
 class BaseSettingGroup(BaseModel):

From ad26dff4fb9f420b031de82c620bfe6cac2749f2 Mon Sep 17 00:00:00 2001
From: cin-ace <ace@cinnamon.is>
Date: Fri, 21 Jun 2024 08:22:16 +0700
Subject: [PATCH 36/56] fix(conf): append the result of doc in llm_scoring
 (#97)

---
 libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py b/libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py
index e575f9476..0ee5b23ac 100644
--- a/libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py
+++ b/libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py
@@ -45,6 +45,7 @@ def run(
                 doc.metadata["llm_reranking_score"] = round(score, 2)
             else:
                 doc.metadata["llm_reranking_score"] = round(1 - score, 2)
+            filtered_docs.append(doc)
 
         # prevent returning empty result
         if len(filtered_docs) == 0:

From 54a4e9ce0fc44ba938f82e852f0d8b0310599ec6 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Sat, 22 Jun 2024 03:36:41 +0000
Subject: [PATCH 37/56] fix: constraint maximum number of images

---
 libs/kotaemon/kotaemon/loaders/utils/gpt4v.py | 22 +++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py b/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
index 1afbdfba9..e43059ca5 100644
--- a/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
+++ b/libs/kotaemon/kotaemon/loaders/utils/gpt4v.py
@@ -9,7 +9,11 @@
 
 
 def generate_gpt4v(
-    endpoint: str, images: str | List[str], prompt: str, max_tokens: int = 512
+    endpoint: str,
+    images: str | List[str],
+    prompt: str,
+    max_tokens: int = 512,
+    max_images: int = 10,
 ) -> str:
     # OpenAI API Key
     api_key = config("AZURE_OPENAI_API_KEY", default="")
@@ -30,7 +34,7 @@ def generate_gpt4v(
                         "type": "image_url",
                         "image_url": {"url": image},
                     }
-                    for image in images
+                    for image in images[:max_images]
                 ],
             }
         ],
@@ -38,6 +42,9 @@ def generate_gpt4v(
         "temperature": 0,
     }
 
+    if len(images) > max_images:
+        print(f"Truncated to {max_images} images (original {len(images)} images")
+
     response = requests.post(endpoint, headers=headers, json=payload)
 
     try:
@@ -52,7 +59,11 @@ def generate_gpt4v(
 
 
 def stream_gpt4v(
-    endpoint: str, images: str | List[str], prompt: str, max_tokens: int = 512
+    endpoint: str,
+    images: str | List[str],
+    prompt: str,
+    max_tokens: int = 512,
+    max_images: int = 10,
 ) -> Any:
     # OpenAI API Key
     api_key = config("AZURE_OPENAI_API_KEY", default="")
@@ -73,7 +84,7 @@ def stream_gpt4v(
                         "type": "image_url",
                         "image_url": {"url": image},
                     }
-                    for image in images
+                    for image in images[:max_images]
                 ],
             }
         ],
@@ -82,6 +93,8 @@ def stream_gpt4v(
         "logprobs": True,
         "temperature": 0,
     }
+    if len(images) > max_images:
+        print(f"Truncated to {max_images} images (original {len(images)} images")
     try:
         response = requests.post(endpoint, headers=headers, json=payload, stream=True)
         assert response.status_code == 200, str(response.content)
@@ -116,6 +129,7 @@ def stream_gpt4v(
 
     except Exception as e:
         logger.error(f"Error streaming gpt4v {e}")
+        logprobs = []
         output = ""
 
     return output, logprobs

From ff0f8529694a68a33d80b66ec6c9e0bf35e8e15e Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Sat, 22 Jun 2024 17:47:35 +0700
Subject: [PATCH 38/56] feat(ui): allow filter file by name in file list page

---
 libs/ktem/ktem/index/file/ui.py | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/libs/ktem/ktem/index/file/ui.py b/libs/ktem/ktem/index/file/ui.py
index 3ddc9ceda..d97e316a9 100644
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@@ -146,6 +146,14 @@ def on_building_ui(self):
                     )
 
                 gr.Markdown("## File List")
+                self.filter = gr.Textbox(
+                    value="",
+                    label="Filter by name:",
+                    info=(
+                        "(1) Case-insensitive. "
+                        "(2) Search with empty string to show all files."
+                    ),
+                )
                 self.file_list_state = gr.State(value=None)
                 self.file_list = gr.DataFrame(
                     headers=[
@@ -354,7 +362,7 @@ def on_register_events(self):
             )
             .then(
                 fn=self.list_file,
-                inputs=[self._app.user_id],
+                inputs=[self._app.user_id, self.filter],
                 outputs=[self.file_list_state, self.file_list],
             )
             .then(
@@ -420,7 +428,7 @@ def on_register_events(self):
 
         uploadedEvent = onUploaded.then(
             fn=self.list_file,
-            inputs=[self._app.user_id],
+            inputs=[self._app.user_id, self.filter],
             outputs=[self.file_list_state, self.file_list],
             concurrency_limit=20,
         )
@@ -454,11 +462,18 @@ def on_register_events(self):
             show_progress="hidden",
         )
 
+        self.filter.submit(
+            fn=self.list_file,
+            inputs=[self._app.user_id, self.filter],
+            outputs=[self.file_list_state, self.file_list],
+            show_progress="hidden",
+        )
+
     def _on_app_created(self):
         """Called when the app is created"""
         self._app.app.load(
             self.list_file,
-            inputs=[self._app.user_id],
+            inputs=[self._app.user_id, self.filter],
             outputs=[self.file_list_state, self.file_list],
         )
 
@@ -595,7 +610,7 @@ def index_files_from_dir(
 
         yield from self.index_fn(files, reindex, settings, user_id)
 
-    def list_file(self, user_id):
+    def list_file(self, user_id, name_pattern=""):
         if user_id is None:
             # not signed in
             return [], pd.DataFrame.from_records(
@@ -616,6 +631,8 @@ def list_file(self, user_id):
             statement = select(Source)
             if self._index.config.get("private", False):
                 statement = statement.where(Source.user == user_id)
+            if name_pattern:
+                statement = statement.where(Source.name.ilike(f"%{name_pattern}%"))
             results = [
                 {
                     "id": each[0].id,
@@ -644,6 +661,7 @@ def list_file(self, user_id):
                 ]
             )
 
+        print(f"{len(results)=}, {len(file_list)=}")
         return results, file_list
 
     def interact_file_list(self, list_files, ev: gr.SelectData):

From ceebbb2a78d8c71dd5178ec161358be1cd07c65e Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Wed, 3 Jul 2024 17:43:55 +0700
Subject: [PATCH 39/56] Fix exceeding token length error for OpenAI embeddings
 by chunking then averaging (#99)

* Average embeddings in case the text exceeds max size

* Add docstring
---
 libs/kotaemon/kotaemon/embeddings/openai.py | 68 ++++++++++++++++++---
 1 file changed, 59 insertions(+), 9 deletions(-)

diff --git a/libs/kotaemon/kotaemon/embeddings/openai.py b/libs/kotaemon/kotaemon/embeddings/openai.py
index 74655dcdd..a5111ba1a 100644
--- a/libs/kotaemon/kotaemon/embeddings/openai.py
+++ b/libs/kotaemon/kotaemon/embeddings/openai.py
@@ -1,5 +1,8 @@
+from itertools import islice
 from typing import Optional
 
+import numpy as np
+import tiktoken
 from theflow.utils.modules import import_dotted_string
 
 from kotaemon.base import Param
@@ -7,6 +10,24 @@
 from .base import BaseEmbeddings, Document, DocumentWithEmbedding
 
 
+def split_text_by_chunk_size(text: str, chunk_size: int) -> list[list[int]]:
+    """Split the text into chunks of a given size
+
+    Args:
+        text: text to split
+        chunk_size: size of each chunk
+
+    Returns:
+        list of chunks (as tokens)
+    """
+    encoding = tiktoken.get_encoding("cl100k_base")
+    tokens = iter(encoding.encode(text))
+    result = []
+    while chunk := list(islice(tokens, chunk_size)):
+        result.append(chunk)
+    return result
+
+
 class BaseOpenAIEmbeddings(BaseEmbeddings):
     """Base interface for OpenAI embedding model, using the openai library.
 
@@ -32,6 +53,9 @@ class BaseOpenAIEmbeddings(BaseEmbeddings):
             "Only supported in `text-embedding-3` and later models."
         ),
     )
+    context_length: Optional[int] = Param(
+        8191, help="The maximum context length of the embedding model"
+    )
 
     @Param.auto(depends_on=["max_retries"])
     def max_retries_(self):
@@ -56,16 +80,42 @@ def openai_response(self, client, **kwargs):
     def invoke(
         self, text: str | list[str] | Document | list[Document], *args, **kwargs
     ) -> list[DocumentWithEmbedding]:
-        input_ = self.prepare_input(text)
+        input_doc = self.prepare_input(text)
         client = self.prepare_client(async_version=False)
-        resp = self.openai_response(
-            client, input=[_.text if _.text else " " for _ in input_], **kwargs
-        ).dict()
-        output_ = sorted(resp["data"], key=lambda x: x["index"])
-        return [
-            DocumentWithEmbedding(embedding=o["embedding"], content=i)
-            for i, o in zip(input_, output_)
-        ]
+
+        input_: list[str | list[int]] = []
+        splitted_indices = {}
+        for idx, text in enumerate(input_doc):
+            if self.context_length:
+                chunks = split_text_by_chunk_size(text.text, self.context_length)
+                splitted_indices[idx] = (len(input_), len(input_) + len(chunks))
+                input_.extend(chunks)
+            else:
+                splitted_indices[idx] = (len(input_), len(input_) + 1)
+                input_.append(text.text)
+
+        resp = self.openai_response(client, input=input_, **kwargs).dict()
+        output_ = list(sorted(resp["data"], key=lambda x: x["index"]))
+
+        output = []
+        for idx, doc in enumerate(input_doc):
+            embs = output_[splitted_indices[idx][0] : splitted_indices[idx][1]]
+            if len(embs) == 1:
+                output.append(
+                    DocumentWithEmbedding(embedding=embs[0]["embedding"], content=doc)
+                )
+                continue
+
+            chunk_lens = [
+                len(_)
+                for _ in input_[splitted_indices[idx][0] : splitted_indices[idx][1]]
+            ]
+            vs: list[list[float]] = [_["embedding"] for _ in embs]
+            emb = np.average(vs, axis=0, weights=chunk_lens)
+            emb = emb / np.linalg.norm(emb)
+            output.append(DocumentWithEmbedding(embedding=emb.tolist(), content=doc))
+
+        return output
 
     async def ainvoke(
         self, text: str | list[str] | Document | list[Document], *args, **kwargs

From d04dc2f75d94a410a6cb4aa8002c6dacdcbe3b1e Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Thu, 4 Jul 2024 12:25:05 +0700
Subject: [PATCH 40/56] fix: Allow empty string when calling embedding

---
 libs/kotaemon/kotaemon/embeddings/openai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/kotaemon/kotaemon/embeddings/openai.py b/libs/kotaemon/kotaemon/embeddings/openai.py
index a5111ba1a..fb8ca43bd 100644
--- a/libs/kotaemon/kotaemon/embeddings/openai.py
+++ b/libs/kotaemon/kotaemon/embeddings/openai.py
@@ -87,7 +87,7 @@ def invoke(
         splitted_indices = {}
         for idx, text in enumerate(input_doc):
             if self.context_length:
-                chunks = split_text_by_chunk_size(text.text, self.context_length)
+                chunks = split_text_by_chunk_size(text.text or " ", self.context_length)
                 splitted_indices[idx] = (len(input_), len(input_) + len(chunks))
                 input_.extend(chunks)
             else:

From 9e2fe4afc9259733c2bec05d79fed48107b84df8 Mon Sep 17 00:00:00 2001
From: cin-ace <ace@cinnamon.is>
Date: Sun, 7 Jul 2024 21:59:59 +0700
Subject: [PATCH 41/56] fix: update trulens LLM ranking score for retrieval
 confidence, improve citation (#98)

* Round when displaying not by default

* Add LLMTrulens reranking model

* Use llmtrulensscoring in pipeline

* fix: update UI display for trulen score

---------

Co-authored-by: taprosoft <tadashi@cinnamon.is>
---
 .../kotaemon/indices/rankings/__init__.py     |   9 +-
 .../kotaemon/indices/rankings/cohere.py       |   2 +-
 .../kotaemon/indices/rankings/llm_scoring.py  |   4 +-
 .../kotaemon/indices/rankings/llm_trulens.py  | 155 ++++++++++++
 libs/ktem/ktem/index/file/pipelines.py        |   4 +-
 libs/ktem/ktem/reasoning/simple.py            | 227 +++++++++---------
 6 files changed, 285 insertions(+), 116 deletions(-)
 create mode 100644 libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py

diff --git a/libs/kotaemon/kotaemon/indices/rankings/__init__.py b/libs/kotaemon/kotaemon/indices/rankings/__init__.py
index 9de04d8d8..84b8765b4 100644
--- a/libs/kotaemon/kotaemon/indices/rankings/__init__.py
+++ b/libs/kotaemon/kotaemon/indices/rankings/__init__.py
@@ -2,5 +2,12 @@
 from .cohere import CohereReranking
 from .llm import LLMReranking
 from .llm_scoring import LLMScoring
+from .llm_trulens import LLMTrulensScoring
 
-__all__ = ["CohereReranking", "LLMReranking", "LLMScoring", "BaseReranking"]
+__all__ = [
+    "CohereReranking",
+    "LLMReranking",
+    "LLMScoring",
+    "BaseReranking",
+    "LLMTrulensScoring",
+]
diff --git a/libs/kotaemon/kotaemon/indices/rankings/cohere.py b/libs/kotaemon/kotaemon/indices/rankings/cohere.py
index d22be9a8a..4f5866ac5 100644
--- a/libs/kotaemon/kotaemon/indices/rankings/cohere.py
+++ b/libs/kotaemon/kotaemon/indices/rankings/cohere.py
@@ -33,7 +33,7 @@ def run(self, documents: list[Document], query: str) -> list[Document]:
         )
         for r in response.results:
             doc = documents[r.index]
-            doc.metadata["cohere_reranking_score"] = round(r.relevance_score, 2)
+            doc.metadata["cohere_reranking_score"] = r.relevance_score
             compressed_docs.append(doc)
 
         return compressed_docs
diff --git a/libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py b/libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py
index 0ee5b23ac..b4f510533 100644
--- a/libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py
+++ b/libs/kotaemon/kotaemon/indices/rankings/llm_scoring.py
@@ -42,9 +42,9 @@ def run(
             score = np.exp(np.average(result.logprobs))
             include_doc = output_parser.parse(result.text)
             if include_doc:
-                doc.metadata["llm_reranking_score"] = round(score, 2)
+                doc.metadata["llm_reranking_score"] = score
             else:
-                doc.metadata["llm_reranking_score"] = round(1 - score, 2)
+                doc.metadata["llm_reranking_score"] = 1 - score
             filtered_docs.append(doc)
 
         # prevent returning empty result
diff --git a/libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py b/libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py
new file mode 100644
index 000000000..1fa4dc45f
--- /dev/null
+++ b/libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py
@@ -0,0 +1,155 @@
+from __future__ import annotations
+
+import re
+from concurrent.futures import ThreadPoolExecutor
+
+from kotaemon.base import Document, HumanMessage, SystemMessage
+from kotaemon.llms import BaseLLM, PromptTemplate
+
+from .llm import LLMReranking
+
+SYSTEM_PROMPT_TEMPLATE = PromptTemplate(
+    """You are a RELEVANCE grader; providing the relevance of the given CONTEXT to the given QUESTION.
+        Respond only as a number from 0 to 10 where 0 is the least relevant and 10 is the most relevant.
+
+        A few additional scoring guidelines:
+
+        - Long CONTEXTS should score equally well as short CONTEXTS.
+
+        - RELEVANCE score should increase as the CONTEXTS provides more RELEVANT context to the QUESTION.
+
+        - RELEVANCE score should increase as the CONTEXTS provides RELEVANT context to more parts of the QUESTION.
+
+        - CONTEXT that is RELEVANT to some of the QUESTION should score of 2, 3 or 4. Higher score indicates more RELEVANCE.
+
+        - CONTEXT that is RELEVANT to most of the QUESTION should get a score of 5, 6, 7 or 8. Higher score indicates more RELEVANCE.
+
+        - CONTEXT that is RELEVANT to the entire QUESTION should get a score of 9 or 10. Higher score indicates more RELEVANCE.
+
+        - CONTEXT must be relevant and helpful for answering the entire QUESTION to get a score of 10.
+
+        - Never elaborate."""  # noqa: E501
+)
+
+USER_PROMPT_TEMPLATE = PromptTemplate(
+    """QUESTION: {question}
+
+        CONTEXT: {context}
+
+        RELEVANCE: """
+)  # noqa
+
+PATTERN_INTEGER: re.Pattern = re.compile(r"([+-]?[1-9][0-9]*|0)")
+"""Regex that matches integers."""
+
+
+def validate_rating(rating) -> int:
+    """Validate a rating is between 0 and 10."""
+
+    if not 0 <= rating <= 10:
+        raise ValueError("Rating must be between 0 and 10")
+
+    return rating
+
+
+def re_0_10_rating(s: str) -> int:
+    """Extract a 0-10 rating from a string.
+
+    If the string does not match an integer or matches an integer outside the
+    0-10 range, raises an error instead. If multiple numbers are found within
+    the expected 0-10 range, the smallest is returned.
+
+    Args:
+        s: String to extract rating from.
+
+    Returns:
+        int: Extracted rating.
+
+    Raises:
+        ParseError: If no integers between 0 and 10 are found in the string.
+    """
+
+    matches = PATTERN_INTEGER.findall(s)
+    if not matches:
+        raise AssertionError
+
+    vals = set()
+    for match in matches:
+        try:
+            vals.add(validate_rating(int(match)))
+        except ValueError:
+            pass
+
+    if not vals:
+        raise AssertionError
+
+    # Min to handle cases like "The rating is 8 out of 10."
+    return min(vals)
+
+
+class LLMTrulensScoring(LLMReranking):
+    llm: BaseLLM
+    system_prompt_template: PromptTemplate = SYSTEM_PROMPT_TEMPLATE
+    user_prompt_template: PromptTemplate = USER_PROMPT_TEMPLATE
+    top_k: int = 3
+    concurrent: bool = True
+    normalize: float = 10
+
+    def run(
+        self,
+        documents: list[Document],
+        query: str,
+    ) -> list[Document]:
+        """Filter down documents based on their relevance to the query."""
+        filtered_docs = []
+
+        documents = sorted(documents, key=lambda doc: doc.get_content())
+        if self.concurrent:
+            with ThreadPoolExecutor() as executor:
+                futures = []
+                for doc in documents:
+                    messages = []
+                    messages.append(
+                        SystemMessage(self.system_prompt_template.populate())
+                    )
+                    messages.append(
+                        HumanMessage(
+                            self.user_prompt_template.populate(
+                                question=query, context=doc.get_content()
+                            )
+                        )
+                    )
+                    futures.append(executor.submit(lambda: self.llm(messages).text))
+
+                results = [future.result() for future in futures]
+        else:
+            results = []
+            for doc in documents:
+                messages = []
+                messages.append(SystemMessage(self.system_prompt_template.populate()))
+                messages.append(
+                    SystemMessage(
+                        self.user_prompt_template.populate(
+                            question=query, context=doc.get_content()
+                        )
+                    )
+                )
+                results.append(self.llm(messages).text)
+
+        # use Boolean parser to extract relevancy output from LLM
+        results = [
+            (r_idx, float(re_0_10_rating(result)) / self.normalize)
+            for r_idx, result in enumerate(results)
+        ]
+        results.sort(key=lambda x: x[1], reverse=True)
+
+        for r_idx, score in results:
+            doc = documents[r_idx]
+            doc.metadata["llm_trulens_score"] = score
+            filtered_docs.append(doc)
+
+        # prevent returning empty result
+        if len(filtered_docs) == 0:
+            filtered_docs = documents[: self.top_k]
+
+        return filtered_docs
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 22e9a1d2a..afb655a8c 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -35,7 +35,7 @@
     BaseReranking,
     CohereReranking,
     LLMReranking,
-    LLMScoring,
+    LLMTrulensScoring,
 )
 from kotaemon.indices.splitters import BaseSplitter, TokenSplitter
 
@@ -254,7 +254,7 @@ def get_pipeline(cls, user_settings, index_settings, selected):
                 )
             ],
             retrieval_mode=user_settings["retrieval_mode"],
-            rerankers=[LLMScoring(), CohereReranking()],
+            rerankers=[CohereReranking(), LLMTrulensScoring()],
         )
         if not user_settings["use_reranking"]:
             retriever.rerankers = []  # type: ignore
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index ad8ef4a3a..2e9e00cb7 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -3,6 +3,7 @@
 import logging
 import re
 from collections import defaultdict
+from difflib import SequenceMatcher
 from functools import partial
 from typing import Generator
 
@@ -50,6 +51,26 @@ def is_close(val1, val2, tolerance=1e-9):
     return abs(val1 - val2) <= tolerance
 
 
+def find_text(search_span, context):
+    sentence_list = search_span.split("\n")
+    matches = []
+    # don't search for small text
+    if len(search_span) > 5:
+        for sentence in sentence_list:
+            match = SequenceMatcher(
+                None, sentence, context, autojunk=False
+            ).find_longest_match()
+            if match.size > len(search_span) * 0.6:
+                matches.append((match.b, match.b + match.size))
+                print(
+                    "search",
+                    search_span,
+                    "matched",
+                    context[match.b : match.b + match.size],
+                )
+    return matches
+
+
 _default_token_func = tiktoken.encoding_for_model("gpt-3.5-turbo").encode
 
 
@@ -139,9 +160,9 @@ def run(self, docs: list[RetrievedDocument]) -> Document:
 
 
 DEFAULT_QA_TEXT_PROMPT = (
-    "Use the following pieces of context to answer the question at the end. "
+    "Use the following pieces of context to answer the question at the end in detail with clear explanation. "  # noqa: E501
     "If you don't know the answer, just say that you don't know, don't try to "
-    "make up an answer. Keep the answer as concise as possible. Give answer in "
+    "make up an answer. Give answer in "
     "{lang}.\n\n"
     "{context}\n"
     "Question: {question}\n"
@@ -150,14 +171,14 @@ def run(self, docs: list[RetrievedDocument]) -> Document:
 
 DEFAULT_QA_TABLE_PROMPT = (
     "List all rows (row number) from the table context that related to the question, "
-    "then provide detail answer with clear explanation and citations. "
+    "then provide detail answer with clear explanation. "
     "If you don't know the answer, just say that you don't know, "
     "don't try to make up an answer. Give answer in {lang}.\n\n"
     "Context:\n"
     "{context}\n"
     "Question: {question}\n"
     "Helpful Answer:"
-)
+)  # noqa
 
 DEFAULT_QA_CHATBOT_PROMPT = (
     "Pick the most suitable chatbot scenarios to answer the question at the end, "
@@ -168,7 +189,7 @@ def run(self, docs: list[RetrievedDocument]) -> Document:
     "{context}\n"
     "Question: {question}\n"
     "Answer:"
-)
+)  # noqa
 
 DEFAULT_QA_FIGURE_PROMPT = (
     "Use the given context: texts, tables, and figures below to answer the question. "
@@ -178,7 +199,7 @@ def run(self, docs: list[RetrievedDocument]) -> Document:
     "{context}\n"
     "Question: {question}\n"
     "Answer: "
-)
+)  # noqa
 
 DEFAULT_REWRITE_PROMPT = (
     "Given the following question, rephrase and expand it "
@@ -187,7 +208,7 @@ def run(self, docs: list[RetrievedDocument]) -> Document:
     "Give answer in {lang}\n"
     "Original question: {question}\n"
     "Rephrased question: "
-)
+)  # noqa
 
 
 class AnswerWithContextPipeline(BaseComponent):
@@ -400,12 +421,14 @@ def stream(  # type: ignore
         if evidence and self.enable_citation:
             citation = self.citation_pipeline(context=evidence, question=question)
 
+        if logprobs:
+            qa_score = np.exp(np.average(logprobs))
+        else:
+            qa_score = None
+
         answer = Document(
             text=output,
-            metadata={
-                "citation": citation,
-                "qa_score": round(np.exp(np.average(logprobs)), 2),
-            },
+            metadata={"citation": citation, "qa_score": qa_score},
         )
 
         return answer
@@ -556,6 +579,47 @@ def retrieve(
 
         return docs, info
 
+    def _format_retrieval_score_and_doc(
+        self,
+        doc: Document,
+        rendered_doc_content: str,
+        open_collapsible: bool = False,
+    ) -> str:
+        """Format the retrieval score and the document"""
+        # score from doc_store (Elasticsearch)
+        if is_close(doc.score, -1.0):
+            text_search_str = " default from full-text search<br>"
+        else:
+            text_search_str = "<br>"
+
+        vectorstore_score = round(doc.score, 2)
+        llm_reranking_score = (
+            round(doc.metadata["llm_trulens_score"], 2)
+            if doc.metadata.get("llm_trulens_score") is not None
+            else None
+        )
+        cohere_reranking_score = (
+            round(doc.metadata["cohere_reranking_score"], 2)
+            if doc.metadata.get("cohere_reranking_score")
+            else None
+        )
+        item_type_prefix = doc.metadata.get("type", "")
+        item_type_prefix = item_type_prefix.capitalize()
+        if item_type_prefix:
+            item_type_prefix += " from "
+
+        return Render.collapsible(
+            header=(f"{item_type_prefix}{get_header(doc)} [{llm_reranking_score}]"),
+            content="<b>Vectorstore score:</b>"
+            f" {vectorstore_score}"
+            f"{text_search_str}"
+            "<b>LLM reranking score:</b>"
+            f" {llm_reranking_score}<br>"
+            "<b>Cohere reranking score:</b>"
+            f" {cohere_reranking_score}<br>" + rendered_doc_content,
+            open=open_collapsible,
+        )
+
     def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document]]:
         """Prepare the citations to show on the UI"""
         with_citation, without_citation = [], []
@@ -565,116 +629,63 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
             for fact_with_evidence in answer.metadata["citation"].answer:
                 for quote in fact_with_evidence.substring_quote:
                     for doc in docs:
-                        start_idx = doc.text.find(quote)
-                        if start_idx == -1:
-                            continue
-
-                        end_idx = start_idx + len(quote)
-
-                        current_idx = start_idx
-                        if "|" not in doc.text[start_idx:end_idx]:
-                            spans[doc.doc_id].append(
-                                {"start": start_idx, "end": end_idx}
-                            )
-                        else:
-                            while doc.text[current_idx:end_idx].find("|") != -1:
-                                match_idx = doc.text[current_idx:end_idx].find("|")
+                        matches = find_text(quote, doc.text)
+
+                        for start, end in matches:
+                            if "|" not in doc.text[start:end]:
                                 spans[doc.doc_id].append(
                                     {
-                                        "start": current_idx,
-                                        "end": current_idx + match_idx,
+                                        "start": start,
+                                        "end": end,
                                     }
                                 )
-                                current_idx += match_idx + 2
-                                if current_idx > end_idx:
-                                    break
-                        break
 
         id2docs = {doc.doc_id: doc for doc in docs}
         not_detected = set(id2docs.keys()) - set(spans.keys())
-        for id, ss in spans.items():
+
+        # render highlight spans
+        for _id, ss in spans.items():
             if not ss:
-                not_detected.add(id)
+                not_detected.add(_id)
                 continue
+            cur_doc = id2docs[_id]
             ss = sorted(ss, key=lambda x: x["start"])
-            text = id2docs[id].text[: ss[0]["start"]]
+            text = cur_doc.text[: ss[0]["start"]]
             for idx, span in enumerate(ss):
-                text += Render.highlight(id2docs[id].text[span["start"] : span["end"]])
+                text += Render.highlight(cur_doc.text[span["start"] : span["end"]])
                 if idx < len(ss) - 1:
-                    text += id2docs[id].text[span["end"] : ss[idx + 1]["start"]]
-            text += id2docs[id].text[ss[-1]["end"] :]
-            if is_close(id2docs[id].score, -1.0):
-                text_search_str = " default from full-text search<br>"
-            else:
-                text_search_str = "<br>"
-
-            if (
-                id2docs[id].metadata.get("llm_reranking_score") is None
-                or id2docs[id].metadata.get("cohere_reranking_score") is None
-            ):
-                cloned_chunk_str = (
-                    "<b>Cloned chunk for a table. No reranking score</b><br>"
-                )
-            else:
-                cloned_chunk_str = ""
-
+                    text += cur_doc.text[span["end"] : ss[idx + 1]["start"]]
+            text += cur_doc.text[ss[-1]["end"] :]
+            # add to display list
             with_citation.append(
                 Document(
                     channel="info",
-                    content=Render.collapsible(
-                        header=(
-                            f"{get_header(id2docs[id])}<br>"
-                            "<b>Vectorstore score:</b>"
-                            f" {round(id2docs[id].score, 2)}"
-                            f"{text_search_str}"
-                            f"{cloned_chunk_str}"
-                            "<b>LLM reranking score:</b>"
-                            f' {id2docs[id].metadata.get("llm_reranking_score")}<br>'
-                            "<b>Cohere reranking score:</b>"
-                            f' {id2docs[id].metadata.get("cohere_reranking_score")}<br>'
-                        ),
-                        content=Render.table(text),
-                        open=True,
+                    content=self._format_retrieval_score_and_doc(
+                        cur_doc,
+                        Render.table(text),
+                        open_collapsible=True,
                     ),
                 )
             )
+        print("Got {} cited docs".format(len(with_citation)))
 
-        for id_ in list(not_detected):
-            doc = id2docs[id_]
-            if is_close(doc.score, -1.0):
-                text_search_str = " default from full-text search<br>"
-            else:
-                text_search_str = "<br>"
+        sorted_not_detected_items_with_scores = [
+            (id_, id2docs[id_].metadata.get("llm_trulens_score", 0.0))
+            for id_ in not_detected
+        ]
+        sorted_not_detected_items_with_scores.sort(key=lambda x: x[1], reverse=True)
 
-            if (
-                doc.metadata.get("llm_reranking_score") is None
-                or doc.metadata.get("cohere_reranking_score") is None
-            ):
-                cloned_chunk_str = (
-                    "<b>Cloned chunk for a table. No reranking score</b><br>"
-                )
-            else:
-                cloned_chunk_str = ""
+        for id_, _ in sorted_not_detected_items_with_scores:
+            doc = id2docs[id_]
             if doc.metadata.get("type", "") == "image":
                 without_citation.append(
                     Document(
                         channel="info",
-                        content=Render.collapsible(
-                            header=(
-                                f"{get_header(doc)}<br>"
-                                "<b>Vectorstore score:</b>"
-                                f" {round(doc.score, 2)}"
-                                f"{text_search_str}"
-                                f"{cloned_chunk_str}"
-                                "<b>LLM reranking score:</b>"
-                                f' {doc.metadata.get("llm_reranking_score")}<br>'
-                                "<b>Cohere reranking score:</b>"
-                                f' {doc.metadata.get("cohere_reranking_score")}<br>'
-                            ),
-                            content=Render.image(
+                        content=self._format_retrieval_score_and_doc(
+                            doc,
+                            Render.image(
                                 url=doc.metadata["image_origin"], text=doc.text
                             ),
-                            open=True,
                         ),
                     )
                 )
@@ -682,20 +693,8 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
                 without_citation.append(
                     Document(
                         channel="info",
-                        content=Render.collapsible(
-                            header=(
-                                f"{get_header(doc)}<br>"
-                                "<b>Vectorstore score:</b>"
-                                f" {round(doc.score, 2)}"
-                                f"{text_search_str}"
-                                f"{cloned_chunk_str}"
-                                "<b>LLM reranking score:</b>"
-                                f' {doc.metadata.get("llm_reranking_score")}<br>'
-                                "<b>Cohere reranking score:</b>"
-                                f' {doc.metadata.get("cohere_reranking_score")}<br>'
-                            ),
-                            content=Render.table(doc.text),
-                            open=True,
+                        content=self._format_retrieval_score_and_doc(
+                            doc, Render.table(doc.text)
                         ),
                     )
                 )
@@ -744,6 +743,8 @@ def stream(  # type: ignore
         if self.use_rewrite:
             message = self.rewrite_pipeline(question=message).text
 
+        print(f"Rewritten message (use_rewrite={self.use_rewrite}): {message}")
+        print(f"Retrievers {self.retrievers}")
         # should populate the context
         docs, infos = self.retrieve(message, history)
         for _ in infos:
@@ -770,12 +771,18 @@ def stream(  # type: ignore
             if without_citation:
                 for _ in without_citation:
                     yield _
+
+        qa_score = (
+            round(answer.metadata["qa_score"], 2)
+            if answer.metadata.get("qa_score")
+            else None
+        )
         yield Document(
             channel="info",
             content=(
                 "<h5><b>Question answering</b></h5><br>"
                 "<b>Question answering confidence:</b> "
-                f"{answer.metadata.get('qa_score')}"
+                f"{qa_score}"
             ),
         )
 

From fd54bbd89623646bc4619828ed09c11a5d1bd508 Mon Sep 17 00:00:00 2001
From: cin-ace <ace@cinnamon.is>
Date: Mon, 8 Jul 2024 11:58:00 +0700
Subject: [PATCH 42/56] feat: add question decomposition & few-shot rewrite
 pipeline (#89)

* Create few-shot query-rewriting. Run and display the result in info_panel

* Fix style check

* Put the functions to separate modules

* Add zero-shot question decomposition

* Fix fewshot rewriting

* Add default few-shot examples

* Fix decompose question

* Fix importing rewriting pipelines

* fix: update decompose logic in fullQA pipeline

---------

Co-authored-by: taprosoft <tadashi@cinnamon.is>
---
 libs/ktem/ktem/index/file/pipelines.py        |    3 +-
 .../reasoning/prompt_optimization/__init__.py |    9 +
 .../prompt_optimization/decompose_question.py |   79 +
 .../fewshot_rewrite_question.py               |  100 +
 .../rephrase_question_train.json              | 9090 +++++++++++++++++
 .../prompt_optimization/rewrite_question.py   |   37 +
 libs/ktem/ktem/reasoning/simple.py            |  246 +-
 7 files changed, 9514 insertions(+), 50 deletions(-)
 create mode 100644 libs/ktem/ktem/reasoning/prompt_optimization/__init__.py
 create mode 100644 libs/ktem/ktem/reasoning/prompt_optimization/decompose_question.py
 create mode 100644 libs/ktem/ktem/reasoning/prompt_optimization/fewshot_rewrite_question.py
 create mode 100644 libs/ktem/ktem/reasoning/prompt_optimization/rephrase_question_train.json
 create mode 100644 libs/ktem/ktem/reasoning/prompt_optimization/rewrite_question.py

diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index afb655a8c..50839e6b4 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -113,6 +113,7 @@ def run(
             text: the text to retrieve similar documents
             doc_ids: list of document ids to constraint the retrieval
         """
+        print("searching in doc_ids", doc_ids)
         if not doc_ids:
             logger.info(f"Skip retrieval because of no selected files: {self}")
             return []
@@ -266,7 +267,7 @@ def get_pipeline(cls, user_settings, index_settings, selected):
                     )
 
         kwargs = {".doc_ids": selected}
-        retriever.set_run(kwargs, temp=True)
+        retriever.set_run(kwargs, temp=False)
         return retriever
 
 
diff --git a/libs/ktem/ktem/reasoning/prompt_optimization/__init__.py b/libs/ktem/ktem/reasoning/prompt_optimization/__init__.py
new file mode 100644
index 000000000..29d8bf962
--- /dev/null
+++ b/libs/ktem/ktem/reasoning/prompt_optimization/__init__.py
@@ -0,0 +1,9 @@
+from .decompose_question import DecomposeQuestionPipeline
+from .fewshot_rewrite_question import FewshotRewriteQuestionPipeline
+from .rewrite_question import RewriteQuestionPipeline
+
+__all__ = [
+    "DecomposeQuestionPipeline",
+    "FewshotRewriteQuestionPipeline",
+    "RewriteQuestionPipeline",
+]
diff --git a/libs/ktem/ktem/reasoning/prompt_optimization/decompose_question.py b/libs/ktem/ktem/reasoning/prompt_optimization/decompose_question.py
new file mode 100644
index 000000000..7fdc4730b
--- /dev/null
+++ b/libs/ktem/ktem/reasoning/prompt_optimization/decompose_question.py
@@ -0,0 +1,79 @@
+import logging
+
+from ktem.llms.manager import llms
+from ktem.reasoning.prompt_optimization.rewrite_question import RewriteQuestionPipeline
+from pydantic import BaseModel, Field
+
+from kotaemon.base import Document, HumanMessage, Node, SystemMessage
+from kotaemon.llms import ChatLLM
+
+logger = logging.getLogger(__name__)
+
+
+class SubQuery(BaseModel):
+    """Search over a database of insurance rulebooks or financial reports"""
+
+    sub_query: str = Field(
+        ...,
+        description="A very specific query against the database.",
+    )
+
+
+class DecomposeQuestionPipeline(RewriteQuestionPipeline):
+    """Decompose user complex question into multiple sub-questions
+
+    Args:
+        llm: the language model to rewrite question
+        lang: the language of the answer. Currently support English and Japanese
+    """
+
+    llm: ChatLLM = Node(
+        default_callback=lambda _: llms.get("openai-gpt4-turbo", llms.get_default())
+    )
+    DECOMPOSE_SYSTEM_PROMPT_TEMPLATE = (
+        "You are an expert at converting user complex questions into sub questions. "
+        "Perform query decomposition using provided function_call. "
+        "Given a user question, break it down into the most specific sub"
+        " questions you can (at most 3) "
+        "which will help you answer the original question. "
+        "Each sub question should be about a single concept/fact/idea. "
+        "If there are acronyms or words you are not familiar with, "
+        "do not try to rephrase them."
+    )
+    prompt_template: str = DECOMPOSE_SYSTEM_PROMPT_TEMPLATE
+
+    def create_prompt(self, question):
+        schema = SubQuery.model_json_schema()
+        function = {
+            "name": schema["title"],
+            "description": schema["description"],
+            "parameters": schema,
+        }
+        llm_kwargs = {
+            "tools": [{"type": "function", "function": function}],
+            "tool_choice": "auto",
+        }
+
+        messages = [
+            SystemMessage(content=self.prompt_template),
+            HumanMessage(content=question),
+        ]
+
+        return messages, llm_kwargs
+
+    def run(self, question: str) -> list:  # type: ignore
+        messages, llm_kwargs = self.create_prompt(question)
+        result = self.llm(messages, **llm_kwargs)
+        tool_calls = result.additional_kwargs.get("tool_calls", None)
+        sub_queries = []
+        if tool_calls:
+            for tool_call in tool_calls:
+                sub_queries.append(
+                    Document(
+                        content=SubQuery.parse_raw(
+                            tool_call["function"]["arguments"]
+                        ).sub_query
+                    )
+                )
+
+        return sub_queries
diff --git a/libs/ktem/ktem/reasoning/prompt_optimization/fewshot_rewrite_question.py b/libs/ktem/ktem/reasoning/prompt_optimization/fewshot_rewrite_question.py
new file mode 100644
index 000000000..a90cc94cb
--- /dev/null
+++ b/libs/ktem/ktem/reasoning/prompt_optimization/fewshot_rewrite_question.py
@@ -0,0 +1,100 @@
+import json
+import uuid
+from pathlib import Path
+
+from ktem.components import get_docstore, get_vectorstore
+from ktem.llms.manager import llms
+from ktem.reasoning.prompt_optimization.rewrite_question import (
+    DEFAULT_REWRITE_PROMPT,
+    RewriteQuestionPipeline,
+)
+from theflow.settings import settings as flowsettings
+
+from kotaemon.base import AIMessage, Document, HumanMessage, Node, SystemMessage
+from kotaemon.embeddings import BaseEmbeddings
+from kotaemon.llms import ChatLLM
+from kotaemon.storages import BaseDocumentStore, BaseVectorStore
+
+
+class FewshotRewriteQuestionPipeline(RewriteQuestionPipeline):
+    """Rewrite user question
+
+    Args:
+        llm: the language model to rewrite question
+        rewrite_template: the prompt template for llm to paraphrase a text input
+        lang: the language of the answer. Currently support English and Japanese
+        embedding: the embedding model to encode the question
+        vector_store: the vector store to store the encoded question
+        doc_store: the document store to store the original question
+        k: the number of examples to retrieve for rewriting
+    """
+
+    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
+    rewrite_template: str = DEFAULT_REWRITE_PROMPT
+    lang: str = "English"
+    embedding: BaseEmbeddings
+    vector_store: BaseVectorStore
+    doc_store: BaseDocumentStore
+    k: int = getattr(flowsettings, "N_PROMPT_OPT_EXAMPLES", 3)
+
+    def add_documents(self, examples, batch_size: int = 50):
+        print("Adding fewshot examples for rewriting")
+        documents = []
+        for example in examples:
+            doc = Document(
+                text=example["input"], id_=str(uuid.uuid4()), metadata=example
+            )
+            documents.append(doc)
+
+        for i in range(0, len(documents), batch_size):
+            embeddings = self.embedding(documents[i : i + batch_size])
+            ids = [t.doc_id for t in documents[i : i + batch_size]]
+            self.vector_store.add(
+                embeddings=embeddings,
+                ids=ids,
+            )
+            self.doc_store.add(documents[i : i + batch_size])
+
+    @classmethod
+    def get_pipeline(
+        cls,
+        embedding,
+        example_path=Path(__file__).parent / "rephrase_question_train.json",
+        collection_name: str = "fewshot_rewrite_examples",
+    ):
+        vector_store = get_vectorstore(collection_name)
+        doc_store = get_docstore(collection_name)
+
+        pipeline = cls(
+            embedding=embedding, vector_store=vector_store, doc_store=doc_store
+        )
+        if vector_store.count() or doc_store.count():
+            return pipeline
+
+        examples = json.load(open(example_path, "r"))
+        pipeline.add_documents(examples)
+
+        return pipeline
+
+    def run(self, question: str) -> Document:  # type: ignore
+        emb = self.embedding(question)[0].embedding
+        _, _, ids = self.vector_store.query(embedding=emb, top_k=self.k)
+        examples = self.doc_store.get(ids)
+        messages = [SystemMessage(content="You are a helpful assistant")]
+        for example in examples:
+            messages.append(
+                HumanMessage(
+                    content=self.rewrite_template.format(
+                        question=example.metadata["input"], lang=self.lang
+                    )
+                )
+            )
+            messages.append(AIMessage(content=example.metadata["output"]))
+        messages.append(
+            HumanMessage(
+                content=self.rewrite_template.format(question=question, lang=self.lang)
+            )
+        )
+
+        result = self.llm(messages)
+        return result
diff --git a/libs/ktem/ktem/reasoning/prompt_optimization/rephrase_question_train.json b/libs/ktem/ktem/reasoning/prompt_optimization/rephrase_question_train.json
new file mode 100644
index 000000000..44f9dd7a1
--- /dev/null
+++ b/libs/ktem/ktem/reasoning/prompt_optimization/rephrase_question_train.json
@@ -0,0 +1,9090 @@
+[
+  {
+    "input": "What was the percentage increase in General and Administrative expenses in the year 2018 compared to the previous year?",
+    "output": "What was the increase in the General and administrative in 2018?"
+  },
+  {
+    "input": "What was the specific percentage increase in the General and Administrative expenses during the year 2018?",
+    "output": "What was the increase in the General and administrative in 2018?"
+  },
+  {
+    "input": "What was the decrease in the Other net expense in 2019?",
+    "output": "What was the specific amount of the decrease in the Other net expense in the financial year of 2019?"
+  },
+  {
+    "input": "What was the decrease in the Other net expense in 2019?",
+    "output": "What was the specific decrease amount experienced in the Other net expense during the year 2019?"
+  },
+  {
+    "input": "What was the increase in interest income?",
+    "output": "What was the exact amount by which the interest income increased?"
+  },
+  {
+    "input": "What was the increase in interest income?",
+    "output": "What was the exact amount of the increase in interest income?"
+  },
+  {
+    "input": "What was the average Other expense, net for 2018 and 2019?",
+    "output": "What was the average net Other expense for the years 2018 and 2019 combined?"
+  },
+  {
+    "input": "In which year was Other expense, net less than 5,000 thousands?",
+    "output": "In what particular year did the amount of Other expense, net fall below 5,000 thousands?"
+  },
+  {
+    "input": "What were the income tax expenses in the years 2019, 2018, and 2017, respectively?",
+    "output": "What was the income tax expense in 2019,2018 and 2017 respectively?"
+  },
+  {
+    "input": "\"What were the income tax expenses for the years 2019, 2018, and 2017?\"",
+    "output": "What was the income tax expense in 2019,2018 and 2017 respectively?"
+  },
+  {
+    "input": "How much money did the company have in unremitted earnings for the year 2019?",
+    "output": "What was the company's unremitted earnings in 2019?"
+  },
+  {
+    "input": "How much unremitted earnings did the company have in 2019, without releasing or distributing them?",
+    "output": "What was the company's unremitted earnings in 2019?"
+  },
+  {
+    "input": "Which year witnessed interest and penalties that amounted to less than 20 thousand?",
+    "output": "In which year was Interest and penalties less than 20 thousand?"
+  },
+  {
+    "input": "What is the net carrying amount in 2019?",
+    "output": "What is the net carrying amount for the year 2019? Please provide details to assist in providing a more accurate response."
+  },
+  {
+    "input": "What is the net carrying amount in 2019?",
+    "output": "What is the net carrying amount for the year 2019? Can you provide more details or context about the specific asset, liability or financial element in consideration?"
+  },
+  {
+    "input": "What is the change in the debt discount, net of amortization from December 31, 2019 to December 31, 2018?",
+    "output": "What was the net change in the debt discount's value, after taking into account amortization, between December 31, 2019 and December 31, 2018?"
+  },
+  {
+    "input": "What is the change in Net carrying amount from December 31, 2019 to December 31, 2018?",
+    "output": "What is the difference in the net carrying amount between December 31, 2019 and December 31, 2018?"
+  },
+  {
+    "input": "What is the change in Furniture and equipment from December 31, 2019 to December 31, 2018?",
+    "output": "What is the net difference in the value of furniture and equipment between December 31, 2019 and December 31, 2018?"
+  },
+  {
+    "input": "What is the change in Furniture and equipment from December 31, 2019 to December 31, 2018?",
+    "output": "What was the net change in the value of Furniture and equipment between December 31, 2019 and December 31, 2018?"
+  },
+  {
+    "input": "What is the change in System hardware from December 31, 2019 to December 31, 2018?",
+    "output": "What are the differences in the hardware of the System between December 31, 2019, and December 31, 2018?"
+  },
+  {
+    "input": "What was the exact percentage increase in the cost of revenue between 2017 and 2018?",
+    "output": "What was the increase in the cost of revenue in 2018?"
+  },
+  {
+    "input": "What was the percentage increase in the total amount spent on expenses directly related to generating revenue in 2018 compared to the previous year?",
+    "output": "What was the increase in the cost of revenue in 2018?"
+  },
+  {
+    "input": "In which year was cost of revenue less than 40,000 thousands?",
+    "output": "In what specific year did the cost of revenue fall below the amount of 40,000 thousands?"
+  },
+  {
+    "input": "If a company used its earnings to finance its domestic operations, what are the potential outcomes or consequences of this decision?",
+    "output": "What would happen if earnings were used to fund domestic operations?"
+  },
+  {
+    "input": "What was the change in Cash, cash equivalents, and restricted cash at the beginning of the period, comparing December 31, 2019, to December 31, 2018?",
+    "output": "What is the change in Cash, cash equivalents and restricted cash at beginning of period from December 31, 2019 to December 31, 2018?"
+  },
+  {
+    "input": "What is the net difference in Cash provided by operating activities between December 31, 2019 and December 31, 2018?",
+    "output": "What is the change in Cash provided by operating activities from December 31, 2019 to December 31, 2018?"
+  },
+  {
+    "input": "What is the change in Cash used in investing activities from December 31, 2019 to December 31, 2018?",
+    "output": "What is the difference in the amount of Cash used in investing activities between December 31, 2019, and December 31, 2018?"
+  },
+  {
+    "input": "What was the percentage change in the Sales and marketing expense from 2017 to 2018?",
+    "output": "What was the increase in the Sales and marketing expense in 2018 compared to 2017?"
+  },
+  {
+    "input": "In which year was Sales and marketing expenses less than 50,000 thousands?",
+    "output": "In what specific year did the total expenses for sales and marketing fall below 50,000 thousands (50 million) currency units?"
+  },
+  {
+    "input": "In which year was General and administrative expenses less than 50,000 thousands?",
+    "output": "In which specific year did the amount for General and Administrative expenses fall below 50,000 thousands (50 million) dollars?"
+  },
+  {
+    "input": "What is the average Carrying Value for the period December 31, 2019 to December 31, 2018?",
+    "output": "What is the average Carrying Value from December 31, 2018 to December 31, 2019?"
+  },
+  {
+    "input": "In which specific year did the fair value of the 2022 Notes measure below 200,000 thousands?",
+    "output": "In which year was the 2022 Notes fair value less than 200,000 thousands?"
+  },
+  {
+    "input": "In the year when the fair value of the 2022 Notes was below 200,000 thousands, what specific year was it?",
+    "output": "In which year was the 2022 Notes fair value less than 200,000 thousands?"
+  },
+  {
+    "input": "What was the 2022 Notes cap price?",
+    "output": "\"What was the cap price for the 2022 Notes? Please provide any additional details that could assist in retrieving the information accurately.\""
+  },
+  {
+    "input": "What were the average expenses for sales and marketing in the years 2018 and 2019?",
+    "output": "What was the average Sales and marketing expenses for 2018 and 2019?"
+  },
+  {
+    "input": "What were the average expenses for Sales and Marketing in 2018 and 2019, respectively?",
+    "output": "What was the average Sales and marketing expenses for 2018 and 2019?"
+  },
+  {
+    "input": "In which year was Sales and marketing expenses less than 90,000 thousands?",
+    "output": "When was the year that the sales and marketing expenses were recorded to be under 90,000 thousands?"
+  },
+  {
+    "input": "In which year was Sales and marketing expenses less than 90,000 thousands?",
+    "output": "In which specific year did the total expenses for sales and marketing amount to a figure less than 90,000 thousands?"
+  },
+  {
+    "input": "What was the exact amount of gross profit recorded in the fiscal year of 2018?",
+    "output": "What was the Gross profit in 2018?"
+  },
+  {
+    "input": "What was the specific value of the gross profit for the year 2018?",
+    "output": "What was the Gross profit in 2018?"
+  },
+  {
+    "input": "In what specific year did the revenue fall below one hundred thousand thousands (100,000,000)?",
+    "output": "In which year was revenue less than 100,000 thousands?"
+  },
+  {
+    "input": "\"In which specific year did the revenue fall below 100,000 thousands (100 million)?\"",
+    "output": "In which year was revenue less than 100,000 thousands?"
+  },
+  {
+    "input": "What is the difference in the Gross Profit between the fiscal years 2018 and 2019?",
+    "output": "What is the change in the Gross Profit from 2018 to 2019?"
+  },
+  {
+    "input": "What were the capitalized software development costs for the company in the years ending on December 31, 2017?",
+    "output": "What was the company capitalized software development costs during the years ended December 31, 2017?"
+  },
+  {
+    "input": "What does the Adjusted EBITDA represent?",
+    "output": "What is the meaning and purpose of the term Adjusted EBITDA?"
+  },
+  {
+    "input": "What does the Adjusted EBITDA represent?",
+    "output": "What is the meaning and significance of Adjusted EBITDA?"
+  },
+  {
+    "input": "\"What were the federal statutory income tax rates in 2019, 2018, and 2017?\"",
+    "output": "What was the Income tax at federal statutory rate in 2019, 2018 and 2017?"
+  },
+  {
+    "input": "What were the federal statutory income tax rates in 2019, 2018, and 2017?",
+    "output": "What was the Income tax at federal statutory rate in 2019, 2018 and 2017?"
+  },
+  {
+    "input": "In what year did the State income tax expense, taking into account the federal tax effect, fall below negative one thousand thousands?",
+    "output": "In which year was State income tax expense, net of federal tax effect less than (1,000) thousands?"
+  },
+  {
+    "input": "What were the amounts of the Other expense, net in 2018 and 2017?",
+    "output": "What was the Other expense, net in 2018 and 2017 respectively?"
+  },
+  {
+    "input": "What is the difference in the amount of money owed from customers (net accounts receivable) between December 31, 2019, and December 31, 2018?",
+    "output": "What is the change in Net accounts receivable from December 31, 2019 to December 31, 2018?"
+  },
+  {
+    "input": "What are the respective federal income tax expense at statutory rates in 2018 and 2019?",
+    "output": "What were the federal income tax expenses at statutory rates for the years 2018 and 2019?"
+  },
+  {
+    "input": "What were the specific state income tax rates, after accounting for federal benefit deductions, for the years 2017 and 2018?",
+    "output": "What are the respective state income taxes, net of federal benefit in 2017 and 2018?"
+  },
+  {
+    "input": "What were the state income tax rates, after accounting for federal benefit, for both the years 2017 and 2018?",
+    "output": "What are the respective state income taxes, net of federal benefit in 2017 and 2018?"
+  },
+  {
+    "input": "What are the respective service revenue at March 31 and June 30, 2018?",
+    "output": "What were the service revenues as of March 31 and June 30, 2018?"
+  },
+  {
+    "input": "What are the respective service revenue at June 30 and September 30, 2018?",
+    "output": "What is the service revenue as of June 30 and September 30, 2018?"
+  },
+  {
+    "input": "What are the respective service revenue at June 30 and September 30, 2018?",
+    "output": "What is the amount of service revenue reported on the financial statements for June 30 and September 30, 2018?"
+  },
+  {
+    "input": "What percentage of the total revenue generated in North America in 2019 can be attributed to the non-core segment?",
+    "output": "What is the value of the revenue from the non-core segment from North America as a percentage of the total revenue earned in North America in 2019?"
+  },
+  {
+    "input": "What is the average amount of net cash generated from operating activities for the years 2018 and 2019?",
+    "output": "What is the average net cash provided by operating activities in 2018 and 2019?"
+  },
+  {
+    "input": "What is the amount of cash-based equity compensation expense included in network operations expenses in both 2018 and 2017?",
+    "output": "What are the respective on-cash equity-based compensation expense included in network operations expenses in 2018 and 2017?"
+  },
+  {
+    "input": "What are the respective service revenue in 2018 and 2019?",
+    "output": "What was the amount of service revenue in the year 2018 and 2019 respectively?"
+  },
+  {
+    "input": "What is the total amount of interest and principal payments required for the debt relating to the 2022 Notes?",
+    "output": "What is the value of the interest and principal payment obligations included in the debt under 2022 Notes?"
+  },
+  {
+    "input": "What is the total amount of both interest and principal that needs to be paid for the debt included in the 2022 Notes?",
+    "output": "What is the value of the interest and principal payment obligations included in the debt under 2022 Notes?"
+  },
+  {
+    "input": "What is the total amount of debt that the company needs to repay within the next 3 years?",
+    "output": "What is the company's total debt due within 3 years?"
+  },
+  {
+    "input": "What is the aggregate amount of debt that the company needs to repay within the next three years?",
+    "output": "What is the company's total debt due within 3 years?"
+  },
+  {
+    "input": "What is the total amount of finance lease obligations that the company needs to repay within the next 3 years?",
+    "output": "What is the company's total finance lease obligations due within 3 years?"
+  },
+  {
+    "input": "What were the specific values of system infrastructure in the years 2018 and 2019?",
+    "output": "What are the respective values of system infrastructure in 2018 and 2019?"
+  },
+  {
+    "input": "What were the values of system infrastructure in 2018 and 2019?",
+    "output": "What are the respective values of system infrastructure in 2018 and 2019?"
+  },
+  {
+    "input": "What is the percentage increase or decrease in the value of network equipment from 2018 to 2019?",
+    "output": "What is the percentage change in the value of network equipment between 2018 and 2019?"
+  },
+  {
+    "input": "What is the percentage change in the value of leasehold improvements between 2018 and 2019?",
+    "output": "What is the precise percentage increase or decrease in the value of leasehold improvements from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in the value of system infrastructure between 2018 and 2019?",
+    "output": "What is the percentage difference in the value of system infrastructure from 2018 to 2019?"
+  },
+  {
+    "input": "What were the pre-tax domestic incomes for 2018 and 2019?",
+    "output": "What are the respective domestic income before income taxes in 2018 and 2019?"
+  },
+  {
+    "input": "What was the mean pre-tax domestic income for individuals in the years 2017 and 2018?",
+    "output": "What is the average domestic income before income taxes in 2017 and 2018?"
+  },
+  {
+    "input": "What were the average pre-tax domestic incomes in the years 2017 and 2018?",
+    "output": "What is the average domestic income before income taxes in 2017 and 2018?"
+  },
+  {
+    "input": "What was the average pre-tax domestic income for households in 2018 and 2019?",
+    "output": "What is the average domestic income before income taxes in 2018 and 2019?"
+  },
+  {
+    "input": "What is the average number of basic weighted average common shares in 2018 and 2019?",
+    "output": "What is the mean value of the basic weighted average common shares for the years 2018 and 2019?"
+  },
+  {
+    "input": "What is the average number of basic weighted average common shares in 2018 and 2019?",
+    "output": "What is the average number of basic weighted average common shares for the years 2018 and 2019 combined?"
+  },
+  {
+    "input": "What was the average impact on stock dilution from stock options in 2018 and 2019?",
+    "output": "What is the average dilutive effect of stock options in 2018 and 2019?"
+  },
+  {
+    "input": "What was the average impact of stock options on dilution for the years 2018 and 2019?",
+    "output": "What is the average dilutive effect of stock options in 2018 and 2019?"
+  },
+  {
+    "input": "What does the cost of operations represent?",
+    "output": "What is the meaning or significance of the term \"cost of operations\"?"
+  },
+  {
+    "input": "How many quarters had operating revenues that was below $2,000 million?",
+    "output": "How many quarters had operating revenues below $2,000 million? Can you provide a breakdown of the number of quarters where the operating revenues were below $2,000 million?"
+  },
+  {
+    "input": "What was the exact change in Operating revenues from the Three Months Ended March to the Three Months Ended June?",
+    "output": "What was the change in Operating revenues between Three Months Ended March and June?"
+  },
+  {
+    "input": "What was the difference in operating revenues for the Three Months Ended March and the Three Months Ended June?",
+    "output": "What was the change in Operating revenues between Three Months Ended March and June?"
+  },
+  {
+    "input": "What was the amount of net property and equipment in 2015?",
+    "output": "What was the exact monetary value of net property and equipment in the year 2015?"
+  },
+  {
+    "input": "What was the amount of net property and equipment in 2015?",
+    "output": "What was the specific amount of net property and equipment recorded on the financial statements for the year 2015?"
+  },
+  {
+    "input": "What was the value of redeemable noncontrolling interests in the year 2019?",
+    "output": "What were the Redeemable noncontrolling interests in 2019?"
+  },
+  {
+    "input": "What was the value of the redeemable noncontrolling interests in 2019?",
+    "output": "What were the Redeemable noncontrolling interests in 2019?"
+  },
+  {
+    "input": "What was the percent increase or decrease in the equity of American Tower Corporation from 2015 to 2016 in the United States?",
+    "output": "What was the percentage change in Total American Tower Corporation equity between 2015 and 2016?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the equity of American Tower Corporation from 2015 to 2016?",
+    "output": "What was the percentage change in Total American Tower Corporation equity between 2015 and 2016?"
+  },
+  {
+    "input": "What was the difference in the overall value of assets from 2017 to 2018?",
+    "output": "What was the change in total assets between 2017 and 2018?"
+  },
+  {
+    "input": "What was the total amount received from the sales of senior notes in the year 2019 after deducting any expenses or fees?",
+    "output": "What were the net Proceeds from issuance of senior notes in 2019?"
+  },
+  {
+    "input": "What was the total amount of money received from the repayment of credit facilities in 2018?",
+    "output": "What were the net Proceeds from (repayments of) credit facilities in 2018?"
+  },
+  {
+    "input": "\"What was the total amount received or repaid through credit facilities in 2018?\"",
+    "output": "What were the net Proceeds from (repayments of) credit facilities in 2018?"
+  },
+  {
+    "input": "What was the value of Equipment in 2018?",
+    "output": "What was the total monetary worth or market value of Equipment in the year 2018?"
+  },
+  {
+    "input": "What was the value of Equipment in 2018?",
+    "output": "What was the monetary worth of the Equipment in the year 2018?"
+  },
+  {
+    "input": "What was the exact monetary worth of buildings and any related improvements in the year 2019?",
+    "output": "What was the value of buildings and improvements in 2019?"
+  },
+  {
+    "input": "What was the difference in the value of land and improvements from 2018 to 2019?",
+    "output": "What was the change in Land and improvements between 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the net value of Property and equipment between the years 2018 and 2019?",
+    "output": "What was the percentage change in Property and equipment, net between 2018 and 2019?"
+  },
+  {
+    "input": "What are the components that make up redevelopment capital expenditures?",
+    "output": "What does Redevelopment capital expenditures include?"
+  },
+  {
+    "input": "What actions are authorized by the company's Buyback Programs?",
+    "output": "What do the company's Buyback Programs authorize the company to do?"
+  },
+  {
+    "input": "What was the change in the total number of shares purchased between November and December?",
+    "output": "How much did the total number of shares purchased increase or decrease from November to December?"
+  },
+  {
+    "input": "What was the total value of the Property, Plant, and Equipment (PPE) in 2016?",
+    "output": "What was the gross PPE in 2016?"
+  },
+  {
+    "input": "What was the total value of personal protective equipment (PPE) in terms of sales or revenue for the year 2016?",
+    "output": "What was the gross PPE in 2016?"
+  },
+  {
+    "input": "For how many years has the total value of gross intangibles been above $15,000 million?",
+    "output": "How many years did gross intangibles exceed $15,000 million?"
+  },
+  {
+    "input": "How many consecutive years did the value of gross intangibles surpass $15,000 million?",
+    "output": "How many years did gross intangibles exceed $15,000 million?"
+  },
+  {
+    "input": "What was the percentage change in gross goodwill between 2018 and 2019?",
+    "output": "What was the percentage difference in the amount of gross goodwill from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in gross goodwill between 2018 and 2019?",
+    "output": "What was the percentage difference in gross goodwill from 2018 to 2019?"
+  },
+  {
+    "input": "\"What was the overall amount paid in lease payments for the operating lease agreement?\"",
+    "output": "What were the total lease payments for operating lease?"
+  },
+  {
+    "input": "What was the sum total of all lease payments made for an operating lease?",
+    "output": "What were the total lease payments for operating lease?"
+  },
+  {
+    "input": "\"What was the total amount of operating lease expenses incurred during the fiscal years 2020, 2021, and 2022?\"",
+    "output": "What was the sum of operating lease in fiscal years 2020-2022?"
+  },
+  {
+    "input": "What is the proportion of non-current lease liability in relation to the total lease liability, expressed as a percentage?",
+    "output": "What is non-current lease liability as a percentage of Total lease liability?"
+  },
+  {
+    "input": "Which years did the company allocate a valuation allowance?",
+    "output": "In which years did the company provide a valuation allowance?"
+  },
+  {
+    "input": "For how many years, starting from January 1, was the balance consistently higher than $100 million?",
+    "output": "How many years was the balance as of January 1 above $100 million?"
+  },
+  {
+    "input": "How many years, starting from January 1, had a balance above $100 million?",
+    "output": "How many years was the balance as of January 1 above $100 million?"
+  },
+  {
+    "input": "What specific details about the company's taxes can be found in their state tax returns?",
+    "output": "What do the company's state tax returns reflect?"
+  },
+  {
+    "input": "In 2017, how many instances were there where the deferred Income tax benefit (provision) exceeded $50 million?",
+    "output": "How many of the deferred Income tax benefit (provision) were above $50 million in 2017?"
+  },
+  {
+    "input": "How many instances occurred in 2017 where the deferred income tax benefit (provision) exceeded $50 million?",
+    "output": "How many of the deferred Income tax benefit (provision) were above $50 million in 2017?"
+  },
+  {
+    "input": "In 2019, what is the number of income tax benefits (provisions) that exceeded $(4 million)?",
+    "output": "How many of the current Income tax benefit (provision) were above $(4 million) in 2019?"
+  },
+  {
+    "input": "In 2019, what is the total number of income tax benefits (provisions) that exceeded $(4 million)?",
+    "output": "How many of the current Income tax benefit (provision) were above $(4 million) in 2019?"
+  },
+  {
+    "input": "What was the change in the net cash provided by (used for) by investing activities between 2017 and 2018?",
+    "output": "What was the difference in the net cash provided by (used for) investing activities from 2017 to 2018?"
+  },
+  {
+    "input": "What was the percentage change in diluted net income per share attributable to common stockholders of American Tower Corporation between the Three Months Ended in March and June?",
+    "output": "What was the change in Diluted net income per share attributable to American Tower Corporation common stockholders between Three Months Ended  March and June?"
+  },
+  {
+    "input": "What was the specific difference in the value of Prepaids and other current assets between the fiscal years 2018 and 2019?",
+    "output": "What was the change in Prepaids and other current assets between 2018 and 2019?"
+  },
+  {
+    "input": "What was the difference in the amount of Prepaids and other current assets reported in the financial statements between the years 2018 and 2019?",
+    "output": "What was the change in Prepaids and other current assets between 2018 and 2019?"
+  },
+  {
+    "input": "What was the total amount of property and real estate taxes that were accumulated or owed in 2018?",
+    "output": "What was the Accrued property and real estate taxes in 2018?"
+  },
+  {
+    "input": "What was the change in Other accrued expenses between 2018 and 2019?",
+    "output": "What was the difference in the amount of Other accrued expenses recorded in 2018 compared to 2019?"
+  },
+  {
+    "input": "What was the change in Other accrued expenses between 2018 and 2019?",
+    "output": "What was the difference in the amount of Other accrued expenses recorded between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the total amount of Net operating loss carryforwards recorded for the financial year 2019?",
+    "output": "What were the Net operating loss carryforwards in 2019?"
+  },
+  {
+    "input": "What were the amounts of the liability decreases included in the company's revisions in estimates for both 2019 and 2018?",
+    "output": "How much were the decreases to the liability included in the revisions in estimates by the company in 2019 and 2018 respectively?"
+  },
+  {
+    "input": "What was the change in balance as of December 31 between 2018 and 2019?",
+    "output": "What was the difference in the balance as of December 31st, 2018 and December 31st, 2019?"
+  },
+  {
+    "input": "What was the value of the Eure-et-Loir interests as of December 31, 2019?",
+    "output": "What was the monetary worth of the Eure-et-Loir interests on the specific date of December 31, 2019?"
+  },
+  {
+    "input": "How many of the non-current liabilities components in 2018 were above $500 million?",
+    "output": "How many components of non-current liabilities in the year 2018 exceeded $500 million in value?"
+  },
+  {
+    "input": "How many of the non-current liabilities components in 2018 were above $500 million?",
+    "output": "In 2018, how many components of non-current liabilities exceeded $500 million in value?"
+  },
+  {
+    "input": "How many years has the expense for stock-based compensation for Total exceeded $100 million?",
+    "output": "How many years did Total stock-based compensation expense exceed $100 million?"
+  },
+  {
+    "input": "For how many years has the expense for Stock-based compensation - Services been higher than $1 million?",
+    "output": "How many years did Stock-based compensation expense - Services exceed $1 million?"
+  },
+  {
+    "input": "What differentiates the characteristics of Federal and State NOLs from 2025 to 2029?",
+    "output": "What is the difference between Federal and State NOLs in the period 2025 to 2029?"
+  },
+  {
+    "input": "What differentiates Federal and State NOLs in the 2025-2029 timeframe, and how do they vary during this period?",
+    "output": "What is the difference between Federal and State NOLs in the period 2025 to 2029?"
+  },
+  {
+    "input": "What was the approximate percentage of revenue in the company's property segments were attributable to their communication sites in 2018?",
+    "output": "What was the estimated percentage of the company's total revenue in 2018 that came from their property segments, specifically from their communication sites?"
+  },
+  {
+    "input": "What was the approximate percentage of revenue in the company's property segments were attributable to their communication sites in 2018?",
+    "output": "What percentage of the company's total revenue in 2018 can be attributed to their communication sites within their property segments?"
+  },
+  {
+    "input": "What percentage of the total revenue in the United States in 2019 can be attributed to property segments?",
+    "output": "How many percent of total revenue in 2019 was accounted for by property segments in the U.S.?"
+  },
+  {
+    "input": "How many percent of total revenue in 2018 was accounted for by property segments in Asia?",
+    "output": "What was the percentage of the total revenue in 2018 contributed by the property segments specifically in Asia?"
+  },
+  {
+    "input": "What were the intangible assets related to the Tower and network location specifically in the year 2019?",
+    "output": "What were the Tower and network location intangible assets in 2019?"
+  },
+  {
+    "input": "What were the intangible assets related to the tower and network locations observed in 2019?",
+    "output": "What were the Tower and network location intangible assets in 2019?"
+  },
+  {
+    "input": "How many years were the total impairment charges above $200 million?",
+    "output": "How many years did the total impairment charges exceed or surpass $200 million?"
+  },
+  {
+    "input": "How many years were the total impairment charges above $200 million?",
+    "output": "For how many years did the total impairment charges exceed $200 million?"
+  },
+  {
+    "input": "What was the percentage change in Total impairment charges between 2018 and 2019?",
+    "output": "What percentage represents the difference in Total impairment charges between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in Total impairment charges between 2018 and 2019?",
+    "output": "What is the percentage difference in Total impairment charges from 2018 to 2019?"
+  },
+  {
+    "input": "For how long has the sum of all assets reached or surpassed $40,000 million?",
+    "output": "How many years did the total assets exceed $40,000 million?"
+  },
+  {
+    "input": "What was the balance as at January 1, 2019?",
+    "output": "What was the amount of money in the account on January 1, 2019?"
+  },
+  {
+    "input": "What was the difference in account balance on January 1st, comparing 2018 to 2019?",
+    "output": "What was the change in balance as of January 1 between 2018 and 2019?"
+  },
+  {
+    "input": "What was the difference in the account balance between January 1, 2018 and January 1, 2019?",
+    "output": "What was the change in balance as of January 1 between 2018 and 2019?"
+  },
+  {
+    "input": "What is the difference in the percentage increase of current year for 2017 and 2018?",
+    "output": "What was the change in current year increases between 2017 and 2018?"
+  },
+  {
+    "input": "What was the difference in the yearly increases of current year between 2017 and 2018?",
+    "output": "What was the change in current year increases between 2017 and 2018?"
+  },
+  {
+    "input": "What was the total amount spent on depreciation in the year 2019?",
+    "output": "What was the depreciation cost in 2019?"
+  },
+  {
+    "input": "What was the change in depreciation between 2018 and 2019?",
+    "output": "What was the difference in the amount of depreciation incurred in 2018 compared to 2019?"
+  },
+  {
+    "input": "What was the gross amount of accumulated depreciation at the beginning of 2017 and how did it change by the beginning of 2018?",
+    "output": "What was the change in Gross amount of accumulated depreciation at beginning between 2017 and 2018?"
+  },
+  {
+    "input": "Which specific years have the statute of limitations expired on certain unrecognized tax benefits?",
+    "output": "Which years did the statute of limitations on certain unrecognized tax benefits lapse?"
+  },
+  {
+    "input": "In 2019, what was the total value of receivables that had not yet been invoiced to customers?",
+    "output": "What were the unbilled receivables in 2019?"
+  },
+  {
+    "input": "What was the change in Unbilled receivables between 2018 and 2019?",
+    "output": "What was the net difference in Unbilled receivables from 2018 to 2019?"
+  },
+  {
+    "input": "What was the change in the investments into Nippon Yttrium Co., Ltd (\"NYC\") between 2018 and 2019?",
+    "output": "\"What was the difference in the amount of money invested in Nippon Yttrium Co., Ltd (\"NYC\") from 2018 to 2019?\""
+  },
+  {
+    "input": "What was the change in the investments into Nippon Yttrium Co., Ltd (\"NYC\") between 2018 and 2019?",
+    "output": "What was the difference in the amount of investments made into Nippon Yttrium Co., Ltd (\"NYC\") during the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in total investments between 2018 and 2019?",
+    "output": "What was the exact percentage difference in the total amount of investments from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in Pro forma revenues from 2017 to 2018?",
+    "output": "What was the change in Pro forma revenues between 2017 and 2018?"
+  },
+  {
+    "input": "How many years did net income exceed $100,000 thousand?",
+    "output": "For how many consecutive years has the net income surpassed $100,000 thousand?"
+  },
+  {
+    "input": "How many years did net income exceed $100,000 thousand?",
+    "output": "For how many consecutive years has the net income been greater than $100,000 thousand?"
+  },
+  {
+    "input": "What was the specific financial difference in adjustments made to the defined benefit post-retirement plan between the years 2017 and 2019?",
+    "output": "What was the change in Defined benefit post-retirement plan adjustments between 2017 and 2019?"
+  },
+  {
+    "input": "What was the difference in the adjustments made to Defined Benefit post-retirement plans between the years 2017 and 2019?",
+    "output": "What was the change in Defined benefit post-retirement plan adjustments between 2017 and 2019?"
+  },
+  {
+    "input": "What were the total sales revenues in Hong Kong in the year 2018?",
+    "output": "What was the net sales in Hong Kong in 2018?"
+  },
+  {
+    "input": "What was the exact amount of net sales generated in Singapore during the year 2017?",
+    "output": "What was the net sales in Singapore in 2017?"
+  },
+  {
+    "input": "What was the change in net sales in Mexico between 2017 and 2018?",
+    "output": "What was the difference in net sales in Mexico from 2017 to 2018 and how does it impact the overall sales performance in the country?"
+  },
+  {
+    "input": "In which specific years did the cumulative net sales in every region surpass one million dollars?",
+    "output": "Which years did the total net sales in all regions exceed $1,000,000 thousand?"
+  },
+  {
+    "input": "What was the percentage change in the net sales from Other Countries between 2018 and 2019?",
+    "output": "What was the percentage difference in net sales from Other Countries in 2019 compared to 2018?"
+  },
+  {
+    "input": "What was the difference in the amount of Amortization of debt issuance costs between the years 2017 and 2018?",
+    "output": "What was the change in the Amortization of debt issuance costs between 2017 and 2018?"
+  },
+  {
+    "input": "For how many years did the interest expense on capital leases surpass $200 thousand?",
+    "output": "How many years did Interest expense on capital leases exceed $200 thousand?"
+  },
+  {
+    "input": "By what percentage did the total interest expense increase or decrease from 2018 to 2019?",
+    "output": "What was the percentage change in the total interest expense between 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the total interest expense from 2018 to 2019?",
+    "output": "What was the percentage change in the total interest expense between 2018 and 2019?"
+  },
+  {
+    "input": "What was the total net actuarial loss or gain specifically associated with Other Benefits for the year 2018?",
+    "output": "What was the Net actuarial loss (gain) for Other Benefits in 2018?"
+  },
+  {
+    "input": "What was the change in net sales between 2017 and 2019?",
+    "output": "What is the difference in net sales from 2017 to 2019?"
+  },
+  {
+    "input": "For how many years was the cost of sales higher than $800,000 thousand?",
+    "output": "How many years did cost of sales exceed $800,000 thousand?"
+  },
+  {
+    "input": "For how many years has the cost of sales been higher than $800,000 thousand?",
+    "output": "How many years did cost of sales exceed $800,000 thousand?"
+  },
+  {
+    "input": "What was the precise net value of property, plant, and equipment in Japan specifically for the year 2019?",
+    "output": "What was the net amount of property, plant and equipment in Japan in 2019?"
+  },
+  {
+    "input": "What was the net amount of property, plant and equipment in Thailand in 2018?",
+    "output": "What was the exact net value of property, plant, and equipment in Thailand during the year 2018?"
+  },
+  {
+    "input": "How many years did Total net property, plant and equipment from Non-United States regions exceed $400,000 thousand?",
+    "output": "How many years was the total value of property, plant, and equipment from regions outside the United States greater than $400,000 thousand?"
+  },
+  {
+    "input": "What was the change in the net property, plant and equipment in China between 2018 and 2019?",
+    "output": "What was the exact difference in the net value of property, plant, and equipment in China during the period from 2018 to 2019?"
+  },
+  {
+    "input": "What was the net interest expense in 2019?",
+    "output": "What was the amount of interest expenses incurred in 2019, after subtracting any interest income earned during the same year?"
+  },
+  {
+    "input": "What was the change in the Loss on early extinguishment of debt between 2018 and 2019?",
+    "output": "What was the difference in the Loss on early extinguishment of debt from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the net interest expense from 2018 to 2019?",
+    "output": "What was the percentage change in the net interest expense between 2018 and 2019?"
+  },
+  {
+    "input": "What percentage increase or decrease occurred in the net interest expense from 2018 to 2019?",
+    "output": "What was the percentage change in the net interest expense between 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in the Returns reserves between 2018 and 2019?",
+    "output": "What was the difference in the amount allocated to Returns reserves between the fiscal years 2018 and 2019?"
+  },
+  {
+    "input": "How much did the Additions for tax positions change from 2018 to 2019 in the current year?",
+    "output": "What was the change in the Additions for tax positions of the current year between 2018 and 2019?"
+  },
+  {
+    "input": "What was the reflection of Pension and other post-retirement benefits?",
+    "output": "What did Pension and other post-retirement benefits reflect?"
+  },
+  {
+    "input": "What were the interest obligations that had payments due more than 5 years?",
+    "output": "What were the interest obligations that had payments due for a period longer than 5 years?"
+  },
+  {
+    "input": "What was the precise difference in the total amount between Employee separation liability and Restructuring liability?",
+    "output": "What was the difference in the total between Employee separation liability and Restructuring liability?"
+  },
+  {
+    "input": "What is the discrepancy in the total amount between Employee Separation Liability and Restructuring Liability?",
+    "output": "What was the difference in the total between Employee separation liability and Restructuring liability?"
+  },
+  {
+    "input": "What was the distinction in the payment amounts expected in Year 1 for interest obligations compared to operating lease obligations?",
+    "output": "What was the difference between the payments due by Year 1 between Interest obligations and operating lease obligations?"
+  },
+  {
+    "input": "What were the contrasting payment amounts between Year 1 for interest obligations and operating lease obligations?",
+    "output": "What was the difference between the payments due by Year 1 between Interest obligations and operating lease obligations?"
+  },
+  {
+    "input": "What is the percentage of total debt obligations in relation to the overall contractual obligations?",
+    "output": "What were the total debt obligations as a percentage of the total contractual obligations?"
+  },
+  {
+    "input": "What was the amount of cash generated or used in financing activities during the year 2017?",
+    "output": "What was the Net cash provided by (used in) financing activities in 2017?"
+  },
+  {
+    "input": "What was the percentage change in the amount of cash, cash equivalents, and restricted cash from 2018 to 2019?",
+    "output": "What was the percentage change in the Net increase in cash, cash equivalents, and restricted cash between 2018 and 2019?"
+  },
+  {
+    "input": "How much of a discount did Customer Advances receive?",
+    "output": "What was the amount of discount for Customer Advances?"
+  },
+  {
+    "input": "What was the total amount of discount provided for Customer Advances?",
+    "output": "What was the amount of discount for Customer Advances?"
+  },
+  {
+    "input": "What was the net gain or loss from acquisitions in the year 2018?",
+    "output": "What was the Acquisition (gain) loss in 2018?"
+  },
+  {
+    "input": "What were the restructuring charges in 2017?",
+    "output": "\"What were the specific charges related to restructuring expenses incurred during the year 2017?\""
+  },
+  {
+    "input": "What were the restructuring charges in 2017?",
+    "output": "What were the specific charges related to the corporate restructuring that occurred in 2017?"
+  },
+  {
+    "input": "For how many years has the income tax expense from continuing operations exceeded $10,000 thousand?",
+    "output": "How many years did Total current income tax expense from continuing operations exceed $10,000 thousand?"
+  },
+  {
+    "input": "What was the difference in foreign income tax expense between 2017 and 2018?",
+    "output": "What was the change in current foreign income tax expense between 2017 and 2018?"
+  },
+  {
+    "input": "What was the difference in the amount of foreign income tax expense recorded for the years 2017 and 2018?",
+    "output": "What was the change in current foreign income tax expense between 2017 and 2018?"
+  },
+  {
+    "input": "What was the percentage change in the deferred federal income tax expense between 2018 and 2019?",
+    "output": "What was the percentage increase or decrease in the amount of deferred federal income tax expense from 2018 to 2019?"
+  },
+  {
+    "input": "What were the restructuring charges in 2019?",
+    "output": "What were the specific charges incurred as part of the restructuring efforts in the year 2019?"
+  },
+  {
+    "input": "What was the change in Personnel reduction costs between 2017 and 2018?",
+    "output": "What was the difference in the costs associated with reducing personnel between the years 2017 and 2018?"
+  },
+  {
+    "input": "What is the percentage change in the costs associated with relocation and exit from 2018 to 2019?",
+    "output": "What was the percentage change in the Relocation and exit costs between 2018 and 2019?"
+  },
+  {
+    "input": "What is the percentage difference in the Relocation and exit costs from 2018 to 2019?",
+    "output": "What was the percentage change in the Relocation and exit costs between 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in Foreign income between 2017 and 2018?",
+    "output": "What was the difference in the amount of income from foreign sources recorded for the years 2017 and 2018?"
+  },
+  {
+    "input": "What were the net sales in 2019?",
+    "output": "What was the total value of sales generated in 2019?"
+  },
+  {
+    "input": "What were the net sales in 2019?",
+    "output": "What was the total amount of revenue generated from sales in the year 2019?"
+  },
+  {
+    "input": "\"What was the gross margin percentage for the quarter ending on September 30th?\"",
+    "output": "What was the gross margin for the Sep-30 quarter?"
+  },
+  {
+    "input": "\"What was the exact gross margin percentage for the quarter ending on September 30th?\"",
+    "output": "What was the gross margin for the Sep-30 quarter?"
+  },
+  {
+    "input": "\"For which specific time periods did the Gross Margin exceed $120,000 thousand?\"",
+    "output": "Which quarters ended did the Gross Margin exceed $120,000 thousand?"
+  },
+  {
+    "input": "What was the total value of contract assets in 2019?",
+    "output": "What were the amount of contract assets in 2019?"
+  },
+  {
+    "input": "What were the specific values of contract assets recorded in the year 2019?",
+    "output": "What were the amount of contract assets in 2019?"
+  },
+  {
+    "input": "What was the difference in the amount of prepaid expenses between the fiscal years 2018 and 2019?",
+    "output": "What was the change in prepaid expenses between 2018 and 2019?"
+  },
+  {
+    "input": "What was the difference in the amount of prepaid expenses between the years 2018 and 2019?",
+    "output": "What was the change in prepaid expenses between 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage difference in the variance between Software Solutions and Data and Analytics?",
+    "output": "What was the difference in the percent variance between Software Solutions and Data and Analytics?"
+  },
+  {
+    "input": "What was the reported amount of net trade receivables in 2017?",
+    "output": "What were the net trade receivables as reported in 2017?"
+  },
+  {
+    "input": "What was the adjustmentments for ASC 606 adoption for net computer software?",
+    "output": "What adjustments were made for the adoption of ASC 606 in relation to net computer software? Please provide a detailed explanation of the adjustments made for net computer software under ASC 606."
+  },
+  {
+    "input": "What was the adjustmentments for ASC 606 adoption for net computer software?",
+    "output": "What were the adjustments made for the adoption of ASC 606 regarding net computer software?"
+  },
+  {
+    "input": "In which specific years did the total value of Deferred contract costs surpass $30 million?",
+    "output": "Which years did Deferred contract costs exceed $30 million?"
+  },
+  {
+    "input": "What was the change in cash equivalents between 2018 and 2019?",
+    "output": "What was the difference in the amount of cash equivalents reported for the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in cash equivalents between 2018 and 2019?",
+    "output": "What was the difference in the amount of cash equivalents held between the year 2018 and the year 2019?"
+  },
+  {
+    "input": "What was the change in Cash between 2018 and 2019?",
+    "output": "What was the difference in the Cash amount reported in the financial statements of the company between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the amount of Equity method investments in 2019?",
+    "output": "How much money was invested through the equity method in 2019?"
+  },
+  {
+    "input": "Which years does the table provide information for the EBITDA by segment?",
+    "output": "For which years does the table contain EBITDA information segmented by different categories?"
+  },
+  {
+    "input": "Which years does the table provide information for the EBITDA by segment?",
+    "output": "For which specific years does the table provide EBITDA data segmented by different categories?"
+  },
+  {
+    "input": "What was the total value of EBITDA (Earnings Before Interest, Taxes, Depreciation, and Amortization) for the year 2018?",
+    "output": "What was the sum of the EBITDA in 2018?"
+  },
+  {
+    "input": "\"What was the specific monetary balance amount for Software Solutions in the year 2017?\"",
+    "output": "What was the balance in 2017 for Software Solutions?"
+  },
+  {
+    "input": "What was the specific balance amount, in USD, for Software Solutions in the year 2017?",
+    "output": "What was the balance in 2017 for Software Solutions?"
+  },
+  {
+    "input": "What was the specific amount of federal income tax expense incurred in the fiscal year of 2019?",
+    "output": "What was the current federal income tax expense in 2019?"
+  },
+  {
+    "input": "Which years does the table provide information for depreciation and amortization by segment?",
+    "output": "For which specific periods does the table offer data regarding depreciation and amortization broken down by segment?"
+  },
+  {
+    "input": "What was the value of the total assets before the implementation of ASC 606 revenue recognition standards?",
+    "output": "What were the total assets without the adoption of ASC 606?"
+  },
+  {
+    "input": "What was the sum of all assets before the implementation of ASC 606?",
+    "output": "What were the total assets without the adoption of ASC 606?"
+  },
+  {
+    "input": "How did the adoption of ASC 606 affect the difference between Total Assets and Total Liabilities?",
+    "output": "What was the difference the effect of ASC 606 Adoption between Total Assets and Total Liabilities?"
+  },
+  {
+    "input": "What is the impact of ASC 606 Adoption on the difference between Total Assets and Total Liabilities?",
+    "output": "What was the difference the effect of ASC 606 Adoption between Total Assets and Total Liabilities?"
+  },
+  {
+    "input": "What was the percentage change in cash flows from operating activities in 2019 compared to 2018?",
+    "output": "What was the cash flows provided by operating activities in 2019 as a percentage of the cash flow in 2018?"
+  },
+  {
+    "input": "For how many consecutive years did the Net increase in cash and cash equivalents remain positive?",
+    "output": "How many years was the Net  increase in cash and cash equivalents positive?"
+  },
+  {
+    "input": "What was the EBITDA difference specifically recorded between the Software Solutions division and the Data and Analytics division?",
+    "output": "What was the difference in the EBITDA between Software Solutions and Data and Analytics?"
+  },
+  {
+    "input": "What was the exact difference in EBITDA (earnings before interest, taxes, depreciation, and amortization) between Software Solutions and Data and Analytics?",
+    "output": "What was the difference in the EBITDA between Software Solutions and Data and Analytics?"
+  },
+  {
+    "input": "What specific expenses were included in the category of operating expenses for Corporate and Other?",
+    "output": "What did operating expenses for Corporate and Other include?"
+  },
+  {
+    "input": "What was the discrepancy or variance between the overall value of assets and the specific intangible asset of goodwill in the context of data and analytics?",
+    "output": "What was the difference between the total assets and goodwill from data and analytics?"
+  },
+  {
+    "input": "What was the difference between Operating expenses and Revenues from Software Solutions?",
+    "output": "What differentiates Operating expenses and Revenues from Software Solutions? Please provide a detailed explanation comparing the two."
+  },
+  {
+    "input": "What was the percentage change in the Net earnings margin between 2017 and 2019?",
+    "output": "What was the percentage difference in the Net earnings margin from 2017 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in the Net earnings margin between 2017 and 2019?",
+    "output": "What was the exact percentage difference in the net earnings margin from 2017 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in revenues between 2018 and 2019?",
+    "output": "What was the percentage increase or decrease in revenues from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in revenues between 2018 and 2019?",
+    "output": "What was the percentage difference in the amount of money earned in revenue from 2018 to 2019?"
+  },
+  {
+    "input": "What were the specific prepaid expenses incurred during the year 2018?",
+    "output": "What were the prepaid expenses in 2018?"
+  },
+  {
+    "input": "What were the specific categories of Other current assets in the financial statements for the year 2019?",
+    "output": "What were the Other current assets in 2019?"
+  },
+  {
+    "input": "What was the change in contract assets between 2018 and 2019?",
+    "output": "What was the difference in the amount of contract assets from 2018 to 2019?"
+  },
+  {
+    "input": "How many years did prepaid expenses exceed $40.0 million?",
+    "output": "For how many years was the amount of prepaid expenses higher than $40.0 million?"
+  },
+  {
+    "input": "How many years did the operating margin exceed 20.0%?",
+    "output": "For how many years was the operating margin greater than 20.0%?"
+  },
+  {
+    "input": "What was the amount of Corporate Services in 2018?",
+    "output": "What was the specific amount of funds allocated to Corporate Services during the year 2018?"
+  },
+  {
+    "input": "What was the amount of Corporate Services in 2018?",
+    "output": "How much did Corporate Services amount to in the year 2018?"
+  },
+  {
+    "input": "What specific factors or elements were taken into consideration when calculating the debt?",
+    "output": "What did the calculation for Debt include?"
+  },
+  {
+    "input": "What was the difference between Total Debt and Total Interest on Debt?",
+    "output": "What is the distinction between the total amount of debt and the total interest incurred on that debt?"
+  },
+  {
+    "input": "How many contracts with a value exceeding $100 million were made during the period of 2021-2022?",
+    "output": "For the period 2021-2022, how many contractual obligations exceeded $100 million?"
+  },
+  {
+    "input": "What is the dollar difference between the actual cost and the estimated cost for software solutions?",
+    "output": "What was the dollar variance for software solutions?"
+  },
+  {
+    "input": "What was the specific revenue generated from the field of Data and Analytics during the year 2017?",
+    "output": "What was revenue from Data and Analytics in 2017?"
+  },
+  {
+    "input": "For how many consecutive years has revenue from Data and Analytics surpassed $150 million?",
+    "output": "How many years did revenue from Data and Analytics exceed $150 million?"
+  },
+  {
+    "input": "What was the average amount of money earned between the years 2017 and 2018?",
+    "output": "What was the average total revenue between 2017 and 2018?"
+  },
+  {
+    "input": "What was the average revenue from Corporate and Other between 2017 and 2018?",
+    "output": "What was the average revenue generated from the Corporate and Other category during the two-year period encompassing 2017 and 2018?"
+  },
+  {
+    "input": "What was the change in purchased software between 2018 and 2019?",
+    "output": "What was the difference in the amount of software that was bought between the years 2018 and 2019?"
+  },
+  {
+    "input": "For how many years has the internally developed software generated revenue exceeding $800 million?",
+    "output": "How many years did internally developed software exceed $800 million?"
+  },
+  {
+    "input": "For how long did internally developed software generate revenue exceeding $800 million?",
+    "output": "How many years did internally developed software exceed $800 million?"
+  },
+  {
+    "input": "What was the percentage change in net computer software between 2018 and 2019?",
+    "output": "What was the percentage increase or decrease in net computer software from 2018 to 2019?"
+  },
+  {
+    "input": "How many years did Percentage of gross lease receivables exceed 2.0%?",
+    "output": "For how many years was the percentage of gross lease receivables above 2.0%?"
+  },
+  {
+    "input": "How many years did Percentage of gross lease receivables exceed 2.0%?",
+    "output": "For how long has the percentage of gross lease receivables been above 2.0%?"
+  },
+  {
+    "input": "What was the percentage change in Allowance for doubtful accounts between 2018 and 2019?",
+    "output": "What is the percentage difference in the allowance for doubtful accounts from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in Allowance for doubtful accounts between 2018 and 2019?",
+    "output": "What was the percentage increase or decrease in the Allowance for doubtful accounts from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in revenue from the Europe, Middle East, and Africa (EMEA) region between 2017 and 2018?",
+    "output": "What was the change for revenue from EMEA between 2017 and 2018?"
+  },
+  {
+    "input": "What was the percentage change in revenue from the Europe, Middle East, and Africa (EMEA) region from 2017 to 2018?",
+    "output": "What was the change for revenue from EMEA between 2017 and 2018?"
+  },
+  {
+    "input": "Why would actual maturities differ from the contractual maturities?",
+    "output": "Why do the actual time periods until maturity sometimes vary from the specified contractual time periods?"
+  },
+  {
+    "input": "What is the distinction in amortized cost between debt investments held for less than 1 year compared to those held for a period ranging from 1 year to 5 years?",
+    "output": "What was the difference between the amortized cost of debt investments that were within 1 year and after 1 year through 5 years?"
+  },
+  {
+    "input": "What was the total amount of money allocated to the gradual write-off of acquired intangible assets throughout the year 2018?",
+    "output": "What was the total amortization of purchased intangible assets in 2018?"
+  },
+  {
+    "input": "What was the difference in the variance in dollars between Available-for-sale debt investments and net Other gains (losses)?",
+    "output": "What was the variance in dollars between the variance in Available-for-sale debt investments and the variance in net Other gains (losses)?"
+  },
+  {
+    "input": "What was the percentage change in the net other gains (losses) between 2017 and 2018?",
+    "output": "What was the percent change in the net amount of other gains (losses) from 2017 to 2018?"
+  },
+  {
+    "input": "What were the dividends in 2018?",
+    "output": "What were the dividend payments made in the year 2018?"
+  },
+  {
+    "input": "What were the dividends in 2018?",
+    "output": "What was the total amount of dividends paid out in the year 2018?"
+  },
+  {
+    "input": "What units are being utilized in the table and what purpose do they serve in the given context?",
+    "output": "What are the units used in the table?"
+  },
+  {
+    "input": "What was the difference in the balance at the start of the fiscal year in 2017 compared to 2018?",
+    "output": "What was the change in Balance at beginning of fiscal year between 2017 and 2018?"
+  },
+  {
+    "input": "What was the difference in the balance at the start of the fiscal year from 2017 to 2018?",
+    "output": "What was the change in Balance at beginning of fiscal year between 2017 and 2018?"
+  },
+  {
+    "input": "How does the company typically recommend analyzing their liquidity and capital resources for future periods?",
+    "output": "How does the company believe that their liquidity and capital resources in future periods should be analyzed?"
+  },
+  {
+    "input": "What differentiates operating leases with durations of less than 1 year from those lasting 1 to 3 years?",
+    "output": "What was the difference in operating leases between those that were less than 1 year and 1 to 3 years?"
+  },
+  {
+    "input": "How did the characteristics of operating leases differ between those with a duration of less than 1 year and those lasting from 1 to 3 years?",
+    "output": "What was the difference in operating leases between those that were less than 1 year and 1 to 3 years?"
+  },
+  {
+    "input": "What percentage of the total contractual obligations is represented by the senior notes?",
+    "output": "What was the total senior notes as a percentage of total contractual obligations?"
+  },
+  {
+    "input": "What is the period that has the greatest Operating leases?",
+    "output": "What is the time frame during which Operating leases are most prevalent?"
+  },
+  {
+    "input": "How does the company manage and minimize the potential financial loss related to credit risk in relation to derivatives?",
+    "output": "How does the company mitigate credit risk associated with derivatives?"
+  },
+  {
+    "input": "Which years does the table provide information for the company's outstanding derivatives?",
+    "output": "For which specific years does the table contain information regarding the outstanding derivatives of the company?"
+  },
+  {
+    "input": "Which years does the table provide information for the company's outstanding derivatives?",
+    "output": "For which specific years does the table present information about the company's exceptional derivatives?"
+  },
+  {
+    "input": "What was the change in the Net investment hedging instruments between 2018 and 2019?",
+    "output": "What was the difference in the amount of net investment hedging instruments between 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in the Net investment hedging instruments between 2018 and 2019?",
+    "output": "What was the difference in the amount of Net investment hedging instruments between 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in the total amount of outstanding derivatives between 2018 and 2019?",
+    "output": "What was the percentage difference in the total value of outstanding derivatives from 2018 to 2019?"
+  },
+  {
+    "input": "How much was the total amount spent on Purchased Considerations specifically for BroadSoft?",
+    "output": "What was the amount of Purchased Considerations for BroadSoft?"
+  },
+  {
+    "input": "What were the differences in terms of Purchase Consideration specifically between Springpath and Broadsoft? Please provide additional details and insights to help understand this comparison better.",
+    "output": "What was the difference in Purchase Consideration between Springpath and Broadsoft?"
+  },
+  {
+    "input": "What was the specific variance in terms of Purchase Consideration when comparing Springpath and Broadsoft?",
+    "output": "What was the difference in Purchase Consideration between Springpath and Broadsoft?"
+  },
+  {
+    "input": "What was the amount of unearned income specifically derived from lease receivables in the year 2019?",
+    "output": "What was unearned income from lease receivables in 2019?"
+  },
+  {
+    "input": "\"What was the amount of unearned income generated specifically from lease receivables during the year 2019?\"",
+    "output": "What was unearned income from lease receivables in 2019?"
+  },
+  {
+    "input": "What was the difference in the reported total between current and noncurrent financing receivables?",
+    "output": "How much greater or lesser was the total reported amount of financing receivables between current and noncurrent categories?"
+  },
+  {
+    "input": "What were the inclusions of foreign exchange transactions and other related activities?",
+    "output": "What did foreign exchange and other include?"
+  },
+  {
+    "input": "What was the difference, expressed as a percentage, in the balance of financing receivables at the end of fiscal year 2018 compared to the balance at the end of fiscal year 2019?",
+    "output": "What was the percentage change in the balance at the end of fiscal year for financing receivables between 2018 and 2019?"
+  },
+  {
+    "input": "What percentage increase or decrease occurred in the balance of financing receivables at the end of the fiscal year from 2018 to 2019?",
+    "output": "What was the percentage change in the balance at the end of fiscal year for financing receivables between 2018 and 2019?"
+  },
+  {
+    "input": "How many years did Revenue exceed $50,000 million?",
+    "output": "How many consecutive years has the revenue exceeded $50,000 million?"
+  },
+  {
+    "input": "How many years did Revenue exceed $50,000 million?",
+    "output": "For how many complete years has the revenue consistently surpassed the $50,000 million mark?"
+  },
+  {
+    "input": "What is the difference in the amount of Goodwill between Duo and Luxtera and how does it impact their overall value?",
+    "output": "What was the difference in Goodwill between Duo and Luxtera?"
+  },
+  {
+    "input": "What were the variations in the level of Goodwill between Duo and Luxtera?",
+    "output": "What was the difference in Goodwill between Duo and Luxtera?"
+  },
+  {
+    "input": "What was the discrepancy in the value of purchased intangible assets between Luxtera and other companies?",
+    "output": "What was the difference in Purchased intangible assets between Luxtera and Others?"
+  },
+  {
+    "input": "What were the variations in the value of purchased intangible assets specifically between Luxtera and other companies/entities?",
+    "output": "What was the difference in Purchased intangible assets between Luxtera and Others?"
+  },
+  {
+    "input": "What is the acquisition with the highest Purchase Consideration?",
+    "output": "What is the highest purchase consideration acquisition?"
+  },
+  {
+    "input": "What was the total value of technology products purchased before accounting for any deductions or expenses?",
+    "output": "What was the amount of gross purchased technology?"
+  },
+  {
+    "input": "What is the net value difference between Technology and Customer relationships?",
+    "output": "What was the difference in the net values between Technology and Customer relationships?"
+  },
+  {
+    "input": "What was the numerical difference in the net values specifically attributed to Technology and Customer relationships in relation to a certain context?",
+    "output": "What was the difference in the net values between Technology and Customer relationships?"
+  },
+  {
+    "input": "Which years does the table provide information for the company's activity related to their product warranty liability?",
+    "output": "For which specific years does the table display data regarding the company's product warranty liability?"
+  },
+  {
+    "input": "Which years does the table provide information for the company's activity related to their product warranty liability?",
+    "output": "For which years does the table displayed provide information about the company's activity in terms of their product warranty liability?"
+  },
+  {
+    "input": "What were the Acquisitions and divestitures in 2019?",
+    "output": "What were the acquisitions and divestitures that took place in the year 2019?"
+  },
+  {
+    "input": "What was the difference in the amount allocated for warranty provisions issued in 2017 compared to 2018?",
+    "output": "What was the change in Provisions for warranties issued between 2017 and 2018?"
+  },
+  {
+    "input": "What was the difference in the amount allocated for warranties issued between the years 2017 and 2018?",
+    "output": "What was the change in Provisions for warranties issued between 2017 and 2018?"
+  },
+  {
+    "input": "For which specific years does the table offer data on cash and cash equivalents as well as investments?",
+    "output": "Which years does the table provide information for  cash and cash equivalents and investments?"
+  },
+  {
+    "input": "What was the change in the value of available-for-sale debt investments, indicating whether there was an increase or decrease?",
+    "output": "What was the increase (decrease) in available-for-sale debt investments?"
+  },
+  {
+    "input": "In 2019, how did the distinction between cash and cash equivalents and available-for-sale debt investments manifest and how were they different from each other?",
+    "output": "What was the difference between cash and cash equivalents and Available-for-sale debt investments in 2019?"
+  },
+  {
+    "input": "What differentiates cash and cash equivalents from available-for-sale debt investments in the financial records for the year 2019?",
+    "output": "What was the difference between cash and cash equivalents and Available-for-sale debt investments in 2019?"
+  },
+  {
+    "input": "What was the change in Interest expense between 2017 and 2018?",
+    "output": "What is the difference in the amount of interest expense incurred from 2017 to 2018?"
+  },
+  {
+    "input": "What was the change in Interest expense between 2017 and 2018?",
+    "output": "What was the difference in the amount of money paid for interest in 2017 compared to 2018?"
+  },
+  {
+    "input": "What was the difference in the amount of income earned from interest between the years 2018 and 2019?",
+    "output": "What was the change in Interest income between 2018 and 2019?"
+  },
+  {
+    "input": "What is the amount of federal net operating loss carryforwards that the company had for income tax purposes in the year 2019?",
+    "output": "What was the company's federal net operating loss carryforwards for income tax purposes in 2019?"
+  },
+  {
+    "input": "What was the amount of the company's federal net operating loss carryforwards specifically for income tax purposes in the year 2019?",
+    "output": "What was the company's federal net operating loss carryforwards for income tax purposes in 2019?"
+  },
+  {
+    "input": "What was the percentage change in total deferred tax assets between 2018 and 2019?",
+    "output": "What is the percentage increase or decrease in the total amount of deferred tax assets from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in total deferred tax assets between 2018 and 2019?",
+    "output": "What was the percentage increase or decrease in the total amount of deferred tax assets from 2018 to 2019?"
+  },
+  {
+    "input": "In fiscal year 2019, what was the total amount of net interest expense recognized by the company?",
+    "output": "How much net interest expense did the company recognize in fiscal year 2019?"
+  },
+  {
+    "input": "How much interest expense, after deducting interest income, did the company record as a net amount during fiscal year 2019?",
+    "output": "How much net interest expense did the company recognize in fiscal year 2019?"
+  },
+  {
+    "input": "What were the Additions for tax positions of prior years in 2019?",
+    "output": "What were the additions made to the tax positions of previous years in the year 2019?"
+  },
+  {
+    "input": "What was the percentage change in net income between 2018 and 2019?",
+    "output": "What was the percentage difference in net income from 2018 to 2019 and how can it be calculated?"
+  },
+  {
+    "input": "Why has the direct effect of foreign currency fluctuations on revenue not been material?",
+    "output": "Why has the direct impact of changes in foreign currency exchange rates on our revenue not had a significant financial effect so far? Please provide an explanation for the lack of material impact despite fluctuations in currency values."
+  },
+  {
+    "input": "Why has the direct effect of foreign currency fluctuations on revenue not been material?",
+    "output": "Why has the impact of foreign currency fluctuations on revenue not been significant enough to have a material effect?"
+  },
+  {
+    "input": "What was the difference in the fair value of forward contracts that were sold between the years 2018 and 2019?",
+    "output": "What was the change in the fair value of sold forward contracts between 2018 and 2019?"
+  },
+  {
+    "input": "Why does the company prioritize inventory and supply chain management as a specific area of focus?",
+    "output": "Why is inventory and supply chain management an area of focus for the company?"
+  },
+  {
+    "input": "Why does the company prioritize inventory and supply chain management as a focal point for its operations?",
+    "output": "Why is inventory and supply chain management an area of focus for the company?"
+  },
+  {
+    "input": "What was the total sum of purchase commitments made in the year 2018?",
+    "output": "What were the total purchase commitments in 2018?"
+  },
+  {
+    "input": "What was the change in purchase commitments that were less than 1 year between 2018 and 2019?",
+    "output": "What was the difference in purchase commitments of less than 1 year between 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in purchase commitments that were less than 1 year between 2018 and 2019?",
+    "output": "What was the difference in purchase commitments shorter than one year between the year 2018 and the year 2019?"
+  },
+  {
+    "input": "For how long did contracts ranging from 1 to 3 years surpass a cumulative worth of $700 million?",
+    "output": "How many years did commitments that were 1 to 3 years exceed $700 million?"
+  },
+  {
+    "input": "What was the percentage change in product revenue for the APJC region from 2017 to 2018?",
+    "output": "What was the change in the product revenue from APJC between 2017 and 2018?"
+  },
+  {
+    "input": "What was the change in the product revenue from Americas between 2017 and 2018?",
+    "output": "What was the difference in the amount of revenue generated from products in the Americas region between the years 2017 and 2018?"
+  },
+  {
+    "input": "What was the numerical difference in the total value of net property and equipment in International regions between the years 2017 and 2018?",
+    "output": "What was the change in net property and equipment from International regions between 2017 and 2018?"
+  },
+  {
+    "input": "What is the percentage increase or decrease in the total net property and equipment from 2018 to 2019?",
+    "output": "What was the percentage change in the total net property and equipment between 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in revenue generated from security services between 2017 and 2018?",
+    "output": "What was the change in revenue from security between 2017 and 2018?"
+  },
+  {
+    "input": "What was the total amount paid for the shares during the period from April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?",
+    "output": "What was the total payment for the shares from April 28, 2019 to May 25, 2019 and from May 26, 2019 to June 22, 2019?"
+  },
+  {
+    "input": "What was the total amount paid for the shares during the period from April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?",
+    "output": "What was the overall payment for the shares between April 28, 2019 and May 25, 2019, as well as between May 26, 2019 and June 22, 2019?"
+  },
+  {
+    "input": "What was the average total amount paid for the shares during the period from April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?",
+    "output": "What was the average total payment for shares from April 28, 2019 to June 22, 2019, and how does it compare between the periods April 28, 2019 to May 25, 2019 and May 26, 2019 to June 22, 2019?"
+  },
+  {
+    "input": "What was the difference in the balance of Americas between the years 2018 and 2019?",
+    "output": "What was the change in balance from Americas between 2018 and 2019?"
+  },
+  {
+    "input": "What was the difference in America's balance between 2018 and 2019 and how did it change?",
+    "output": "What was the change in balance from Americas between 2018 and 2019?"
+  },
+  {
+    "input": "Which years does the table provide information for the company's available-for-sale debt investments and equity investments?",
+    "output": "For which time period does the table provide data on the available-for-sale debt investments and equity investments of the company?"
+  },
+  {
+    "input": "\"What were the total provisions, also known as benefits, provided in this context?",
+    "output": "What were the total provisions (benefits)?"
+  },
+  {
+    "input": "What was the difference between total provisions (benefits) and net recoveries (write-offs)?",
+    "output": "What differentiates total provisions, also referred to as benefits, from net recoveries, which are write-offs?"
+  },
+  {
+    "input": "What was the allowance for credit loss for financed service contracts as a percentage of total allowance for credit loss in 2018?",
+    "output": "In 2018, what was the proportion or ratio of the allowance for credit loss specifically assigned to financed service contracts compared to the total allowance for credit loss?"
+  },
+  {
+    "input": "What was the allowance for credit loss for financed service contracts as a percentage of total allowance for credit loss in 2018?",
+    "output": "What was the percentage of the total allowance for credit loss in 2018 that was allocated specifically for financed service contracts?"
+  },
+  {
+    "input": "What is the percentage difference in Loss from continuing operations before income tax between the years 2018 and 2019?",
+    "output": "What is the percentage change in the Loss from continuing operations before income tax from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage difference in the Loss from continuing operations before income tax between the years 2018 and 2019?",
+    "output": "What is the percentage change in the Loss from continuing operations before income tax from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in the income tax benefit from 2018 to 2019?",
+    "output": "What is the percentage difference in the income tax benefit received between 2018 and 2019?"
+  },
+  {
+    "input": "In which year did the net loss for the year exceed the net loss of any other year?",
+    "output": "In which year is there a greater net loss for the year?"
+  },
+  {
+    "input": "What does the net deferred tax liabilities include?",
+    "output": "What is included in the calculation of net deferred tax liabilities?"
+  },
+  {
+    "input": "What is the exact percentage change in the net deferred tax liabilities from 2018 to 2019?",
+    "output": "What is the percentage change in the net deferred tax liabilities from 2018 to 2019?"
+  },
+  {
+    "input": "What was the parent entity's current ratio in 2019?",
+    "output": "What is the current ratio of the parent entity in 2019?"
+  },
+  {
+    "input": "What was the parent entity's current ratio in 2019? Please provide the numerical value.",
+    "output": "What is the current ratio of the parent entity in 2019?"
+  },
+  {
+    "input": "What was the debts to assets ratio of the parent entity in the year 2018? Can you provide the specific ratio value or percentage?",
+    "output": "What is the debts to assets ratio of the parent entity in 2018?"
+  },
+  {
+    "input": "What is the debt-to-assets ratio of the parent company for the year 2018?",
+    "output": "What is the debts to assets ratio of the parent entity in 2018?"
+  },
+  {
+    "input": "What is the percentage change in the net cash generated or used for investing activities?",
+    "output": "What is the percentage change in the net cash provided from investing activities?"
+  },
+  {
+    "input": "What is the change in the net cash provided from operating activities from 2018 to 2019?",
+    "output": "What is the difference between the net cash provided from operating activities in 2018 and 2019?"
+  },
+  {
+    "input": "What is the change in the net cash provided from operating activities from 2018 to 2019?",
+    "output": "What is the difference in the amount of net cash provided from operating activities between 2018 and 2019?"
+  },
+  {
+    "input": "What is the change in Net change in cash and cash equivalent from 2018 to 2019?",
+    "output": "What is the difference in the amount of cash and cash equivalents between 2018 and 2019?"
+  },
+  {
+    "input": "What is the change in Net change in cash and cash equivalent from 2018 to 2019?",
+    "output": "What is the difference in the net change in cash and cash equivalents between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the value of current assets from 2018 to 2019?",
+    "output": "What is the percentage change in current assets from 2018 to 2019?"
+  },
+  {
+    "input": "What is the current ratio in the year 2019 and what factors should be considered while determining it?",
+    "output": "What is the current ratio in 2019?"
+  },
+  {
+    "input": "What was the debts to assets ratio for the year 2019?",
+    "output": "What is the debts to assets ratio in 2019?"
+  },
+  {
+    "input": "\"What was the debts to assets ratio for the year 2019?\"",
+    "output": "What is the debts to assets ratio in 2019?"
+  },
+  {
+    "input": "What is the purpose and significance of trial commissions, and how are they defined?",
+    "output": "What are trial commissions?"
+  },
+  {
+    "input": "\"What is the purpose and definition of trial commissions?\"",
+    "output": "What are trial commissions?"
+  },
+  {
+    "input": "\"How does the Group calculate or determine its total income or financial proceeds?\"",
+    "output": "How does the Group determine its revenue?"
+  },
+  {
+    "input": "\"How does the Group calculate or establish its revenue? Provide details about the methods or criteria used for determining the Group's revenue.\"",
+    "output": "How does the Group determine its revenue?"
+  },
+  {
+    "input": "What is the percentage difference in the amount of upfront fees charged from 2018 to 2019?",
+    "output": "What is the percentage change in upfront fees from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage difference in upfront fees between 2018 and 2019?",
+    "output": "What is the percentage change in upfront fees from 2018 to 2019?"
+  },
+  {
+    "input": "What is the change in the Business combination reserve between 2018 and 2019?",
+    "output": "What is the monetary difference in the Business combination reserve from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in the total reserves from 2018 to 2019?",
+    "output": "By what percentage did the total reserves increase or decrease from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage increase or decrease in the trail commission asset comparing the values from 2018 and 2019?",
+    "output": "What is the percentage change in the current trail commission asset from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in the cash receipts from 2018 to 2019?",
+    "output": "What is the percentage increase or decrease in the cash receipts comparing the amounts in 2018 and 2019?"
+  },
+  {
+    "input": "What are the payroll tax rates for both 2019 and 2018?",
+    "output": "What is the payroll tax for 2019 and 2018 respectively?"
+  },
+  {
+    "input": "\"What was the amount of post-tax loss incurred from discontinued operations in the year 2019?\"",
+    "output": "What is the Post-tax loss of  discontinued operations in 2019?"
+  },
+  {
+    "input": "\"In which year did the revenue increase compared to other years?\"",
+    "output": "In which year is there a higher revenue?"
+  },
+  {
+    "input": "In which specific year did the revenue increase, resulting in a higher amount compared to previous years?",
+    "output": "In which year is there a higher revenue?"
+  },
+  {
+    "input": "What is the percentage difference in the amount of lease liabilities between 2018 and 2019?",
+    "output": "What is the percentage change in the current lease liabilities from 2018 to 2019?"
+  },
+  {
+    "input": "What is the exact percentage difference in the lease liabilities as of 2018 compared to 2019?",
+    "output": "What is the percentage change in the current lease liabilities from 2018 to 2019?"
+  },
+  {
+    "input": "What method does the Company use to determine and calculate the amount for its provision?",
+    "output": "How does the Company calculate its provision?"
+  },
+  {
+    "input": "How does the Company determine the amount of money set aside for provisions and what factors are taken into consideration during the calculation process?",
+    "output": "How does the Company calculate its provision?"
+  },
+  {
+    "input": "What is the percentage change in the contract assets from 2018 to 2019?",
+    "output": "What is the percentage change in the value of the contract assets from the year 2018 to the year 2019?"
+  },
+  {
+    "input": "In which year does the number of outstanding shares have a higher value at the start of the period as compared to other years?",
+    "output": "In which year is the number of outstanding shares at the beginning of the period higher?"
+  },
+  {
+    "input": "What is the pre-tax discount rate that was utilized to determine the value of projected cash flows for the year 2019?",
+    "output": "What is the pre-tax discount rate applied to cash flow projections in 2019?"
+  },
+  {
+    "input": "What was the pre-tax discount rate used to calculate cash flow projections in 2019?",
+    "output": "What is the pre-tax discount rate applied to cash flow projections in 2019?"
+  },
+  {
+    "input": "In which year is the health CGU higher?",
+    "output": "\"In which specific year does the health Comprehensive Grand Unit (CGU) exhibit a higher value compared to other years?\""
+  },
+  {
+    "input": "In which year is the car CGU higher?",
+    "output": "\"In which specific year does the car CGU (Car Gross Utilization) exhibit a higher value compared to other years? Please provide the year.\""
+  },
+  {
+    "input": "What is the post-employment benefits in 2018?",
+    "output": "What were the post-employment benefits in 2018, and can you provide more details about them?"
+  },
+  {
+    "input": "What were the share-based payment transactions and their impact in 2019?",
+    "output": "What is the share-based payments in 2019?"
+  },
+  {
+    "input": "What were the share-based payments in 2019 and could you provide further details on this topic?",
+    "output": "What is the share-based payments in 2019?"
+  },
+  {
+    "input": "What is the percentage change in the post-employment benefits from 2018 to 2019?",
+    "output": "What is the percentage difference in post-employment benefits between the years 2018 and 2019?"
+  },
+  {
+    "input": "What specific expenses or expenditure categories are included in the unallocated corporate costs for the current year?",
+    "output": "What do the unalloacated corporate costs include in the current year?"
+  },
+  {
+    "input": "What is the exact percentage change in revenue in Australia from the year 2018 to the year 2019?",
+    "output": "What is the percentage change in the revenue in Australia from 2018 to 2019?"
+  },
+  {
+    "input": "What is the revenue percentage change experienced in Asia between 2018 and 2019?",
+    "output": "What is the percentage change in the revenue in Asia from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in revenue in the Asian region from 2018 to 2019?",
+    "output": "What is the percentage change in the revenue in Asia from 2018 to 2019?"
+  },
+  {
+    "input": "What was the total number of outstanding stock options for VMware in 2017?",
+    "output": "What was the outstanding number of shares for VMware stock options in 2017?"
+  },
+  {
+    "input": "What was the total number of shares allotted for VMware stock options in the year 2017?",
+    "output": "What was the outstanding number of shares for VMware stock options in 2017?"
+  },
+  {
+    "input": "What was the number of granted shares from Pivotal Stock Options in 2018?",
+    "output": "\"How many shares were granted by Pivotal Stock Options during the year 2018?\""
+  },
+  {
+    "input": "For how long did the Weighted-Average Exercise Price (per share) for VMware stock options remain above $60.00?",
+    "output": "How many years did the outstanding Weighted-Average Exercise Price (per share) for VMware stock options exceed $60.00?"
+  },
+  {
+    "input": "What was the difference in the overall amount of money earned by the company in 2018 compared to 2019?",
+    "output": "What was the change in total revenue between 2018 and 2019?"
+  },
+  {
+    "input": "What was the difference in total revenue between the years 2018 and 2019?",
+    "output": "What was the change in total revenue between 2018 and 2019?"
+  },
+  {
+    "input": "How many years did total revenue exceed $500 million?",
+    "output": "For how many consecutive years has the total revenue been greater than $500 million?"
+  },
+  {
+    "input": "What was the percentage change in net income between 2019 and 2020?",
+    "output": "What was the percentage difference in net income from 2019 to 2020, and how can we calculate it accurately?"
+  },
+  {
+    "input": "\"What was the variation in the increase in goodwill associated with mergers and acquisitions from 2019 to 2020?\"",
+    "output": "What was the change in the Increase in goodwill related to business combinations between 2019 and 2020?"
+  },
+  {
+    "input": "What was the difference in balance at the start of 2019 and 2020?",
+    "output": "What was the change in balance at the beginning of the year between 2019 and 2020?"
+  },
+  {
+    "input": "What was the difference in balance at the beginning of 2019 and 2020?",
+    "output": "What was the change in balance at the beginning of the year between 2019 and 2020?"
+  },
+  {
+    "input": "What did the Senior Notes consists of?",
+    "output": "What were the contents or components of the Senior Notes?"
+  },
+  {
+    "input": "What was the difference between total Senior Notes and total Notes payable to Dell?",
+    "output": "What is the distinction between the combined value of Senior Notes and the combined value of Notes payable to Dell?"
+  },
+  {
+    "input": "What differentiates the total amount of Term Loans from the total amount of Future Lease Commitments?",
+    "output": "What was the difference between total Term Loans and total Future Lease Commitments?"
+  },
+  {
+    "input": "What differentiates the total amount of Term Loans from the total value of Future Lease Commitments? Elaborate on the distinctions between these two financial concepts.",
+    "output": "What was the difference between total Term Loans and total Future Lease Commitments?"
+  },
+  {
+    "input": "Which specific time periods are covered in the table containing data on long-lived assets categorized by geographic area? These assets primarily consist of net property and equipment.",
+    "output": "Which years does the table include information for long-lived assets by geographic area, which primarily include property and equipment, net?"
+  },
+  {
+    "input": "How long has the outstanding balance owed to related parties been surpassing $100 million?",
+    "output": "How many years did current amounts due to related parties exceed $100 million?"
+  },
+  {
+    "input": "What was the cumulative amount of sales invoices acknowledged and recorded as revenue throughout the entire fiscal year of 2020?",
+    "output": "What was the total billings recognized during the year ended 2020?"
+  },
+  {
+    "input": "\"What was the exact amount of billings that were recorded and acknowledged as revenue throughout the entirety of the year that concluded in 2020?\"",
+    "output": "What was the total billings recognized during the year ended 2020?"
+  },
+  {
+    "input": "What was the change in Unearned software maintenance revenue between 2019 and 2020?",
+    "output": "\"What was the difference in Unearned software maintenance revenue from 2019 to 2020 and how did it change?\""
+  },
+  {
+    "input": "What was the change in Unearned software maintenance revenue between 2019 and 2020?",
+    "output": "What was the difference in Unearned software maintenance revenue from 2019 to 2020?"
+  },
+  {
+    "input": "What is the percentage of unearned revenue in the total revenue for the years 2019 and 2020?",
+    "output": "What was the percentage of total unearned revenue between 2019 and 2020?"
+  },
+  {
+    "input": "\"What was the percentage of unearned revenue as a proportion of the total revenue generated in the years 2019 and 2020?\"",
+    "output": "What was the percentage of total unearned revenue between 2019 and 2020?"
+  },
+  {
+    "input": "What was the average price per share in 2018, taking into account the weight assigned to each share?",
+    "output": "What was the Weighted-average price per share in 2018?"
+  },
+  {
+    "input": "For how many years did the repurchase of Class A common stock surpass a cumulative value of $10,000 million?",
+    "output": "How many years did Class A common stock repurchased exceed $10,000 million?"
+  },
+  {
+    "input": "For how many years did the repurchase of Class A common stock amount to more than $10 billion?",
+    "output": "How many years did Class A common stock repurchased exceed $10,000 million?"
+  },
+  {
+    "input": "For which years does the table provide details regarding the exclusion of weighted-average common share equivalents of Class A common stock from the calculations of diluted net income per share?",
+    "output": "Which years does the table provide information for the weighted-average common share equivalents of Class A common stock that were excluded from the diluted net income per share calculations?"
+  },
+  {
+    "input": "What were the employee stock options for the year 2019?",
+    "output": "What was the Employee stock options in 2019?"
+  },
+  {
+    "input": "What were the employee stock options offered in 2019?",
+    "output": "What was the Employee stock options in 2019?"
+  },
+  {
+    "input": "How does Dell engage in the procurement of products and services from other companies?",
+    "output": "How does Dell purchase products and services from the company?"
+  },
+  {
+    "input": "What was the percentage change in the internal-use revenue between 2019 and 2020?",
+    "output": "What is the percentage difference in internal-use revenue from 2019 to 2020?"
+  },
+  {
+    "input": "Which years does the table provide information for accrued expenses and other?",
+    "output": "For which specific years does the provided table offer information related to accrued expenses and other similar items?"
+  },
+  {
+    "input": "Which years does the table provide information for accrued expenses and other?",
+    "output": "For which specific years does the table contain data regarding accrued expenses and other items?"
+  },
+  {
+    "input": "What was the difference in the total amount of expenses that accumulated over time between the years 2019 and 2020?",
+    "output": "What was the change in the total accrued expenses between 2019 and 2020?"
+  },
+  {
+    "input": "What is the difference in the total amount of expenses that accumulated over time between 2019 and 2020?",
+    "output": "What was the change in the total accrued expenses between 2019 and 2020?"
+  },
+  {
+    "input": "In 2019, what were the specific support and administrative costs incurred by Dell's subsidiary?",
+    "output": "What was the Dell subsidiary support and administrative costs in 2019?"
+  },
+  {
+    "input": "What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?",
+    "output": "What was the difference in the amount spent on purchasing and leasing products as well as purchasing services from 2018 to 2019?"
+  },
+  {
+    "input": "What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?",
+    "output": "What was the difference in the amount spent on purchasing and leasing products and purchasing services from 2018 to 2019?"
+  },
+  {
+    "input": "For how many consecutive years did Dell subsidiary incur support and administrative costs exceeding $200 million?",
+    "output": "How many years did Dell subsidiary support and administrative costs exceed $200 million?"
+  },
+  {
+    "input": "What was the percentage change in the Dell subsidiary support and administrative costs between 2019 and 2020?",
+    "output": "What percentage increase or decrease was observed in the support and administrative costs of Dell's subsidiary from 2019 to 2020?"
+  },
+  {
+    "input": "What was the significance or meaning of the aggregate intrinsic value?",
+    "output": "What did the aggregate intrinsic value represent?"
+  },
+  {
+    "input": "What differentiates outstanding options that are both exercisable and vested as well as expected to vest?",
+    "output": "What was the difference between outstanding options that were exercisable and vested and expected to vest?"
+  },
+  {
+    "input": "What was the specific amount allocated for the federal income tax provision in the year 2019?",
+    "output": "What was the current federal income tax provision in 2019?"
+  },
+  {
+    "input": "What was the specific provision for federal income tax in 2019?",
+    "output": "What was the current federal income tax provision in 2019?"
+  },
+  {
+    "input": "What was the change in the current federal income tax provision between 2018 and 2019?",
+    "output": "What was the difference in the amount allocated for federal income tax in 2018 compared to 2019?"
+  },
+  {
+    "input": "What was the percentage change in the Total income tax provision between 2018 and 2019?",
+    "output": "What was the precise percentage increase or decrease in the Total income tax provision from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in the Total income tax provision between 2018 and 2019?",
+    "output": "What is the percentage difference in the Total income tax provision from 2018 to 2019?"
+  },
+  {
+    "input": "What was the specific monetary value of Restricted cash categorized under other current assets in the financial records for the year 2019?",
+    "output": "What was the amount of Restricted cash within other current assets in 2019?"
+  },
+  {
+    "input": "What were the specific additions made to tax positions related to the current year of 2020 in the field of taxation?",
+    "output": "What were the additions to tax positions related to current year in 2020?"
+  },
+  {
+    "input": "What categories are encompassed within the scope of purchases and leases of products, and what does this also include in terms of purchases?",
+    "output": "What did Purchases and leases of products and purchases of services include?"
+  },
+  {
+    "input": "What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?",
+    "output": "What was the year-on-year difference in the amounts spent on purchasing and leasing products and purchasing services between 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in Purchases and leases of products and purchases of services between 2018 and 2019?",
+    "output": "What was the difference in the amount spent on purchasing and leasing products, as well as purchasing services, from 2018 to 2019?"
+  },
+  {
+    "input": "For how long did the subsidiary of Dell exceed $150 million in annual support and administrative costs?",
+    "output": "How many years did Dell subsidiary support and administrative costs exceed $150 million?"
+  },
+  {
+    "input": "What specific expenses or liabilities are encompassed within the current balances owed to affiliated individuals or entities?",
+    "output": "What did the current amounts due to related parties include?"
+  },
+  {
+    "input": "What specific items or transactions are included in the present outstanding balances owed to related parties?",
+    "output": "What did the current amounts due to related parties include?"
+  },
+  {
+    "input": "For how long has the current outstanding amount owed by related parties been over $1,000 million?",
+    "output": "How many years did current amount due from related parties exceed $1,000 million?"
+  },
+  {
+    "input": "For how long has the present outstanding balance owed by affiliated organizations been over $1,000 million?",
+    "output": "How many years did current amount due from related parties exceed $1,000 million?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the net current amount due from related parties from 2019 to 2020?",
+    "output": "What was the percentage change in the net current amount due from related parities between 2019 and 2020?"
+  },
+  {
+    "input": "What was the change in Additions to intangible assets between 2019 and 2020?",
+    "output": "What was the difference in the amount of intangible assets added between the years 2019 and 2020?"
+  },
+  {
+    "input": "What is the total amount of lease liabilities for operating leases recorded on the current financial statements?",
+    "output": "What were the current lease liabilities for operating leases?"
+  },
+  {
+    "input": "What is the total amount of lease liabilities for operating leases as of the present moment?",
+    "output": "What were the current lease liabilities for operating leases?"
+  },
+  {
+    "input": "How many lease liabilities for operating leases exceeded $500 million?",
+    "output": "How many operating lease liabilities were there that exceeded $500 million in amount?"
+  },
+  {
+    "input": "What differentiates current lease liabilities from non-current lease liabilities in the context of operating leases?",
+    "output": "What was the difference between current and non-current lease liabilities for operating leases?"
+  },
+  {
+    "input": "What differentiates current lease liabilities from non-current lease liabilities for operating leases?",
+    "output": "What was the difference between current and non-current lease liabilities for operating leases?"
+  },
+  {
+    "input": "What was the discrepancy in the aggregate lease obligations between operating leases and finance leases?",
+    "output": "What was the difference in total lease liabilities between operating leases and finance leases?"
+  },
+  {
+    "input": "Which years does the table provide information for unearned revenue?",
+    "output": "For which specific years does the table contain information regarding unearned revenue?"
+  },
+  {
+    "input": "Which years does the table provide information for unearned revenue?",
+    "output": "\"For which specific years does the table provide data on unearned revenue?\""
+  },
+  {
+    "input": "What was the percentage change in Unearned subscription and SaaS revenue from 2019 to 2020?",
+    "output": "What was the change in Unearned subscription and SaaS revenue between 2019 and 2020?"
+  },
+  {
+    "input": "What was the foreign income in 2018?",
+    "output": "What was the total amount of income received from foreign sources during the year 2018?"
+  },
+  {
+    "input": "What is the percentage difference in the total income prior to income tax between the years 2019 and 2020?",
+    "output": "What was the percentage change in the total income before income tax between 2019 and 2020?"
+  },
+  {
+    "input": "What was the total value of construction projects underway in 2020 globally or within a specific country/region?",
+    "output": "What was the amount of construction in progress in 2020?"
+  },
+  {
+    "input": "What was the difference in the total value of property and equipment from 2019 to 2020?",
+    "output": "What was the change in Total property and equipment between 2019 and 2020?"
+  },
+  {
+    "input": "What was the change in the Unrealized losses on available-for-sale securities between 2017 and 2018?",
+    "output": "What was the difference in the amount of Unrealized losses on available-for-sale securities from 2017 to 2018?"
+  },
+  {
+    "input": "What was the percentage change in the Reclassification of realized transactions, net of taxes between 2018 and 2019?",
+    "output": "What percentage represents the difference in the Reclassification of realized transactions, net of taxes between the years 2018 and 2019?"
+  },
+  {
+    "input": "What impact did the alteration in the balance of net accounts receivable have on the financial situation?",
+    "output": "What was the effect of change in the net accounts receivable?"
+  },
+  {
+    "input": "What is the ratio of long-term deferred tax liability to accrued liabilities?",
+    "output": "What was the Long-term deferred tax liability as a ratio of Accrued liabilities?"
+  },
+  {
+    "input": "How did the Other interest expense change between the years 2017 and 2018?",
+    "output": "What was the change in Other interest expense between 2017 and 2018?"
+  },
+  {
+    "input": "What was the difference between the reported net sales and the reported cost of sales in terms of amount?",
+    "output": "What was the difference in amount as reported between net sales and cost of sales?"
+  },
+  {
+    "input": "What was the difference between Net income from continuing operations and Income before income taxes?",
+    "output": "What is the distinction between net income from continuing operations and income before income taxes? Please provide a comprehensive response, retaining all the details provided in the original question. Keep the question brief, succinct, and in the same language as the original."
+  },
+  {
+    "input": "What was the impact of stock options and RSUs on dilution in 2019?",
+    "output": "What was the Dilutive effect of stock options and RSUs in 2019?"
+  },
+  {
+    "input": "How many consecutive years did the balance at the end of each year surpass $2 million?",
+    "output": "How many years did the balance at end of the year exceed $2 million?"
+  },
+  {
+    "input": "For how many years was the balance at the end of each year above $2 million?",
+    "output": "How many years did the balance at end of the year exceed $2 million?"
+  },
+  {
+    "input": "How many years did Intercompany prepaid tax asset amortization exceed $7 million?",
+    "output": "For how many years was the amortization of Intercompany prepaid tax asset greater than $7 million?"
+  },
+  {
+    "input": "What was the service cost in 2018?",
+    "output": "What was the cost of the service provided in 2018?"
+  },
+  {
+    "input": "What was the service cost in 2018?",
+    "output": "What was the cost of the service in the year 2018 specifically?"
+  },
+  {
+    "input": "What was the total amount of interest incurred in the year 2019?",
+    "output": "What was the interest cost in 2019?"
+  },
+  {
+    "input": "What was the total expense incurred as interest in the year 2019?",
+    "output": "What was the interest cost in 2019?"
+  },
+  {
+    "input": "What was the percentage difference in the Discount rate from 2018 to 2019?",
+    "output": "What was the change in the Discount rate between 2018 and 2019?"
+  },
+  {
+    "input": "What was the specific difference in the Discount rate from the year 2018 to the year 2019?",
+    "output": "What was the change in the Discount rate between 2018 and 2019?"
+  },
+  {
+    "input": "Why could the sums fail to equal the overall total?",
+    "output": "Why might amounts may not add to the total?"
+  },
+  {
+    "input": "\"What was the numerical distinction between the total amount obtained from sales after deducting all applicable costs, and the total revenue generated from sales before deducting any expenses or costs?\"",
+    "output": "What was the difference between the total net sales and gross profit?"
+  },
+  {
+    "input": "What is the calculated percentage change in the operating income from the third quarter to the fourth quarter?",
+    "output": "What was the percentage change in the Operating income between the third and fourth quarter?"
+  },
+  {
+    "input": "What was the change in the cost of sales between 2017 and 2018?",
+    "output": "How much did the cost of sales change from 2017 to 2018?"
+  },
+  {
+    "input": "What was the percentage change in the Net deferred tax asset between 2018 and 2019?",
+    "output": "What was the percentage increase or decrease in the amount of Net deferred tax asset from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in the Net deferred tax asset between 2018 and 2019?",
+    "output": "What was the percentage increase or decrease in the Net deferred tax asset from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage difference in the total balance from 2018 to 2019? Calculate using the formula for percentage change.",
+    "output": "What was the percentage change in the total balance between 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the total balance from 2018 to 2019?",
+    "output": "What was the percentage change in the total balance between 2018 and 2019?"
+  },
+  {
+    "input": "For how long, in terms of number of years, has the cost of sales exceeded 40% of net sales?",
+    "output": "How many years did cost of sales of net sales exceed 40%?"
+  },
+  {
+    "input": "What is the percentage increase or decrease in operating income as a proportion of net sales from 2018 to 2019?",
+    "output": "What was the percentage change in operating income of net sales between 2018 and 2019?"
+  },
+  {
+    "input": "What was the exact percentage increase or decrease in operating income of net sales from 2018 to 2019?",
+    "output": "What was the percentage change in operating income of net sales between 2018 and 2019?"
+  },
+  {
+    "input": "How many years have there been increases in tax positions related to the current year that exceeded $30 million?",
+    "output": "How many years did Increases related to current year tax positions exceed $30 million?"
+  },
+  {
+    "input": "What was the percentage change in the Ending balance between 2018 and 2019?",
+    "output": "What was the percentage difference in the Ending balance from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in the Ending balance between 2018 and 2019?",
+    "output": "What is the percent increase or decrease in the Ending balance from 2018 to 2019?"
+  },
+  {
+    "input": "According to what were the plan benefits provided, and to what were they in accordance with?",
+    "output": "What were plan benefits provided in accordance with?"
+  },
+  {
+    "input": "In accordance with what, were the plan benefits provided? Can you expand on the specific benefits mentioned in the plan?",
+    "output": "What were plan benefits provided in accordance with?"
+  },
+  {
+    "input": "What were the total service costs incurred during the year 2018?",
+    "output": "What were the service costs in 2018?"
+  },
+  {
+    "input": "What were the specific costs associated with services provided in 2018?",
+    "output": "What were the service costs in 2018?"
+  },
+  {
+    "input": "What were the total interest expenses incurred in the year 2017?",
+    "output": "What were the interest costs in 2017?"
+  },
+  {
+    "input": "What was the total amount spent on interest expenses in the year 2017?",
+    "output": "What were the interest costs in 2017?"
+  },
+  {
+    "input": "What was the difference in the amount of Amortization of actuarial loss between 2018 and 2019?",
+    "output": "What was the change in the Amortization of actuarial loss between 2018 and 2019?"
+  },
+  {
+    "input": "What was the difference in the amount allocated for the gradual reduction of actuarial loss in the financial statements between the years 2018 and 2019?",
+    "output": "What was the change in the Amortization of actuarial loss between 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in the net pension period cost between 2018 and 2019?",
+    "output": "What was the percentage increase or decrease in the net pension period cost from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in the net pension period cost between 2018 and 2019?",
+    "output": "What was the percentage difference in the net pension period cost from 2018 to 2019?"
+  },
+  {
+    "input": "How was the lower of cost and net realizable value determined as the valuation method for inventories? Please provide a detailed explanation of the process.",
+    "output": "How were inventories valued at the lower of cost and net realizable value?"
+  },
+  {
+    "input": "What was the quantity or volume of raw materials used in the year 2018?",
+    "output": "What was the amount of raw materials in 2018?"
+  },
+  {
+    "input": "For how long has the value of Finished goods remained above $200 million?",
+    "output": "How many years did the amount of Finished goods exceed $200 million?"
+  },
+  {
+    "input": "What was the total value of both Core and developed technology assets combined?",
+    "output": "What was the gross amount of Core and developed technology assets?"
+  },
+  {
+    "input": "What was the value assigned to each share granted on a specific date at March 31, 2017, taking into account their respective weights?",
+    "output": "What was the Weighted Average Grant Date Fair Value for nonvested shares at March 31, 2017?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the number of shares that have not yet vested from 2018 to 2019?",
+    "output": "What was the percentage change in the number of nonvested shares between 2018 and 2019?"
+  },
+  {
+    "input": "What was the cumulative intrinsic value of options and stock appreciation rights (SARs) exercised throughout the fiscal years ending on March 31, 2019?",
+    "output": "What was the total intrinsic value of options and SARs exercised during the years ended March 31, 2019?"
+  },
+  {
+    "input": "In 2019, how many options and SAR (stock appreciation rights) shares were available for exercise?",
+    "output": "What was the number of option and SAR shares exercisable in 2019?"
+  },
+  {
+    "input": "Which years does the table provide the number of outstanding shares for?",
+    "output": "Which specific years are mentioned in the table that provides the information regarding the number of outstanding shares?"
+  },
+  {
+    "input": "What was the difference in the Weighted Average Exercise Price per Share for outstanding shares between the years 2017 and 2018?",
+    "output": "What was the change in the Weighted Average Exercise Price per Share for outstanding shares between 2017 and 2018?"
+  },
+  {
+    "input": "\"What was the calculated percentage difference in the quantity of outstanding shares from 2018 to 2019?\"",
+    "output": "What was the percentage change in the number of outstanding shares between 2018 and 2019?"
+  },
+  {
+    "input": "For which specific years does the table provide information on Identifiable long-lived assets (property, plant, and equipment net of accumulated amortization) within different geographic areas?",
+    "output": "Which years does the table provide information for Identifiable long-lived assets (consisting of property, plant and equipment net of accumulated amortization) by geographic area?"
+  },
+  {
+    "input": "Which geographic areas are included in the table that provides information on identifiable long-lived assets, specifically property, plant, and equipment (net of accumulated amortization) for different years?",
+    "output": "Which years does the table provide information for Identifiable long-lived assets (consisting of property, plant and equipment net of accumulated amortization) by geographic area?"
+  },
+  {
+    "input": "What was the total value of assets held in different countries during the year 2019?",
+    "output": "What was the amount of assets in Various other countries in 2019?"
+  },
+  {
+    "input": "What was the total value of assets in various countries in 2019?",
+    "output": "What was the amount of assets in Various other countries in 2019?"
+  },
+  {
+    "input": "What percentage increase or decrease occurred in the total value of long-lived assets from 2018 to 2019?",
+    "output": "What was the percentage change in total long-lived assets between 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in the Deferred expense for State between 2018 and 2019?",
+    "output": "What is the difference in the amount of Deferred expense for the State from 2018 to 2019?"
+  },
+  {
+    "input": "What was the change in the Deferred expense for State between 2018 and 2019?",
+    "output": "What was the difference in the amount of Deferred expense for State between the years 2018 and 2019?"
+  },
+  {
+    "input": "What is the cumulative balance of all time deposits?",
+    "output": "What was the total balance time deposits?"
+  },
+  {
+    "input": "What is the aggregate amount of money held in time deposits at present?",
+    "output": "What was the total balance time deposits?"
+  },
+  {
+    "input": "What was the specific balance of Inventories on April 1, 2018, as recorded in the financial records or statements?",
+    "output": "What was the balance of Inventories on April 1, 2018?"
+  },
+  {
+    "input": "What was the adjustment made in ASC 606 for other current assets? Please provide details regarding how this adjustment affected the accounting treatment and reporting of other current assets under ASC 606.",
+    "output": "What was the adjustment from ASC 606 for other current assets?"
+  },
+  {
+    "input": "How many liabilities had a balance on March 31, 2018 that exceeded $300 million?",
+    "output": "How many liabilities, as of March 31, 2018, had a balance surpassing $300 million?"
+  },
+  {
+    "input": "What was the percentage change in Other assets specifically attributed to the adjustments made?",
+    "output": "What was the percentage change in Other assets due to the adjustments?"
+  },
+  {
+    "input": "What were the net sales in 2016?",
+    "output": "What was the total amount of sales generated in the year 2016?"
+  },
+  {
+    "input": "What was the difference in net sales from 2016 to 2017, and how can this information assist in better answering the question?",
+    "output": "What was the change in net sales between 2016 and 2017?"
+  },
+  {
+    "input": "What was the difference in net sales revenue from 2016 to 2017?",
+    "output": "What was the change in net sales between 2016 and 2017?"
+  },
+  {
+    "input": "Which years did assets from Thailand exceed $200 million?",
+    "output": "Which specific years did the total value of assets originating from Thailand surpass $200 million?"
+  },
+  {
+    "input": "Which years did assets from Thailand exceed $200 million?",
+    "output": "In which specific years did the total value of assets originating from Thailand surpass $200 million?"
+  },
+  {
+    "input": "What was the percentage change in the total long-lived assets between 2018 and 2019?",
+    "output": "What was the percentage difference in the total value of long-lived assets from 2018 to 2019, indicating whether it increased or decreased?"
+  },
+  {
+    "input": "In which years was the total annual compensation calculated in?",
+    "output": "What are the specific years for which the total annual compensation was calculated?"
+  },
+  {
+    "input": "Which geographic locations are taken into account for calculating the overall current tax expense as mentioned in the table?",
+    "output": "What are the geographic locations in the table considered when calculating the total current tax expense?"
+  },
+  {
+    "input": "What is the highest recorded tax expense in Germany and in which specific year did it occur?",
+    "output": "In which year was the current tax expense in Germany the largest?"
+  },
+  {
+    "input": "What is the highest recorded tax expense in Germany, and in which specific year was it observed?",
+    "output": "In which year was the current tax expense in Germany the largest?"
+  },
+  {
+    "input": "What was the amount of non-current assets in APJ in 2019?",
+    "output": "What was the specific value of non-current assets within the Asia-Pacific-Japan (APJ) region during the year 2019?"
+  },
+  {
+    "input": "During which specific years were the calculations of Non-Current Assets by Region conducted?",
+    "output": "In which years were the Non-Current Assets by Region calculated?"
+  },
+  {
+    "input": "In which year was the amount in Rest of Americas larger?",
+    "output": "In what specific year did the Rest of Americas region experience a higher amount compared to previous years?"
+  },
+  {
+    "input": "What was the numerical difference in the quantity of Rest of Americas in 2019 compared to 2018?",
+    "output": "What was the change in the amount in Rest of Americas in 2019 from 2018?"
+  },
+  {
+    "input": "What was the difference in the quantity of Rest of Americas in 2019 compared to 2018?",
+    "output": "What was the change in the amount in Rest of Americas in 2019 from 2018?"
+  },
+  {
+    "input": "How was the Total expense for the share-based payment plans of Executive Board members determined?",
+    "output": "How was the total expense for the share-based payment plans determined specifically for the Executive Board members?"
+  },
+  {
+    "input": "During which specific years is the Total Expense for Share-Based Payment given or provided?",
+    "output": "In which years is the Total Expense for Share-Based Payment provided?"
+  },
+  {
+    "input": "For which specific years is the Total Expense for Share-Based Payment given?",
+    "output": "In which years is the Total Expense for Share-Based Payment provided?"
+  },
+  {
+    "input": "What is the specific value or quantity for the EMEA region in 2019?",
+    "output": "What is the amount for EMEA in 2019?"
+  },
+  {
+    "input": "What is the amount for APJ in 2018?",
+    "output": "What was the total amount of APJ in the year 2018?"
+  },
+  {
+    "input": "When did Germany have the highest amount?",
+    "output": "In which year was the amount for Germany the largest?"
+  },
+  {
+    "input": "When did Germany experience its highest amount?",
+    "output": "In which year was the amount for Germany the largest?"
+  },
+  {
+    "input": "What is the specific monetary figure spent on employee benefits in the year 2019?",
+    "output": "What is the amount of employee benefits expenses in 2019?"
+  },
+  {
+    "input": "What was the percentage difference in the amount spent on pensions in 2019 compared to 2018?",
+    "output": "What was the percentage change in Pension expenses in 2019 from 2018?"
+  },
+  {
+    "input": "What was the total cost of services offered in the year 2019?",
+    "output": "What was the Cost of services in 2019?"
+  },
+  {
+    "input": "What was the specific cost of services rendered in the year 2019?",
+    "output": "What was the Cost of services in 2019?"
+  },
+  {
+    "input": "\"When did the Cost of services reach its highest value?\"",
+    "output": "In which year was Cost of services largest?"
+  },
+  {
+    "input": "In which specific year did the Cost of services reach its maximum value, considering all available data?",
+    "output": "In which year was Cost of services largest?"
+  },
+  {
+    "input": "What was the change in issued capital in 2019 from 2018?",
+    "output": "What was the net change in the amount of issued capital between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in issued capital in 2019 from 2018?",
+    "output": "What was the difference in the amount of issued capital between 2019 and 2018? How much did the issued capital change from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage of unused tax losses in 2019 that can be attributed to state tax loss carryforwards in the United States?",
+    "output": "How much of unused tax losses relate to U.S. state tax loss carryforwards in 2019?"
+  },
+  {
+    "input": "In which year was the amount Expiring after the following year the largest?",
+    "output": "In what year did the highest amount of expenses expire after the subsequent year?"
+  },
+  {
+    "input": "What is the meaning and scope of the rights associated with Bill McDermott?",
+    "output": "What do the rights for Bill McDermott refer to? "
+  },
+  {
+    "input": "What specific types of entitlements are included in the table when referring to annual pension entitlements?",
+    "output": "What kind of entitlements are annual pension entitlements as displayed in the table?"
+  },
+  {
+    "input": "Which specific years are the annual pension entitlements fully and legally secured to an individual?",
+    "output": "In which years are the annual pension entitlements vested?"
+  },
+  {
+    "input": "What was the profit before tax in 2019?",
+    "output": "What was the pre-tax profit for the fiscal year of 2019?"
+  },
+  {
+    "input": "What was the profit before tax in 2019?",
+    "output": "What was the pre-tax profit for the year 2019?"
+  },
+  {
+    "input": "\"What was the specific tax rate applicable in 2018 for individuals or businesses in a particular country?\"",
+    "output": "What was the applicable tax rate in 2018?"
+  },
+  {
+    "input": "What was the percentage change in Other in 2019 from 2018?",
+    "output": "What was the percentage difference in the category labeled \"Other\" between the years 2018 and 2019?"
+  },
+  {
+    "input": "What market is the information specifically targeting and representing for Airtel?",
+    "output": "Which market does the information reflect for Airtel?"
+  },
+  {
+    "input": "What is the percentage increase in the number of mobile customers for AIS over a specific period?",
+    "output": "What is the % growth of mobile customers for AIS?"
+  },
+  {
+    "input": "Which associate had the best % growth in mobile customers?",
+    "output": "\"Which associate experienced the highest percentage growth in the number of mobile customers over a specific period of time?\""
+  },
+  {
+    "input": "What specific components were encompassed within the exceptional items during the fiscal year 2018?",
+    "output": "What was included in the exceptional items in FY2018?"
+  },
+  {
+    "input": "What specific items were considered exceptional in the financial year 2018?",
+    "output": "What was included in the exceptional items in FY2018?"
+  },
+  {
+    "input": "What was the amount of cash generated from operations after accounting for capital expenditures and operating expenses in the specified year?",
+    "output": "What was the free cash flow for the year?"
+  },
+  {
+    "input": "What is the % change in EBITDA margin from 2018 to 2019?",
+    "output": "What is the percentage difference in EBITDA margin between 2018 and 2019?"
+  },
+  {
+    "input": "What is the % change in EBITDA margin from 2018 to 2019?",
+    "output": "What is the percentage change in EBITDA margin from the year 2018 to the year 2019?"
+  },
+  {
+    "input": "What is the absolute difference in net profit between 2018 and 2019?",
+    "output": "What is the change in net profit from 2018 to 2019 in absolute numbers?"
+  },
+  {
+    "input": "What does the line item labeled as 'Others' provide information about in a given context or dataset?",
+    "output": "What information does the line item 'Others' relate to?"
+  },
+  {
+    "input": "What is the average total amount owed in debt for the duration of the two-year period?",
+    "output": "What is the average gross debt across the 2 years?"
+  },
+  {
+    "input": "Why does this net deferred gain balance exist?",
+    "output": "Why is there a net deferred gain balance?"
+  },
+  {
+    "input": "What is the reason for the continued presence of a net deferred gain balance after Singtel completed the sale of its entire 100% stake in NLT to NetLink NBN Trust in July 2017?",
+    "output": "Why is there still a balance of net deferred gain since Singtel sold its 100% interest in NLT to NetLink NBN Trust in July 2017?"
+  },
+  {
+    "input": "What was the year when the net deferred gain balance reached its peak?",
+    "output": "In which year was the net deferred gain balance the highest?"
+  },
+  {
+    "input": "What is the subject matter or main focus mentioned in note 22?",
+    "output": "What is the topic of note 22?"
+  },
+  {
+    "input": "What is included in the \"Others\" category in the table?",
+    "output": "What does the line item \"Others\" in the table encompass?"
+  },
+  {
+    "input": "What is the Singtel's largest joint venture in terms of the proportion of the Group's ownership?",
+    "output": "Which is the largest joint venture of Singtel, in terms of the proportion of Group's ownership?"
+  },
+  {
+    "input": "Do the non-audit services offered by KPMG LLP have an impact on their independence in conducting audits?",
+    "output": "Does the non-audit services provided by KPMG LLP affect their independence?"
+  },
+  {
+    "input": "What is included in the compensation for key management personnel?",
+    "output": "What does key management personnel compensation comprise?"
+  },
+  {
+    "input": "What is included in the compensation package for key management personnel?",
+    "output": "What does key management personnel compensation comprise?"
+  },
+  {
+    "input": "What are the components included in directors' remuneration?",
+    "output": "What does directors' remuneration comprise of?"
+  },
+  {
+    "input": "To what topic does the content of note 18.1 correspond?",
+    "output": "What is the subject matter of note 18.1?"
+  },
+  {
+    "input": "What percentage of the company's derivative financial liabilities are classified as non-current liabilities disclosed in their financial statements?",
+    "output": "How many % of the company's derivative financial liabilities are being disclosed as non-current?"
+  },
+  {
+    "input": "What is Note 21 about?",
+    "output": "What is the topic of note 21?"
+  },
+  {
+    "input": "Can you please provide details on the terms and conditions of the advances made by the shareholders?",
+    "output": "What are the terms of the shareholders' advances?"
+  },
+  {
+    "input": "\"What are the specific terms and conditions associated with the advances provided to shareholders?\"",
+    "output": "What are the terms of the shareholders' advances?"
+  },
+  {
+    "input": "How many factors need to be considered when calculating the balance for subsidiaries?",
+    "output": "How many factors are involved in calculating the balance for subsidiaries?"
+  },
+  {
+    "input": "\"What is typically included in the category of selling and administrative costs?\"",
+    "output": "What does selling and administrative costs include?"
+  },
+  {
+    "input": "How many distinct categories of operating expenses exist?",
+    "output": "How many different type of operating expenses are there?"
+  },
+  {
+    "input": "What is the mean value of the three highest subcategories within operating expenses for the year 2019?",
+    "output": "What is the average of the top 3 operating expenses subcategories in 2019?"
+  },
+  {
+    "input": "What is the average value of the three highest subcategories of operating expenses in the year 2019?",
+    "output": "What is the average of the top 3 operating expenses subcategories in 2019?"
+  },
+  {
+    "input": "What are the projected maximum future payouts for Barry Litwin and Thomas Clark under the NEO plan of 2019?",
+    "output": "What is the maximum estimated future payouts under the 2019 NEO plan for Barry Litwin and Thomas Clark?"
+  },
+  {
+    "input": "What is the maximum estimated future payout amount under the 2019 NEO plan specifically designated for Barry Litwin and Thomas Clark?",
+    "output": "What is the maximum estimated future payouts under the 2019 NEO plan for Barry Litwin and Thomas Clark?"
+  },
+  {
+    "input": "What are the projected highest expected future payouts under the 2019 NEO plan for both Manoj Shetty and Lawrence Reinhold?",
+    "output": "What is the maximum estimated future payouts under the 2019 NEO plan for Manoj Shetty and Lawrence Reinhold?"
+  },
+  {
+    "input": "What are the audit fees incurred by the company in 2018 and 2019?",
+    "output": "What were the audit fees that the company had to pay in the years 2018 and 2019?"
+  },
+  {
+    "input": "What is the percentage difference in the total amount of fees paid by the company for all other expenses between the years 2018 and 2019?",
+    "output": "What is the percentage change in all other fees incurred by the company between 2018 and 2019?"
+  },
+  {
+    "input": "What percentage of common stock does Barry Litwin and Robert D. Rosenthal collectively own?",
+    "output": "What is the percent of common stock owned by Barry Litwin and Robert D. Rosenthal?"
+  },
+  {
+    "input": "What is the total compensation received by Robert D. Rosenthal and Chad M. Lindbloom respectively during fiscal 2019?",
+    "output": "What were the individual total compensations received by Robert D. Rosenthal and Chad M. Lindbloom in fiscal year 2019?"
+  },
+  {
+    "input": "What is the total compensation received by Paul S. Pearlman and Lawrence Reinhold respectively during fiscal 2019?",
+    "output": "What were the individual total compensations earned by Paul S. Pearlman and Lawrence Reinhold in fiscal year 2019?"
+  },
+  {
+    "input": "What is the definition and significance of stock awards?",
+    "output": "What does stock awards refer to?"
+  },
+  {
+    "input": "What are the Thomas Clark's accelerated vesting of stock options and unvested performance restricted stock units respectively?",
+    "output": "Can you please provide specific details about the accelerated vesting of stock options of Thomas Clark? Additionally, what are the details regarding the unvested performance restricted stock units held by Thomas Clark?"
+  },
+  {
+    "input": "What is the meaning or definition of the \"other\" description in the allowance for sales return in the year 2017?",
+    "output": "What does the \"other\" description in allowance for sales return in 2017 refer to?"
+  },
+  {
+    "input": "What is the cumulative amount of allowances for sales returns write-offs from 2017 to 2019?",
+    "output": "What is the total allowances for sales returns write-offs between 2017 to 2019?"
+  },
+  {
+    "input": "What is the total amount of allowances accounted for sales returns and write-offs from 2017 to 2019?",
+    "output": "What is the total allowances for sales returns write-offs between 2017 to 2019?"
+  },
+  {
+    "input": "What is the percentage difference in the allowance for deferred tax assets at the end of the period in 2018 compared to 2019?",
+    "output": "What is the percentage change in the allowance for deferred tax assets at the end of period between 2018 and 2019?"
+  },
+  {
+    "input": "What are Manoj Shetty's respective accelerated vesting of stock options and unvested performance restricted stock units respectively?",
+    "output": "What is the accelerated vesting schedule for Manoj Shetty's stock options? Additionally, how many unvested performance restricted stock units does he currently have?"
+  },
+  {
+    "input": "What are Manoj Shetty's respective accelerated vesting of stock options and unvested performance restricted stock units respectively?",
+    "output": "What is the accelerated vesting timeline for Manoj Shetty's stock options? Additionally, what is the status of his unvested performance restricted stock units?"
+  },
+  {
+    "input": "What is the difference in consolidated gross profits comparing 2019 to 2018, and also 2018 to 2017?",
+    "output": "What is the change in consolidated gross profits between 2019 vs 2018 and 2018 vs 2017 respectively?"
+  },
+  {
+    "input": "What is the total consolidated net sales in 2019 and 2018?",
+    "output": "What were the combined net sales for the fiscal years of 2019 and 2018?"
+  },
+  {
+    "input": "What is the total consolidated net sales in 2017 and 2018",
+    "output": "What were the combined net sales for the years 2017 and 2018?"
+  },
+  {
+    "input": "What is the total consolidated net sales in 2017 and 2018",
+    "output": "What were the combined net sales for 2017 and 2018?"
+  },
+  {
+    "input": "What is the change in consolidated gross profit between 2017 and 2018?",
+    "output": "What is the difference in consolidated gross profit for the company between the years 2017 and 2018, and how does the value change during this period?"
+  },
+  {
+    "input": "What is the change in consolidated gross profit between 2017 and 2018?",
+    "output": "What is the specific difference in consolidated gross profit when comparing the financial years of 2017 and 2018?"
+  },
+  {
+    "input": "What was the percentage of the total population or total something specific (specify) from Singapore in the year 2019?",
+    "output": "What was the percentage of total from Singapore in 2019?"
+  },
+  {
+    "input": "What was the average amount of money sent from Singapore in the years 2018 and 2019?",
+    "output": "What was the average amount from Singapore in 2018 and 2019?"
+  },
+  {
+    "input": "What was the average amount of money, in Singapore currency, that was received from Singapore in the years 2018 and 2019 combined?",
+    "output": "What was the average amount from Singapore in 2018 and 2019?"
+  },
+  {
+    "input": "How is the fair value of a financial instrument of a company determined and defined?",
+    "output": "How is the fair value of the company's financial instrument defined?"
+  },
+  {
+    "input": "In what specific year did the interest rate swap fall below the threshold of 1,000 thousands?",
+    "output": "In which year was interest rate swap less than 1,000 thousands?"
+  },
+  {
+    "input": "In which specific year did the interest rate swap fall below one million in thousands?",
+    "output": "In which year was interest rate swap less than 1,000 thousands?"
+  },
+  {
+    "input": "What are the three levels of subjectivity and could you provide more information to assist in providing a comprehensive answer?",
+    "output": "What are the 3 levels of subjectivity?"
+  },
+  {
+    "input": "What are the 3 distinct levels of subjectivity and how can they be described or defined?",
+    "output": "What are the 3 levels of subjectivity?"
+  },
+  {
+    "input": "\"What were the additional features or updates introduced in the year 2019? Similarly, what were the new additions or improvements made in 2018?\"",
+    "output": "What were the other additions in 2019 and 2018 respectively?"
+  },
+  {
+    "input": "\"What are the average number of other additions made in 2018 and 2019?\"",
+    "output": "What is the average other additions for 2018 and 2019?"
+  },
+  {
+    "input": "\"What is the average number of non-inclusive additions made in the years 2018 and 2019 combined?\"",
+    "output": "What is the average other additions for 2018 and 2019?"
+  },
+  {
+    "input": "In what specific year did the transfer of developed technology from IPR&D exceed an amount greater than 4,000 thousands?",
+    "output": "In which year was Transfers to developed technology from IPR&D greater than 4,000 thousands?"
+  },
+  {
+    "input": "What were the average Selling, General, and Administrative expenses for the financial years ending on December 31, 2019 and 2018?",
+    "output": "What is the average Selling, general and administrative for the Years Ended December 31, 2019 to 2018?"
+  },
+  {
+    "input": "\"What is the average amount spent on Selling, general and administrative expenses for the Years Ended December 31, 2019 and 2018?\"",
+    "output": "What is the average Selling, general and administrative for the Years Ended December 31, 2019 to 2018?"
+  },
+  {
+    "input": "What was the percentage decrease in the Selling, general and administrative expenses during the year 2019?",
+    "output": "What was the decrease in Selling, general and administrative in 2019?"
+  },
+  {
+    "input": "What was the number of shares granted in 2019?",
+    "output": "How many shares were awarded in the year 2019?"
+  },
+  {
+    "input": "Define research and development activities.",
+    "output": "What are research and development activities?"
+  },
+  {
+    "input": "What was the average amount spent on net revenue for the years ending on December 31, 2019 and 2018?",
+    "output": "What is the average Cost of net revenue, for the Years Ended December 31, 2019 to 2018?"
+  },
+  {
+    "input": "What is the average Cost of net revenue for the period spanning from December 31, 2018, to December 31, 2019?",
+    "output": "What is the average Cost of net revenue, for the Years Ended December 31, 2019 to 2018?"
+  },
+  {
+    "input": "In which year was Restructuring expense 0 thousands?",
+    "output": "In which specific year did the Restructuring expense amount to zero thousands (thousands referring to the unit) without any incurred costs?"
+  },
+  {
+    "input": "In which year was Restructuring expense 0 thousands?",
+    "output": "What was the specific year when the Restructuring expense amounted to 0 thousands?"
+  },
+  {
+    "input": "\"What were the transfers of developed technology from IPR&D in the years 2019 and 2018 respectively?\"",
+    "output": "What were the Transfers to developed technology from IPR&D in 2019 and 2018 respectively?"
+  },
+  {
+    "input": "What was the change in the Transfers to developed technology from IPR&D from 2018 to 2019?",
+    "output": "What was the difference in the amount of transfers to developed technology from the Intangible Property, Research, and Development (IPR&D) category between the years 2018 and 2019?"
+  },
+  {
+    "input": "In which year was Transfers to developed technology from IPR&D negative?",
+    "output": "In what specific year did Transfers to developed technology from IPR&D show a negative value?"
+  },
+  {
+    "input": "What is the average Short-term restricted cash for 2018 and 2019?",
+    "output": "What is the average amount of Short-term restricted cash for the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the average Net cash provided by operating activities from December 31, 2018 to December 31, 2019?",
+    "output": "What is the average Net cash provided by operating activities for the year ended December 31, 2019 to 2018?"
+  },
+  {
+    "input": "What is the average amount of cash generated from operating activities, received or used, for the period spanning from December 31, 2018 to December 31, 2019?",
+    "output": "What is the average Net cash provided by operating activities for the year ended December 31, 2019 to 2018?"
+  },
+  {
+    "input": "What was the respective Net cash used in investing activities in 2019 and 2018?",
+    "output": "What were the net amounts of cash used in investing activities for the years 2019 and 2018 respectively?"
+  },
+  {
+    "input": "What was the respective Net cash used in investing activities in 2019 and 2018?",
+    "output": "\"What was the amount of net cash utilized in investing activities during the years 2019 and 2018?\""
+  },
+  {
+    "input": "What was the net value of long-term debt in 2019 after deducting any related expenses or liabilities?",
+    "output": "What was the Net carrying amount of long-term debt in 2019?"
+  },
+  {
+    "input": "What was the specific amount of long-term debt after deducting all necessary expenses from the total amount in 2018?",
+    "output": "What was the Net carrying amount of long-term debt in 2018?"
+  },
+  {
+    "input": "What was the specific amount of long-term debt, after deducting any reductions or write-offs, recorded on the financial statements for the year 2018?",
+    "output": "What was the Net carrying amount of long-term debt in 2018?"
+  },
+  {
+    "input": "What is the mean value of the unamortized debt discount for the fiscal years 2018 and 2019?",
+    "output": "What is the average Unamortized debt discount for 2018 and 2019?"
+  },
+  {
+    "input": "When was the year when the net carrying amount of long-term debt was less than 210,000 thousands?",
+    "output": "In which year was Net carrying amount of long-term debt less than 210,000 thousands?"
+  },
+  {
+    "input": "What is the average working capital for the period from December 31, 2018 to December 31, 2019?",
+    "output": "What is the average Working capital for December 31, 2019 to 2018?"
+  },
+  {
+    "input": "What is the average Cash and cash equivalents for December 31, 2019 to 2018?",
+    "output": "What is the average amount of Cash and cash equivalents for the period from December 31, 2018 to December 31, 2019?"
+  },
+  {
+    "input": "What is the average Cash and cash equivalents for December 31, 2019 to 2018?",
+    "output": "What is the average amount of Cash and cash equivalents as of December 31 for the years 2019 and 2018 combined?"
+  },
+  {
+    "input": "What is the eligibility of Performance-based restricted stock to vest?",
+    "output": "What criteria must be met for Performance-based restricted stock to become vested and eligible for acquisition?"
+  },
+  {
+    "input": "What were the adjustments in 2018?",
+    "output": "\"What specific adjustments, if any, occurred during the year 2018?\""
+  },
+  {
+    "input": "When was the year when the ending balance fell below $240,000,000?",
+    "output": "In which year was the ending balance less than 240,000 thousands?"
+  },
+  {
+    "input": "What was the total amount of expenses incurred during the period from January 1 to December 31, 2018, after accounting for all deductions and offsets?",
+    "output": "What was the net expense in the year ended December 31, 2018?"
+  },
+  {
+    "input": "What was the total amount of expenditures for the financial year that concluded on December 31, 2018?",
+    "output": "What was the net expense in the year ended December 31, 2018?"
+  },
+  {
+    "input": "What was the total amount of expenses incurred during the period from January 1 to December 31, 2019?",
+    "output": "What was the net expense in the year ended December 31, 2019?"
+  },
+  {
+    "input": "What was the total expenditure for the period that concluded on December 31, 2019?",
+    "output": "What was the net expense in the year ended December 31, 2019?"
+  },
+  {
+    "input": "What was the percentage increase in the cost of services in 2019?",
+    "output": "How much did Cost of services increase in 2019?"
+  },
+  {
+    "input": "What was the percentage increase in the cost of services in the year 2019 compared to the previous year?",
+    "output": "How much did Cost of services increase in 2019?"
+  },
+  {
+    "input": "What was the Less Capital expenditures (including capitalized software) in 2019?",
+    "output": "What was the total amount spent on Less Capital Expenditures, including expenditure on capitalized software, during the year 2019?"
+  },
+  {
+    "input": "What was the Less Capital expenditures (including capitalized software) in 2019?",
+    "output": "\"What was the total amount of capital expenditures, including capitalized software, that occurred in the year 2019?\""
+  },
+  {
+    "input": "What was the free cash flow in 2019?",
+    "output": "What was the amount of free cash flow generated in the year 2019?"
+  },
+  {
+    "input": "What was the average free cash flow for 2018 and 2019?",
+    "output": "What was the average amount of free cash flow generated for the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in the Less Capital expenditures from 2018 to 2019?",
+    "output": "What was the percentage difference in the amount spent on capital expenditures between 2018 and 2019?"
+  },
+  {
+    "input": "What was the exact total value of the assets owned by a particular entity/company during the year 2019?",
+    "output": "What was the Total assets in 2019?"
+  },
+  {
+    "input": "What was the total value of assets in the year 2019?",
+    "output": "What was the Total assets in 2019?"
+  },
+  {
+    "input": "What was the average amount of operating income recorded during the period from 2015 to 2019?",
+    "output": "What was the average operating income for 2015-2019?"
+  },
+  {
+    "input": "What was the change in the Per common share – basic from 2018 to 2019?",
+    "output": "What was the exact change in the Per common share – basic value from 2018 to 2019? Please provide the specific numerical difference between the two years."
+  },
+  {
+    "input": "What were the amounts of Service and other revenues for the years ended December 31, 2019 and 2018?",
+    "output": "How much did Service and other revenues amounted for   years ended December 31, 2019 and 2018 respectively?"
+  },
+  {
+    "input": "What were the respective amounts of Service and other revenues for the years ended December 31, 2019 and 2018?",
+    "output": "How much did Service and other revenues amounted for   years ended December 31, 2019 and 2018 respectively?"
+  },
+  {
+    "input": "\"What were the respective revenue amounts generated from Wireless equipment for the years ending December 31, 2019 and 2018?\"",
+    "output": "How much did Wireless equipment revenues amounted for   years ended December 31, 2019 and 2018 respectively?"
+  },
+  {
+    "input": "What was the total amount of revenue generated from Wireless equipment during the years ending December 31, 2019 and 2018?",
+    "output": "How much did Wireless equipment revenues amounted for   years ended December 31, 2019 and 2018 respectively?"
+  },
+  {
+    "input": "What was the percentage change in the value of Small and Medium Businesses from 2018 to 2019?",
+    "output": "What is the change in Small and Medium Business value from 2018 to 2019?"
+  },
+  {
+    "input": "What is the Consolidated Net Income in 2019?",
+    "output": "What is the Consolidated Net Income for the year 2019?"
+  },
+  {
+    "input": "What is the Consolidated Net Income in 2019?",
+    "output": "What is the exact amount of Consolidated Net Income recorded in the fiscal year of 2019?"
+  },
+  {
+    "input": "What is the difference in Consolidated Adjusted EBITDA between 2018 and 2019?",
+    "output": "What is the change in Consolidated Adjusted EBITDA from 2018 to 2019?"
+  },
+  {
+    "input": "What was the net change in accumulated other comprehensive income for the year 2016?",
+    "output": "What was the net increase to accumulated other comprehensive income in 2016?"
+  },
+  {
+    "input": "What is the difference in the amount of projected employee benefits that a company is obligated to pay, as of 2018 compared to 2019?",
+    "output": "What is the change in the projected benefit obligation from 2018 to 2019?"
+  },
+  {
+    "input": "\"What was the specific percentage of income tax that individuals had to pay during the tax year of 2018?\"",
+    "output": "What was the effective income tax rate for the period 2018?"
+  },
+  {
+    "input": "What is the percentage difference in the effective income tax rate between 2018 and 2019?",
+    "output": "What is the change in Effective income tax rate from 2018 to 2019?"
+  },
+  {
+    "input": "What specific amount, included in the year 2018, would have a positive impact on the effective income tax rate?",
+    "output": "What was the amount that would favorably affect the effective income tax rate if included in 2018?"
+  },
+  {
+    "input": "What specific amount, included in 2017, would positively impact the effective income tax rate?",
+    "output": "What was the amount that would favorably affect the effective income tax rate if included in 2017?"
+  },
+  {
+    "input": "What specific amount, if included in the 2017 earnings, would have a positive impact on the effective income tax rate?",
+    "output": "What was the amount that would favorably affect the effective income tax rate if included in 2017?"
+  },
+  {
+    "input": "How did the Additions based on tax positions related to the current year change from 2018 to 2019?",
+    "output": "What was the change in the Additions based on tax positions related to the current year from 2018 to 2019?"
+  },
+  {
+    "input": "What was the average Additions for tax positions of prior years for 2017-2019?",
+    "output": "What were the average additions made for tax positions in the previous years, specifically from 2017 to 2019?"
+  },
+  {
+    "input": "What was the total long-term debt?",
+    "output": "\"What was the precise amount of long-term debt accrued by the entity in question, taking into account all outstanding obligations over an extended period of time?\""
+  },
+  {
+    "input": "What was the value of the long-term debt that was payable within less than 1 year?",
+    "output": "What was the long-term debt less than 1 year?"
+  },
+  {
+    "input": "What was the exact amount of the finance lease obligation in its entirety?",
+    "output": "What was the total finance lease obligation?"
+  },
+  {
+    "input": "What is the difference between the long-term debt due less than 1 year and 1 to 3 years?",
+    "output": "What distinguishes long-term debt due within less than one year from long-term debt due within one to three years?"
+  },
+  {
+    "input": "What is the difference between the long-term debt due less than 1 year and 1 to 3 years?",
+    "output": "What differentiates long-term debt with a maturity of less than 1 year from debt with a maturity ranging from 1 to 3 years?"
+  },
+  {
+    "input": "By what percentage did Consumer's total operating revenues increase in the year 2019?",
+    "output": "How much did Consumer’s total operating revenues increase in 2019?"
+  },
+  {
+    "input": "What was the percentage increase in Consumer's total operating revenues in 2019 compared to the previous year?",
+    "output": "How much did Consumer’s total operating revenues increase in 2019?"
+  },
+  {
+    "input": "What was the percentage increase in Service Revenue for the year 2019 compared to the previous year?",
+    "output": "How much did Service Revenue increase in 2019?"
+  },
+  {
+    "input": "What is the percentage change in revenue from wireless equipment sales between 2018 and 2019?",
+    "output": "What is the change in Wireless equipment revenue from 2018 to 2019?"
+  },
+  {
+    "input": "By what percentage did the consolidated revenue increase from 2018 to 2019?",
+    "output": "What was the increase in the consolidated revenue from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage increase/decrease in consumer revenue from 2018 to 2019?",
+    "output": "What was the change in the consumer revenue from 2018 to 2019?"
+  },
+  {
+    "input": "What is the source of financing for the firm's capital spending requirements?",
+    "output": "How is the capital spending requirements for the firm financed?"
+  },
+  {
+    "input": "How does the firm obtain financing for its capital spending needs?",
+    "output": "How is the capital spending requirements for the firm financed?"
+  },
+  {
+    "input": "What was the average cash flow used in investing activities for 2018 and 2019?",
+    "output": "What was the average amount of cash used in investing activities during the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in cash flow used in financing activities from 2018 to 2019?",
+    "output": "What was the percentage change in cash flow utilized for financing activities between the fiscal years 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in cash flow used in financing activities from 2018 to 2019?",
+    "output": "What was the exact percentage of increase or decrease in cash flow used for financing activities between 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage difference in the cost of services between 2018 and 2019?",
+    "output": "What was the change in the cost of services from 2018 to 2019?"
+  },
+  {
+    "input": "What were the adopted standards on January 1, 2018?",
+    "output": "Which Standards were Adopted on January 1, 2018?"
+  },
+  {
+    "input": "What is the change in Accumulated other comprehensive income from December 31, 2017 to January 1, 2018?",
+    "output": "What is the difference in the amount of Accumulated Other Comprehensive Income between December 31, 2017 and January 1, 2018?"
+  },
+  {
+    "input": "What is the net change in Noncontrolling interests between December 31, 2017, and January 1, 2018?",
+    "output": "What is the change in Noncontrolling interests from December 31, 2017 to January 1, 2018?"
+  },
+  {
+    "input": "What was the difference in Noncontrolling interests' value between December 31, 2017, and January 1, 2018?",
+    "output": "What is the change in Noncontrolling interests from December 31, 2017 to January 1, 2018?"
+  },
+  {
+    "input": "What was the ratio of the amortized cost due within one year or less to the fair value for the same period?",
+    "output": "What was the amortized cost that was due in one year or less as a ratio of the fair value for the same period?"
+  },
+  {
+    "input": "What was the ratio between the amortized cost to be paid within one year or less and the fair value for the same period?",
+    "output": "What was the amortized cost that was due in one year or less as a ratio of the fair value for the same period?"
+  },
+  {
+    "input": "What is the specific difference between the amortized cost and fair value, at the end of five years and at the end of ten years?",
+    "output": "What was the difference between the amortized cost and fair value that was due after five years through ten years?"
+  },
+  {
+    "input": "Which specific years can be found in the table that offer additional details pertaining to the stock options of the company?",
+    "output": "Which years does the table provide Additional information related to the company's stock options?"
+  },
+  {
+    "input": "What was the specific intrinsic value associated with exercises in the year 2019?",
+    "output": "What was the intrinsic value of exercises in 2019?"
+  },
+  {
+    "input": "What were the Proceeds received from exercises in 2018?",
+    "output": "What was the total amount of money received from exercises held in 2018?"
+  },
+  {
+    "input": "What were the Proceeds received from exercises in 2018?",
+    "output": "What was the total amount of money received from exercises in the year 2018?"
+  },
+  {
+    "input": "What was the gross income in 2017, prior to deducting income taxes?",
+    "output": "What was the total income before income taxes in 2017?"
+  },
+  {
+    "input": "What was the change in foreign income before income taxes between 2017 and 2018?",
+    "output": "What was the difference in the amount of income from foreign sources prior to income taxes, comparing the years 2017 and 2018? Please provide the change in figures for better understanding."
+  },
+  {
+    "input": "What was the specific difference in foreign income, without considering income taxes, from 2018 to 2019?",
+    "output": "What was the change in foreign income before income taxes between 2018 and 2019?"
+  },
+  {
+    "input": "What were the short-term investment options available in the year 2018?",
+    "output": "What were the short-term investments in 2018?"
+  },
+  {
+    "input": "What were the types of short-term investments that were prevalent in 2018?",
+    "output": "What were the short-term investments in 2018?"
+  },
+  {
+    "input": "What years does the table provide information for total assets?",
+    "output": "For which time period does the table provide data on total assets?"
+  },
+  {
+    "input": "What years does the table provide information for total assets?",
+    "output": "For which years does the table present data regarding the total assets?"
+  },
+  {
+    "input": "What was the total debt in 2015?",
+    "output": "What amount of debt, in terms of total outstanding obligations, was recorded in the year 2015?"
+  },
+  {
+    "input": "What was the change in working capital between 2015 and 2016?",
+    "output": "What was the difference in the amount of working capital between 2015 and 2016?"
+  },
+  {
+    "input": "By what percentage did the Total stockholders' equity change from 2018 to 2019?",
+    "output": "What was the percentage change in Total stockholders' equity between 2018 and 2019?"
+  },
+  {
+    "input": "Which years does the table provide information for the funded status of the company's postretirement health care and other defined benefit plans?",
+    "output": "For which specific years does the provided table present data relating to the funded status of the company's postretirement health care and other defined benefit plans?"
+  },
+  {
+    "input": "What was the specific amount of fair value attributed to the plan assets during the year 2019?",
+    "output": "What was the fair value of plan assets in 2019?"
+  },
+  {
+    "input": "What was the fair value measurements using Level 2 for Corporate Bonds?",
+    "output": "What are the fair value measurements using Level 2 for Corporate Bonds?"
+  },
+  {
+    "input": "What was the combined value of cash, cash equivalents, and short-term investments?",
+    "output": "What were the total cash, cash equivalents and short-term investments?"
+  },
+  {
+    "input": "What was the sum of all cash, cash equivalents, and short-term investments?",
+    "output": "What were the total cash, cash equivalents and short-term investments?"
+  },
+  {
+    "input": "What was the difference in the fair value for U.S. Treasury and government debt securities between Level 1 and Level 2?",
+    "output": "What was the discrepancy in the fair value of U.S. Treasury and government debt securities when classified as Level 1 and Level 2 assets?"
+  },
+  {
+    "input": "What was the fair value of Level 2 Total cash, cash equivalents and short-term investments as a percentage of the total cash, cash equivalents and short-term investments?",
+    "output": "What is the fair value, expressed as a percentage, of Level 2 Total cash, cash equivalents, and short-term investments in relation to the overall total of cash, cash equivalents, and short-term investments?"
+  },
+  {
+    "input": "What were the distinctions between the levels of Total cash, cash equivalents, and short-term investments in terms of Level 1 and Level 2?",
+    "output": "What was the difference between the Total cash, cash equivalents and short-term investments for Level 1 and Level 2?"
+  },
+  {
+    "input": "What were the disparities in the amounts of Total Cash, Cash Equivalents, and Short-Term Investments between Level 1 and Level 2?",
+    "output": "What was the difference between the Total cash, cash equivalents and short-term investments for Level 1 and Level 2?"
+  },
+  {
+    "input": "On what basis was the calculation for days inventory outstanding determined?",
+    "output": "What was days inventory outstanding based on?"
+  },
+  {
+    "input": "What was the Days sales outstanding for three months ended april 2019?",
+    "output": "\"What was the specific value of Days Sales Outstanding (DSO) specifically for the three-month period ending in April 2019?\""
+  },
+  {
+    "input": "What was the change in day sales outstanding between 2018 and 2019?",
+    "output": "What was the difference in the number of days it took to collect sales revenue between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in day sales outstanding between 2018 and 2019?",
+    "output": "What was the difference in the number of days it took to collect sales revenue between 2018 and 2019?"
+  },
+  {
+    "input": "For how long was the days inventory outstanding higher than 20 days ?",
+    "output": "How many years did days inventory outstanding exceed 20 days?"
+  },
+  {
+    "input": "What was the total amount spent on purchases in 2018?",
+    "output": "What was the Aggregate purchase price in 2018?"
+  },
+  {
+    "input": "What was the difference in the total purchase price for all items combined in 2018 and 2019?",
+    "output": "What was the change in the Aggregate purchase price between 2018 and 2019?"
+  },
+  {
+    "input": "What was the amount of finished goods in 2019?",
+    "output": "What was the specific quantity of fully completed and ready-for-sale products in the year 2019?"
+  },
+  {
+    "input": "What was the exact quantity of inventories during the year 2018?",
+    "output": "What was the amount of inventories in 2018?"
+  },
+  {
+    "input": "What was the change in finished goods between 2018 and 2019?",
+    "output": "How did the amount of finished goods change from 2018 to 2019?"
+  },
+  {
+    "input": "What were the additions made to tax positions related to the current year in 2019, and how do they impact the overall financial situation?",
+    "output": "What were the Additions based on tax positions related to the current year in 2019?"
+  },
+  {
+    "input": "What was the change in the Balance at beginning of period between 2017 and 2018?",
+    "output": "What was the difference in the starting balance from the beginning of the period in 2017 compared to 2018?"
+  },
+  {
+    "input": "What is the overall difference in the total amount of Additions for tax positions of prior years from 2017 to 2019?",
+    "output": "What was the total change in Additions for tax positions of prior years between 2017 and 2019?"
+  },
+  {
+    "input": "What was the difference in the total expenses recorded between 2018 and 2019?",
+    "output": "What was the change in expense accrued during the period between 2018 and 2019?"
+  },
+  {
+    "input": "What was the difference in the amount of expenses recorded from 2018 to 2019?",
+    "output": "What was the change in expense accrued during the period between 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in the Warranty costs incurred between 2018 and 2019?",
+    "output": "What was the difference in warranty costs incurred between 2018 and 2019, expressed as a percentage change?"
+  },
+  {
+    "input": "What were the total assets in 2018?",
+    "output": "What was the total value of assets in the year 2018?"
+  },
+  {
+    "input": "What was the percentage change in  Total deferred commissions between 2018 and 2019?",
+    "output": "What was the percentage change in the total amount of deferred commissions from 2018 to 2019?"
+  },
+  {
+    "input": "What was the final balance at the end of the specified period in the year 2018?",
+    "output": "What was the balance at the end of period in 2018?"
+  },
+  {
+    "input": "What was the final balance at the end of period in 2018? Please provide the balance from the specified period in 2018.",
+    "output": "What was the balance at the end of period in 2018?"
+  },
+  {
+    "input": "What was the change in the balance at beginning of period between 2018 and 2019?",
+    "output": "What was the difference in the balance at the start of the period between the years 2018 and 2019?"
+  },
+  {
+    "input": "For how many years did the ending balance surpass $3,500 million?",
+    "output": "How many years did balance at end of period exceed $3,500 million?"
+  },
+  {
+    "input": "How many years did interest income exceed $50 million?",
+    "output": "For how many years was the interest income greater than $50 million?"
+  },
+  {
+    "input": "How many years did interest income exceed $50 million?",
+    "output": "How many years was the total interest income greater than $50 million?"
+  },
+  {
+    "input": "What was the percentage change in net revenues between 2018 and 2019?",
+    "output": "What was the exact percentage difference in net revenues from 2018 to 2019, and how can I calculate it accurately?"
+  },
+  {
+    "input": "Which years does the table provide information for net property and equipment?",
+    "output": "For which specific years is information regarding net property and equipment provided in the table?"
+  },
+  {
+    "input": "Which years does the table provide information for net property and equipment?",
+    "output": "For which specific years does the table present data regarding net property and equipment?"
+  },
+  {
+    "input": "What was the total revenue generated from operational activities during the year 2017?",
+    "output": "What was the income from operations in 2017?"
+  },
+  {
+    "input": "What was the percentage change in the net income between 2018 and 2019?",
+    "output": "What was the precise percentage increase or decrease in the net income from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in the net income between 2018 and 2019?",
+    "output": "What is the percentage difference in the net income from 2018 to 2019?"
+  },
+  {
+    "input": "How much money was in circulation in the form of physical cash in the year 2019?",
+    "output": "What was the amount of cash in 2019?"
+  },
+  {
+    "input": "In 2019, how much cash was there?",
+    "output": "What was the amount of cash in 2019?"
+  },
+  {
+    "input": "What was the specific value or quantity of cash equivalents recorded in the financial records for the year 2018?",
+    "output": "What was the amount of cash equivalents in 2018?"
+  },
+  {
+    "input": "Which years does the table provide information for property and equipment information for geographic areas based on the physical location of the assets?",
+    "output": "For which specific years does the table provide information related to property and equipment in geographic areas, considering the assets' physical locations?"
+  },
+  {
+    "input": "Which years does the table provide information for property and equipment information for geographic areas based on the physical location of the assets?",
+    "output": "For which years does the table present property and equipment information? This information is specifically related to geographic areas, and is based on the physical location of the assets."
+  },
+  {
+    "input": "What was the total combined value of property and equipment in the United States during the year 2019?",
+    "output": "What was the amount of property and equipment in U.S. in 2019?"
+  },
+  {
+    "input": "How many years did International property and equipment exceed $150 million?",
+    "output": "For how many consecutive years has the value of International property and equipment been greater than $150 million?"
+  },
+  {
+    "input": "What was the financed unearned services revenue in 2019?",
+    "output": "\"What was the amount of unearned revenue from financed services in 2019?\""
+  },
+  {
+    "input": "What was the financed unearned services revenue in 2019?",
+    "output": "What was the specific amount of unearned services revenue that was financed during the year 2019?"
+  },
+  {
+    "input": "For how many years has the amount of financed unearned services revenue been greater than $100 million?",
+    "output": "How many years did financed unearned services revenue exceed $100 million?"
+  },
+  {
+    "input": "For how many consecutive years has the unearned services revenue in financed exceeded $100 million?",
+    "output": "How many years did financed unearned services revenue exceed $100 million?"
+  },
+  {
+    "input": "What was the change in deferred services revenue between 2018 and 2019?",
+    "output": "What was the difference in the amount of deferred services revenue recorded in 2018 and 2019?"
+  },
+  {
+    "input": "What was the sum of the net revenues from the last two quarters?",
+    "output": "What is the total net revenue for the most recent two quarters?"
+  },
+  {
+    "input": "What was the change in the Dividends per share declared between 2018 and 2019?",
+    "output": "What was the difference in the amount of Dividends per share declared between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in the Dividends per share declared between 2018 and 2019?",
+    "output": "What was the difference in the amount of dividends per share declared for the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in the Dividend payments allocated to retained earnings (accumulated deficit) between 2017 and 2018?",
+    "output": "What was the difference in the amounts of dividends allocated to retained earnings (accumulated deficit) from 2017 to 2018?"
+  },
+  {
+    "input": "What was the change in the Dividend payments allocated to retained earnings (accumulated deficit) between 2017 and 2018?",
+    "output": "What was the variation in the amount of dividends transferred to retained earnings (accumulated deficit) from 2017 to 2018?"
+  },
+  {
+    "input": "What factors contributed to the rise in the cash inflows generated from day-to-day business operations during the year 2019?",
+    "output": "What caused the increase in the cash flow from operating activities in 2019?"
+  },
+  {
+    "input": "What is the percentage change in the Free Cash Flow, considering both an increase and decrease in value?",
+    "output": "What is the increase / (decrease) in the Free Cash Flow?"
+  },
+  {
+    "input": "What is the percentage change in Free Cash Flow?",
+    "output": "What is the increase / (decrease) in the Free Cash Flow?"
+  },
+  {
+    "input": "What are the definitions of underlying operating profit and underlying operating margin, and how do they differ from each other?",
+    "output": "What is the definition of the Underlying operating profit and underlying operating margin?"
+  },
+  {
+    "input": "What does the underlying effective tax rate measure reflects?",
+    "output": "What is the definition and significance of the underlying effective tax rate and what does it reflect in terms of a company's financial situation or performance?"
+  },
+  {
+    "input": "What does the underlying effective tax rate measure reflects?",
+    "output": "What is the meaning of the underlying effective tax rate measure and what does it reflect?"
+  },
+  {
+    "input": "What is the methodology used to calculate the effective tax rate?",
+    "output": "How is the underlying effective tax rate calculated?"
+  },
+  {
+    "input": "What is the exact difference in the underlying effective tax rate between 2018 and 2019?",
+    "output": "What is the Underlying effective tax rate change from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in the underlying effective tax rate between 2018 and 2019?",
+    "output": "What is the Underlying effective tax rate change from 2018 to 2019?"
+  },
+  {
+    "input": "By what percentage did the Biogas production increase from the year 2018 to 2019?",
+    "output": "What is the increase in the Biogas from 2018 to 2019?"
+  },
+  {
+    "input": "What is the average total energy?",
+    "output": "What is the average amount of energy, when considering all types of energy combined?"
+  },
+  {
+    "input": "\"What is the percentage change in the price of Fuel Oils from 2018 to 2019? Please specify if it is an increase or decrease.\"",
+    "output": "What is the percentage increase / (decrease) in Fuel Oils from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in the price of Fuel Oils between 2018 and 2019?",
+    "output": "What is the percentage increase / (decrease) in Fuel Oils from 2018 to 2019?"
+  },
+  {
+    "input": "What method is used to calculate ROIC (Return on Invested Capital)? Please provide a detailed explanation of the calculation process.",
+    "output": "How is ROIC calculated?"
+  },
+  {
+    "input": "What is the difference in the diluted combined average number of share units (in millions) between 2018 and 2019?",
+    "output": "What is the change in the Diluted combined average number of share units (millions of units) from 2018 to 2019?"
+  },
+  {
+    "input": "What is the difference in the diluted combined average number of share units (measured in millions) between 2018 and 2019?",
+    "output": "What is the change in the Diluted combined average number of share units (millions of units) from 2018 to 2019?"
+  },
+  {
+    "input": "What was the specific price quotation for investments on 31 December 2018?",
+    "output": "What was the price quotation for investments as of 31 December 2018?"
+  },
+  {
+    "input": "What was the average value of investments made by listed companies in the year 2019?",
+    "output": "What is the average amount of investments of listed companies in 2019?"
+  },
+  {
+    "input": "What was the average investment amount of listed companies in 2019?",
+    "output": "What is the average amount of investments of listed companies in 2019?"
+  },
+  {
+    "input": "What was the average percentage of ownership or voting rights held by shareholders in listed companies during the year 2019?",
+    "output": "What is the average Percentage of ownership or voting rights of listed companies in 2019?"
+  },
+  {
+    "input": "What is the average Unsecured other loans?",
+    "output": "What is the average amount of unsecured loans that fall under the category of \"other loans\"?"
+  },
+  {
+    "input": "What is the average value or rating of Integrated Device Manufacturers (IDMs)?",
+    "output": "What is the average of Integrated device manufacturers?"
+  },
+  {
+    "input": "What is the average performance or profitability of Integrated device manufacturers?",
+    "output": "What is the average of Integrated device manufacturers?"
+  },
+  {
+    "input": "What is the increase / (decrease) in the Integrated device manufacturers from 2017 to 2018?",
+    "output": "What is the net change, either increase or decrease, in the number of Integrated device manufacturers from 2017 to 2018?"
+  },
+  {
+    "input": "What is the average amount of compensation received by Directors?",
+    "output": "What is the average Directors’ compensation?"
+  },
+  {
+    "input": "What is the percentage change in compensation for Directors from the year 2018 to 2019?",
+    "output": "What is the increase/ (decrease) in Directors’ compensation from 2018 to 2019?"
+  },
+  {
+    "input": "What calculation method is employed to determine the total sum of the Company's shares?",
+    "output": "What method is used to calculate the aggregate amount of the Company’s share?"
+  },
+  {
+    "input": "What specific method is employed to calculate the total combined value of the shares held by the Company?",
+    "output": "What method is used to calculate the aggregate amount of the Company’s share?"
+  },
+  {
+    "input": "What is the average revenue generated from the sale of goods and services from December 31, 2018 to December 31, 2019?",
+    "output": "What are the average Sales of goods and services for December 31, 2018 to 2019?"
+  },
+  {
+    "input": "What is the increase/ (decrease) in Sales of goods and services for December 31, 2018 to 2019?",
+    "output": "What is the percentage change in the sales of goods and services when comparing December 31, 2018, to December 31, 2019?"
+  },
+  {
+    "input": "What is the percentage change in the sales of goods and services from January 1, 2018, to December 31, 2018?",
+    "output": "What is the increase/ (decrease) in Sales of goods and services for January 1, 2018 to December 31, 2018?"
+  },
+  {
+    "input": "What is the percentage change in the sales of goods and services from January 1, 2018 to December 31, 2018?",
+    "output": "What is the increase/ (decrease) in Sales of goods and services for January 1, 2018 to December 31, 2018?"
+  },
+  {
+    "input": "\"What is the average amount of lease obligation for the period Less than 1 Year and 1-3 Years? Please provide the average lease obligations separately for both time periods.\"",
+    "output": "What is the average lease obligation for the period Less than 1 Year and 1-3 Years?"
+  },
+  {
+    "input": "What does Purchase obligations incorporate?",
+    "output": "What is included in purchase obligations?"
+  },
+  {
+    "input": "What is the criteria for an employee to receive additional bonuses?",
+    "output": "What are the specific requirements or factors that determine if an employee is eligible to receive extra bonuses in addition to their regular compensation?"
+  },
+  {
+    "input": "What is the difference in Time Deposits: Non-Trading Purpose between Carrying Amount and Fair Amount?",
+    "output": "What is the distinction between the carrying amount and fair amount of Time Deposits used for non-trading purposes?"
+  },
+  {
+    "input": "What is the difference between Bonds: Non-Trading Purpose Carrying Amount and Fair Amount?",
+    "output": "What distinguishes Bonds: Non-Trading Purpose Carrying Amount from Fair Amount in terms of their meaning, significance, and financial implications?"
+  },
+  {
+    "input": "What is the average quantity of supplies and spare parts in general?",
+    "output": "What was the average Supplies and spare parts?"
+  },
+  {
+    "input": "What was the average quantity of supplies and spare parts used?",
+    "output": "What was the average Supplies and spare parts?"
+  },
+  {
+    "input": "What was the percentage increase / (decrease) in the Finished goods from 2018 to 2019?",
+    "output": "What was the percentage change in the amount of Finished goods from the year 2018 to 2019, indicating whether it increased or decreased?"
+  },
+  {
+    "input": "What was the percentage increase / (decrease) in the Finished goods from 2018 to 2019?",
+    "output": "What was the percentage change in the value of Finished goods from 2018 to 2019, indicating whether it increased or decreased?"
+  },
+  {
+    "input": "What is the percentage change in the value of financial assets at fair value through profit or loss between 2018 and 2019?",
+    "output": "What is the increase / (decrease) in the Financial assets at fair value through profit or loss from 2018 to 2019?"
+  },
+  {
+    "input": "What is the increase / (decrease) in the Short-term loans from 2018 to 2019?",
+    "output": "What was the percentage change in Short-term loans from 2018 to 2019?"
+  },
+  {
+    "input": "What is the increase / (decrease) in the Short-term loans from 2018 to 2019?",
+    "output": "What was the percentage change in Short-term loans between 2018 and 2019?"
+  },
+  {
+    "input": "What measures does the company implement to effectively manage, maintain, or adapt its capital structure in response to changes in the business environment?",
+    "output": "What steps does the company take To maintain or adjust the capital structure?"
+  },
+  {
+    "input": "What is the change in the total liabilities, expressed as a percentage, between 2018 and 2019?",
+    "output": "What is the increase / (decrease) in the Total liabilities from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in Total Capital from 2018 to 2019?",
+    "output": "What is the percentage increase / (decrease) of Total Capital from 2018 to 2019?"
+  },
+  {
+    "input": "How do non-GAAP measures assist investors in making informed investment decisions and assessing a company's financial performance?",
+    "output": "How do these non-GAAP measures aid investors?"
+  },
+  {
+    "input": "How much would diluted earnings per share for fiscal year 2018 be without the net charge (benefit) related to the enactment of the TCJA?",
+    "output": "What would be the diluted earnings per share for fiscal year 2018 if we exclude the net charge (benefit) associated with the enactment of the Tax Cuts and Jobs Act (TCJA)?"
+  },
+  {
+    "input": "How much would diluted earnings per share for fiscal year 2018 be without the net charge (benefit) related to the enactment of the TCJA?",
+    "output": "What would be the diluted earnings per share for fiscal year 2018, excluding the impact of the net charge (benefit) resulting from the implementation of the Tax Cuts and Jobs Act (TCJA)?"
+  },
+  {
+    "input": "What was the average revenue over the 3 year period from 2017 to 2019?",
+    "output": "What was the average revenue from 2017 to 2019, calculated over the span of three years?"
+  },
+  {
+    "input": "What factors contributed to the change in the federal statutory rate during fiscal year 2017?",
+    "output": "Why did the federal statutory rate in fiscal year 2017? "
+  },
+  {
+    "input": "How many items accounted for the difference between income taxes computed at the US federal statutory rate and the company's effective rate?",
+    "output": "How many specific items contributed to the variance between the income taxes calculated using the official US federal statutory rate and the actual rate paid by the company?"
+  },
+  {
+    "input": "What was the average federal statutory rate over the 3 year period from 2017 to 2019? ",
+    "output": "What was the average federal statutory rate from 2017 to 2019?"
+  },
+  {
+    "input": "What was the average effective rate for the years 2017, 2018, and 2019?",
+    "output": "What was the average effective rate over the 3 year period from 2017 to 2019? "
+  },
+  {
+    "input": "How much were the top 3 components of property and equipment as a % of the total at cost, property and equipment for 2019?",
+    "output": "What were the percentages for the top 3 components of property and equipment in relation to the total at cost, property, and equipment in 2019?"
+  },
+  {
+    "input": "What percentage of the overall revenue in 2019 was not generated by the commercial cloud revenue?",
+    "output": "How much of the total revenue in 2019 did not come from commercial cloud revenue?"
+  },
+  {
+    "input": "Which were the bottom 2 revenue items for 2017?",
+    "output": "\"What were the two lowest revenue-generating items in 2017?\""
+  },
+  {
+    "input": "What was the average EPS (earnings per share) for the years 2017, 2018, and 2019?",
+    "output": "What was the average basic earnings per share over the 3 year period from 2017 to 2019?"
+  },
+  {
+    "input": "\"Which specific locations does the company have both operating leases and finance leases in place?\"",
+    "output": "Which places does the company have operating and finance leases?"
+  },
+  {
+    "input": "What is the topic or content covered in note 15?",
+    "output": "What is note 15 about?"
+  },
+  {
+    "input": "\"What is the ascending order of the years when sorted by their total cost of finance leases?\"",
+    "output": "What are the years sorted by total finance lease cost, in ascending order?"
+  },
+  {
+    "input": "How many different items are included when calculating operating income (loss)?",
+    "output": "How many items are there for operating income (loss)?"
+  },
+  {
+    "input": "How many different items are included in the calculation of operating income (loss)?",
+    "output": "How many items are there for operating income (loss)?"
+  },
+  {
+    "input": "How is the allocation of amortization and depreciation determined and distributed?",
+    "output": "How is the portion of amortization and depreciation allocated?"
+  },
+  {
+    "input": "How is the allocation of the portion of amortization and depreciation determined and distributed?",
+    "output": "How is the portion of amortization and depreciation allocated?"
+  },
+  {
+    "input": "How many countries have Long-lived assets classified based on the controlling statutory company's location, excluding financial instruments and tax assets? Also, can you identify the countries where the assets amount to over 10% of the total assets?",
+    "output": "How many countries have Long-lived assets, excluding financial instruments and tax assets, classified by the location of the controlling statutory company and with countriesover 10% of the total assets?"
+  },
+  {
+    "input": "What was the percentage change in the total long-lived assets from 2017 to 2018?",
+    "output": "What was the precise percentage difference in the value of total long-lived assets between the years 2017 and 2018?"
+  },
+  {
+    "input": "What was the percentage change in the total long-lived assets from 2017 to 2018?",
+    "output": "What was the percentage increase or decrease in the value of the total long-lived assets between the years 2017 and 2018?"
+  },
+  {
+    "input": "What is the definition and significance of purchase commitments?",
+    "output": "What do the purchase commitments represent?"
+  },
+  {
+    "input": "What is the meaning and significance of purchase commitments?",
+    "output": "What do the purchase commitments represent?"
+  },
+  {
+    "input": "Where can the long-term debt be found?",
+    "output": "Where is the location to find the information regarding long-term debt?"
+  },
+  {
+    "input": "How much do the top 3 contractual obligation terms add up to in 2020?",
+    "output": "What is the total value of the top three contractual obligation terms in 2020?"
+  },
+  {
+    "input": "How much do the top 3 contractual obligation terms add up to in 2020?",
+    "output": "In 2020, what is the sum of the top three contractual obligation terms?"
+  },
+  {
+    "input": "What was the number of stock awards that were granted in 2019, not including the PSUs granted?",
+    "output": "Excluding the PSUs, how many stock awards were granted in the year 2019?"
+  },
+  {
+    "input": "What was the number of stock awards that were granted in 2019, not including the PSUs granted?",
+    "output": "How many stock awards were granted in 2019, excluding the granted PSUs?"
+  },
+  {
+    "input": "What was the average fair value of the stock awards that were vested between 2017 and 2019 over a three-year period?",
+    "output": "What was the average fair value of stock awards vested over the 3 year period from 2017 to 2019?"
+  },
+  {
+    "input": "What was the intelligent cloud as a percentage of total revenue in 2019?",
+    "output": "What percentage of the total revenue in 2019 was attributed to the intelligent cloud?"
+  },
+  {
+    "input": "What was the intelligent cloud as a percentage of total revenue in 2019?",
+    "output": "What percentage of total revenue in 2019 was attributed to the intelligent cloud?"
+  },
+  {
+    "input": "How many revenue categories are there?",
+    "output": "How many different categories of revenue exist in the given context?"
+  },
+  {
+    "input": "What is the company's net loss in 2019?",
+    "output": "What was the net financial loss incurred by the company during the year 2019?"
+  },
+  {
+    "input": "What was the cumulative unrealized gain on marketable securities for the company from 2017 to 2019?",
+    "output": "What is the company's total unrealized gain on marketable securities between 2017 to 2019?"
+  },
+  {
+    "input": "What is the percentage difference in the total stock base compensation, categorized by type of award, from 2018 to 2019?",
+    "output": "What is the percentage change in total stock base compensation  by type of award between 2018 and 2019?"
+  },
+  {
+    "input": "What is the percentage change in the total stock base compensation for each type of award from 2018 to 2019?",
+    "output": "What is the percentage change in total stock base compensation  by type of award between 2018 and 2019?"
+  },
+  {
+    "input": "What is the cumulative amount of stock based compensation from 2017 through 2019?",
+    "output": "What is the total stock based compensation amount between 2017 to 2019?"
+  },
+  {
+    "input": "What is the sum of stock-based compensation expenses for the years 2017 to 2019?",
+    "output": "What is the total stock based compensation amount between 2017 to 2019?"
+  },
+  {
+    "input": "As of December 31, 2018, how many shares were granted and are currently outstanding?",
+    "output": "What is the number of outstanding shares granted as of December 31, 2018? "
+  },
+  {
+    "input": "As of December 31, 2018, how many shares were granted and still remain outstanding?",
+    "output": "What is the number of outstanding shares granted as of December 31, 2018? "
+  },
+  {
+    "input": "What was the change in the number of outstanding shares between December 31, 2018, and December 31, 2019?",
+    "output": "What is the difference in outstanding shares as of December 31, 2018 and 2019?"
+  },
+  {
+    "input": "What data is shown in the table?",
+    "output": "\"What specific information does the table display?\""
+  },
+  {
+    "input": "What is the units for the data shown in the table?",
+    "output": "What units are being used to represent the information presented in the table?"
+  },
+  {
+    "input": "What is the total revenue earned by the company between 2017 to 2019?",
+    "output": "What is the cumulative revenue generated by the company from 2017 to 2019?"
+  },
+  {
+    "input": "What is the exact amount of revenue generated in the EMEA region from 2017 to 2019?",
+    "output": "What is the total revenue earned in EMEA between 2017 to 2019?"
+  },
+  {
+    "input": "What is the change in total revenue between 2018 to 2019?",
+    "output": "What is the difference in total revenue from 2018 compared to 2019, and how can this information assist in better understanding the financial performance of the company during this period?"
+  },
+  {
+    "input": "What is the sum of all non-current liabilities excluding total other non-current liabilities as of 31 December 2019?",
+    "output": "What is the total other non-current liabilities as at 31 December 2019?"
+  },
+  {
+    "input": "What is the aggregate amount of all non-current liabilities, excluding any current liabilities, as of December 31, 2019?",
+    "output": "What is the total other non-current liabilities as at 31 December 2019?"
+  },
+  {
+    "input": "What is the depreciation expense on property and equipment as at December 31, 2018?",
+    "output": "What is the specific amount of the depreciation expense recorded for property and equipment on the financial statement as at the closing date of December 31, 2018?"
+  },
+  {
+    "input": "What is the depreciation expense on property and equipment as at December 31, 2018?",
+    "output": "What was the amount of depreciation expense booked for property and equipment up until December 31st, 2018?"
+  },
+  {
+    "input": "What is the depreciation expense on property and equipment as at December 31, 2019?",
+    "output": "As of December 31, 2019, what is the recorded depreciation expense amount for property and equipment?"
+  },
+  {
+    "input": "What is the depreciation expense on property and equipment as at December 31, 2019?",
+    "output": "What is the specific amount of depreciation expense recorded for property and equipment as on December 31, 2019?"
+  },
+  {
+    "input": "What is the total depreciation expense on property and equipment from 2017 to 2019?",
+    "output": "From 2017 to 2019, what was the cumulative amount of depreciation expense incurred on property and equipment?"
+  },
+  {
+    "input": "What is the total depreciation expense on property and equipment from 2017 to 2019?",
+    "output": "What is the cumulative depreciation expense on property and equipment for the years 2017, 2018, and 2019?"
+  },
+  {
+    "input": "What is the allowance for doubtful accounts as at December 31, 2018?",
+    "output": "What is the amount set aside for doubtful accounts on the balance sheet as of December 31, 2018?"
+  },
+  {
+    "input": "What are some examples of financial instruments that can subject the company to credit risk?",
+    "output": "What financial instruments expose companies to credit risk? Can you provide some examples of such instruments?"
+  },
+  {
+    "input": "What are some examples of financial instruments that can subject the company to credit risk?",
+    "output": "What financial instruments can expose a company to credit risk? Can you provide some examples?"
+  },
+  {
+    "input": "How does the company effectively manage and reduce its credit risk associated with accounts receivable?",
+    "output": "How does the company mitigate its credit risk pertaining to accounts receivable?"
+  },
+  {
+    "input": "How does the company identify and determine its important or major customers in its business operations?",
+    "output": "How does the company determine its significant customers?"
+  },
+  {
+    "input": "How does the company identify and assess its significant customers for business purposes?",
+    "output": "How does the company determine its significant customers?"
+  },
+  {
+    "input": "What was the monetary value of the company's losses within its own country during the year 2019?",
+    "output": "What is the company's domestic loss in 2019?"
+  },
+  {
+    "input": "\"What was the total financial loss incurred by the company within its own country during the year 2019?\"",
+    "output": "What is the company's domestic loss in 2019?"
+  },
+  {
+    "input": "What is the amount of money that the company lost within its own country during the year 2017?",
+    "output": "What is the company's domestic loss in 2017?"
+  },
+  {
+    "input": "\"What was the fiscal year 2017 domestic financial loss incurred by the company?\"",
+    "output": "What is the company's domestic loss in 2017?"
+  },
+  {
+    "input": "What is the company's total loss before income taxes between 2017 to 2019?",
+    "output": "What is the cumulative pre-tax loss of the company from 2017 to 2019?"
+  },
+  {
+    "input": "What is the company's change in foreign income between 2018 and 2019?",
+    "output": "What was the percentage change in the company's foreign income from 2018 to 2019?"
+  },
+  {
+    "input": "What is the cumulative amount of loss incurred by the company domestically during the period from 2017 to 2019?",
+    "output": "What is the company's total domestic loss between 2017 to 2019?"
+  },
+  {
+    "input": "What is the aggregate amount of losses incurred by the company within its domestic markets during the period from 2017 to 2019?",
+    "output": "What is the company's total domestic loss between 2017 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in total deferred revenue 2018 and 2019?",
+    "output": "What is the percentage difference in the total deferred revenue between 2018 and 2019?"
+  },
+  {
+    "input": "What is the percentage change in total deferred revenue 2018 and 2019?",
+    "output": "What is the percentage difference in the total amount of deferred revenue between the years 2018 and 2019?"
+  },
+  {
+    "input": "What is the proportion of the cost of revenue and research and development expenses as a percentage of the total restructuring expense?",
+    "output": "What is the ratio of the combined cost of revenue and research and development expenses to the total restructuring expense, expressed as a percentage?"
+  },
+  {
+    "input": "What is the total legal fees and facilities expenses?",
+    "output": "What are the combined expenses for legal fees and facilities?"
+  },
+  {
+    "input": "What is the total legal fees and facilities expenses?",
+    "output": "What is the cumulative sum of the expenses incurred in legal fees and facilities?"
+  },
+  {
+    "input": "What is the total increases related to tax positions taken during current year in 2018 and 2019?",
+    "output": "What is the cumulative increase in tax positions taken during the current year across 2018 and 2019?"
+  },
+  {
+    "input": "What is the total increases related to tax positions taken during current year in 2018 and 2019?",
+    "output": "What is the combined amount of increments in relation to tax positions that were assumed during the present year in both 2018 and 2019?"
+  },
+  {
+    "input": "As of 31st December 2018, what is the sum of all liabilities that have accumulated or been incurred by the company up to that date?",
+    "output": "What is the total accrued liabilities as at 31 December 2018?"
+  },
+  {
+    "input": "What are the units used to measure the values in the table?",
+    "output": "What is the units that the values in the table are measured in?"
+  },
+  {
+    "input": "What is the percentage increase or decrease in the total amount of accrued liabilities from 2018 to 2019?",
+    "output": "What is the percentage change in total accrued liabilities between 2018 and 2019?"
+  },
+  {
+    "input": "What is the percentage increase or decrease in the total amount of liabilities that have accumulated between the years 2018 and 2019?",
+    "output": "What is the percentage change in total accrued liabilities between 2018 and 2019?"
+  },
+  {
+    "input": "What is the total amount of long-lived asset in Japan in both 2018 and 2019?",
+    "output": "What is the total value of long-lived assets in Japan for the years 2018 and 2019 combined?"
+  },
+  {
+    "input": "What were the capitalized costs of software development in 2019?",
+    "output": "What was the Capitalized software development costs in 2019?"
+  },
+  {
+    "input": "What were the average net costs of software development for both the years 2018 and 2019?",
+    "output": "What was the average Software development costs, net for 2018 and 2019?"
+  },
+  {
+    "input": "What were the net average costs for software development in both 2018 and 2019?",
+    "output": "What was the average Software development costs, net for 2018 and 2019?"
+  },
+  {
+    "input": "What was the Cash and cash equivalents in 2019 and 2018 respectively?",
+    "output": "What were the cash and cash equivalents figures for the years 2019 and 2018, respectively?"
+  },
+  {
+    "input": "What was the Cash and cash equivalents in 2019 and 2018 respectively?",
+    "output": "What were the amounts of Cash and cash equivalents for the years 2019 and 2018, respectively?"
+  },
+  {
+    "input": "What are the average amounts of Restricted cash for the years 2018 and 2019?",
+    "output": "What is the average Restricted cash for 2018 and 2019?"
+  },
+  {
+    "input": "What was the Compensation and related benefits in 2019 and 2018 respectively?",
+    "output": "What were the compensation and benefits offered in 2019 and 2018?"
+  },
+  {
+    "input": "\"In what specific year did the expenses for professional and legal services amount to a value below 500 thousand dollars?\"",
+    "output": "In which year was Professional and legal fees less than 500 thousands?"
+  },
+  {
+    "input": "What was the percentage change in Compensation and related benefits from 2018 to 2019?",
+    "output": "What is the change in the Compensation and related benefits from 2018 to 2019?"
+  },
+  {
+    "input": "What were the average professional and legal fees in 2018 and 2019?",
+    "output": "What was the average Professional and legal fees for 2018 and 2019?"
+  },
+  {
+    "input": "What were the factors that led to the decrease in net pool allocation? Can you provide an explanation and further details regarding the decline in net pool allocation?",
+    "output": "What accounted for the decrease in net pool allocation?"
+  },
+  {
+    "input": "What are the components of vessel operating and supervision costs?",
+    "output": "What are the key components that make up the costs associated with vessel operations and supervision?"
+  },
+  {
+    "input": "How much did the technical maintenance expenses differ between 2017 and 2018?",
+    "output": "What was the change in technical maintenance expenses from 2017 to 2018?"
+  },
+  {
+    "input": "What was the exact monetary difference in technical maintenance expenses between the years 2017 and 2018?",
+    "output": "What was the change in technical maintenance expenses from 2017 to 2018?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the combined costs of operating and supervising vessels from 2018 to 2019?",
+    "output": "What was the percentage change in total vessel operating and supervision costs from 2018 to 2019?"
+  },
+  {
+    "input": "Why does the Group choose to retain ownership of the trade receivables instead of selling them or assigning them to a third party?",
+    "output": "Why does the Group hold the trade receivables?"
+  },
+  {
+    "input": "What does accrued income represent?",
+    "output": "\"What is the meaning and significance of accrued income?\""
+  },
+  {
+    "input": "What does accrued income represent?",
+    "output": "What is the meaning and significance of accrued income?"
+  },
+  {
+    "input": "What was the change in trade receivables from 2018 to 2019?",
+    "output": "What was the difference in the amount of trade receivables between 2018 and 2019, indicating whether it increased or decreased?"
+  },
+  {
+    "input": "What was the exact percentage increase or decrease in the total amount of money owed to a company from 2018 to 2019?",
+    "output": "What was the percentage change in total receivables from 2018 to 2019?"
+  },
+  {
+    "input": "In which years was the fair value of the derivative assets recorded for?",
+    "output": "For which specific years were the fair values of the derivative assets recorded? Please provide all relevant information regarding the years for accurate response."
+  },
+  {
+    "input": "In which specific year did the fair value of interest rate swaps experience an increase compared to other years?",
+    "output": "In which year was the fair value of  Interest rate swaps  higher?"
+  },
+  {
+    "input": "When was the fair value of interest rate swaps higher: in which specific year did the fair value of interest rate swaps surpass that of other years?",
+    "output": "In which year was the fair value of  Interest rate swaps  higher?"
+  },
+  {
+    "input": "In which year did the company record the highest amount of expenses pertaining to share-based compensation?",
+    "output": "In which year was the total expense recognized in respect of share-based compensation the highest?"
+  },
+  {
+    "input": "What was the highest year in terms of total recognized expense for share-based compensation?",
+    "output": "In which year was the total expense recognized in respect of share-based compensation the highest?"
+  },
+  {
+    "input": "What was the difference in the amount of money earned by The Cool Pool Limited in 2017 compared to 2018?",
+    "output": "What was the change in revenues from The Cool Pool Limited  from 2017 to 2018?"
+  },
+  {
+    "input": "What was the exact difference in the amount of money earned by The Cool Pool Limited in the year 2017 compared to the year 2018?",
+    "output": "What was the change in revenues from The Cool Pool Limited  from 2017 to 2018?"
+  },
+  {
+    "input": "For which years were the movements in tangible fixed assets and vessels under construction documented?",
+    "output": "In which years was the movements in tangible fixed assets and vessels under construction recorded for?"
+  },
+  {
+    "input": "What was the percentage change in the cost of vessels from the end of 2018 to the end of 2019?",
+    "output": "What was the change in vessel cost from end 2018 to end 2019?"
+  },
+  {
+    "input": "In which year was the other non-current assets recorded for?",
+    "output": "What is the specific year in which the recording of non-current assets other than the current ones occurred?"
+  },
+  {
+    "input": "In which year did the value of long-term assets, other than the current year, surpass that of the current year?",
+    "output": "In which year was the other long-term assets higher?"
+  },
+  {
+    "input": "What was the percentage change in the total value of other non-current assets from the year 2018 to the year 2019?",
+    "output": "What was the percentage change in total other non-current assets from 2018 to 2019?"
+  },
+  {
+    "input": "For which years were the earnings per share (EPS) or losses per share (LPS) recorded?",
+    "output": "In which years was the earnings/(losses) per share recorded for?"
+  },
+  {
+    "input": "What was the amount of ship management client accounts in 2018?",
+    "output": "How many ship management client accounts were there in the year 2018?"
+  },
+  {
+    "input": "What was the change in current accounts  from 2018 to 2019?",
+    "output": "What is the difference between the current accounts in 2018 and 2019?"
+  },
+  {
+    "input": "\"What was the specific amount of compensation or payment given to individuals or employees in the year 2018?\"",
+    "output": "What was the remuneration in 2018?"
+  },
+  {
+    "input": "What was the amount of compensation received in 2018?",
+    "output": "What was the remuneration in 2018?"
+  },
+  {
+    "input": "Which year was the short-term benefits the highest?",
+    "output": "What was the peak year for short-term benefits?"
+  },
+  {
+    "input": "Which year was the short-term benefits the highest?",
+    "output": "In which year did the short-term benefits reach their peak level?"
+  },
+  {
+    "input": "For which specific years were the costs of constructing the vessels recorded?",
+    "output": "In which years was the vessels under construction costs recorded for?"
+  },
+  {
+    "input": "For which years were the recorded construction costs of the vessels?",
+    "output": "In which years was the vessels under construction costs recorded for?"
+  },
+  {
+    "input": "What is the significance or meaning behind vessels that are currently being constructed?",
+    "output": "What does vessels under construction represent?"
+  },
+  {
+    "input": "In which year were the installment payments for the progress shipyard lower compared to other years?",
+    "output": "Which year was the progress shipyard installments lower?"
+  },
+  {
+    "input": "In which specific year did the progress shipyard experience a decrease in the number of installments being made?",
+    "output": "Which year was the progress shipyard installments lower?"
+  },
+  {
+    "input": "What was the difference in the average daily hire rate between 2017 and 2018?",
+    "output": "What was the change in average daily hire rate from 2017 to 2018?"
+  },
+  {
+    "input": "What is the change in pool gross revenues from 2017 to 2018?",
+    "output": "What was the difference in the total revenue generated from pools in 2017 compared to 2018?"
+  },
+  {
+    "input": "What is the change in pool gross revenues from 2017 to 2018?",
+    "output": "What is the difference in total gross revenues generated by the pool in 2017 compared to 2018?"
+  },
+  {
+    "input": "In which years was the other payables and accruals recorded for?",
+    "output": "For which years was the recording of other payables and accruals made?"
+  },
+  {
+    "input": "How many vessels were hired for employment in the year 2018?",
+    "output": "How many vessels were hired in 2018?"
+  },
+  {
+    "input": "In 2018, what was the total number of vessels that were hired?",
+    "output": "How many vessels were hired in 2018?"
+  },
+  {
+    "input": "In which year was the accrued interest higher?",
+    "output": "\"In which year did the accumulated interest amount exceed the interest accrued in other years?\""
+  },
+  {
+    "input": "What is the percentage difference in the combined value of total payables and accruals between 2018 and 2019?",
+    "output": "What was the percentage change in total payables and  accruals from 2018 to 2019?"
+  },
+  {
+    "input": "What was the difference in the ratio of a company's long-term debt to its equity from 2018 to 2019?",
+    "output": "What was the change in gearing ratio from 2018 to 2019?"
+  },
+  {
+    "input": "In 2019, how many operating days were there and what was the total revenue generated during those days?",
+    "output": "What was the total revenue operating days in 2019?"
+  },
+  {
+    "input": "How many operating days were there in 2019 and what was the total revenue generated during those days?",
+    "output": "What was the total revenue operating days in 2019?"
+  },
+  {
+    "input": "What was the change in average daily hire rate from 2018 to 2019?",
+    "output": "What was the specific difference in the average rate of daily hiring between 2018 and 2019? How did the average daily hire rate change from one year to the other?"
+  },
+  {
+    "input": "\"What was the specific monetary value of the cash distributions made in the year 2019 for common units?\"",
+    "output": "What was the amount of cash distributions in 2019 for common units?"
+  },
+  {
+    "input": "How much did the size of the ATM Programme change?",
+    "output": "What was the change in size of the ATM Programme?"
+  },
+  {
+    "input": "\"When was the specific year when the general and administrative expenses were officially documented or accounted for?\"",
+    "output": "In which year was the general and administrative expenses recorded for?"
+  },
+  {
+    "input": "Where does the management intend to relocate its employees?",
+    "output": "\"What are the specific plans of the management regarding the relocation of its employees?\""
+  },
+  {
+    "input": "What was the selling price of Methane Julia Louise?",
+    "output": "How much was Methane Julia Louise sold for?"
+  },
+  {
+    "input": "Which company was the vessel sold to?",
+    "output": "To which company was the vessel sold? Please provide the name of the company that purchased the vessel."
+  },
+  {
+    "input": "Which company was the vessel sold to?",
+    "output": "\"To which company was the vessel sold?\""
+  },
+  {
+    "input": "What was the percentage change in total right-of use assets  from start to end 2019?",
+    "output": "What was the percentage change in the total value of right-of-use assets between the beginning and end of 2019?"
+  },
+  {
+    "input": "How much was the change in the repayment amount for the bank loan?",
+    "output": "By how much did the bank loan repayment change?"
+  },
+  {
+    "input": "What year experienced a decrease in audit fees compared to previous years?",
+    "output": "In which year was the audit fees lower?"
+  },
+  {
+    "input": "What was the percentage change in audit fees between 2018 and 2019?",
+    "output": "What was the change in audit fees from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in audit fees from the year 2018 to the year 2019?",
+    "output": "What was the change in audit fees from 2018 to 2019?"
+  },
+  {
+    "input": "What are the components subjected to credit risk?",
+    "output": "What are the various components that are exposed to the risk of credit?"
+  },
+  {
+    "input": "What is the significance of bunkers' consumption and other voyage expenses in relation to overall expenses for a voyage?",
+    "output": "What does bunkers’ consumption and other voyage expenses represent?"
+  },
+  {
+    "input": "What is the highest year for brokers' commissions on revenue?",
+    "output": "In which year was the brokers' commissions on revenue the highest?"
+  },
+  {
+    "input": "What was the change in bunkers’ consumption and other voyage expenses from 2018 to 2019?",
+    "output": "What was the difference in the consumption of bunkers and other voyage expenses between 2018 and 2019?"
+  },
+  {
+    "input": "What is the specific monetary value that The Cool Pool Limited is required to pay in 2018?",
+    "output": "What is the amount due from The Cool Pool Limited in 2018?"
+  },
+  {
+    "input": "In which years was the dividends receivable and other amounts due from related parties recorded for?",
+    "output": "For which years were the recorded dividends receivable and other amounts due from related parties?"
+  },
+  {
+    "input": "In which years was the dividends receivable and other amounts due from related parties recorded for?",
+    "output": "In which specific years were the dividends receivable and other outstanding amounts due from related parties recorded?"
+  },
+  {
+    "input": "In which years was the investments recorded for?",
+    "output": "For which specific years were the investments recorded?"
+  },
+  {
+    "input": "What was the numerical difference in the number of additions made between 2018 and 2019?",
+    "output": "What was the change in additions from 2018 to 2019?"
+  },
+  {
+    "input": "What was the difference in the number of additions between 2018 and 2019?",
+    "output": "What was the change in additions from 2018 to 2019?"
+  },
+  {
+    "input": "What were the specific Level 2 municipal and corporate bonds held on December 31, 2018?",
+    "output": "What are the respective Level 2 municipal and corporate bonds as at December 31, 2018?"
+  },
+  {
+    "input": "What is the value of municipal bonds as a percentage of the total marketable securities?",
+    "output": "What proportion of the overall marketable securities is represented by municipal bonds?"
+  },
+  {
+    "input": "What are the respective goodwill amounts in 2015 and 2016?",
+    "output": "What were the goodwill amounts in 2015 and 2016, respectively?"
+  },
+  {
+    "input": "What were the average amounts of total cash, cash equivalents, and marketable securities in the years 2015 and 2016?",
+    "output": "What is the average total cash, cash equivalents, and marketable securities in 2015 and 2016?"
+  },
+  {
+    "input": "What is the proportion of shares bought in November out of the total shares purchased in the last three months of 2019?",
+    "output": "What percentage of the total shares purchased in the last three months of 2019 are bought in November?"
+  },
+  {
+    "input": "What is the proportion of shares bought in November out of the total shares purchased during the last three months of 2019?",
+    "output": "What percentage of the total shares purchased in the last three months of 2019 are bought in November?"
+  },
+  {
+    "input": "What is the percentage change in revenue from term license between 2018 and 2019?",
+    "output": "What is the percent increase or decrease in revenue from term license for the year 2019 compared to 2018?"
+  },
+  {
+    "input": "What is the percentage increase or decrease in revenue generated from maintenance services from 2018 to 2019?",
+    "output": "What is the percentage change in revenue from maintenance between 2018 and 2019?"
+  },
+  {
+    "input": "What were the pre-tax losses in 2018 and 2019 for the company?",
+    "output": "What are the respective loss before income tax benefits in 2018 and 2019?"
+  },
+  {
+    "input": "What were the pre-tax domestic and foreign earnings in 2017?",
+    "output": "What are the respective domestic and foreign income before income taxes in 2017?"
+  },
+  {
+    "input": "\"What were the pre-tax incomes in 2017, separated by domestic and foreign sources?\"",
+    "output": "What are the respective domestic and foreign income before income taxes in 2017?"
+  },
+  {
+    "input": "\"What were the total losses incurred in the domestic market and international markets in 2019, excluding any income tax benefits received?\"",
+    "output": "What are the respective domestic and foreign losses before income tax benefits in 2019?"
+  },
+  {
+    "input": "What is the sum of all outstanding receivables that are yet to be invoiced, and are expected to be collected within a period of two years?",
+    "output": "What is the total unbilled receivables due within 2 years?"
+  },
+  {
+    "input": "What is the sum of all outstanding amounts receivable that have not been invoiced yet, and are expected to be collected within the next 1 to 5 years?",
+    "output": "What is the total unbilled receivables due between 1 to 5 years?"
+  },
+  {
+    "input": "What is the cumulative value of unrealized billings that are yet to be accounted for and are expected to be collected within a period ranging from 1 to 5 years?",
+    "output": "What is the total unbilled receivables due between 1 to 5 years?"
+  },
+  {
+    "input": "What is the difference between the company's federal and state net operating losses?",
+    "output": "What differentiates the federal and state net operating losses of the company?"
+  },
+  {
+    "input": "What were the amounts that the company is authorised to repurchase in 2019 and 2018 respectively?",
+    "output": "What were the authorized repurchase amounts for the company in both 2019 and 2018?"
+  },
+  {
+    "input": "What were the respective amounts of stock repurchases made by the company as at January 1, 2018 and 2019 respectively?",
+    "output": "What were the specific amounts of stock repurchases made by the company as of January 1, 2018 and January 1, 2019, respectively?"
+  },
+  {
+    "input": "What was the average amount of stock repurchases as of January 1st in both 2017 and 2018?",
+    "output": "What is the average stock repurchases as at January 1, 2017 and 2018?"
+  },
+  {
+    "input": "What is the average amount of stock repurchases as of January 1, 2017, and January 1, 2018?",
+    "output": "What is the average stock repurchases as at January 1, 2017 and 2018?"
+  },
+  {
+    "input": "What is the exact percentage difference in the amount of long-term deferred revenue from 2018 to 2019?",
+    "output": "What is the percentage change in long-term deferred revenue between 2018 and 2019?"
+  },
+  {
+    "input": "What are the respective values for Term in 2018 and 2019?",
+    "output": "What are the values for Term in the year 2018? What are the values for Term in the year 2019?"
+  },
+  {
+    "input": "What are the company's respective stock-based compensation for  cost of revenues in 2019 and 2018?",
+    "output": "What were the stock-based compensation expenses for the cost of revenues incurred by the company in both 2019 and 2018?"
+  },
+  {
+    "input": "What are the company's respective stock-based compensation for  cost of revenues in 2019 and 2018?",
+    "output": "\"What were the stock-based compensation expenses for cost of revenues in 2019 and 2018 for the company?\""
+  },
+  {
+    "input": "What is the average amount of stock-based compensation for the cost of revenue that the company has incurred from 2017 to 2019?",
+    "output": "What is the company's average stock-based compensation for the cost of revenue between 2017 to 2019?"
+  },
+  {
+    "input": "What is the average amount of stock-based compensation granted by the company for the selling and marketing department in both 2018 and 2019?",
+    "output": "What is the company's average stock-based compensation for selling and marketing in 2018 and 2019?"
+  },
+  {
+    "input": "\"What was the average amount of stock-based compensation granted to employees in the selling and marketing department of the company in both 2018 and 2019?\"",
+    "output": "What is the company's average stock-based compensation for selling and marketing in 2018 and 2019?"
+  },
+  {
+    "input": "What is the percentage change in revenue earned from term license from 2015 to 2016?",
+    "output": "What is the percentage change in the revenue earned from term license between 2015 and 2016?"
+  },
+  {
+    "input": "What is the total revenue earned from maintenance in 2017 and 2018?",
+    "output": "What is the combined revenue generated from maintenance services in the years 2017 and 2018?"
+  },
+  {
+    "input": "What is the total revenue earned from maintenance in 2017 and 2018?",
+    "output": "What was the combined revenue generated from maintenance services in the years 2017 and 2018?"
+  },
+  {
+    "input": "How did actuarial calculations impact the resulting changes in parameters?",
+    "output": "What did the changes in parameters on the basis of actuarial calculations lead to?"
+  },
+  {
+    "input": "What was the precise amount of the actual return on plan assets in the fiscal year 2019?",
+    "output": "How much did the actual return on plan assets amount to in FY2019?"
+  },
+  {
+    "input": "What items or expenses are typically excluded from the expected payments?",
+    "output": "What is not included in expected payments?"
+  },
+  {
+    "input": "Explain what expenses or items are typically not encompassed within the estimated payments.",
+    "output": "What is not included in expected payments?"
+  },
+  {
+    "input": "What was the change in interest income in FY2019 from FY2018?",
+    "output": "What was the difference in the amount of income generated from interest in the fiscal year 2019 compared to the fiscal year 2018?"
+  },
+  {
+    "input": "What was the change in interest income in FY2019 from FY2018?",
+    "output": "What was the specific difference in interest income between the fiscal year 2019 and the fiscal year 2018?"
+  },
+  {
+    "input": "In which year did Asia experience a higher year-on-year percentage change in GDP compared to 2018 during the period from 2018 to 2019?",
+    "output": "In which year was the Year-on-year percentage change of GDP in Asia larger from 2018 to 2019?"
+  },
+  {
+    "input": "What was the change in the year-on-year percentage change in GDP for Germany from 2018 to 2019?",
+    "output": "What was the difference in the percentage change of Germany's GDP from one year to the next, comparing 2018 and 2019?"
+  },
+  {
+    "input": "In which year did the total sum of taxes collected exceed the amount collected in all other years?",
+    "output": "In which year was the total amount of taxes larger?"
+  },
+  {
+    "input": "What was the exact difference in the total amount of taxes paid in the time period from 2018 to 2019 compared to the time period from 2017 to 2018?",
+    "output": "What was the change in total taxes in  2018/2019 from 2017/2018?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the total amount of taxes in 2018/2019 compared to 2017/2018?",
+    "output": "What was the percentage change in total taxes in  2018/2019 from 2017/2018?"
+  },
+  {
+    "input": "What factors or criteria are used as a foundation for the measurements being taken?",
+    "output": "What is the basis for the measurements?"
+  },
+  {
+    "input": "What is the fundamental basis or principle used to determine the measurements being referred to in the context?",
+    "output": "What is the basis for the measurements?"
+  },
+  {
+    "input": "In which specific year did the amount for Belgium surpass or exceed the previous value?",
+    "output": "In which year was the amount for Belgium larger?"
+  },
+  {
+    "input": "In which specific year did the total amount (not specified) exceed the amount for Belgium?",
+    "output": "In which year was the amount for Belgium larger?"
+  },
+  {
+    "input": "What was the amount of cash and cash equivalents in FY2019?",
+    "output": "How much cash and cash equivalents were reported in the financial statements for the fiscal year 2019?"
+  },
+  {
+    "input": "What was the exact percentage difference in Cash and cash equivalents between FY2018 and FY2019 after considering adjustments?",
+    "output": "What was the percentage change in Cash and cash equivalents in FY2019 from FY2018 adjusted?"
+  },
+  {
+    "input": "What was the calculated percentage difference in Cash and cash equivalents between FY2019 and FY2018, after adjusting for any relevant factors?",
+    "output": "What was the percentage change in Cash and cash equivalents in FY2019 from FY2018 adjusted?"
+  },
+  {
+    "input": "In which specific year did the absolute value of the Earnings per share exceed the value in another year?",
+    "output": "In which year was the absolute value of the Earnings per share larger?"
+  },
+  {
+    "input": "\"What specific factor was used to calculate the net cost of the current service?\"",
+    "output": "What was the current service cost netted against?"
+  },
+  {
+    "input": "\"In which specific year did the total sales exceed the sales of other years in terms of maximum revenue generated?\"",
+    "output": "In which year was the Total sales larger?"
+  },
+  {
+    "input": "In what year did the headcount in METRO Asia exceed the corresponding headcount in other years?",
+    "output": "In which year was the amount in METRO Asia headcount larger?"
+  },
+  {
+    "input": "What was the year-over-year change in the number of employees at METRO AG from 2018 to 2019?",
+    "output": "What was the change in METRO AG headcount in 2019 from 2018?"
+  },
+  {
+    "input": "With which companies did METRO maintain business relations with, and who were the parties involved in these relations?",
+    "output": "Who were the parties to which METRO maintained business relations with related companies?"
+  },
+  {
+    "input": "What were the business relations to related companies listed in the table?",
+    "output": "What were the business relations between the listed companies in the table and other related entities or organizations?"
+  },
+  {
+    "input": "What was the difference in the amount and type of services received in the fiscal year 2019 compared to the fiscal year 2018?",
+    "output": "What was the change in services received in FY2019 from FY2018?"
+  },
+  {
+    "input": "What is the exact percentage change in the number of services received during fiscal year 2019 compared to fiscal year 2018?",
+    "output": "What was the percentage change in services received in FY2019 from FY2018?"
+  },
+  {
+    "input": "What was the exact percentage difference in the quantity of services received during the fiscal year 2019 compared to the fiscal year 2018?",
+    "output": "What was the percentage change in services received in FY2019 from FY2018?"
+  },
+  {
+    "input": "In what format is the cash flow statement presented in the table?",
+    "output": "What version is the cash flow statement in the table in?"
+  },
+  {
+    "input": "What specific expenses related to income tax were officially acknowledged in the year 2019?",
+    "output": "What are the recognised income tax expenses in 2019?"
+  },
+  {
+    "input": "What was the percentage change in recognised income tax expenses in 2018/2019 from 2017/2018?",
+    "output": "What was the percentage increase or decrease in recognised income tax expenses for the financial year 2018/2019 compared to the financial year 2017/2018?"
+  },
+  {
+    "input": "What was the percentage change in recognised income tax expenses in 2018/2019 from 2017/2018?",
+    "output": "What was the percentage increase or decrease in recognised income tax expenses for the fiscal year 2018/2019 compared to the fiscal year 2017/2018?"
+  },
+  {
+    "input": "What was the change in EBT in 2018/2019 from 2017/2018?",
+    "output": "What was the difference in EBT (Earnings Before Taxes) between 2018 and 2019 when compared to 2017 and 2018?"
+  },
+  {
+    "input": "What does the other operating income include?",
+    "output": "What type of income is included under \"other operating income\"? Please provide a comprehensive explanation of the various sources that fall under this category."
+  },
+  {
+    "input": "In which specific year did the amount of Services surpass the quantity in other years?",
+    "output": "In which year was the amount of Services larger?"
+  },
+  {
+    "input": "During which year did the quantity of services surpass the quantity in previous years?",
+    "output": "In which year was the amount of Services larger?"
+  },
+  {
+    "input": "What was the exact amount of equity during the fiscal year 2019?",
+    "output": "What was the amount of equity in FY2019?"
+  },
+  {
+    "input": "What proportion of the total earnings in FY2019 can be attributed to the Noncontrolling interests?",
+    "output": "How much of earnings does the Noncontrolling interests account for in FY2019?"
+  },
+  {
+    "input": "What proportion of the FY2019 earnings is represented by the Noncontrolling interests account?",
+    "output": "How much of earnings does the Noncontrolling interests account for in FY2019?"
+  },
+  {
+    "input": "What was the exact numerical difference in sales between FY2019 and FY2018?",
+    "output": "What was the change in Sales in FY2019 from FY2018?"
+  },
+  {
+    "input": "What was the difference in the total amount of sales between fiscal year 2019 and fiscal year 2018?",
+    "output": "What was the change in Sales in FY2019 from FY2018?"
+  },
+  {
+    "input": "When were the calculations of employee numbers by segments performed?",
+    "output": "When were the employee numbers by segments calculated?"
+  },
+  {
+    "input": "What were the specific segments or categories mentioned under the \"METRO\" component in the table that accounted for the number of employees?",
+    "output": "What were the components under METRO in the table when accounting for the employee numbers by segments?"
+  },
+  {
+    "input": "What was the change in METRO AG in 2019 from 2018?",
+    "output": "What was the year-on-year change in METRO AG's performance in 2019 compared to 2018?"
+  },
+  {
+    "input": "What was the percentage difference in METRO AG's performance in 2019 compared to 2018?",
+    "output": "What was the percentage change in METRO AG in 2019 from 2018?"
+  },
+  {
+    "input": "What was the percentage difference in the performance of METRO AG in 2019 compared to 2018?",
+    "output": "What was the percentage change in METRO AG in 2019 from 2018?"
+  },
+  {
+    "input": "In which year was the amount of total assets larger?",
+    "output": "\"When comparing the amount of total assets, which year had a larger value?\""
+  },
+  {
+    "input": "What was the percentage change in total assets in FY2019 from FY2018?",
+    "output": "What was the percentage increase or decrease in the total assets for the fiscal year 2019 compared to the total assets for the fiscal year 2018?"
+  },
+  {
+    "input": "What is the source of revenue that compensates for the expenses incurred in providing logistics services?",
+    "output": "What is offset by income from logistics services?"
+  },
+  {
+    "input": "What are the components under Other operating expenses in the table?",
+    "output": "What specific components are included in the category of \"Other operating expenses\" as mentioned in the table?"
+  },
+  {
+    "input": "What are the components under Other operating expenses in the table?",
+    "output": "What specifically are the individual components included in the category of Other operating expenses in the provided table?"
+  },
+  {
+    "input": "What was the change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?",
+    "output": "\"What is the difference in losses resulting from the sale or disposal of fixed assets between the fiscal years 2017/2018 and 2018/2019?\""
+  },
+  {
+    "input": "What was the change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?",
+    "output": "What was the difference in the amount of losses incurred from selling fixed assets between the fiscal years 2018/2019 and 2017/2018?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in losses resulting from the disposal of fixed assets during the period of 2018/2019 compared to the period of 2017/2018?",
+    "output": "What was the percentage change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?"
+  },
+  {
+    "input": "What was the percentage change in losses incurred from the disposal of fixed assets in the fiscal year 2018/2019 compared to the fiscal year 2017/2018?",
+    "output": "What was the percentage change in Losses from the disposal of fixed assets in 2018/2019 from 2017/2018?"
+  },
+  {
+    "input": "What was the change in interest carry-forwards in FY2019 from FY2018?",
+    "output": "What was the difference in the amount of interest carry-forwards between the fiscal years of 2019 and 2018?"
+  },
+  {
+    "input": "What are the primary elements within the table that are utilized to determine the outcome of the interest calculation?",
+    "output": "What are the broad components in the table which are used to calculate the interest result?"
+  },
+  {
+    "input": "What are the various broad components included in the table that are utilized for calculating the outcome of interest?",
+    "output": "What are the broad components in the table which are used to calculate the interest result?"
+  },
+  {
+    "input": "What was the percentage change in the amount of income earned from interest during the period between 2018 and 2019 compared to the previous period between 2017 and 2018?",
+    "output": "What was the percentage change in interest income in 2018/2019 from 2017/2018?"
+  },
+  {
+    "input": "In which specific year did the total sum of taxes other than general administrative expenses exceed the amount incurred in those administrative expenses?",
+    "output": "In which year was the amount of other taxes thereof from general administrative expenses larger?"
+  },
+  {
+    "input": "In which years were the reserves retained from earnings calculated in?",
+    "output": "During which specific years were the calculations for retained reserves from earnings conducted?"
+  },
+  {
+    "input": "In which years were the reserves retained from earnings calculated in?",
+    "output": "During which specific years were the calculations for retaining reserves from earnings carried out?"
+  },
+  {
+    "input": "What was the change in the Income tax on components of other comprehensive income in FY2019 from FY2018?",
+    "output": "What was the difference in the income tax related to the components of other comprehensive income from fiscal year 2018 to fiscal year 2019?"
+  },
+  {
+    "input": "What was the change in operating income in the Communications Solutions segment in 2019?",
+    "output": "What was the specific amount of increase or decrease in operating income for the Communications Solutions segment during the year 2019?"
+  },
+  {
+    "input": "What was the change in operating income in the Communications Solutions segment in 2019?",
+    "output": "What was the percentage change in operating income specifically for the Communications Solutions segment during the year 2019 compared to the previous year?"
+  },
+  {
+    "input": "Why did operating income decrease in fiscal 2019?",
+    "output": "\"What were the reasons for the decrease in operating income during fiscal year 2019?\""
+  },
+  {
+    "input": "Why did operating income decrease in fiscal 2019?",
+    "output": "What were the reasons for the decrease in operating income in fiscal 2019?"
+  },
+  {
+    "input": "In which year did the volume or quantity of raw materials reach its highest point?",
+    "output": "In which year was Raw materials larger?"
+  },
+  {
+    "input": "In which specific year did the total volume or quantity of raw materials experience a greater increase or reach a higher value compared to previous years?",
+    "output": "In which year was Raw materials larger?"
+  },
+  {
+    "input": "In which specific years was the calculation of the Repurchase value conducted?",
+    "output": "Which years was the Repurchase value calculated in?"
+  },
+  {
+    "input": "What was the exact difference in the total number of common shares repurchased in the year 2019 compared to 2018?",
+    "output": "What was the change in the Number of common shares repurchased in 2019 from 2018?"
+  },
+  {
+    "input": "What was the calculation for determining the percentage change in the quantity of common shares repurchased during the year 2019 in comparison to 2018?",
+    "output": "What was the percentage change in the Number of common shares repurchased in 2019 from 2018?"
+  },
+  {
+    "input": "What are the specific categories or divisions for which the net sales figures are displayed in the provided table?",
+    "output": "What are the segments for which the net sales are presented in the table?"
+  },
+  {
+    "input": "What year did Industrial Solutions experience a larger size or growth compared to other years?",
+    "output": "In which year was Industrial Solutions larger?"
+  },
+  {
+    "input": "What specific changes occurred in the Industrial Solutions sector between 2018 and 2019?",
+    "output": "What was the change in Industrial Solutions in 2019 from 2018?"
+  },
+  {
+    "input": "In 2019, what specific changes occurred in the Industrial Solutions sector compared to the previous year of 2018?",
+    "output": "What was the change in Industrial Solutions in 2019 from 2018?"
+  },
+  {
+    "input": "In which years was the basic earnings per share calculated for?",
+    "output": "For which specific years was the metric of basic earnings per share calculated and used for evaluation?"
+  },
+  {
+    "input": "In which years was the basic earnings per share calculated for?",
+    "output": "For which years was the calculation of basic earnings per share carried out? Please provide specific details in your answer."
+  },
+  {
+    "input": "What was the change in Dilutive impact of share-based compensation arrangements in 2019 from 2018?",
+    "output": "What was the difference in the dilutive impact of share-based compensation arrangements between 2018 and 2019?"
+  },
+  {
+    "input": "What was the year-on-year percentage change in the dilutive impact caused by share-based compensation arrangements in 2019 compared to 2018?",
+    "output": "What was the percentage change in Dilutive impact of share-based compensation arrangements in 2019 from 2018?"
+  },
+  {
+    "input": "What was the exact percentage change in the dilutive impact of share-based compensation arrangements between 2018 and 2019?",
+    "output": "What was the percentage change in Dilutive impact of share-based compensation arrangements in 2019 from 2018?"
+  },
+  {
+    "input": "In which specific year did the percentage of industrial solutions reach its lowest point?",
+    "output": "In which year was the percentage of industrial solutions the lowest in?"
+  },
+  {
+    "input": "What was the percentage change in the Weighted-Average Grant-Date Fair Value for nonvested shares between 2018 and 2019?",
+    "output": "What was the change in the Weighted-Average Grant-Date Fair Value for nonvested shares in 2019 from 2018?"
+  },
+  {
+    "input": "In the table, what segments were considered when calculating the backlog?",
+    "output": "What were the segments for which backlog was calculated in the table?"
+  },
+  {
+    "input": "For which segments in the table was the backlog calculated?",
+    "output": "What were the segments for which backlog was calculated in the table?"
+  },
+  {
+    "input": "What was the change in total backlog in 2019 from 2018?",
+    "output": "What was the difference in the total backlog between 2019 and 2018?"
+  },
+  {
+    "input": "What was the percentage difference in the total amount of unfinished work in 2019 compared to 2018?",
+    "output": "What was the percentage change in total backlog in 2019 from 2018?"
+  },
+  {
+    "input": "In which specific year did the amount spent on acquisition and integration costs exceed any other year?",
+    "output": "In which year was Acquisition and integration costs larger?"
+  },
+  {
+    "input": "In which specific year did the expenses incurred for acquisition and integration costs exceed the expenses in other years?",
+    "output": "In which year was Acquisition and integration costs larger?"
+  },
+  {
+    "input": "What are the components under U.S. in the table?",
+    "output": "Which components are listed under the U.S. category in the table?"
+  },
+  {
+    "input": "What are the different categories of Income Tax Expense (Benefit) mentioned in the table?",
+    "output": "What are the types of Income Tax Expense (Benefit) in the table?"
+  },
+  {
+    "input": "In which specific year did Non-U.S. experience the highest income tax expense (benefit)?",
+    "output": "Which year was the current income tax expense (benefit) for Non-U.S. the largest?"
+  },
+  {
+    "input": "In which year did the Non-U.S. current income tax expense (benefit) reach its highest amount?",
+    "output": "Which year was the current income tax expense (benefit) for Non-U.S. the largest?"
+  },
+  {
+    "input": "What was the change in Current income tax expense (benefit) in 2019 from 2018?",
+    "output": "What was the difference in Current income tax expense (benefit) between 2019 and 2018?"
+  },
+  {
+    "input": "What is the company's policy regarding the allocation and management of funds?",
+    "output": "What is the company's funding policy?"
+  },
+  {
+    "input": "What is the minimum amount of contributions that individuals are required to make to the pension plan outside of the United States?",
+    "output": "How much minimum required contributions is expected to be made to the non-U.S. pension plan?"
+  },
+  {
+    "input": "Which fiscal year between 2020 and 2024 will witness the highest benefit payments under the United States plans?",
+    "output": "In which Fiscal year from 2020 to 2024 would the benefit payments under the U.S Plans be the largest?"
+  },
+  {
+    "input": "During which fiscal year from 2020 to 2024 will the benefit payments under the United States Plans reach their maximum extent?",
+    "output": "In which Fiscal year from 2020 to 2024 would the benefit payments under the U.S Plans be the largest?"
+  },
+  {
+    "input": "What is typically not included in the net earnings of companies operating outside the United States?",
+    "output": "What do the Non-U.S. net earnings exclude?"
+  },
+  {
+    "input": "What is excluded from the net earnings of countries other than the U.S. and why?",
+    "output": "What do the Non-U.S. net earnings exclude?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the expense for U.S. federal income tax at the statutory rate in 2019 compared to 2018?",
+    "output": "What was the percentage change in Notional U.S. federal income tax expense at the statutory rate in 2019 from 2018?"
+  },
+  {
+    "input": "What are the specific elements or factors taken into account when calculating Restructuring and Other Charges, Net?",
+    "output": "What are the components considered under Restructuring and Other Charges, Net?"
+  },
+  {
+    "input": "When did the lowest value of net restructuring charges occur?",
+    "output": "In which year was Restructuring charges, net the lowest?"
+  },
+  {
+    "input": "When was the year with the lowest net restructuring charges recorded?",
+    "output": "In which year was Restructuring charges, net the lowest?"
+  },
+  {
+    "input": "What was the change in Restructuring charges, net in 2019 from 2018?",
+    "output": "What was the difference in the amount of Restructuring charges, net between 2019 and 2018?"
+  },
+  {
+    "input": "What was the change in Restructuring charges, net in 2019 from 2018?",
+    "output": "What was the net change in restructuring charges in 2019 compared to 2018?"
+  },
+  {
+    "input": "In 2019, what was the percentage difference in Restructuring charges, net compared to 2018?",
+    "output": "What was the percentage change in Restructuring charges, net in 2019 from 2018?"
+  },
+  {
+    "input": "What specific regions do the net sales to external customers correspond to?",
+    "output": "What are the net sales by geographic region to external customers attributed to?"
+  },
+  {
+    "input": "In which specific year did the percentage in Americas reach its highest point compared to other years?",
+    "output": "In which year was the percentage in Americas the largest?"
+  },
+  {
+    "input": "What is the specific year when the largest percentage was recorded in the Americas?",
+    "output": "In which year was the percentage in Americas the largest?"
+  },
+  {
+    "input": "What was the average net sales in Asia-Pacific as a percentage of total net sales across 2017, 2018 and 2019?",
+    "output": "What was the average proportion of net sales generated in the Asia-Pacific region, compared to the total net sales, for the years 2017, 2018, and 2019?"
+  },
+  {
+    "input": "What was the average net sales in Asia-Pacific as a percentage of total net sales across 2017, 2018 and 2019?",
+    "output": "What was the average percentage of total net sales in the Asia-Pacific region for the years 2017, 2018, and 2019 combined?"
+  },
+  {
+    "input": "What was the percentage change in the amount of US dollar-denominated long-term debt from 2018 to 2019?",
+    "output": "What was the increase / (decrease) in US dollar-denominated long-term debt from 2018 to 2019?"
+  },
+  {
+    "input": "What is the increase/ (decrease) in Numerator (basic) – Net income from 2018 to 2019?",
+    "output": "What is the change in Numerator (basic) – Net income from 2018 to 2019?"
+  },
+  {
+    "input": "What is the increase/ (decrease) in Numerator (basic) – Net income from 2018 to 2019?",
+    "output": "What is the absolute change in Numerator (basic) – Net income from the year 2018 to the year 2019?"
+  },
+  {
+    "input": "What is the percentage change in Basic Earnings per share from 2018 to 2019?",
+    "output": "What is the increase/ (decrease) in Basic Earnings per share from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in Diluted Earnings per share from 2018 to 2019?",
+    "output": "What is the increase/ (decrease) in Diluted Earnings per share from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in Diluted Earnings per share (EPS) between 2018 and 2019?",
+    "output": "What is the increase/ (decrease) in Diluted Earnings per share from 2018 to 2019?"
+  },
+  {
+    "input": "What was the specific percentage of taxes paid in the year 2019, taking into account all applicable taxes and deductions?",
+    "output": "What was the effective tax rate in 2019?"
+  },
+  {
+    "input": "What was the average Computed income tax expense?",
+    "output": "What was the average expense for income tax computed?"
+  },
+  {
+    "input": "What was the percentage change in the amount of interest on borrowings from 2018 to 2019?",
+    "output": "What was the increase / (decrease) in Interest on borrowings from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in Interest on borrowings from 2018 to 2019?",
+    "output": "What was the increase / (decrease) in Interest on borrowings from 2018 to 2019?"
+  },
+  {
+    "input": "What was the increase / (decrease) in the Finance costs before interest on lease liabilities from 2018 to 2019?",
+    "output": "What was the change in the Finance costs before interest on lease liabilities from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in Adjusted EBITDA between 2018 and 2019?",
+    "output": "What was the increase / (decrease) in Adjusted EBITDA from 2018 to 2019?"
+  },
+  {
+    "input": "What was the average Depreciation and amortization?",
+    "output": "What was the average amount of depreciation and amortization expenses incurred over a specific period?"
+  },
+  {
+    "input": "What factors contributed to the overall rise in both depreciation and amortization expenses?",
+    "output": "What caused the increase in the total depreciation and amortization?"
+  },
+  {
+    "input": "What was the Depreciation of right-of-use assets in 2019?",
+    "output": "What was the amount of depreciation recorded for right-of-use assets during the year 2019?"
+  },
+  {
+    "input": "What was the Depreciation of right-of-use assets in 2019?",
+    "output": "\"What was the amount of depreciation for right-of-use assets recorded in the financial statements for the year 2019?\""
+  },
+  {
+    "input": "What was the increase / (decrease) in the Depreciation from 2018 to 2019?",
+    "output": "What was the net change in Depreciation, either an increase or decrease, between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the increase / (decrease) in the Depreciation from 2018 to 2019?",
+    "output": "What was the difference in the Depreciation amount from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in the total depreciation and amortization from 2018 to 2019?",
+    "output": "What was the increase / (decrease) in the Total depreciation and amortization from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in Total depreciation and amortization from 2018 to 2019?",
+    "output": "What was the increase / (decrease) in the Total depreciation and amortization from 2018 to 2019?"
+  },
+  {
+    "input": "What was the change in the coupon rate between November 12, 2019, and April 30, 2019?",
+    "output": "What was the increase / (decrease) in the coupon rate from November 12, 2019 to April 30, 2019?"
+  },
+  {
+    "input": "What was the change in the coupon rate from November 12, 2019 to April 30, 2019?",
+    "output": "What was the increase / (decrease) in the coupon rate from November 12, 2019 to April 30, 2019?"
+  },
+  {
+    "input": "What was the percentage change in the Equivalent (Cdn$) value from 2018 to November 2019?",
+    "output": "What was the increase / (decrease) in the Equivalent (Cdn$) between 2018 and November 2019?"
+  },
+  {
+    "input": "What is the percentage change in the amount of interest on borrowings from 2018 to 2019?",
+    "output": "What is the increase/ (decrease) in Interest on borrowings from 2018 to 2019?"
+  },
+  {
+    "input": "By what percentage did the Interest on borrowings change from 2018 to 2019? Please state whether there was an increase or decrease.",
+    "output": "What is the increase/ (decrease) in Interest on borrowings from 2018 to 2019?"
+  },
+  {
+    "input": "What is the purpose of utilizing valuation-related analysis and making capital structure-related decisions?",
+    "output": "What is used for to conduct valuation-related analysis and make capital structure-related decisions?"
+  },
+  {
+    "input": "What items are considered when calculating the adjusted net debt?",
+    "output": "What is included in the adjusted net debt?"
+  },
+  {
+    "input": "In which context or industry is the valuation of debt derivatives without incorporating credit risk used as an evaluation tool?",
+    "output": "Where is debt derivatives valued without adjustment for credit risk used for evaluation?"
+  },
+  {
+    "input": "What is the net change in long-term debt from December 31, 2018, to December 31, 2019?",
+    "output": "What is the increase/ (decrease) in Long-term debt from December 31, 2018 to December 31, 2019?"
+  },
+  {
+    "input": "What is the change in the amount of Long-term debt from December 31, 2018 to December 31, 2019?",
+    "output": "What is the increase/ (decrease) in Long-term debt from December 31, 2018 to December 31, 2019?"
+  },
+  {
+    "input": "What is the change in the value of net debt derivative assets without credit risk adjustment from December 31, 2018 to December 31, 2019?",
+    "output": "What is the increase/ (decrease) in Net debt derivative assets valued without any adjustment for credit risk from December 31, 2018 to December 31, 2019?"
+  },
+  {
+    "input": "What is the method of measuring accounts receivable and what factors are considered in the measurement process?",
+    "output": "How is accounts receivable measured?"
+  },
+  {
+    "input": "What is the percentage change in the amount of customer accounts receivable from 2018 to 2019?",
+    "output": "What is the increase/ (decrease) in Customer accounts receivable from 2018 to 2019?"
+  },
+  {
+    "input": "\"What is the method employed to settle the principal amount of Convertible Notes in cash upon conversion, and how does it factor into the computation of diluted earnings?\"",
+    "output": "Which method is used for to settle the principal of the Convertible Notes in cash on conversion and calculates diluted earnings?"
+  },
+  {
+    "input": "What are the consequences when a loss is incurred that can be attributed to shareholders?",
+    "output": "What happens if a loss attributable to shareholders has been incurred?"
+  },
+  {
+    "input": "What was the Convertible Notes that had an anti-dilutive effect on the calculation of diluted earnings per common share in the year end 2019, 2018 and 2017 respectively?",
+    "output": "What were the Convertible Notes that impacted diluted earnings per common share calculation in 2019, 2018, and 2017, specifically in terms of their anti-dilutive effects?"
+  },
+  {
+    "input": "What is the change in Loss per common share - basic and diluted from 2018 to 2019?",
+    "output": "What is the difference in the loss per common share - basic and diluted between 2018 and 2019?"
+  },
+  {
+    "input": "What is the change in Loss per common share - basic and diluted from 2018 to 2019?",
+    "output": "What is the difference in basic and diluted Loss per common share between 2018 and 2019?"
+  },
+  {
+    "input": "How are the recognition processes for realized and unrealized losses different?",
+    "output": "How are the realized and unrealized losses recognized?"
+  },
+  {
+    "input": "How are both realized and unrealized losses recognized? Please provide a detailed explanation of the recognition process for each type of loss.",
+    "output": "How are the realized and unrealized losses recognized?"
+  },
+  {
+    "input": "In which specific year did the Gain on sale or write-down of a cost-accounted investment amount to less than 1,000 thousands?",
+    "output": "In which year is the Gain on sale / (write-down) of cost-accounted investment less than 1,000 thousands?"
+  },
+  {
+    "input": "\"In which fiscal year did the Gain on sale or write-down of a cost-accounted investment amount to less than 1,000 thousands (1 million)?\"",
+    "output": "In which year is the Gain on sale / (write-down) of cost-accounted investment less than 1,000 thousands?"
+  },
+  {
+    "input": "What were the total fees paid by Teekay Tankers to KPMG LLP in 2019 and 2018, respectively?",
+    "output": "What fees was paid to KPMG LLP by Teekay Tankers during 2019 and 2018 respectively?"
+  },
+  {
+    "input": "What were the fees paid by Teekay Tankers to KPMG LLP in 2019 and 2018?",
+    "output": "What fees was paid to KPMG LLP by Teekay Tankers during 2019 and 2018 respectively?"
+  },
+  {
+    "input": "What are the components or factors included in the income tax expense calculation?",
+    "output": "What is included in the income tax expense?"
+  },
+  {
+    "input": "What items are incorporated into the income tax expense?",
+    "output": "What is included in the income tax expense?"
+  },
+  {
+    "input": "What is the difference in the number of positions taken in previous years between December 31, 2019, and December 31, 2018?",
+    "output": "What is the change in Changes for positions taken in prior years from December 31, 2019 to December 31, 2018?"
+  },
+  {
+    "input": "What is the net change in the number of positions taken in prior years from December 31, 2019 to December 31, 2018?",
+    "output": "What is the change in Changes for positions taken in prior years from December 31, 2019 to December 31, 2018?"
+  },
+  {
+    "input": "What led to increase in Consolidated net cash flow from operating activities for the year ended December 31, 2019?",
+    "output": "What factors contributed to the rise in Consolidated net cash flow from operating activities during the year ending on December 31, 2019?"
+  },
+  {
+    "input": "How much did Brookfield receive from the sale of its interests in Altera in 2019?",
+    "output": "How much was received from Brookfield for the sale of interests in Altera during 2019?"
+  },
+  {
+    "input": "What is the change in Net financing cash flows from Year Ended December 31, 2019 to December 31, 2018?",
+    "output": "What is the difference in the amount of cash generated or used for financing activities in the financial statements for the years ending December 31, 2019 and December 31, 2018?"
+  },
+  {
+    "input": "What is the change in Net financing cash flows from Year Ended December 31, 2019 to December 31, 2018?",
+    "output": "What is the difference in net cash flows from financing activities between the fiscal years ended on December 31, 2019 and December 31, 2018?"
+  },
+  {
+    "input": "What is the change in Net (loss) income from Year Ended December 31, 2019 to December 31, 2018?",
+    "output": "What was the difference in the Net (loss) income between the fiscal years ending on December 31, 2019 and December 31, 2018?"
+  },
+  {
+    "input": "What is the change in Net (loss) income from Year Ended December 31, 2019 to December 31, 2018?",
+    "output": "What is the difference in the amount of net (loss) income between the fiscal year ending on December 31, 2019, and the fiscal year ending on December 31, 2018?"
+  },
+  {
+    "input": "What is the change in Income tax expense (recovery) from Year Ended December 31, 2019 to December 31, 2018?",
+    "output": "What is the difference in the amount of income tax expense or recovery reported for the fiscal year ended on December 31, 2019, compared to the fiscal year ended on December 31, 2018?"
+  },
+  {
+    "input": "What is the change in Income tax expense (recovery) from Year Ended December 31, 2019 to December 31, 2018?",
+    "output": "How does the income tax expense (recovery) change between the years ended December 31, 2019 and December 31, 2018?"
+  },
+  {
+    "input": "What is the sum of all minimum lease payments expected to be received in the year 2019?",
+    "output": "What was the total Total minimum lease payments to be received in 2019？"
+  },
+  {
+    "input": "What is the total sum of minimum lease payments that were expected to be received in the year 2019?",
+    "output": "What was the total Total minimum lease payments to be received in 2019？"
+  },
+  {
+    "input": "What is the change in Initial direct costs and other from December 31, 2019 to December 31, 2018?",
+    "output": "How did the initial direct costs and other expenses change from December 31, 2018, to December 31, 2019?"
+  },
+  {
+    "input": "When did Teekay sell to Brookfield all of the Company’s remaining interests in Altera?",
+    "output": "\"When was the complete sale of Teekay's remaining holdings in Altera to Brookfield executed?\""
+  },
+  {
+    "input": "When did Teekay sell to Brookfield all of the Company’s remaining interests in Altera?",
+    "output": "\"When did Teekay sell its remaining interests in Altera to Brookfield?\""
+  },
+  {
+    "input": "What were the Fair values at the start of the year for 2019 and 2018, respectively? Please provide the Fair value figures for both years separately.",
+    "output": "What was the Fair value at the beginning of the year for 2019 and 2018 respectively?"
+  },
+  {
+    "input": "In which year was Fair value at the beginning of the year less than 15,000 thousands?",
+    "output": "In which year did the fair value at the beginning of the year fall below 15,000,000 (15 million) dollars?"
+  },
+  {
+    "input": "\"What items are typically included in the calculation of current income tax expense, and what factors can affect this expense?\"",
+    "output": "What is included in current income tax expense?"
+  },
+  {
+    "input": "What components are typically encompassed in the calculation of contemporary income tax expense?",
+    "output": "What is included in current income tax expense?"
+  },
+  {
+    "input": "What is the change in Current income tax expense from December 31, 2019 to December 31, 2018?",
+    "output": "How much has the Current income tax expense changed from December 31, 2019, to December 31, 2018?"
+  },
+  {
+    "input": "What is the change in Current income tax expense from December 31, 2019 to December 31, 2018?",
+    "output": "What was the difference in Current income tax expense between December 31, 2019 and December 31, 2018?"
+  },
+  {
+    "input": "\"Between two years, which one exhibits a higher expense for current income tax?\"",
+    "output": "Which year has higher current income tax expense?"
+  },
+  {
+    "input": "What were the Lease and Non-Lease commitments in the year 2021, and can you provide specific details about each commitment?",
+    "output": "What was the Lease and Non-Lease commitment in 2021 respectively?"
+  },
+  {
+    "input": "What was the amount of lease commitment for the fiscal year 2022?",
+    "output": "What was the Lease commitment in 2022?"
+  },
+  {
+    "input": "What was the total value of lease commitments in the year 2022?",
+    "output": "What was the Lease commitment in 2022?"
+  },
+  {
+    "input": "In which year was the Lease commitment less than 10,000 thousands?",
+    "output": "In which specific year did the Lease commitment amount to less than 10,000 thousand units?"
+  },
+  {
+    "input": "In which year was the Lease commitment less than 10,000 thousands?",
+    "output": "In which specific year did the Lease commitment amount fall below 10,000 thousands? (For better answering, please state the year and the unit of the Lease commitment.)"
+  },
+  {
+    "input": "What is the difference in the lease commitment amount between 2020 and 2021?",
+    "output": "What is the change in the Lease commitment from 2020 to 2021?"
+  },
+  {
+    "input": "What is the average duration of lease agreements from 2020 to 2022?",
+    "output": "What is the average Lease Commitment from 2020 to 2022?"
+  },
+  {
+    "input": "What is the average duration of lease commitments between 2020 and 2022?",
+    "output": "What is the average Lease Commitment from 2020 to 2022?"
+  },
+  {
+    "input": "How much was the equity loss for the year ended December 31, 2019?",
+    "output": "What was the amount of equity lost during the period from January 1 to December 31, 2019?"
+  },
+  {
+    "input": "How much was the equity loss for the year ended December 31, 2019?",
+    "output": "What was the total amount of equity loss recorded for the period of the year ending December 31, 2019?"
+  },
+  {
+    "input": "What is the difference in the combined amount of cash and restricted cash on December 31, 2019, compared to December 31, 2018?",
+    "output": "What is the change in Cash and restricted cash from December 31, 2019 to December 31, 2018?"
+  },
+  {
+    "input": "What was the increase or decrease in the category of Other assets – current on the company's balance sheet from December 31, 2019 to December 31, 2018?",
+    "output": "What is the change in Other assets – current from December 31, 2019 to December 31, 2018?"
+  },
+  {
+    "input": "What were the values of the other current assets in the years 2019 and 2018?",
+    "output": "What was the other assets - current in 2019 and 2018 respectively?"
+  },
+  {
+    "input": "What were the respective amounts of non-current liabilities in 2019 and 2018 aside from the liabilities mentioned earlier?",
+    "output": "What was the other liabilities - non current in 2019 and 2018 respectively?"
+  },
+  {
+    "input": "What were the amounts of non-current liabilities in 2019 and 2018, respectively, excluding any other liabilities that may be present on the financial statements?",
+    "output": "What was the other liabilities - non current in 2019 and 2018 respectively?"
+  },
+  {
+    "input": "What is the difference in the number of Consolidated LNG carriers between 2020 and 2021?",
+    "output": "What is the change in Consolidated LNG carriers from 2020 to 2021?"
+  },
+  {
+    "input": "What is the net difference in the number of Consolidated LNG carriers between 2020 and 2021?",
+    "output": "What is the change in Consolidated LNG carriers from 2020 to 2021?"
+  },
+  {
+    "input": "What is the annual growth or decline in the number of Consolidated LNG carriers from 2021 to 2022?",
+    "output": "What is the change in Consolidated LNG carriers from 2021 to 2022?"
+  },
+  {
+    "input": "What is the average number of Consolidated LNG carriers for the period from 2020 to 2021?",
+    "output": "What is the average Consolidated LNG carriers, for the year 2020 to 2021?"
+  },
+  {
+    "input": "What is the average number of Consolidated LNG carriers between the years 2020 and 2021?",
+    "output": "What is the average Consolidated LNG carriers, for the year 2020 to 2021?"
+  },
+  {
+    "input": "What is the change in Realized (losses) gains relating to Interest rate swap agreements from Year Ended December 31, 2019 to December 31, 2018?",
+    "output": "What was the difference in Realized (losses) gains associated with Interest rate swap agreements between December 31, 2019 and December 31, 2018?"
+  },
+  {
+    "input": "What is the difference in Realized losses and gains from Forward freight agreements between Year Ended December 31, 2019 and December 31, 2018?",
+    "output": "What is the change in Realized (losses) gains relating to Forward freight agreements from Year Ended December 31, 2019 to December 31, 2018?"
+  },
+  {
+    "input": "What was the difference in revenue between 2019 and 2018?",
+    "output": "What is the change in Revenues from, 2019 to 2018?"
+  },
+  {
+    "input": "What is the change in Voyage expenses from, 2019 to 2018?",
+    "output": "What is the difference in expenses for voyages comparing 2019 to 2018?"
+  },
+  {
+    "input": "How is the expense of share-based compensation quantified and evaluated?",
+    "output": "How is share-based compensation expense measured?"
+  },
+  {
+    "input": "How is the expense related to share-based compensation quantified and determined?",
+    "output": "How is share-based compensation expense measured?"
+  },
+  {
+    "input": "What are the various types of share-based compensation plans in existence?",
+    "output": "What are the types of share-based compensation plans?"
+  },
+  {
+    "input": "By what percentage did the stock-based compensation expense for general and administrative operations decrease from 2018 to 2019?",
+    "output": "How much did the stock-based compensation expense in the general and administrative operations decreased from 2018 to 2019?"
+  },
+  {
+    "input": "What is the ending outstanding number of vested RSUs in 2019?",
+    "output": "What is the final count of vested Restricted Stock Units (RSUs) at the conclusion of 2019?"
+  },
+  {
+    "input": "What is the ending outstanding number of vested RSUs in 2019?",
+    "output": "How many RSUs were left as outstanding in the year 2019 once they had vested?"
+  },
+  {
+    "input": "What was the exact amount of depreciation expense recorded in the financial statements for the year 2018?",
+    "output": "What was the depreciation expense in 2018?"
+  },
+  {
+    "input": "What are the accepted accounting methods for handling and recording disposed assets?",
+    "output": "What are the accounting treatments for disposed assets?"
+  },
+  {
+    "input": "What are the accounting procedures to be followed specifically for assets that have been disposed of?",
+    "output": "What are the accounting treatments for disposed assets?"
+  },
+  {
+    "input": "What is the net total for property, plant and equipment in 2019?",
+    "output": "What is the total value of property, plant, and equipment after deducting accumulated depreciation in 2019?"
+  },
+  {
+    "input": "What is the net total for property, plant and equipment in 2019?",
+    "output": "What is the total value of property, plant, and equipment after deducting accumulated depreciation and any impairment losses, as reported in the financial statements for the year 2019?"
+  },
+  {
+    "input": "What is the percentage decrease in the net value of Property, Plant, and Equipment between 2018 and 2019?",
+    "output": "What is the percentage decrease in Net Total Property, Plant and Equipment from 2018 to 2019?"
+  },
+  {
+    "input": "How much was the increase in Federal R&D tax credits from 2018 to 2019?",
+    "output": "How much did R&D tax credits from the Federal increased from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage increase in R&D tax credits provided by the Federal government from 2018 to 2019?",
+    "output": "How much did R&D tax credits from the Federal increased from 2018 to 2019?"
+  },
+  {
+    "input": "\"What was the amount recorded as a long-term liability in the financial statements for the year 2018?\"",
+    "output": "How much was reflected as a long-term liability in 2018?"
+  },
+  {
+    "input": "What is the percentage difference in Net deferred tax assets from 2018 to 2019?",
+    "output": "What was the percentage change in Net deferred tax assets between 2018 and 2019?"
+  },
+  {
+    "input": "What was the total cost of the service provided in the year 2018?",
+    "output": "What was the service cost in 2018?"
+  },
+  {
+    "input": "What was the cost of the service provided in the year 2018?",
+    "output": "What was the service cost in 2018?"
+  },
+  {
+    "input": "What was the difference in the cost of services from 2018 to 2019?",
+    "output": "What was the change in service cost between 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in interest cost between 2018 and 2019?",
+    "output": "What was the difference in the amount of money spent on interest between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in interest cost between 2018 and 2019?",
+    "output": "How did the interest cost change from 2018 to 2019, and what is the difference between the two years?"
+  },
+  {
+    "input": "What was the change in balances at the end of period between 2018 and 2019?",
+    "output": "What is the difference in balances at the end of the period between 2018 and 2019, and what caused this change?"
+  },
+  {
+    "input": "What was the specific monetary value or total sum allocated for Machinery and equipment in the year 2018?",
+    "output": "What was the amount for Machinery and equipment in 2018?"
+  },
+  {
+    "input": "\"What was the specific monetary value allocated to Machinery and equipment in the year 2018?\"",
+    "output": "What was the amount for Machinery and equipment in 2018?"
+  },
+  {
+    "input": "What was the change in Net cash provided by (used in) operating activities from continuing operations between 2018 and 2019?",
+    "output": "What was the difference in Net cash provided by (used in) operating activities from continuing operations between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the difference in the amount of Net cash provided by (used in) operating activities between the years 2018 and 2019?",
+    "output": "What was the change in Net cash provided by (used in) operating activities between 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in Income (loss) from discontinued operations, net of income taxes between 2018 and 2019?",
+    "output": "What was the percentage change in net income (loss) from discontinued operations, after accounting for income taxes, between the fiscal years 2018 and 2019?"
+  },
+  {
+    "input": "Which periods does the company's effective tax rates differ from the U.S. federal statutory rate?",
+    "output": "\"During which specific time periods does the effective tax rate of the company deviate from the U.S. federal statutory rate?\""
+  },
+  {
+    "input": "What was the specific framework or arrangement regarding Federal taxes in the year 2018, including any applicable laws, regulations, rates, and deductions?",
+    "output": "What was the current provision for Federal taxes in 2018?"
+  },
+  {
+    "input": "What were the existing federal tax regulations and provisions for the year 2018?",
+    "output": "What was the current provision for Federal taxes in 2018?"
+  },
+  {
+    "input": "What was the specific provision made for foreign taxes in the year 2019?",
+    "output": "What was the current provision for Foreign taxes in 2019?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the fair value of intangible assets from 2018 to 2019?",
+    "output": "What was the percentage change in the fair value of intangible assets between 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in Pro Forma total sales between 2018 and 2019?",
+    "output": "What was the difference in the total sales of Pro Forma from 2018 to 2019?"
+  },
+  {
+    "input": "What was the change in Pro Forma total sales between 2018 and 2019?",
+    "output": "What was the difference in Pro Forma total sales from 2018 to 2019?"
+  },
+  {
+    "input": "What was the exact fair value of the property and equipment owned by LumaSense?",
+    "output": "What was the fair value of  Property and equipment from LumaSense?"
+  },
+  {
+    "input": "What was the fair value of Property and equipment acquired by LumaSense?",
+    "output": "What was the fair value of  Property and equipment from LumaSense?"
+  },
+  {
+    "input": "What was the difference in the assumed exercise of dilutive stock options and restricted stock units between the years 2018 and 2019?",
+    "output": "What was the change in Assumed exercise of dilutive stock options and restricted stock units between 2018 and 2019?"
+  },
+  {
+    "input": "What was the disparity in the fair value of cash paid for acquisition between Trek and the Electrostatic Product Line?",
+    "output": "What was the difference in the fair value of cash paid for acquisition between Trek and Electrostatic Product Line?"
+  },
+  {
+    "input": "What was the diluted earnings per share (EPS) specifically for Continuing Operations for the quarter ending in September?",
+    "output": "What was the diluted earnings per share of Continuing Operations in Quarter Ended  September?"
+  },
+  {
+    "input": "What was the diluted earnings per share of Continuing Operations in the quarter ending in September?",
+    "output": "What was the diluted earnings per share of Continuing Operations in Quarter Ended  September?"
+  },
+  {
+    "input": "What was the total Income (loss) from discontinued operations, net of income taxes in 2019?",
+    "output": "In 2019, what was the net income (loss) from discontinued operations, adjusted for income taxes?"
+  },
+  {
+    "input": "What was the difference in total operating expense between the fiscal years 2018 and 2019?",
+    "output": "What was the change in total operating expense between 2018 and 2019?"
+  },
+  {
+    "input": "What was the difference in the amount of money spent on operational expenses from 2018 to 2019?",
+    "output": "What was the change in total operating expense between 2018 and 2019?"
+  },
+  {
+    "input": "How much money was owed to the company for goods or services provided in 2019 but not yet invoiced?",
+    "output": "What was the amount of unbilled receivables in 2019?"
+  },
+  {
+    "input": "What were the precise amounts that were billed during the year 2018 after considering all deductions, fees, and payments?",
+    "output": "What were the net amounts billed in 2018?"
+  },
+  {
+    "input": "What was the change in unbilled receivables between 2018 and 2019?",
+    "output": "What was the difference in the amount of unbilled receivables from 2018 to 2019?"
+  },
+  {
+    "input": "What was the difference in the total amount billed between 2018 and 2019?",
+    "output": "What was the change in net amounts billed between 2018 and 2019?"
+  },
+  {
+    "input": "What was the accumulated amortization of customer relationships in 2018?",
+    "output": "\"What was the total amount of accumulated amortization recorded for customer relationships during the year 2018?\""
+  },
+  {
+    "input": "What were the top two segments with the highest net carrying amounts in the year 2018?",
+    "output": "What was the two highest net carrying amounts segments in 2018?"
+  },
+  {
+    "input": "What were the two segments with the highest net carrying amounts in 2018?",
+    "output": "What was the two highest net carrying amounts segments in 2018?"
+  },
+  {
+    "input": "How much did the service cost in 2019 and can you provide more details about the specific service being referred to?",
+    "output": "What was service cost in 2019?"
+  },
+  {
+    "input": "What was the total amount spent on interest expenses in the year 2018?",
+    "output": "What was interest cost in 2018?"
+  },
+  {
+    "input": "What was the expected rate of return on plan assets for the year 2017?",
+    "output": "What was the Expected return on plan assets in 2017?"
+  },
+  {
+    "input": "What was the anticipated return on assets for the plan in 2017?",
+    "output": "What was the Expected return on plan assets in 2017?"
+  },
+  {
+    "input": "What was the change in Amortization of actuarial gains and losses between 2017 and 2018?",
+    "output": "What was the difference in the amount of amortization of actuarial gains and losses recorded between the years 2017 and 2018?"
+  },
+  {
+    "input": "What was the change in Amortization of actuarial gains and losses between 2017 and 2018?",
+    "output": "What was the difference in the amount of Amortization of actuarial gains and losses between the fiscal years 2017 and 2018?"
+  },
+  {
+    "input": "What were the specific expenses incurred for severance payments and any associated costs during the year 2019?",
+    "output": "What were the severance and related charges in 2019?"
+  },
+  {
+    "input": "What was the amount of severance and associated expenses recorded as charges in the year 2019?",
+    "output": "What were the severance and related charges in 2019?"
+  },
+  {
+    "input": "What was the change in Severance and related charges between 2018 and 2019?",
+    "output": "What was the year-on-year difference in the amount spent on Severance and related charges between 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in Severance and related charges between 2018 and 2019?",
+    "output": "What was the difference in the amount of Severance and related charges incurred in 2018 compared to 2019?"
+  },
+  {
+    "input": "What is the proportion of facility relocation and closure charges in the total amount of restructuring charges incurred in 2019?",
+    "output": "What percentage of total restructuring charges in 2019 consisted of Facility relocation and closure charges?"
+  },
+  {
+    "input": "What was the contrast between the combined assets in the Level 1 and Level 2 categories for the year 2019?",
+    "output": "What was the difference in the total assets in the Level 1 Level 2 categories for 2019?"
+  },
+  {
+    "input": "What was the difference between the total assets in the Level 1 and Level 2 categories for the year 2019?",
+    "output": "What was the difference in the total assets in the Level 1 Level 2 categories for 2019?"
+  },
+  {
+    "input": "What was the combined value of the top three assets in the Level 2 Category for the year 2019?",
+    "output": "What was the total of the three highest assets in the Level 2 Category for 2019?"
+  },
+  {
+    "input": "What percentage of total assets across all categories consisted of assets from the Level One category for 2019?",
+    "output": "What was the proportion or share, in terms of percentage, of Level One assets out of the total assets across all categories for the year 2019?"
+  },
+  {
+    "input": "What was the specific value or monetary figure of long-lived assets in the United States during the year 2019?",
+    "output": "What was the amount of long-lived assets in United States in 2019?"
+  },
+  {
+    "input": "What was the net difference in the value of long-term assets in the United States from 2018 to 2019?",
+    "output": "What was the change in the amount of long-lived assets between 2018 and 2019 in United States?"
+  },
+  {
+    "input": "\"What was the combined value of the two longest-lasting assets in the year 2018?\"",
+    "output": "What is the sum of the highest two long-lived assets in 2018?"
+  },
+  {
+    "input": "\"What is the total value, in monetary terms, of the two long-lived assets with the highest worth that were recorded in the year 2018?\"",
+    "output": "What is the sum of the highest two long-lived assets in 2018?"
+  },
+  {
+    "input": "What was the initial estimated value of property and equipment as of December 31, 2019 in relation to their market worth?",
+    "output": "What was the Preliminary fair value of property and equipment in December 31, 2019?"
+  },
+  {
+    "input": "What was the initial estimated value of property and equipment as of December 31, 2019, before any adjustments or final calculations were made?",
+    "output": "What was the Preliminary fair value of property and equipment in December 31, 2019?"
+  },
+  {
+    "input": "What was the initial estimated value assigned to inventories as of September 10th, 2019?",
+    "output": "What was the Preliminary fair value of inventories in September 10, 2019?"
+  },
+  {
+    "input": "\"What was the initial estimated market value of the inventories as of September 10, 2019?\"",
+    "output": "What was the Preliminary fair value of inventories in September 10, 2019?"
+  },
+  {
+    "input": "In 2019, what was the specific amount of revenue that was recognized for product sales and the provision of services at a particular point in time?",
+    "output": "What was the Product and service revenue recognized at point in time in 2019?"
+  },
+  {
+    "input": "What specific extended warranty and service contracts were acknowledged and recorded throughout the year of 2018?",
+    "output": "What was the Extended warranty and service contracts recognized over time in 2018?"
+  },
+  {
+    "input": "What were the specific extended warranty and service contracts that gained recognition throughout the year 2018?",
+    "output": "What was the Extended warranty and service contracts recognized over time in 2018?"
+  },
+  {
+    "input": "What was the change in Product and service revenue recognized at point in time between 2018 and 2019?",
+    "output": "What was the specific difference in revenue from the sales of products and services recognized at a specific moment between the years 2018 and 2019?"
+  },
+  {
+    "input": "What were the sales figures for the Industrial & Medical sectors in the year 2017?",
+    "output": "What was the sales for Industrial & Medical in 2017?"
+  },
+  {
+    "input": "What were the sales figures specifically for Industrial & Medical sectors in the year 2017?",
+    "output": "What was the sales for Industrial & Medical in 2017?"
+  },
+  {
+    "input": "What was the percentage change in sales of Semiconductor Equipment from 2017 to 2018?",
+    "output": "What was the change in sales of Semiconductor Equipment between 2017 and 2018?"
+  },
+  {
+    "input": "What were the income taxes for the 2018 tax year, based on the federal statutory tax rates?",
+    "output": "What was the Income taxes per federal statutory rate in 2018?"
+  },
+  {
+    "input": "What was the State income taxes, net of federal deduction in 2017?",
+    "output": "In 2017, what was the amount of state income taxes paid after deducting the federal deduction?"
+  },
+  {
+    "input": "What were the specific modifications that occurred in the GILTI Tax legislation from 2018 to 2019?",
+    "output": "What was the change in GILTI Tax between 2018 and 2019?"
+  },
+  {
+    "input": "What was the difference in the GILTI Tax rates from 2018 to 2019, and how did the change impact tax calculations?",
+    "output": "What was the change in GILTI Tax between 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in Unremitted earnings between 2018 and 2019?",
+    "output": "What was the difference in Unremitted earnings from 2018 to 2019? Please provide the specific amount of change in Unremitted earnings between these two years."
+  },
+  {
+    "input": "What was the percentage difference in Withholding taxes from 2018 to 2019?",
+    "output": "What was the percentage change in Withholding taxes between 2018 and 2019?"
+  },
+  {
+    "input": "What is the estimated rate of forfeiture?",
+    "output": "What was the forfeiture rate estimated to be?"
+  },
+  {
+    "input": "What is total non-cash compensation expense related to stock-based awards in 2019?",
+    "output": "\"What was the amount of expenses incurred in 2019 for stock-based awards as a form of total non-cash compensation?\""
+  },
+  {
+    "input": "What is the range of years covered in the table?",
+    "output": "What are the years included in the table?"
+  },
+  {
+    "input": "\"What is the final balance as of December 31, 2019?\"",
+    "output": "What is the balance at end of year for 2019?"
+  },
+  {
+    "input": "In which specific markets does the company focus its marketing efforts and aim to capture a significant share of customers?",
+    "output": "Which markets does the company target?"
+  },
+  {
+    "input": "What specific types of research and development (R&D) activities are sponsored by companies? Please provide a comprehensive list of these activities as outlined in the table.",
+    "output": "What are the types of company-sponsored R&D activities in the table?"
+  },
+  {
+    "input": "What types of research and development (R&D) activities supported by companies are listed in the table?",
+    "output": "What are the types of company-sponsored R&D activities in the table?"
+  },
+  {
+    "input": "What was the difference in the total expenditure on research and development sponsored by the company from 2018 to 2019?",
+    "output": "What is the change in the total company-sponsored research and development expense in 2019 from 2018?"
+  },
+  {
+    "input": "What is the change in total backlog in 2019?",
+    "output": "What was the overall increase or decrease in the total backlog during the year 2019?"
+  },
+  {
+    "input": "What are the segments under Total Backlog in the table?",
+    "output": "What specific categories or divisions are included within the Total Backlog section of the table?"
+  },
+  {
+    "input": "How many distinct segments are there included in the Total Backlog?",
+    "output": "How many segments are there under Total Backlog?"
+  },
+  {
+    "input": "How many distinct segments are included within the category of Total Backlog?",
+    "output": "How many segments are there under Total Backlog?"
+  },
+  {
+    "input": "What is the specific maturity date or time period for the long-term debt that is expected to be due in 2021?",
+    "output": "What is the maturity of long-term debt for 2021?"
+  },
+  {
+    "input": "What are the components recorded under income (loss) from continuing operations before income taxes?",
+    "output": "What specific items are included in the income (loss) from continuing operations category before taxes are deducted?"
+  },
+  {
+    "input": "In which year was the amount under Foreign the smallest?",
+    "output": "\"When was the lowest recorded value for the amount under the category 'Foreign' observed?\""
+  },
+  {
+    "input": "What is the change in the amount under Foreign in 2019 from 2018?",
+    "output": "What is the difference in the amount under the category \"Foreign\" between 2019 and 2018?"
+  },
+  {
+    "input": "What is the change in the amount under Foreign in 2019 from 2018?",
+    "output": "How much did the Foreign amount change from 2018 to 2019?"
+  },
+  {
+    "input": "In which year was the amount of sales in Other the largest?",
+    "output": "In which year did the category labeled as \"Other\" witness the highest recorded sales volume?"
+  },
+  {
+    "input": "What is the percentage change in the amount of Other in 2019 from 2018?",
+    "output": "What is the percentage difference in the amount of Other expenses or income in the year 2019 compared to 2018, and how can it be calculated?"
+  },
+  {
+    "input": "What is the tax expense for 2019 calculated at the U.S. statutory rate?",
+    "output": "What is the Tax expense at U.S. statutory rate for 2019?"
+  },
+  {
+    "input": "What was the tax expense for the year 2019 in the United States at the statutory rate?",
+    "output": "What is the Tax expense at U.S. statutory rate for 2019?"
+  },
+  {
+    "input": "What is the percentage change in nondeductible expenses in 2019 from 2018?",
+    "output": "What is the percentage difference in the amount of expenses that cannot be deducted in 2019 compared to 2018?"
+  },
+  {
+    "input": "What is the percentage change in nondeductible expenses in 2019 from 2018?",
+    "output": "What is the percentage increase or decrease in expenses that cannot be deducted from taxes in the year 2019 compared to the year 2018?"
+  },
+  {
+    "input": "\"When were the costs associated with purchasing and developing the ERP system recorded?\"",
+    "output": "In which years were the incurred costs related to the purchase and development of the ERP system recorded?"
+  },
+  {
+    "input": "In which year did the total value of property, plant, and equipment reach its highest level?",
+    "output": "In which year was the total amount of property, plant and equipment larger?"
+  },
+  {
+    "input": "In which specific year did the combined value of property, plant, and equipment reach its highest point?",
+    "output": "In which year was the total amount of property, plant and equipment larger?"
+  },
+  {
+    "input": "What is the change in leasehold improvements from 2018 to 2019?",
+    "output": "What was the year-on-year change in the value of leasehold improvements between 2018 and 2019?"
+  },
+  {
+    "input": "What is the percentage change in leasehold improvements from 2018 to 2019?",
+    "output": "What is the percentage difference in the value of leasehold improvements from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in CGD sales in 2018?",
+    "output": "What was the percentage increase or decrease in CGD sales in 2018 compared to the previous year?"
+  },
+  {
+    "input": "What is the percentage change in CGD sales in 2018?",
+    "output": "What is the exact percentage difference in sales of CGD products between the years 2018 and the previous year?"
+  },
+  {
+    "input": "For which years is the amortization of purchased intangibles included in the CGD results recorded?",
+    "output": "In the recorded CGD results, during which specific years is the amortization of purchased intangibles included?"
+  },
+  {
+    "input": "In which year does the inclusion of the amortization of purchased intangibles result in a higher amount in the Comprehensive Gain or Loss From Discontinued Operations (CGD)?",
+    "output": "In which year is the amortization of purchased intangibles included in the CGD results larger?"
+  },
+  {
+    "input": "\"What criteria or requirements must be met in order to be eligible for receiving an extra cash payment amounting to $3.0 million?\"",
+    "output": "What is the eligibility to receive an additional cash payment of $3.0 million based on?"
+  },
+  {
+    "input": "During which years was the method of calculating net income (loss) per common share using the weighted-average number of shares outstanding implemented and documented?",
+    "output": "For which years was the weighted-average number of shares outstanding used to compute net income (loss) per common share recorded?"
+  },
+  {
+    "input": "What is the percentage change in adjusted EBITDA in 2019 from 2018?",
+    "output": "What is the exact percentage change in adjusted EBITDA between the years 2018 and 2019?"
+  },
+  {
+    "input": "What is the percentage change in adjusted EBITDA in 2019 from 2018?",
+    "output": "What is the percentage difference in adjusted EBITDA between 2019 and 2018? How can this information be used to analyze the financial performance in these two years?"
+  },
+  {
+    "input": "What resulted in a higher operating income?",
+    "output": "What factor led to an increase in operating income?"
+  },
+  {
+    "input": "How many finished products were produced in the year 2019?",
+    "output": "What is the amount of finished products in 2019?"
+  },
+  {
+    "input": "What was the total quantity of completed products manufactured in the year 2019?",
+    "output": "What is the amount of finished products in 2019?"
+  },
+  {
+    "input": "In which year does the value of finished products surpass the value of finished products in other years, resulting in a higher overall value?",
+    "output": "In which year is the value of finished products higher?"
+  },
+  {
+    "input": "What are the differences in the types of materials and purchased parts between 2018 and 2019?",
+    "output": "What is the change in materials and purchased parts from 2018 to 2019?"
+  },
+  {
+    "input": "\"What factors contributed to the increase in operating profits?\"",
+    "output": "What resulted in the improvement in operating profits?"
+  },
+  {
+    "input": "For which fiscal year(s) is the amount of amortization of purchased intangibles included in the CGD results recorded?",
+    "output": "In which fiscal year(s) is the CGD results recorded and does it include the amortization amount of purchased intangibles?"
+  },
+  {
+    "input": "For which fiscal year(s) is the amount of amortization of purchased intangibles included in the CGD results recorded?",
+    "output": "In which fiscal year(s) is the amortization amount of purchased intangibles included in the recorded results of the Comprehensive Growth and Development (CGD)?"
+  },
+  {
+    "input": "Which year, from the given weighted-average assumptions for determining benefit obligation as of September 30, exhibits the highest rate of compensation increase among all years?",
+    "output": "For the weighted-average assumptions used to determine benefit obligation at September 30, which year has the largest rate of compensation increase?"
+  },
+  {
+    "input": "What was the change in the discount rate between 2018 and 2019 for determining the benefit obligation at September 30?",
+    "output": "For the weighted-average assumptions used to determine benefit obligation at September 30, what is the change in the discount rate in 2019 from 2018?"
+  },
+  {
+    "input": "What is the weighted-average discount rate used to calculate the benefit obligation as of September 30 for the years 2017, 2018, and 2019?",
+    "output": "For the weighted-average assumptions used to determine benefit obligation at September 30, what is the average discount rate across 2017, 2018 and 2019?"
+  },
+  {
+    "input": "In what specific year did the rate differential for foreign exchange exceed 10.0?",
+    "output": "In which year was Foreign rate differential greater than 10.0?"
+  },
+  {
+    "input": "What was the average amount of state taxes, excluding federal benefits, for the years 2017 to 2019?",
+    "output": "What was the average State taxes net of federal benefit for 2017-2019?"
+  },
+  {
+    "input": "How much did the Research and Development credits shift between 2018 and 2019?",
+    "output": "What is the change in the Research and development credits from 2018 to 2019?"
+  },
+  {
+    "input": "What is the difference in the amount of Research and Development credits between 2018 and 2019?",
+    "output": "What is the change in the Research and development credits from 2018 to 2019?"
+  },
+  {
+    "input": "What were the respective gains in 2019 and 2018?",
+    "output": "What were the gains achieved in 2019 and 2018, respectively?"
+  },
+  {
+    "input": "What were the respective gains in 2019 and 2018?",
+    "output": "What were the gains in 2019 and 2018, respectively?"
+  },
+  {
+    "input": "What is the specific year when the Loss from discontinued operations was exactly zero?",
+    "output": "In which year was Loss from discontinued operations 0?"
+  },
+  {
+    "input": "What was the average amount of money lost from 2017 to 2019?",
+    "output": "What was the average Net loss for 2017 to 2019?"
+  },
+  {
+    "input": "What is the change in the Weighted average common shares outstanding-basic from 2018 to 2019?",
+    "output": "What is the difference in the number of weighted average common shares outstanding-basic between 2018 and 2019?"
+  },
+  {
+    "input": "What was the depreciation and amortization expense related to property and equipment for 2019?",
+    "output": "What was the specific amount of depreciation and amortization expense attributed to property and equipment for the fiscal year 2019?"
+  },
+  {
+    "input": "\"What was the precise monetary worth or estimated value, expressed in the currency of the specific country or region, of the category of Machinery and Equipment during the calendar year of 2019?\"",
+    "output": "What was the value of Machinery and equipment in 2019?"
+  },
+  {
+    "input": "What was the exact monetary value of Machinery and equipment in the year 2019?",
+    "output": "What was the value of Machinery and equipment in 2019?"
+  },
+  {
+    "input": "In what year did the construction in process have a value of less than 30,000 thousands?",
+    "output": "In which year was value of Construction in process less than 30,000 thousands?"
+  },
+  {
+    "input": "In which specific year did the value of Construction in process fall below 30,000 thousands?",
+    "output": "In which year was value of Construction in process less than 30,000 thousands?"
+  },
+  {
+    "input": "What were the average values of Machinery and equipment in both 2018 and 2019?",
+    "output": "What was the average value of Machinery and equipment for 2018 and 2019?"
+  },
+  {
+    "input": "What is the average telecommunication expenditure during the fiscal years 2018 and 2019?",
+    "output": "What is the average Telecom for fiscal year 2018 and 2019?"
+  },
+  {
+    "input": "During which year did the number of Telecom users fall below 200,000 thousand?",
+    "output": "In which year was Telecom less than 200,000 thousand?"
+  },
+  {
+    "input": "What was the exact percentage decrease of the revenue in 2019 compared to the previous year?",
+    "output": "What was the decrease in the revenue in 2019?"
+  },
+  {
+    "input": "What was the specific percentage decrease in revenue for the fiscal year of 2019?",
+    "output": "What was the decrease in the revenue in 2019?"
+  },
+  {
+    "input": "What is the average value of the Telecom industry for the fiscal years 2017 and 2018 combined?",
+    "output": "What is the average Telecom value for fiscal year 2017 and 2018?"
+  },
+  {
+    "input": "What was the average value of the telecommunications industry for the fiscal years 2017 and 2018?",
+    "output": "What is the average Telecom value for fiscal year 2017 and 2018?"
+  },
+  {
+    "input": "In which year was Telecom value less than 300,000 thousand?",
+    "output": "In what specific year did the value of Telecom drop below 300,000 thousand?"
+  },
+  {
+    "input": "What were the values of divestments and acquisitions in 2019, 2018, and 2017?",
+    "output": "What was the (Divested)/acquired values in 2019, 2018 and 2017 respectively?"
+  },
+  {
+    "input": "In what specific year did the beginning balance fall below 5,000,000?",
+    "output": "In which year was Balance — beginning of year less than 5,000 thousands?"
+  },
+  {
+    "input": "In what specific year did the beginning balance equate to less than 5,000 thousands (5 million)?",
+    "output": "In which year was Balance — beginning of year less than 5,000 thousands?"
+  },
+  {
+    "input": "What was the difference in the amount of expenses allocated to provisions between the years 2017 and 2018?",
+    "output": "What was the change in the Provisions/(expense) from 2017 to 2018?"
+  },
+  {
+    "input": "What was the difference in the Provisions/(expense) between 2017 and 2018?",
+    "output": "What was the change in the Provisions/(expense) from 2017 to 2018?"
+  },
+  {
+    "input": "What was the amount of charge-offs recorded in the year 2019?",
+    "output": "What was the charge-offs in 2019?"
+  },
+  {
+    "input": "\"What was the total amount of charge-offs incurred in 2019?\"",
+    "output": "What was the charge-offs in 2019?"
+  },
+  {
+    "input": "\"What is the average net Provision for the years 2017, 2018, and 2019?\"",
+    "output": "What was the average Provision, net for 2017-2019?"
+  },
+  {
+    "input": "What was the average net provision for the years 2017, 2018, and 2019 combined?",
+    "output": "What was the average Provision, net for 2017-2019?"
+  },
+  {
+    "input": "What factors determine the value that is obtained upon vesting?",
+    "output": "What is the value realized on vesting based on?"
+  },
+  {
+    "input": "What were the specific equity awards held by each named executive officer throughout the year 2019?",
+    "output": "What were the equity awards held by named executives during 2019?"
+  },
+  {
+    "input": "What were the equity awards granted to the named executives in 2019?",
+    "output": "What were the equity awards held by named executives during 2019?"
+  },
+  {
+    "input": "What is the significance of the number of shares obtained upon vesting?",
+    "output": "What does the number of shares acquired on vesting represent?"
+  },
+  {
+    "input": "Where was the reported combined net loss of $82 million disclosed or published?",
+    "output": "Where was the combined net loss of $82 million reported in?"
+  },
+  {
+    "input": "What was the percentage difference in the Adjusted EBITDA margin from 2017 to 2019?",
+    "output": "What is the total change in the Adjusted EBITDA margin between 2019 and 2017?"
+  },
+  {
+    "input": "What specific expenses are included in the integration and transformation costs that have an impact on the Adjusted EBITDA?",
+    "output": "What does the integration and transformation costs impacting Adjusted EBITDA include?"
+  },
+  {
+    "input": "What is the change in total special items impacting adjusted EBITDA between 2018 and 2019?",
+    "output": "What was the difference in the total number of special items affecting adjusted EBITDA from 2018 to 2019?"
+  },
+  {
+    "input": "What is the change in total special items impacting adjusted EBITDA between 2018 and 2019?",
+    "output": "How did the total special items affecting adjusted EBITDA change from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in total special items impacting adjusted EBITDA between 2018 and 2019?",
+    "output": "What is the percentage difference in the amount of special items affecting adjusted EBITDA from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in total special items impacting adjusted EBITDA between 2018 and 2019?",
+    "output": "What is the percentage difference in the total number of special items affecting adjusted EBITDA from 2018 to 2019?"
+  },
+  {
+    "input": "How is the amortization process for deferred acquisition and fulfillment costs conducted?",
+    "output": "How are the deferred acquisition and fulfillment costs amortized?"
+  },
+  {
+    "input": "What is the percentage change for the end of period balance for Fulfillment Costs when comparing the beginning of period balance?",
+    "output": "What is the percentage change in the end of period balance for Fulfillment Costs when comparing it with the beginning of period balance?"
+  },
+  {
+    "input": "What was the total value of options that were available to be exercised and outstanding as of December 31, 2019?",
+    "output": "What was the aggregate intrinsic value of options outstanding and exercisable at December 31, 2019? "
+  },
+  {
+    "input": "Which year has a higher weighted-average exercise price for options outstanding and exercisable?",
+    "output": "Which year exhibits a greater weighted-average exercise price for options that are currently outstanding and available for exercise?"
+  },
+  {
+    "input": "What does the deferred income tax benefit (expense) under post-retirement benefit plans currently recognize? ",
+    "output": "What recognition does the deferred income tax benefit (expense) currently provide for under post-retirement benefit plans?"
+  },
+  {
+    "input": "What is the percentage increase or decrease in the deferred income tax benefit or expense for post-retirement benefit plans between 2017 and 2018?",
+    "output": "What is the percentage change in the deferred income tax benefit (expense) for post-retirement benefit plans in 2018 from 2017?"
+  },
+  {
+    "input": "What was the percentage difference in the deferred income tax benefit (expense) for post-retirement benefit plans between 2018 and 2017?",
+    "output": "What is the percentage change in the deferred income tax benefit (expense) for post-retirement benefit plans in 2018 from 2017?"
+  },
+  {
+    "input": "How is the Adjusted EBITDA Run Rate determined?",
+    "output": "How is the Adjusted EBITDA Run Rate calculated and what factors are considered in determining it?"
+  },
+  {
+    "input": "What are the different performance levels?",
+    "output": "What are the various levels of performance and can you provide more details about each level?"
+  },
+  {
+    "input": "How many levels of performance would result in a payout exceeding 50% of the target award?",
+    "output": "How many performance levels would the payout as % of target award be above 50%?"
+  },
+  {
+    "input": "How many performance levels have a payout as a percentage of the target award that exceeds 50%?",
+    "output": "How many performance levels would the payout as % of target award be above 50%?"
+  },
+  {
+    "input": "What is the main factor or aspect that goodwill is specifically connected or associated with?",
+    "output": "What is goodwill attributable to?"
+  },
+  {
+    "input": "What is property, plant and equipment expressed as a ratio of the total estimated aggregate consideration under the balance as of December 31, 2017?",
+    "output": "What is the ratio of property, plant, and equipment to the total estimated aggregate consideration shown in the balance as of December 31, 2017?"
+  },
+  {
+    "input": "What is property, plant and equipment expressed as a ratio of the total estimated aggregate consideration under the balance as of December 31, 2017?",
+    "output": "What is the ratio of property, plant and equipment to the total estimated aggregate consideration on the balance sheet as of December 31, 2017?"
+  },
+  {
+    "input": "What is the percentage change in other non currrent assets in 2018?",
+    "output": "What was the percentage change in other non-current assets during the year 2018?"
+  },
+  {
+    "input": "Under what conditions would officers be entitled to the years of welfare benefits?",
+    "output": "When would officers have the right to receive welfare benefits for a prolonged period of time?"
+  },
+  {
+    "input": "Which types of officers are listed in the table?",
+    "output": "\"What types of officers are included in the table displayed?\""
+  },
+  {
+    "input": "Which type of officer has the longest protected period?",
+    "output": "Which rank of officer within a specific profession or organization is entitled to the longest period of protection?"
+  },
+  {
+    "input": "Which type of officer has the longest protected period?",
+    "output": "Which type of officer is granted the longest period of protection against any form of termination or dismissal?"
+  },
+  {
+    "input": "What was the gross debt amount in the year 2019?",
+    "output": "What is the gross debt in 2019?"
+  },
+  {
+    "input": "What was the difference in the total amount of debt in 2019 compared to 2018?",
+    "output": "What is the change in gross debt in 2019 from 2018?"
+  },
+  {
+    "input": "What is the percentage change in gross debt in 2019 from 2018?",
+    "output": "What was the percentage increase/decrease in the gross debt from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in gross debt in 2019 from 2018?",
+    "output": "What is the percentage difference in the amount of gross debt from 2018 to 2019?"
+  },
+  {
+    "input": "On what basis are the estimated benefit payments made, considering the underlying assumptions?",
+    "output": "What assumptions are the estimated benefit payments based on?"
+  },
+  {
+    "input": "How many separate periods are highlighted in the table?",
+    "output": "How many different period segments are highlighted in the table?"
+  },
+  {
+    "input": "How many distinct period segments are specifically emphasized in the provided table?",
+    "output": "How many different period segments are highlighted in the table?"
+  },
+  {
+    "input": "What is the percentage change in combined pension plan in 2021 from 2020?",
+    "output": "What is the percentage difference in the combined pension plan between 2021 and 2020?"
+  },
+  {
+    "input": "What was the extent or percentage of the business that falls under the category of Operations and Other that was sold?",
+    "output": "What portion of business under Operations and Other was sold?"
+  },
+  {
+    "input": "What is the total segment revenue in 2019?",
+    "output": "What is the exact amount of revenue generated by each segment in 2019 and what is the sum of these revenues overall?"
+  },
+  {
+    "input": "What components are under operating revenue?",
+    "output": "What specific components contribute to the total operating revenue of a company or organization?"
+  },
+  {
+    "input": "What additional information does Note 17 provide?",
+    "output": "What is the additional information provided in Note 17?"
+  },
+  {
+    "input": "What additional information does Note 17 provide?",
+    "output": "What is the content of Note 17 and how does it provide additional information?"
+  },
+  {
+    "input": "\"What was the exact sum of adjusted EBITDA for the entire year of 2019?\"",
+    "output": "What is the total adjusted EBITDA in 2019?"
+  },
+  {
+    "input": "What is the change in the adjusted EBITDA under Wholesale in 2019 from 2018?",
+    "output": "What was the difference in adjusted EBITDA for the Wholesale category between 2019 and 2018?"
+  },
+  {
+    "input": "What is the average total adjusted EBITDA over the three years?",
+    "output": "What is the mean value of the total adjusted EBITDA for the three-year period?"
+  },
+  {
+    "input": "How many executives among the present ones in the organization receive a short-term incentive bonus that exceeds $500,000 in amount?",
+    "output": "How many current executives have a STI bonus amount greater than $500,000?"
+  },
+  {
+    "input": "How many executives presently receive a Short-Term Incentive (STI) bonus exceeding $500,000?",
+    "output": "How many current executives have a STI bonus amount greater than $500,000?"
+  },
+  {
+    "input": "What is the salary earned by Indraneel Dev in 2019, and how does it compare to his/her STI bonus amount, expressed as a ratio?",
+    "output": "What is Indraneel Dev's salary earned during 2019 expressed as a ratio of his/her STI bonus amount?"
+  },
+  {
+    "input": "What is included in the category of 'Support assets'? Please provide a detailed explanation of the various components that constitute these assets.",
+    "output": "What do 'Support assets' consist of?"
+  },
+  {
+    "input": "What is the specific figure for Reported Balances' operating revenue?",
+    "output": "What is the Reported Balances operating revenue?"
+  },
+  {
+    "input": "What was the variation in the deferred income tax benefit (expense) related to pension plans between 2019 and 2018?",
+    "output": "What is the change in the deferred income tax benefit (expense) for pension plans in 2019 from 2018?"
+  },
+  {
+    "input": "What is the percentage change in the deferred income tax benefit (expense) for pension plans in 2019 from 2018?",
+    "output": "What is the exact percentage difference in the deferred income tax benefit or expense for pension plans between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the outcome of the continuous efforts made to downsize the company's workforce?",
+    "output": "What was recognized as a result of ongoing efforts to reduce the company's workforce?"
+  },
+  {
+    "input": "What is the average amount of service costs across 2017, 2018 and 2019?",
+    "output": "What is the mean service cost from 2017 to 2019?"
+  },
+  {
+    "input": "What was the total amount of money owed by customers that had not been received as of December 31, 2019?",
+    "output": "What was the gross customer receivables at December 31, 2019?"
+  },
+  {
+    "input": "What was the total amount of customer receivables as recorded on the balance sheet at the end of December 31, 2019, including all outstanding invoices and amounts owed to the company by its customers?",
+    "output": "What was the gross customer receivables at December 31, 2019?"
+  },
+  {
+    "input": "What is included in the total value of long-term debt?",
+    "output": "What does total long-term debt include?"
+  },
+  {
+    "input": "What is included in the total amount of long-term debt?",
+    "output": "What does total long-term debt include?"
+  },
+  {
+    "input": "What was the approach or method adopted in 2019 to determine the total assets of an entity?",
+    "output": "Regarding the total assets, what was adopted in 2019?"
+  },
+  {
+    "input": "How many different dates declared had a dividend per share of $0.250 in 2018 and 2019?",
+    "output": "In 2018 and 2019, how many dates were declared with a dividend per share of $0.250?"
+  },
+  {
+    "input": "\"What specific categories of expenses were documented in the records for the year 2018?\"",
+    "output": "What types of expenses were recorded during 2018?"
+  },
+  {
+    "input": "What were the recorded expenses in 2018 and what categories do they fall under?",
+    "output": "What types of expenses were recorded during 2018?"
+  },
+  {
+    "input": "\"What is the total amount of non-cash goodwill impairment charges that are not tax-deductible, for the years 2019 and 2018?\"",
+    "output": "What is the sum of non-cash, non-tax-deductible goodwill impairment charges for 2019 and 2018?"
+  },
+  {
+    "input": "How much were the cumulative acquisition-related expenses incurred specifically by Level 3 in the year 2019?",
+    "output": "What was the amount of cumulative acquisition-related expenses incurred for Level 3 in 2019?"
+  },
+  {
+    "input": "What was the total amount of expenses related to acquisitions that Level 3 incurred in the year 2019? Please provide the specific cumulative value.",
+    "output": "What was the amount of cumulative acquisition-related expenses incurred for Level 3 in 2019?"
+  },
+  {
+    "input": "In which year did the company record the smallest total expenditure associated with acquisitions?",
+    "output": "Which year incurred the lowest amount of total acquisition-related expenses?"
+  },
+  {
+    "input": "In which specific year did the company experience the least amount of expenses related to acquisition?",
+    "output": "Which year incurred the lowest amount of total acquisition-related expenses?"
+  },
+  {
+    "input": "Against what were the estimates offset for the years 2019, 2018, and 2017?",
+    "output": "What are the 2019, 2018 and 2017 change in estimates offset against?"
+  },
+  {
+    "input": "What factors were the changes in estimates for 2019, 2018, and 2017 set off against?",
+    "output": "What are the 2019, 2018 and 2017 change in estimates offset against?"
+  },
+  {
+    "input": "In what year was the highest accretion expense recorded?",
+    "output": "Which year has the largest accretion expense?"
+  },
+  {
+    "input": "What is the year with the highest accretion expense?",
+    "output": "Which year has the largest accretion expense?"
+  },
+  {
+    "input": "What is the average accretion expense across 2017, 2018 and 2019?",
+    "output": "What is the average expense for accretion incurred during the years 2017, 2018, and 2019?"
+  },
+  {
+    "input": "Where is the recording location for the current portion of the post-retirement benefit obligations?",
+    "output": "Where is the current portion of the post-retirement benefit obligations recorded?"
+  },
+  {
+    "input": "In which specific year is the fair value of plan assets under post-retirement benefit plans higher than in other years?",
+    "output": "In which year is the fair value of plan assets under post-retirement benefit plans higher?"
+  },
+  {
+    "input": "\"What were the depreciation and amortization figures for the fiscal year 2019?\"",
+    "output": "What is the Depreciation and amortization for 2019?"
+  },
+  {
+    "input": "What is the total adjusted EBITDA for each segment in 2019?",
+    "output": "What is the total segment adjusted EBITDA in 2019?"
+  },
+  {
+    "input": "What is the total amount of income tax expense (benefit) recorded in the financial statements for the years 2018 and 2019?",
+    "output": "What is the sum of income tax expense (benefit) in 2018 and 2019?"
+  },
+  {
+    "input": "What is the percentage change in income tax expense (benefit) in 2019 from 2018?",
+    "output": "What is the percentage difference in income tax expense (benefit) between 2018 and 2019?"
+  },
+  {
+    "input": "What was the total amount of interest expenses incurred in the year 2019?",
+    "output": "What is the total interest expense in 2019?"
+  },
+  {
+    "input": "What was the total amount of interest paid in expenses during the year 2019?",
+    "output": "What is the total interest expense in 2019?"
+  },
+  {
+    "input": "What segments of interest expense are presented in the table?",
+    "output": "Which specific segments of interest expense are displayed in the table provided?"
+  },
+  {
+    "input": "What is the specific monetary difference in the total interest expense for the year 2019 compared to 2018?",
+    "output": "What is the change in the gross interest expense in 2019 from 2018?"
+  },
+  {
+    "input": "What was the difference in the amount of money spent on interest expenses in 2019 compared to 2018?",
+    "output": "What is the change in the gross interest expense in 2019 from 2018?"
+  },
+  {
+    "input": "What is the average amount of interest paid over the years 2017, 2018, and 2019?",
+    "output": "What is the average total interest expense for 2017 to 2019?"
+  },
+  {
+    "input": "\"What is the average annual value of unvested restricted stock awards that were excluded from antidilutive considerations in the years 2017, 2018, and 2019?\"",
+    "output": "What is the average annual amount of unvested restricted stock awards that are antidilutive excluded in 2017, 2018 and 2019?"
+  },
+  {
+    "input": "What were the respective average annual amounts of unvested restricted stock awards that were excluded due to being antidilutive in 2017, 2018, and 2019?",
+    "output": "What is the average annual amount of unvested restricted stock awards that are antidilutive excluded in 2017, 2018 and 2019?"
+  },
+  {
+    "input": "Which year has a larger amount of contract acquisition costs?",
+    "output": "In which specific year do the contract acquisition costs exhibit a greater magnitude or quantity compared to other years?"
+  },
+  {
+    "input": "What is the change in contract assets in 2019?",
+    "output": "What is the net increase or decrease in contract assets during the year 2019?"
+  },
+  {
+    "input": "What percentage of Level 3 goodwill is specifically allocated to the consumer segment?",
+    "output": "How much of Level 3 goodwill is allocated to consumer?"
+  },
+  {
+    "input": "What is the ratio of goodwill to the total amount as of December 31, 2018, in the context of Business?",
+    "output": "What is the amount of goodwill under Business as a ratio of the Total amount as of December 31, 2018?"
+  },
+  {
+    "input": "As of December 31, 2018, what is the ratio of goodwill to the total amount in Business?",
+    "output": "What is the amount of goodwill under Business as a ratio of the Total amount as of December 31, 2018?"
+  },
+  {
+    "input": "What is the combined goodwill figure for the years 2017 and 2018?",
+    "output": "What is the sum of the total amount of goodwill for 2017 and 2018?"
+  },
+  {
+    "input": "What is the combined value of goodwill for both 2017 and 2018?",
+    "output": "What is the sum of the total amount of goodwill for 2017 and 2018?"
+  },
+  {
+    "input": "\"What specific items are encompassed within the current liabilities section of the consolidated balance sheets?\"",
+    "output": "What do the current liabilities reflected in the consolidated balance sheets include?"
+  },
+  {
+    "input": "What specific items are included under current liabilities in the consolidated balance sheets?",
+    "output": "What do the current liabilities reflected in the consolidated balance sheets include?"
+  },
+  {
+    "input": "What items are typically categorized as Other current liabilities in financial statements?",
+    "output": "What are the items included under Other current liabilities?"
+  },
+  {
+    "input": "In which specific year do capital expenditures account for a greater value of accounts payable?",
+    "output": "Which year has a larger amount of accounts payable associated with capital expenditures?"
+  },
+  {
+    "input": "Which specific year indicates a higher level of accounts payable specifically linked to capital expenditures?",
+    "output": "Which year has a larger amount of accounts payable associated with capital expenditures?"
+  },
+  {
+    "input": "What distinguishes the total grant values of Indraneel Dev and Stacey W. Goff?",
+    "output": "What is the difference between Indraneel Dev and Stacey W. Goff's total grant values?"
+  },
+  {
+    "input": "What is the disparity in total grant amounts between Indraneel Dev and Stacey W. Goff?",
+    "output": "What is the difference between Indraneel Dev and Stacey W. Goff's total grant values?"
+  },
+  {
+    "input": "What is the monthly average for the number of shares that are typically withheld for tax purposes?",
+    "output": "What is the average number of shares withheld for taxes per month?"
+  },
+  {
+    "input": "\"What is the monthly average amount of shares that are typically withheld for tax purposes?\"",
+    "output": "What is the average number of shares withheld for taxes per month?"
+  },
+  {
+    "input": "What is the total amount of minimum lease payments expected for non-cancellable operating leases in 2019?",
+    "output": "How much is the 2019 total future minimum lease payments under non-cancellable operating leases?"
+  },
+  {
+    "input": "What was the percentage change in future minimum lease payments due within one year between 2018 and 2019?",
+    "output": "How much did future minimum lease payments due within one year change by between 2018 and 2019?"
+  },
+  {
+    "input": "Which year, 2018 or 2019, had a higher value of capital?",
+    "output": "Between 2018 and 2019, which year had a greater amount of capital?"
+  },
+  {
+    "input": "In terms of owned assets, which year, 2018 or 2019, had a higher overall value or quantity of assets?",
+    "output": "Between 2018 and 2019, which year had a greater amount of owned assets?"
+  },
+  {
+    "input": "Which year, 2018 or 2019, had a higher total value of assets owned?",
+    "output": "Between 2018 and 2019, which year had a greater amount of owned assets?"
+  },
+  {
+    "input": "What were the average values of leased assets for the years 2018 and 2019?",
+    "output": "What is the average leased assets for 2018 and 2019?"
+  },
+  {
+    "input": "What was the average value of leased assets for the years 2018 and 2019?",
+    "output": "What is the average leased assets for 2018 and 2019?"
+  },
+  {
+    "input": "What is the average trade receivables included within non-current assets?",
+    "output": "What is the average amount of trade receivables included within the category of non-current assets?"
+  },
+  {
+    "input": "What is the average prepayments included in non-current assets?",
+    "output": "What is the average amount of prepayments that are included in non-current assets?"
+  },
+  {
+    "input": "What financial items does guidance basis comprise of?",
+    "output": "What specific components does the guidance basis in financial terms consist of?"
+  },
+  {
+    "input": "Which year, either 2018 or 2019, had a higher adjusted EBITDA on a guidance basis?",
+    "output": "Between 2018 and 2019, which year had higher adjusted EBITDA, guidance basis?"
+  },
+  {
+    "input": "Which year, 2018 or 2019, had a higher adjusted EBITDA on a guidance basis? Please provide the comparison between the two specified years.",
+    "output": "Between 2018 and 2019, which year had higher adjusted EBITDA, guidance basis?"
+  },
+  {
+    "input": "What is the 2019 average adjusted EBITDA, guidance basis?",
+    "output": "\"What was the average adjusted EBITDA on a guidance basis for the year 2019?\""
+  },
+  {
+    "input": "What is the average amount of cash and cash equivalents that are typically reported in the statement of cash flows?",
+    "output": "What is the average cash and cash equivalents presented in the statement of cash flows?"
+  },
+  {
+    "input": "What is the average amount of cash and cash equivalents reported in the statement of cash flows?",
+    "output": "What is the average cash and cash equivalents presented in the statement of cash flows?"
+  },
+  {
+    "input": "What are the different types of financial items that are included in the data of a consolidated income statement?",
+    "output": "What financial items does the consolidated income statement data comprise of?"
+  },
+  {
+    "input": "\"What is the mean total income for the fiscal years 2018 and 2019?\"",
+    "output": "What is the average revenue for 2018 and 2019?"
+  },
+  {
+    "input": "What were the average revenues for the years 2018 and 2019?",
+    "output": "What is the average revenue for 2018 and 2019?"
+  },
+  {
+    "input": "What was the average revenue in 2017 and 2018?",
+    "output": "What is the average revenue for 2017 and 2018?"
+  },
+  {
+    "input": "\"What was the average revenue in both 2017 and 2018? Please provide the specific revenue figures for each year.\"",
+    "output": "What is the average revenue for 2017 and 2018?"
+  },
+  {
+    "input": "\"What is the revenue change reported specifically for Europe?\"",
+    "output": "What is the reported change in revenue - europe?"
+  },
+  {
+    "input": "What is the reported change in revenue for Europe? How has the revenue in Europe been affected according to the reports?",
+    "output": "What is the reported change in revenue - europe?"
+  },
+  {
+    "input": "\"Which country, Germany or Italy, has experienced a greater overall growth in the organic sector over a specified period of time?\"",
+    "output": "Between Germany and Italy, which one has a higher organic change?"
+  },
+  {
+    "input": "What does cash consideration paid comprise of?",
+    "output": "What is included in the cash consideration paid?"
+  },
+  {
+    "input": "\"In the table, what is the timeframe for the financial years whose information is displayed?\"",
+    "output": "Which financial years' information is shown in the table?"
+  },
+  {
+    "input": "What is the total value of acquisitions made in 2019 throughout the year?",
+    "output": "How much is the 2019 acquisitions during the year ?"
+  },
+  {
+    "input": "What was the total value of acquisitions made in 2019 throughout the year?",
+    "output": "How much is the 2019 acquisitions during the year ?"
+  },
+  {
+    "input": "Which year, 2018 or 2019, witnessed a higher number of acquisitions throughout the entire year?",
+    "output": "Between 2018 and 2019, which year had a greater amount of acquisitions during the year?"
+  },
+  {
+    "input": "What are the total operating lease commitments?",
+    "output": "What is the sum of all the commitments related to operating leases?"
+  },
+  {
+    "input": "What are the total operating lease commitments?",
+    "output": "What is the sum of all commitments made for operating leases?"
+  },
+  {
+    "input": "What is the total outstanding amount of financial liabilities that need to be paid within the shortest timeframe?",
+    "output": "How much financial liabilities are due for payment soonest?"
+  },
+  {
+    "input": "What is the proportion of financial liabilities in relation to the overall sum of contractual obligations and commitments?",
+    "output": "What percentage of total contractual obligations and commitments is the financial liabilities?"
+  },
+  {
+    "input": "\"What is the average amount of revenue generated from services according to IAS 18 for the years 2018 and 2019?\"",
+    "output": "What is the average service revenue between 2018 and 2019 IAS 18?"
+  },
+  {
+    "input": "What is the difference between average service revenue and average other revenue?",
+    "output": "What distinguishes average service revenue from average other revenue?"
+  },
+  {
+    "input": "Which specific financial years are being represented in the provided table?",
+    "output": "Which financial years' information is shown in the table?"
+  },
+  {
+    "input": "What type of short-term investments are shown in the table?",
+    "output": "\"What specific types of short-term investments are displayed in the provided table?\""
+  },
+  {
+    "input": "What is the percentage change in the value of managed investment funds from 2018 to 2019?",
+    "output": "What is the change in managed investment funds between 2018 and 2019?"
+  },
+  {
+    "input": "What is the difference in the value of managed investment funds from 2018 to 2019?",
+    "output": "What is the change in managed investment funds between 2018 and 2019?"
+  },
+  {
+    "input": "What type of long-term borrowings are shown in the table?",
+    "output": "What are the specific types of long-term borrowings that are presented in the table?"
+  },
+  {
+    "input": "What type of long-term borrowings are shown in the table?",
+    "output": "What is the specific category of long-term borrowings that is displayed in the table?"
+  },
+  {
+    "input": "What is the percentage change in the number of shares acquired from February to March 2019?",
+    "output": "What is the percentage change between shares purchased in February and March 2019?"
+  },
+  {
+    "input": "Which financial years' information is shown in the table?",
+    "output": "Which fiscal years' data is displayed in the given table?"
+  },
+  {
+    "input": "Which financial years' information is shown in the table?",
+    "output": "Which specific financial year's information is depicted in the table?"
+  },
+  {
+    "input": "How much is the 2019 audit fees ?",
+    "output": "What is the exact cost of the audit fees for the year 2019?"
+  },
+  {
+    "input": "What were the average fees for audits in 2018 and 2019?",
+    "output": "What is the average audit fees for 2018 and 2019?"
+  },
+  {
+    "input": "What were the average audit fees in both 2018 and 2019?",
+    "output": "What is the average audit fees for 2018 and 2019?"
+  },
+  {
+    "input": "\"What is the average total fees for the years 2018 and 2019 combined?\"",
+    "output": "What is the average total fees for 2018 and 2019?"
+  },
+  {
+    "input": "What is the average sum of fees for the years 2018 and 2019 combined?",
+    "output": "What is the average total fees for 2018 and 2019?"
+  },
+  {
+    "input": "What is the discrepancy between the average fees charged for audits and the average total fees incurred in 2018 and 2019?",
+    "output": "What is the difference between the average audit fees and the average total fees for 2018 and 2019?"
+  },
+  {
+    "input": "What is the disparity between the mean charges for audits and the mean overall charges, comparing the years 2018 and 2019?",
+    "output": "What is the difference between the average audit fees and the average total fees for 2018 and 2019?"
+  },
+  {
+    "input": "What is the amount of the non-current liabilities' 2018 other payables?",
+    "output": "How much is the 2018 other payables included within non-current liabilities?"
+  },
+  {
+    "input": "What is the value of the 2019 other payables included in current liabilities, excluding the amount payable for the share buyback?",
+    "output": "How much is the 2019 other payables included within current liabilities excluding the amount payable in relation to the share buyback?"
+  },
+  {
+    "input": "What is the value of the 2019 other payables included in the current liabilities, except for the amount payable related to the share buyback?",
+    "output": "How much is the 2019 other payables included within current liabilities excluding the amount payable in relation to the share buyback?"
+  },
+  {
+    "input": "What is the total amount of money that is owed to subsidiaries within one year as of 2019?",
+    "output": "What is the 2019 amounts owed to subsidiaries within one year?"
+  },
+  {
+    "input": "Which year experiences a greater number of bank loans and other loans that are due within one year?",
+    "output": "Which year has a higher amount of bank loans and other loans under amounts falling due within one year?"
+  },
+  {
+    "input": "How much is 2019 cash at bank and in hand ?",
+    "output": "What is the total amount of cash that was available in the bank and on hand in 2019?"
+  },
+  {
+    "input": "Between 2018 and 2019, which year had a greater amount of money market funds?",
+    "output": "Which year, 2018 or 2019, had a higher value of money market funds?"
+  },
+  {
+    "input": "Between 2018 and 2019, which year had a greater amount of money market funds?",
+    "output": "Which year had a higher volume of funds invested in money market funds: 2018 or 2019?"
+  },
+  {
+    "input": "Which year, 2018 or 2019, witnessed a higher quantity of government securities being issued?",
+    "output": "Between 2018 and 2019, which year had a greater amount of government securities?"
+  },
+  {
+    "input": "Between 2018 and 2019, which year had more ordinary shares allotted?",
+    "output": "In the period spanning from 2018 to 2019, which year witnessed a higher allocation of ordinary shares?"
+  },
+  {
+    "input": "What is the current cost of servicing for the year 2019?",
+    "output": "How much is the 2019 current service cost?"
+  },
+  {
+    "input": "What specific types of investments that are no longer held are displayed in the table?",
+    "output": "What type of non-current investments are shown in the table?"
+  },
+  {
+    "input": "In the table, which types of investments that are not currently being utilized or held are depicted?",
+    "output": "What type of non-current investments are shown in the table?"
+  },
+  {
+    "input": "How much is the 2018 debt securities?",
+    "output": "What is the value or price of the debt securities issued in 2018?"
+  },
+  {
+    "input": "How much is the 2018 debt securities?",
+    "output": "What is the cost of the 2018 debt securities?"
+  },
+  {
+    "input": "Which financial years' information is shown in the table?",
+    "output": "Which financial years are displayed in the provided table?"
+  },
+  {
+    "input": "What is the total projected income and expenditure for the year 2018?",
+    "output": "How much is the 2018 aggregated expected income expense ?"
+  },
+  {
+    "input": "What is the projected total income expense for 2018 after aggregating all relevant information?",
+    "output": "How much is the 2018 aggregated expected income expense ?"
+  },
+  {
+    "input": "What is the 2018 deferred tax on overseas earnings, excluding the 15€m charge relating to the combination of Vodafone India with Idea Cellular?",
+    "output": "What is the amount of deferred tax on overseas earnings in 2018, excluding the charge of 15€m related to the merger of Vodafone India and Idea Cellular?"
+  },
+  {
+    "input": "What is the difference in average income tax expense between the years 2017-2018 and 2018-2019?",
+    "output": "What is the change between 2017-2018 and 2018-2019 average income tax expense?"
+  },
+  {
+    "input": "How much is the excluded depreciation and amortisation for 2019 adjusted EBITDA?",
+    "output": "What is the amount of depreciation and amortization that is excluded from the adjusted EBITDA calculation for the year 2019?"
+  },
+  {
+    "input": "How much is the excluded depreciation and amortisation for 2018 adjusted EBITDA?",
+    "output": "What is the total amount of depreciation and amortization that were not included in calculating the adjusted EBITDA for the year 2018?"
+  },
+  {
+    "input": "\"What is the exact amount of the adjusted profit before tax for the year 2018?\"",
+    "output": "How much is the adjusted profit before tax in 2018?"
+  },
+  {
+    "input": "Which specific financial year's information is displayed in the provided table?",
+    "output": "Which financial years' information is shown in the table?"
+  },
+  {
+    "input": "What is the distinction between the average net book value and the average total costs of shares in Group undertakings as of March 31, 2019? How can analyzing this difference assist in better understanding financial performance?",
+    "output": "What is the difference between 2019 average net book value and 2019 average total costs of shares in Group undertakings as at 31 March?"
+  },
+  {
+    "input": "What is the significance of the recorded amount for developed technology?",
+    "output": "What does the amount recorded for developed technology represent?"
+  },
+  {
+    "input": "What does the recorded amount for developed technology signify, and what is its purpose or significance?",
+    "output": "What does the amount recorded for developed technology represent?"
+  },
+  {
+    "input": "What is the useful life for Developed technology?",
+    "output": "What is the average duration of usefulness or lifespan for technology that has been fully developed and is in practical use?"
+  },
+  {
+    "input": "What is the difference in useful life between developed technology and customer relationships?",
+    "output": "What distinguishes the lifespan of developed technology from that of customer relationships?"
+  },
+  {
+    "input": "What is the difference in fair value between developed technology and customer relationships?",
+    "output": "What distinguishes the fair value of developed technology from that of customer relationships?"
+  },
+  {
+    "input": "What identifiable intangible assets have a useful life exceeding five years?",
+    "output": "What are the identifiable intangible assets with a useful life above 5 years?"
+  },
+  {
+    "input": "What intangible assets can be identified and have a useful life of more than 5 years?",
+    "output": "What are the identifiable intangible assets with a useful life above 5 years?"
+  },
+  {
+    "input": "What does the amount recorded for developed technology represent?",
+    "output": "\"What is the meaning or significance of the recorded amount for developed technology?\""
+  },
+  {
+    "input": "What is the useful life (in years) of developed technology?",
+    "output": "What is the average duration, measured in years, of the lifespan of technology that has been developed?"
+  },
+  {
+    "input": "What is the difference in fair value between developed technology and customer relationships?",
+    "output": "What distinguishes the fair value of developed technology from that of customer relationships, and how do they differ from each other?"
+  },
+  {
+    "input": "What are the intangible assets that have a fair value of above $20,000 thousands?",
+    "output": "What are the intangible assets with a fair value exceeding $20,000 thousands? Please provide a comprehensive list of these assets."
+  },
+  {
+    "input": "What are the intangible assets that have a fair value of above $20,000 thousands?",
+    "output": "What are the intangible assets with a fair value exceeding $20 million dollars?"
+  },
+  {
+    "input": "What is the distinction between the total amount invested in debt and equity, when considering amortized cost as the basis for evaluation?",
+    "output": "What is the difference in the total debt investments and the total equity investments, based on amortized cost?"
+  },
+  {
+    "input": "What is the percentage composition of U.S. treasury securities in relation to the total debt investments when considering fair value?",
+    "output": "Based on fair value, what is the percentage constitution of U.S. treasury securities among the total debt investments?"
+  },
+  {
+    "input": "What is the number of potentially anti-dilutive shares that were excluded for fiscal years ended June 30, 2017, 2018 and 2019 respectively? ",
+    "output": "What was the excluded number of potentially anti-dilutive shares for the fiscal years ending June 30, 2017, 2018, and 2019?"
+  },
+  {
+    "input": "What is the number of potentially anti-dilutive shares that were excluded for fiscal years ended June 30, 2017, 2018 and 2019 respectively? ",
+    "output": "What is the total count of potentially anti-dilutive shares excluded for the fiscal years ending on June 30, 2017, 2018, and 2019, respectively?"
+  },
+  {
+    "input": "What is the variance in the weighted-average ordinary shares outstanding for basic and diluted calculations in the fiscal year that ended on June 30, 2019?",
+    "output": "In fiscal year ended June 30, 2019, what is the difference in the weighted-average ordinary shares outstanding between the basic and the diluted?"
+  },
+  {
+    "input": "What is the average total non-current operating assets for fiscal years ended June 30, 2018 and 2019?",
+    "output": "\"What is the average total value of non-current operating assets for the fiscal years that ended on June 30, 2018 and June 30, 2019?\""
+  },
+  {
+    "input": "How many geographic regions had non-current operating assets exceeding $10,000 thousand in the fiscal year ending on June 30, 2019?",
+    "output": "In fiscal year ended June 30, 2019, how many geographic regions have non-current operating assets of more than $10,000 thousand?"
+  },
+  {
+    "input": "What percentage of the total non-current operating assets owned by the Group in the United States constituted the non-current operating assets in the fiscal year ending June 30, 2018?",
+    "output": "In fiscal year ended June 30, 2018, what is the percentage constitution of the non-current operating assets in the United States among the total non-current operating assets owned by the Group?"
+  },
+  {
+    "input": "\"What specific adjustments were made to figures in the year 2018?\"",
+    "output": "What was the adjustment done to figures in 2018?"
+  },
+  {
+    "input": "How much did the value of security deposits change from fiscal year 2018 to 2019?",
+    "output": "What is the change in value between security deposits between fiscal years 2018 and 2019?"
+  },
+  {
+    "input": "What is the  Balance at the beginning of 2019?",
+    "output": "What is the balance amount in the accounts at the start of the year 2019?"
+  },
+  {
+    "input": "What is the average deferred tax expense for fiscal years 2018 and 2019?",
+    "output": "What is the average amount of deferred tax expense incurred during fiscal years 2018 and 2019?"
+  },
+  {
+    "input": "What is the percentage change of deferred tax expenses between fiscal year 2018 to 2019?",
+    "output": "What is the percentage difference in deferred tax expenses between fiscal year 2018 and fiscal year 2019?"
+  },
+  {
+    "input": "What is the amount recorded for short-term investments as of June 30, 2018, on the consolidated statements of financial position for the Group?",
+    "output": "As of June 30, 2018, what is the value of short-term investments on the Group's consolidated statements of financial position?"
+  },
+  {
+    "input": "Based on fair value, what is the difference in value between U.S. treasury securities and agency securities?",
+    "output": "What is the quantifiable difference between U.S. treasury securities and agency securities in terms of their fair market value?"
+  },
+  {
+    "input": "What are the specific revenue figures for the United Kingdom for the fiscal years that concluded in 2017, 2018, and 2019?",
+    "output": "What are the revenue amounts from the United Kingdom for fiscal years ended 2017, 2018 and 2019 respectively?"
+  },
+  {
+    "input": "What is the difference in the revenues from Asia Pacific between fiscal years ended 2018 and 2019?",
+    "output": "How does the revenue for the Asia Pacific region differ between the fiscal years ending in 2018 and 2019?"
+  },
+  {
+    "input": "What is the balance of capped call as of June 30, 2019?",
+    "output": "What is the specific capped call balance as of June 30, 2019?"
+  },
+  {
+    "input": "What is the difference in the capped call balance between the fiscal year ending on June 30, 2018, and the fiscal year ending on June 30, 2019?",
+    "output": "What is the change in the balance of capped call between fiscal year ended June 30, 2018 and 2019?"
+  },
+  {
+    "input": "What is the difference in balance of capped call and non-marketable investments as of June 30, 2019?",
+    "output": "What is the difference in the balance of capped call and non-marketable investments as of June 30, 2019, and how do these two types of investments vary in terms of their marketability?"
+  },
+  {
+    "input": "What is the difference in balance of capped call and non-marketable investments as of June 30, 2019?",
+    "output": "What is the balance difference between capped call and non-marketable investments as of June 30, 2019?"
+  },
+  {
+    "input": "\"What is the chronological order of the share capital amounts from June 30, 2015 to 2019?\"",
+    "output": "What are the share capital for the years as of June 30, 2015 to 2019 in chronological order?"
+  },
+  {
+    "input": "List the short-term investments from June 30, 2015 to 2019 in chronological order.",
+    "output": "What are the short-term investments for the years as of June 30, 2015 to 2019 in chronological order?"
+  },
+  {
+    "input": "What is the difference in the value of short-term investments between fiscal years 2018 and 2019?",
+    "output": "What is the variance in the monetary worth of short-term investments from fiscal year 2018 to fiscal year 2019?"
+  },
+  {
+    "input": "What is the average  Cash and cash equivalents for 2015-2019?",
+    "output": "What is the average amount of Cash and cash equivalents from 2015 to 2019?"
+  },
+  {
+    "input": "What is the definition of other fees?",
+    "output": "What does the term \"other fees\" mean? Please provide a clear definition of this term, including any relevant details, to facilitate a more accurate response."
+  },
+  {
+    "input": "What is the average amount of taxes paid in fees for the fiscal years 2018 and 2019?",
+    "output": "What is the average tax fees for fiscal years 2018 and 2019?"
+  },
+  {
+    "input": "What is the disparity in the aggregate of obligations between operating leases and other types of contractual commitments?",
+    "output": "What is the difference in the total commitments between that of operating leases and other contractual commitments?"
+  },
+  {
+    "input": "What percentage of the total commitments for operating leases for the year ending in 2020 is constituted by the commitments specifically for operating leases?",
+    "output": "What is the percentage constitution of the commitments for operating leases for year ending 2020 among the total commitments for operating leases?"
+  },
+  {
+    "input": "What portion of the total commitments for operating leases for the year ending 2020 represents the percentage constitution?",
+    "output": "What is the percentage constitution of the commitments for operating leases for year ending 2020 among the total commitments for operating leases?"
+  },
+  {
+    "input": "What was the percentage increase in maintenance revenues from the fiscal year ending in 2018 to the fiscal year ending in 2019?",
+    "output": "What was the increase of maintenance revenues from fiscal year ended 2018 to 2019?"
+  },
+  {
+    "input": "What was the exact percentage increase in perpetual license revenues from the fiscal year ended 2018 to the fiscal year ended 2019?",
+    "output": "What was the increase of perpetual license revenues from fiscal year ended 2018 to 2019?"
+  },
+  {
+    "input": "What percentage of total revenue for the fiscal year ending in 2018 is made up of perpetual license sales?",
+    "output": "For fiscal year ended 2018, what is the percentage constitution of perpetual license among the total revenue?"
+  },
+  {
+    "input": "What percentage of the total revenue for the fiscal year ended 2018 was generated from perpetual licenses?",
+    "output": "For fiscal year ended 2018, what is the percentage constitution of perpetual license among the total revenue?"
+  },
+  {
+    "input": "\"What are the average maintenance revenues for the fiscal years ending in 2018 and 2019, respectively?\"",
+    "output": "What is the average maintenance revenues for fiscal year ended 2018 and 2019?"
+  },
+  {
+    "input": "What is the specific provision for dilapidation expenses allocated for the fiscal year 2019?",
+    "output": "What is the dilapidation provision for fiscal year 2019?"
+  },
+  {
+    "input": "What is the formula to calculate free cash flow, and can you further explain the steps involved in deriving this measure?",
+    "output": "How is free cash flow calculated?"
+  },
+  {
+    "input": "What was the percentage increase in free cash flow from fiscal year 2018 to 2019?",
+    "output": "What was the increase in free cash flow between fiscal year 2018 and 2019?"
+  },
+  {
+    "input": "What is the average net cash flow generated by operating activities for the years 2017, 2018, and 2019?",
+    "output": "What is the average net cash provided by operating activities from 2017-2019?"
+  },
+  {
+    "input": "\"What is the average amount of net cash generated from operating activities for the years 2017, 2018, and 2019?\"",
+    "output": "What is the average net cash provided by operating activities from 2017-2019?"
+  },
+  {
+    "input": "What is the percentage difference in the amount of available cash generated by the company between the years 2017 and 2018?",
+    "output": "What is the percentage change in free cash flow between 2017 and 2018?"
+  },
+  {
+    "input": "What is the change in capital expenditures between 2018 and 2019?",
+    "output": "What was the difference in capital expenditures from 2018 to 2019?"
+  },
+  {
+    "input": "What is the change in capital expenditures between 2018 and 2019?",
+    "output": "What was the difference in the amount of money spent on capital expenditures from 2018 to 2019?"
+  },
+  {
+    "input": "As of June 30, 2019, what is the total value or monetary amount of outstanding payments owed by customers or business partners for goods or services provided?",
+    "output": "As of June 30, 2019, what is the amount of trade receivables?"
+  },
+  {
+    "input": "How does the net cash provided by operating activities compare between the fiscal years ending in 2018 and 2019?",
+    "output": "What is the difference in net cash provided by operating activities between fiscal year ended 2018 and 2019?"
+  },
+  {
+    "input": "What is the change in net cash provided by operating activities between the fiscal years ending in 2018 and 2019?",
+    "output": "What is the difference in net cash provided by operating activities between fiscal year ended 2018 and 2019?"
+  },
+  {
+    "input": "What are the specific components included in the \"other current assets\" category in the table?",
+    "output": "What are the components under other current assets in the table?"
+  },
+  {
+    "input": "What was the difference in the total value of other current assets between 2019 and 2018?",
+    "output": "What was the change in total other current assets in 2019 from 2018?"
+  },
+  {
+    "input": "Which items in the table include the cumulative effect of the 2017 Tax Reform Act in their 2017 amounts?",
+    "output": "For which items in the table does the amount for 2017 includes the sum related to the impact of the 2017 Tax Reform Act?"
+  },
+  {
+    "input": "Which items in the table include the sum related to the impact of the 2017 Tax Reform Act in their amount for 2017?",
+    "output": "For which items in the table does the amount for 2017 includes the sum related to the impact of the 2017 Tax Reform Act?"
+  },
+  {
+    "input": "What was the year when the Diluted Net Income per Common Share reached its highest value?",
+    "output": "In which year was the Diluted Net Income per Common Share largest?"
+  },
+  {
+    "input": "In which year did the exclusion of anti-dilutive shares from the calculation of diluted earnings per share reach its highest magnitude?",
+    "output": "In which year was the Anti-dilutive shares excluded from the diluted earnings per share calculation largest?"
+  },
+  {
+    "input": "What was the year when the exclusion of anti-dilutive shares from the calculation of diluted earnings per share reached its highest value?",
+    "output": "In which year was the Anti-dilutive shares excluded from the diluted earnings per share calculation largest?"
+  },
+  {
+    "input": "Where is Income tax benefit included?",
+    "output": "Where is the income tax benefit typically included in financial statements or accounting records?"
+  },
+  {
+    "input": "Where is Income tax benefit included?",
+    "output": "Where can income tax benefit be found in financial statements or reports?"
+  },
+  {
+    "input": "When is the calculation of income tax benefit typically performed annually?",
+    "output": "In which years is income tax benefit calculated?"
+  },
+  {
+    "input": "What does restricted cash include?",
+    "output": "What is included in restricted cash and can you provide further details about its components?"
+  },
+  {
+    "input": "In which year was Restricted cash included in \"Other current assets\" the lowest?",
+    "output": "In which year did the inclusion of Restricted cash in the category of \"Other current assets\" reach its lowest level?"
+  },
+  {
+    "input": "What was the difference in the amount of Restricted cash included in the category of \"Deferred charges and other assets\" in the year 2017 compared to 2016?",
+    "output": "What was the change in Restricted cash included in \"Deferred charges and other assets\" in 2017 from 2016?"
+  },
+  {
+    "input": "What is the largest recorded year for Net periodic benefit cost?",
+    "output": "In which year was Net periodic benefit cost the largest?"
+  },
+  {
+    "input": "What was the difference in the amount of money spent on interest in 2019 compared to 2018?",
+    "output": "What was the change in interest cost in 2019 from 2018?"
+  },
+  {
+    "input": "What was the difference in the cost of interest between 2019 and 2018?",
+    "output": "What was the change in interest cost in 2019 from 2018?"
+  },
+  {
+    "input": "\"In which year did the amount of deferred revenue reach its highest value and why?\"",
+    "output": "In which year was deferred revenue larger?"
+  },
+  {
+    "input": "During which year was the accumulated income received in advance, known as deferred revenue, higher?",
+    "output": "In which year was deferred revenue larger?"
+  },
+  {
+    "input": "What was the specific dollar amount of the change in deferred revenue between 2019 and 2018?",
+    "output": "What was the change in deferred revenue in 2019 from 2018?"
+  },
+  {
+    "input": "What was the percentage change in deferred revenue in 2019 from 2018?",
+    "output": "What is the percentage increase or decrease in deferred revenue from 2018 to 2019?"
+  },
+  {
+    "input": "What was the change in Fair value of share awards vested in 2019 from 2018?",
+    "output": "What was the variation in the fair value of share awards that were vested in the year 2019 compared to 2018?"
+  },
+  {
+    "input": "What was the change in Fair value of share awards vested in 2019 from 2018?",
+    "output": "What was the difference in fair value of share awards that were granted in 2018 but vested in 2019?"
+  },
+  {
+    "input": "During which years was information regarding the credit agreements provided?",
+    "output": "In which years is information related to the credit agreements provided?"
+  },
+  {
+    "input": "What is the least year in which the Interest expense reached its minimum amount?",
+    "output": "In which year was the amount of Interest expense the smallest?"
+  },
+  {
+    "input": "When was the least amount of interest expense recorded?",
+    "output": "In which year was the amount of Interest expense the smallest?"
+  },
+  {
+    "input": "What was the change in Interest expense in 2019 from 2018?",
+    "output": "What was the difference in the amount of money spent on interest payments in 2019 compared to 2018?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the amount spent on interest in 2019 compared to 2018?",
+    "output": "What was the percentage change in Interest expense in 2019 from 2018?"
+  },
+  {
+    "input": "What was the value of purchases that were accumulated or outstanding as of the end of 2019?",
+    "output": "What was the amount of Accrued purchases in 2019?"
+  },
+  {
+    "input": "What was the specific amount of Accrued legal and professional fees incurred during the year 2018?",
+    "output": "What was the amount of Accrued legal and professional fees  in 2018?"
+  },
+  {
+    "input": "\"How much money was recorded for Accrued legal and professional fees during the year 2018?\"",
+    "output": "What was the amount of Accrued legal and professional fees  in 2018?"
+  },
+  {
+    "input": "What was the percentage difference in Accrued roadside assistance claim costs between 2019 and 2018?",
+    "output": "What was the change in Accrued roadside assistance claim costs in 2019 from 2018?"
+  },
+  {
+    "input": "\"What was the specific amount charged for the service in the year 2019?\"",
+    "output": "What was the service cost in 2019?"
+  },
+  {
+    "input": "\"What was the cost of the service provided in the year 2019?\"",
+    "output": "What was the service cost in 2019?"
+  },
+  {
+    "input": "What was the total cost of interest expenses in the year 2018?",
+    "output": "What was the  Interest cost  in 2018?"
+  },
+  {
+    "input": "What was the total amount spent on interest expenses in 2018?",
+    "output": "What was the  Interest cost  in 2018?"
+  },
+  {
+    "input": "\"In which specific year did the cost of services exceed the cost of services in the other years?\"",
+    "output": "In which year was service cost larger?"
+  },
+  {
+    "input": "Which year had a greater service cost compared to other years?",
+    "output": "In which year was service cost larger?"
+  },
+  {
+    "input": "What was the difference in the amount spent on interest in 2019 compared to 2018?",
+    "output": "What was the change in interest cost in 2019 from 2018?"
+  },
+  {
+    "input": "\"What was the gross revenue generated by overseas operations, excluding any income taxes, in the fiscal year 2018?\"",
+    "output": "What was the  Foreign  income before income taxes in 2018?"
+  },
+  {
+    "input": "What was the total income from foreign sources in 2018, before any deductions for income taxes were made?",
+    "output": "What was the  Foreign  income before income taxes in 2018?"
+  },
+  {
+    "input": "What are the components under income before income taxes?",
+    "output": "What are the individual components that make up the category of income before income taxes?"
+  },
+  {
+    "input": "In which specific year, prior to the implementation of income taxes, did individuals and/or organizations experience the highest level of income?",
+    "output": "In which year was income before income taxes the largest?"
+  },
+  {
+    "input": "What is the largest recorded income before income taxes, and in which specific year was it achieved?",
+    "output": "In which year was income before income taxes the largest?"
+  },
+  {
+    "input": "In which specific year did the quantity of an item categorized as \"Other\" exceed the quantity of another item, considering all provided information?",
+    "output": "In which year was the amount of Other larger?"
+  },
+  {
+    "input": "In which year did the quantity of a different item exceed the quantity of the rest?",
+    "output": "In which year was the amount of Other larger?"
+  },
+  {
+    "input": "What was the year-over-year difference in accrued severance and associated expenses between 2018 and 2019?",
+    "output": "What was the change in Accrued severance and related costs in 2019 from 2018?"
+  },
+  {
+    "input": "What was the total payment made by the Company in the year 2019?",
+    "output": "How much has the Company paid in total in 2019?"
+  },
+  {
+    "input": "What was the total amount paid by the Company in 2019?",
+    "output": "How much has the Company paid in total in 2019?"
+  },
+  {
+    "input": "When did the amount of non-cash impairment charges exceed previous years?",
+    "output": "In which year was Non-cash impairment charges larger?"
+  },
+  {
+    "input": "Which year had a greater amount of non-cash impairment charges compared to other years?",
+    "output": "In which year was Non-cash impairment charges larger?"
+  },
+  {
+    "input": "During which years is the fair value of restricted shares or restricted stock units (RSUs) vested determined?",
+    "output": "In which years is the Fair value of restricted shares/RSUs vested calculated?"
+  },
+  {
+    "input": "What was the change in Number of restricted shares/ RSUs granted in 2019 from 2018?",
+    "output": "What was the difference in the number of restricted shares/RSUs granted in 2019 compared to 2018?"
+  },
+  {
+    "input": "What was the change in Number of restricted shares/ RSUs granted in 2019 from 2018?",
+    "output": "What was the difference in the number of restricted shares/ RSUs granted in 2019 compared to 2018?"
+  },
+  {
+    "input": "What was the percentage change in Number of restricted shares/ RSUs granted in 2019 from 2018?",
+    "output": "What was the percentage change in the number of restricted shares/ RSUs granted from 2018 to 2019?"
+  },
+  {
+    "input": "As of July 1, 2019, what was the total value of stapled securities of NSR listed on the Australian Securities Exchange?",
+    "output": "How much did NSR had stapled securities quoted on the Australian Securities Exchange as at 1 July 2019?"
+  },
+  {
+    "input": "As of July 1, 2019, what was the total value of NSR stapled securities quoted on the Australian Securities Exchange?",
+    "output": "How much did NSR had stapled securities quoted on the Australian Securities Exchange as at 1 July 2019?"
+  },
+  {
+    "input": "What would happen if the price of a security dropped by 2.5% and how would this affect the overall market?",
+    "output": "What would be the impact if security price decreased by 2.5%?"
+  },
+  {
+    "input": "What is the change in Other intangible assets Opening net book value from 2018 to 2019?",
+    "output": "What is the difference in the net book value of Other intangible assets at the beginning of 2018 compared to the beginning of 2019?"
+  },
+  {
+    "input": "What is the change in Other intangible assets Opening net book value from 2018 to 2019?",
+    "output": "What is the difference in the opening net book value of Other intangible assets between 2018 and 2019?"
+  },
+  {
+    "input": "How many distinct business segments were there in the years 2019 and 2018?",
+    "output": "How many business segments were present in 2019 and 2018?"
+  },
+  {
+    "input": "\"What is the average revenue generated from New Zealand in both 2018 and 2019?\"",
+    "output": "What is the average revenue from New Zealand for 2018 and 2019?"
+  },
+  {
+    "input": "What is the average revenue generated from New Zealand in both 2018 and 2019?",
+    "output": "What is the average revenue from New Zealand for 2018 and 2019?"
+  },
+  {
+    "input": "In which specific year did the revenue from New Zealand amount to less than 10,000 thousands?",
+    "output": "In which year was revenue from New Zealand under 10,000 thousands?"
+  },
+  {
+    "input": "In what specific year did the revenue generated from New Zealand fall below 10,000 thousands (in the same currency)?",
+    "output": "In which year was revenue from New Zealand under 10,000 thousands?"
+  },
+  {
+    "input": "What was the total NLA in Brisbane?",
+    "output": "What was the total number of Newly Listed Apartments (NLA) in Brisbane?"
+  },
+  {
+    "input": "What is the difference in the NLA between Sunshine Cost and Brisbane?",
+    "output": "What are the NLA differences between the Sunshine Coast and Brisbane, and how do they vary from each other in terms of specific aspects?"
+  },
+  {
+    "input": "In which year is the Net investment hedge negative?",
+    "output": "Which specific year does the Net investment hedge demonstrate a negative value?"
+  },
+  {
+    "input": "What is the change in the Taxation impact on revaluation from 2018 to 2019?",
+    "output": "What is the difference in the taxation impact on revaluation between 2018 and 2019?"
+  },
+  {
+    "input": "What is the change in the Taxation impact on revaluation from 2018 to 2019?",
+    "output": "What is the difference in the effect of taxation on revaluation between 2018 and 2019?"
+  },
+  {
+    "input": "What was the share of profit from associates representing NSRs in 2019 and 2018?",
+    "output": "What were the percentages of profit from associates, specifically those representing NSRs, for the years 2019 and 2018?"
+  },
+  {
+    "input": "What is the difference in the percentage of profit attributed to associates between 2018 and 2019?",
+    "output": "What is the change in the Share of profit from associates from 2018 to 2019?"
+  },
+  {
+    "input": "What is the difference in the percentage of profit allocated to associates between 2018 and 2019?",
+    "output": "What is the change in the Share of profit from associates from 2018 to 2019?"
+  },
+  {
+    "input": "When was the year when the closing balance on June 30th was below 11,000 thousand units?",
+    "output": "In which year was the Closing balance at 30 June less than 11,000 thousands?"
+  },
+  {
+    "input": "What is the year-over-year change in face value for New Zealand Dollar (NZD) interest rate swaps from 2018 to 2019?",
+    "output": "What is the change in Interest rate swaps (NZD) at face value for Current interest rate swaps from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in face value for current New Zealand Dollar (NZD) interest rate swaps from 2018 to 2019?",
+    "output": "What is the change in Interest rate swaps (NZD) at face value for Current interest rate swaps from 2018 to 2019?"
+  },
+  {
+    "input": "What was the assets held for sale under Level 1, Level 2 and Level 3 in 2018?",
+    "output": "In 2018, what were the assets classified as held for sale and valued at Level 1, Level 2, and Level 3 according to their respective classifications?"
+  },
+  {
+    "input": "What was the amount of senior secured term loan for fiscal years 2019 and 2018?",
+    "output": "What is the value of senior secured term loan for fiscal years 2019 and 2018 respectively?"
+  },
+  {
+    "input": "What is the percentage shift in the amount of finance leases and other financing obligations from fiscal year 2018 to fiscal year 2019?",
+    "output": "What is the percentage change in the finance leases and other financing obligations between fiscal years 2018 and 2019?"
+  },
+  {
+    "input": "What is the average finance leases and other financing obligations for fiscal years 2018 and 2019?",
+    "output": "What is the mean value of finance leases and other financing obligations for fiscal years 2018 and 2019?"
+  },
+  {
+    "input": "What are the income tax expenses for the years 2019, 2018, and 2017?",
+    "output": "What is the total current income tax expense for 2019, 2018 and 2017 respectively?"
+  },
+  {
+    "input": "What is the amount of deferred income tax expense (benefit) recorded for the years 2019, 2018, and 2017, respectively?",
+    "output": "What is the Total deferred income tax expense (benefit) for 2019, 2018 and 2017 respectively?"
+  },
+  {
+    "input": "What is the average total income tax expense for the years 2017, 2018, and 2019?",
+    "output": "What is the average Total income tax expense for 2019, 2018 and 2017?"
+  },
+  {
+    "input": "What is the average aggregate size of the facilities in Arizona and Washington?",
+    "output": "What is the average total size of the facilities located in both Arizona and Washington?"
+  },
+  {
+    "input": "\"How many states in the United States have a total of more than 5 facilities?\"",
+    "output": "How many states have more than 5 facilities?"
+  },
+  {
+    "input": "What percentage of the total contractual obligations does the total inventory purchase commitments represent?",
+    "output": "What is the percentage constitution of total inventory purchase commitments among the total contractual obligations?"
+  },
+  {
+    "input": "What percentage of the total contractual obligations is constituted by the inventory purchase commitments in the overall inventory?",
+    "output": "What is the percentage constitution of total inventory purchase commitments among the total contractual obligations?"
+  },
+  {
+    "input": "\"What specific component within a contractual obligation holds the highest overall value?\"",
+    "output": "Which component of contractual obligation has the highest total value?"
+  },
+  {
+    "input": "At December 29, 2017, how many shares were still unvested?",
+    "output": "What is the number of shares that are Unvested at December 29, 2017?"
+  },
+  {
+    "input": "What is the length of time that buildings are typically considered useful for?",
+    "output": "What is the useful lives of buildings?"
+  },
+  {
+    "input": "What are the estimated useful lives or lifespans of machinery and equipment commonly used in various industries or sectors?",
+    "output": "What is the useful lives of Machinery and equipment?"
+  },
+  {
+    "input": "What are the estimated useful lifespans of machinery and equipment? How long can machinery and equipment be expected to remain functional and productive?",
+    "output": "What is the useful lives of Machinery and equipment?"
+  },
+  {
+    "input": "What is the average monetary worth of buildings in the years 2018 and 2019?",
+    "output": "What is the average value of buildings for 2018 and 2019?"
+  },
+  {
+    "input": "What is the average monetary worth of buildings in the years 2018 and 2019 combined?",
+    "output": "What is the average value of buildings for 2018 and 2019?"
+  },
+  {
+    "input": "What is the average adjusted net income from 2017-2019?",
+    "output": "\"What is the average adjusted net income for the years 2017, 2018, and 2019?\""
+  },
+  {
+    "input": "What were the values of receivables and inventory in the years 2019 and 2018, respectively?",
+    "output": "What is the value of Receivables and inventory for 2019 and 2018 respectively?"
+  },
+  {
+    "input": "What is the value of Accrued expenses for 2019 and 2018 respectively?",
+    "output": "What is the amount of Accrued expenses in 2019 and 2018?"
+  },
+  {
+    "input": "What is the value of Self-insurance reserves for 2019 and 2018 respectively?",
+    "output": "What are the self-insurance reserve values for the years 2019 and 2018?"
+  },
+  {
+    "input": "What are the average values of receivables and inventory for the years 2019 and 2018?",
+    "output": "What is the average value of Receivables and inventory for 2019 and 2018?"
+  },
+  {
+    "input": "What are the average values of Receivables and inventory for the years 2019 and 2018?",
+    "output": "What is the average value of Receivables and inventory for 2019 and 2018?"
+  },
+  {
+    "input": "In which year did the Accrued expenses reach the highest level?",
+    "output": "Which year has the highest Accrued expenses?"
+  },
+  {
+    "input": "In which specific year did the Accrued expenses reach their highest value?",
+    "output": "Which year has the highest Accrued expenses?"
+  },
+  {
+    "input": "Which year recorded the highest Senior Secured Term Loan amount?",
+    "output": "Which year has the highest Senior secured term loan?"
+  },
+  {
+    "input": "What information does the table present?",
+    "output": "What data is being shown in the table and what details can be derived from it?"
+  },
+  {
+    "input": "What is the change in long-term portion of contingent earn-out liabilities for 2018 and 2019?",
+    "output": "What is the difference in the long-term portion of contingent earn-out liabilities between 2018 and 2019?"
+  },
+  {
+    "input": "What were the statutory U.S. Federal tax rates for the years 2019, 2018, and 2017?",
+    "output": "What is the Statutory U.S. Federal tax for 2019, 2018 and 2017 respectively?"
+  },
+  {
+    "input": "What are the respective statutory federal tax rates for the years 2019, 2018, and 2017 in the United States?",
+    "output": "What is the Statutory U.S. Federal tax for 2019, 2018 and 2017 respectively?"
+  },
+  {
+    "input": "How much did the valuation allowance change in 2017?",
+    "output": "What is the change in valuation allowance for 2017?"
+  },
+  {
+    "input": "What was the difference in valuation allowance between 2016 and 2017?",
+    "output": "What is the change in valuation allowance for 2017?"
+  },
+  {
+    "input": "What has been the average amount of Statutory U.S. Federal tax paid annually for the years 2017, 2018, and 2019?",
+    "output": "What is the average Statutory U.S. Federal tax from 2017-2019?"
+  },
+  {
+    "input": "What was the average statutory U.S. federal tax rate for the years 2017-2019?",
+    "output": "What is the average Statutory U.S. Federal tax from 2017-2019?"
+  },
+  {
+    "input": "What is the Working capital, excluding cash and cash equivalents for fiscal years 2019, 2018 and 2017 respectively?",
+    "output": "What is the year-end working capital, excluding cash and cash equivalents, for the fiscal years 2019, 2018, and 2017, respectively?"
+  },
+  {
+    "input": "What is the Working capital, excluding cash and cash equivalents for fiscal years 2019, 2018 and 2017 respectively?",
+    "output": "What is the Working Capital, excluding cash and cash equivalents, for the fiscal years 2019, 2018, and 2017?"
+  },
+  {
+    "input": "What are the average values of Cash and cash equivalents for fiscal years 2019, 2018, and 2017?",
+    "output": "What is the average value for the Cash and cash equivalents for fiscal years 2019, 2018 and 2017?"
+  },
+  {
+    "input": "What are the average values of the Availability under asset-based loan facility for the fiscal years 2019, 2018, and 2017?",
+    "output": "What is the average value for the Availability under asset-based loan facility for fiscal years 2019, 2018 and 2017?"
+  },
+  {
+    "input": "What were the net income (or net losses) for the years 2019, 2018, and 2017 respectively?",
+    "output": "What was the Net (loss) income in 2019, 2018 and 2017 respectively?"
+  },
+  {
+    "input": "What were the respective net losses or incomes for the years 2019, 2018, and 2017?",
+    "output": "What was the Net (loss) income in 2019, 2018 and 2017 respectively?"
+  },
+  {
+    "input": "In which year was Adjusted EBITDA less than 20,000 thousands?",
+    "output": "\"In which specific year, out of the given years, was the Adjusted EBITDA figure lower than 20,000 thousands?\""
+  },
+  {
+    "input": "In which year was Adjusted EBITDA less than 20,000 thousands?",
+    "output": "In which specific year was the Adjusted EBITDA, expressed in thousands, less than 20,000?"
+  },
+  {
+    "input": "What was the percentage increase in the expense incurred for Research and Development in the year 2018 compared to the previous period?",
+    "output": "What was the increase in Research and development expense in 2018?"
+  },
+  {
+    "input": "\"What was the percentage increase in Research and Development (R&D) expense during the year 2018 compared to the previous year?\"",
+    "output": "What was the increase in Research and development expense in 2018?"
+  },
+  {
+    "input": "What were the average research and development expenses in both 2017 and 2018?",
+    "output": "What was the average Research and development expenses in 2017 and 2018?"
+  },
+  {
+    "input": "What were the respective average amounts spent on research and development in 2017 and 2018?",
+    "output": "What was the average Research and development expenses in 2017 and 2018?"
+  },
+  {
+    "input": "What was the change in the Net operating loss carryforwards from 2018 to 2019?",
+    "output": "What was the numerical difference between the Net operating loss carryforwards in 2018 and 2019?"
+  },
+  {
+    "input": "\"In which specific year did the value of Deferred revenue fall below 2,000?\"",
+    "output": "In which year was Deferred revenue less than 2,000?"
+  },
+  {
+    "input": "What is the difference in the amount of accumulated depreciation between March 31, 2018, and March 31, 2019?",
+    "output": "What is the change in Accumulated depreciation from March 31, 2018 to March 31, 2019?"
+  },
+  {
+    "input": "How much did the Accumulated depreciation increase between March 31, 2018 and March 31, 2019?",
+    "output": "What is the change in Accumulated depreciation from March 31, 2018 to March 31, 2019?"
+  },
+  {
+    "input": "What was the percentage change in the General and administrative expenses in 2019 compared to the previous year?",
+    "output": "What was the increase in the General and administrative expenses in 2019?"
+  },
+  {
+    "input": "What was the percentage increase in the General and administrative expenses from 2018 to 2019?",
+    "output": "What was the increase in the General and administrative expenses in 2019?"
+  },
+  {
+    "input": "\"Who has the authority over the management and administration of the company's operations in a specific country?\"",
+    "output": "Which country controls the company's affairs?"
+  },
+  {
+    "input": "\"Who has jurisdiction over the overall management and operations of the company?\"",
+    "output": "Which country controls the company's affairs?"
+  },
+  {
+    "input": "What was the change in the U.S. state taxes, net of federal from 2018 to 2019?",
+    "output": "What was the difference in the amount of state taxes paid in the United States, after deducting federal taxes, from 2018 to 2019?"
+  },
+  {
+    "input": "What was the change in the U.S. state taxes, net of federal from 2018 to 2019?",
+    "output": "What was the difference in the amount of state taxes, after deducting federal taxes, between the years 2018 and 2019 in the United States?"
+  },
+  {
+    "input": "In which year was Tax credits less than 10.0?",
+    "output": "When was the year when tax credits were less than 10.0 units?"
+  },
+  {
+    "input": "In which year was Tax credits less than 10.0?",
+    "output": "In which specific year did the amount of Tax credits fall below 10.0?"
+  },
+  {
+    "input": "What was the exact amount of share-based compensation expense incurred during the year 2019?",
+    "output": "What was the Total share-based compensation expense in 2019?"
+  },
+  {
+    "input": "\"What changes, if any, took place once the IPO concluded?\"",
+    "output": "What became effective upon the closing of the IPO?"
+  },
+  {
+    "input": "What were the initial balances in the years 2019 and 2018, respectively?",
+    "output": "What was the Beginning balance in 2019 and 2018 respectively?"
+  },
+  {
+    "input": "What were the starting balances for the year 2019 and 2018?",
+    "output": "What was the Beginning balance in 2019 and 2018 respectively?"
+  },
+  {
+    "input": "How much did the Company recorded amortization expense for the year ended March 31, 2018?",
+    "output": "What was the amount of amortization expense recorded by the Company for the fiscal year ending on March 31, 2018?"
+  },
+  {
+    "input": "How much did the Company recorded amortization expense for the year ended March 31, 2018?",
+    "output": "What was the amount of amortization expense recorded by the Company for the fiscal year that ended on March 31, 2018?"
+  },
+  {
+    "input": "How much did the Company recorded amortization expense for the year ended March 31, 2017?",
+    "output": "What was the amount of amortization expense recorded by the Company for the fiscal year that ended on March 31, 2017?"
+  },
+  {
+    "input": "What are the main sources of liquidity in financial markets and institutions?",
+    "output": "What are the principal sources of liquidity?"
+  },
+  {
+    "input": "What was the total value of outstanding letters of credit associated with specific operating leases as of March 31, 2018?",
+    "output": "How much was the outstanding letters of credit related to certain operating leases as of March 31, 2018?"
+  },
+  {
+    "input": "What were the revenue retention rates for the years 2019, 2018, and 2017 in chronological order?",
+    "output": "What was the Revenue retention rate in 2019, 2018 and 2017 respectively?"
+  },
+  {
+    "input": "What is the average number of customers in total from 2017 to 2019?",
+    "output": "What is the average Total customers between 2017-2019?"
+  },
+  {
+    "input": "\"In which specific year did the Adjusted EBITDA amount to less than 20,000 thousands?\"",
+    "output": "In which year was Adjusted EBITDA less than 20,000 thousands?"
+  },
+  {
+    "input": "In which specific year did the Adjusted EBITDA amount to less than 20,000 thousands?",
+    "output": "In which year was Adjusted EBITDA less than 20,000 thousands?"
+  },
+  {
+    "input": "What was the amount of Depreciation and amortization expense recorded for the fiscal year that ended on March 31, 2017?",
+    "output": "How much was Depreciation and amortization expense for the years ended March 31, 2017?"
+  },
+  {
+    "input": "What was the total amount of Depreciation and amortization expense recorded for the fiscal years ending on March 31, 2017?",
+    "output": "How much was Depreciation and amortization expense for the years ended March 31, 2017?"
+  },
+  {
+    "input": "What is the net increase or decrease in the Accumulated Amortization balance from March 31, 2018, to March 31, 2019?",
+    "output": "What is the change in Accumulated amortization from March 31, 2018 to March 31, 2019?"
+  },
+  {
+    "input": "What is the net increase or decrease in the amount of Accumulated Amortization between March 31, 2018, and March 31, 2019?",
+    "output": "What is the change in Accumulated amortization from March 31, 2018 to March 31, 2019?"
+  },
+  {
+    "input": "What was the percentage change in the Risk-free interest rate from 2018 to 2019?",
+    "output": "What was the change in the Risk-free interest rate from 2018 to 2019?"
+  },
+  {
+    "input": "What was the average level of expected volatility recorded between 2017 and 2019?",
+    "output": "What was the average Expected volatility between 2017-2019?"
+  },
+  {
+    "input": "What was the average expected volatility during the period of 2017-2019?",
+    "output": "What was the average Expected volatility between 2017-2019?"
+  },
+  {
+    "input": "In which specific year did the grant date fair value per ordinary share fall below 30.0?",
+    "output": "In which year was the Grant date fair value per ordinary share less than 30.0?"
+  },
+  {
+    "input": "When was the year when the grant date fair value per ordinary share fell below 30.0?",
+    "output": "In which year was the Grant date fair value per ordinary share less than 30.0?"
+  },
+  {
+    "input": "\"What was the specific quantity or value of work that was at the intermediate stage of completion in the year 2018?\"",
+    "output": "What was the amount of work in process in 2018?"
+  },
+  {
+    "input": "What was the change in finished goods between 2018 and 2019?",
+    "output": "What was the numerical difference in the amount of finished goods produced between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the specific amount of interest incurred in the year 2018?",
+    "output": "What was the interest cost in 2018?"
+  },
+  {
+    "input": "What was the change in interest cost between 2018 and 2019?",
+    "output": "By how much did the interest cost change from 2018 to 2019?"
+  },
+  {
+    "input": "For how many years did the actuarial loss surpass $1,000 thousand and receive recognition?",
+    "output": "How many years did the recognized actuarial loss exceed $1,000 thousand?"
+  },
+  {
+    "input": "What was the total amount of insurance proceeds received during the fiscal year that ended on August 31, 2019?",
+    "output": "What were the insurance proceeds for the fiscal year ended August 31, 2019?"
+  },
+  {
+    "input": "\"What was the total amount of insurance claims paid out during the fiscal year that concluded on August 31, 2019?\"",
+    "output": "What were the insurance proceeds for the fiscal year ended August 31, 2019?"
+  },
+  {
+    "input": "What was the specific difference in the amount of Restructuring and related charges incurred by the company in the years 2018 and 2019?",
+    "output": "What was the change in Restructuring and related charges between 2018 and 2019?"
+  },
+  {
+    "input": "What were the reductions for tax positions  primarily related to?",
+    "output": "\"What was the primary reason for the reductions in tax positions?\""
+  },
+  {
+    "input": "What is the company's definition of working capital and how is it calculated?",
+    "output": "How does the company define working capital?"
+  },
+  {
+    "input": "What was the amount of assets in 2019?",
+    "output": "What were the total assets in 2019?"
+  },
+  {
+    "input": "What was the value of the total assets in the year 2019?",
+    "output": "What were the total assets in 2019?"
+  },
+  {
+    "input": "What is the range of years covered in the table?",
+    "output": "What are the years included in the table?"
+  },
+  {
+    "input": "What was the change in Current installments of notes payable and long-term debt between 2018 and 2019?",
+    "output": "What was the difference in the amount of current installments of notes payable and long-term debt between the year 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in Current installments of notes payable and long-term debt between 2018 and 2019?",
+    "output": "What is the difference in the amount of current installments of notes payable and long-term debt between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the stockholders' equity of Total Jabil Inc. from 2018 to 2019?",
+    "output": "What was the percentage change in Total Jabil Inc. stockholders’ equity between 2018 and 2019?"
+  },
+  {
+    "input": "What was the specific net revenue figure reported?",
+    "output": "What was the net revenue as reported?"
+  },
+  {
+    "input": "What was the operating income as reported?",
+    "output": "What was the reported operating income amount?"
+  },
+  {
+    "input": "What was the specific distinction between the reported net revenue and cost of revenue? Please elaborate on the differences observed between these two financial measures.",
+    "output": "What was the difference between net revenue and cost of revenue as reported?"
+  },
+  {
+    "input": "What was the distinction between the net revenue and cost of revenue as stated in the report, and can you provide further details to assist in answering?",
+    "output": "What was the difference between net revenue and cost of revenue as reported?"
+  },
+  {
+    "input": "What was the operating income balance expressed as a ratio of the amount reported, before considering the implementation of ASU 2014-09?",
+    "output": "What was the operating income balance without the adoption of ASU 2014-09 as a ratio of the amount as reported?"
+  },
+  {
+    "input": "What was the ratio of the operating income balance without the adoption of ASU 2014-09 to the amount as reported?",
+    "output": "What was the operating income balance without the adoption of ASU 2014-09 as a ratio of the amount as reported?"
+  },
+  {
+    "input": "By what amount did the gross profit increase or decrease from May 2018 to August 2018?",
+    "output": "What was the change in gross profit between May 2018 and August 2018?"
+  },
+  {
+    "input": "What are the distinctions between the September 2018 balances of contract assets and contract liabilities? Please provide a comprehensive explanation while keeping all the details intact.",
+    "output": "What is the difference between the balance in September 2018 for contract assets and contract liabilities?"
+  },
+  {
+    "input": "What was the percentage change for Other accrued expenses due to adjustments by the new standard?",
+    "output": "What was the percentage change specifically for Other accrued expenses as a result of adjustments made under the new standard?"
+  },
+  {
+    "input": "What was the percentage change for Other accrued expenses due to adjustments by the new standard?",
+    "output": "What was the percentage change in Other accrued expenses after incorporating adjustments made by the new standard? Please provide the specific percentage change."
+  },
+  {
+    "input": "What is the exact amount of net revenue generated in the fiscal year 2019?",
+    "output": "What was the net revenue in 2019?"
+  },
+  {
+    "input": "What was the total revenue generated during the year 2019 after deducting all expenses and costs?",
+    "output": "What was the net revenue in 2019?"
+  },
+  {
+    "input": "What was the gross profit in 2018?",
+    "output": "What was the gross profit generated in the year 2018?"
+  },
+  {
+    "input": "What was the operating income in 2017?",
+    "output": "What was the operating income for the year 2017?"
+  },
+  {
+    "input": "What was the change in gross profit between 2017 and 2018?",
+    "output": "What was the difference in gross profit from 2017 to 2018?"
+  },
+  {
+    "input": "What was the change in Prepaid expenses and other current assets due to the adoption of ASU 2014-09?",
+    "output": "\"What was the impact on Prepaid expenses and other current assets resulting from the adoption of ASU 2014-09? Please provide details and explain the specific changes that occurred as a result of this accounting standard.\""
+  },
+  {
+    "input": "How many reported segments have a value exceeding $2,000,000?",
+    "output": "How many segments as reported exceed $2,000,000 thousand?"
+  },
+  {
+    "input": "What were the items incorporated within the amounts for the fiscal year that concluded on August 31, 2017?",
+    "output": "What did the amounts in fiscal year ended August 31, 2017 include?"
+  },
+  {
+    "input": "What expenses or revenues were included in the amounts reported for the fiscal year that ended on August 31, 2017?",
+    "output": "What did the amounts in fiscal year ended August 31, 2017 include?"
+  },
+  {
+    "input": "What was the difference in the costs of severance and employee benefits for the company from 2017 to 2018?",
+    "output": "What was the change in Employee severance and benefit costs between 2017 and 2018?"
+  },
+  {
+    "input": "Which periods does the table include?",
+    "output": "What time periods are encompassed in the table's content?"
+  },
+  {
+    "input": "\"What specific items or transactions were encompassed within the category of acquisitions and adjustments?\"",
+    "output": "What did Acquisitions and adjustments include?"
+  },
+  {
+    "input": "What were the variations in acquisitions and adjustments made in fiscal year 2018 between the Electronic Manufacturing Services (EMS) and Document Management Systems (DMS)?",
+    "output": "What is the difference in Acquisitions and adjustments in fiscal 2018 between EMS and DMS?"
+  },
+  {
+    "input": "What are the distinctions in acquisitions and adjustments, specifically pertaining to fiscal year 2018, when comparing EMS (Emergency Medical Services) and DMS (Document Management Systems)?",
+    "output": "What is the difference in Acquisitions and adjustments in fiscal 2018 between EMS and DMS?"
+  },
+  {
+    "input": "What was the balance of EMS as in 2018 as a percentage of the total balance at the end of fiscal 2018?",
+    "output": "What percentage of the total balance at the end of fiscal 2018 represented the balance of EMS in 2018?"
+  },
+  {
+    "input": "What categories of income or loss are included in the table's income (loss) figures before accounting for income tax expense?",
+    "output": "What types of income (loss) before income tax expense is provided by the table?"
+  },
+  {
+    "input": "What was the foreign income (loss) in 2018?",
+    "output": "What was the amount of foreign income or loss for the year 2018?"
+  },
+  {
+    "input": "What was the change in Foreign income (loss) between 2018 and 2019?",
+    "output": "What was the difference in the amount of income or loss generated from foreign sources, comparing the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in Foreign income (loss) between 2018 and 2019?",
+    "output": "What was the difference in the amount of foreign income or loss reported between 2018 and 2019?"
+  },
+  {
+    "input": "\"How many annual periods experienced a Foreign income loss that was greater than $800,000 thousand dollars?\"",
+    "output": "How many years did Foreign income (loss) exceed $800,000 thousand?"
+  },
+  {
+    "input": "For how many years was the amount of Foreign income (loss) greater than $800,000 thousand?",
+    "output": "How many years did Foreign income (loss) exceed $800,000 thousand?"
+  },
+  {
+    "input": "What was the change in Accumulated benefit obligation between 2018 and 2019?",
+    "output": "What was the specific numerical difference in the Accumulated Benefit Obligation (ABO) amount between the fiscal years of 2018 and 2019?"
+  },
+  {
+    "input": "\"What were the specific expenses incurred by the company for employee severance and benefits during the year 2018?\"",
+    "output": "What were the Employee severance and benefit costs in 2018?"
+  },
+  {
+    "input": "What were the severance and benefit costs for employees in the year 2018?",
+    "output": "What were the Employee severance and benefit costs in 2018?"
+  },
+  {
+    "input": "What was the precise difference in terms of employee severance and benefit costs from 2018 to 2019?",
+    "output": "What was the change in Employee severance and benefit costs between 2018 and 2019?"
+  },
+  {
+    "input": "How many years did cash proceeds received exceed $5,000 million?",
+    "output": "How many years have the cash proceeds received been above $5,000 million?"
+  },
+  {
+    "input": "How many years did cash proceeds received exceed $5,000 million?",
+    "output": "For how many years did the cash proceeds received exceed $5,000 million?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in pre-tax losses on the sale of receivables from 2017 to 2018?",
+    "output": "What was the percentage change in Pre-tax losses on sale of receivables between 2017 and 2018?"
+  },
+  {
+    "input": "What was the difference in the amount of money spent on repurchasing shares in 2016 compared to 2017?",
+    "output": "What was the change in share repurchases between 2016 and 2017?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in dividend payments from 2018 to 2019?",
+    "output": "What was the percentage change in dividends paid between 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in dividends paid from 2018 to 2019?",
+    "output": "What was the percentage change in dividends paid between 2018 and 2019?"
+  },
+  {
+    "input": "What was the specific topic of discussion in the DMS segment?",
+    "output": "What was the DMS segment focused on?"
+  },
+  {
+    "input": "What was the change in the net revenue from EMS between 2018 and 2019?",
+    "output": "What was the difference in net revenue generated by EMS between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the change in the net revenue from EMS between 2018 and 2019?",
+    "output": "What was the exact difference in net revenue for EMS between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the average year-on-year percentage change in total net revenue from 2017-2019?",
+    "output": "What was the average annual percentage increase or decrease in the total net revenue from 2017 to 2019?"
+  },
+  {
+    "input": "What was the average year-on-year percentage change in total net revenue from 2017-2019?",
+    "output": "What was the average percentage change in net revenue from year to year for the total period between 2017 and 2019?"
+  },
+  {
+    "input": "What is the difference in the balance of Employee Severance and Benefit Costs as of August between 2017 and 2018?",
+    "output": "What is the change in the balance as of August in Employee Severance and Benefit Costs between 2017 and 2018?"
+  },
+  {
+    "input": "What were the lease costs in 2017 as a percentage of the total balance in 2017?",
+    "output": "\"What was the percentage of lease costs in 2017 compared to the total balance in 2017?\""
+  },
+  {
+    "input": "What were the lease costs in 2017 as a percentage of the total balance in 2017?",
+    "output": "\"What proportion of the total balance in 2017 was allocated towards lease costs?\""
+  },
+  {
+    "input": "\"What was the amount of the domestic-federal income tax expense (benefit) recorded for the year 2017?\"",
+    "output": "What was the current domestic-federal income tax expense (benefit) in 2017?"
+  },
+  {
+    "input": "What was the amount of domestic-federal income tax expense (benefit) recorded in the financial statements for the year 2017?",
+    "output": "What was the current domestic-federal income tax expense (benefit) in 2017?"
+  },
+  {
+    "input": "What was the amount of income tax expense or benefit incurred for domestic operations in 2019?",
+    "output": "What was the current domestic-state income tax expense (benefit) in 2019?"
+  },
+  {
+    "input": "In 2019, what was the amount of income tax expense or benefit incurred at the domestic state level?",
+    "output": "What was the current domestic-state income tax expense (benefit) in 2019?"
+  },
+  {
+    "input": "What is the change in the company's domestic-state income tax expense (benefit) between 2018 and 2019?",
+    "output": "What is the difference in the domestic-state income tax expense (benefit) for the company from 2018 to 2019?"
+  },
+  {
+    "input": "What is the change in the company's domestic-state income tax expense (benefit) between 2018 and 2019?",
+    "output": "What was the difference in the domestic-state income tax expense (benefit) of the company from 2018 to 2019?"
+  },
+  {
+    "input": "What were the types of land and the associated improvements that existed in the year 2019?",
+    "output": "What were the land and improvements in 2019?"
+  },
+  {
+    "input": "What was the total monetary worth of all buildings in the year 2018?",
+    "output": "What was the value of buildings in 2018?"
+  },
+  {
+    "input": "What was the percentage change in total property, plant and equipment between 2018 and 2019?",
+    "output": "What was the percentage increase or decrease in the total value of property, plant, and equipment from 2018 to 2019?"
+  },
+  {
+    "input": "Based on what factors or criteria were the fair value estimates for Level 3 determined?",
+    "output": "What were the Level 3 fair value estimates based on?"
+  },
+  {
+    "input": "What factors or criteria were taken into consideration when determining the Level 3 fair value estimates?",
+    "output": "What were the Level 3 fair value estimates based on?"
+  },
+  {
+    "input": "What was the change in the fair value for the 3.950% Senior Notes between 2018 and 2019?",
+    "output": "What was the difference in fair value for the 3.950% Senior Notes from 2018 to 2019?"
+  },
+  {
+    "input": "What is the significance of the amount awarded as shares, which is contingent upon meeting specific performance criteria?",
+    "output": "What does the amount for shares granted that are based on the achievement of certain performance criteria represent?"
+  },
+  {
+    "input": "What was the percentage difference in the number of shares from 2018 to 2019 and how do I calculate it accurately?",
+    "output": "What was the percentage change in the number of shares between 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in Expected dividend yield between 2017 and 2018?",
+    "output": "What was the percentage adjustment in the expected dividend yield from 2017 to 2018?"
+  },
+  {
+    "input": "What were the restricted stock units in 2019?",
+    "output": "What is the significance of restricted stock units in 2019, and can you provide more details about them?"
+  },
+  {
+    "input": "What were the details and features of the Employee stock purchase plan (ESPP) implemented in 2018?",
+    "output": "What was the Employee stock purchase plan in 2018?"
+  },
+  {
+    "input": "What was the change in Other stock-based compensation expenses between 2017 and 2018?",
+    "output": "What was the difference in the expenses related to stock-based compensation for Other category between the years 2017 and 2018?"
+  },
+  {
+    "input": "\"What was the difference in the number of restricted stock units awarded by a company in 2018 compared to 2019?\"",
+    "output": "What was the change in the restricted stock units between 2018 and 2019?"
+  },
+  {
+    "input": "What was the net change in the number of restricted stock units from 2018 to 2019?",
+    "output": "What was the change in the restricted stock units between 2018 and 2019?"
+  },
+  {
+    "input": "What does the key management comprise of?",
+    "output": "What is included in key management?"
+  },
+  {
+    "input": "What is the combined value of all shares held in joint ventures as of December 31, 2019?",
+    "output": "What is the total value of shares at 31 December 2019 for each joint venture?"
+  },
+  {
+    "input": "What is the precise percentage change in revenue losses experienced by the United Kingdom from the year 2018 to 2019?",
+    "output": "What is the percentage change in the revenue losses in UK from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage increase or decrease in the revenue losses experienced in the United Kingdom from 2018 to 2019?",
+    "output": "What is the percentage change in the revenue losses in UK from 2018 to 2019?"
+  },
+  {
+    "input": "What was the net amount of debt in 2019?",
+    "output": "What is the Net debt in 2019?"
+  },
+  {
+    "input": "What was the net debt amount in the year 2019?",
+    "output": "What is the Net debt in 2019?"
+  },
+  {
+    "input": "What is the percentage change in the net debt from 2018 to 2019?",
+    "output": "What is the percentage increase or decrease in the net debt from 2018 to 2019?"
+  },
+  {
+    "input": "What were the fees payable by the Group's joint ventures in 2019?",
+    "output": "What were the specific fees that the Group's joint ventures had to pay in the year 2019?"
+  },
+  {
+    "input": "What were the fees payable by the Group's joint ventures in 2019?",
+    "output": "What were the fees that the Group's joint ventures had to pay in 2019? Could you provide details on the specific amounts payable by each joint venture?"
+  },
+  {
+    "input": "What proportion of the total borrowings in 2019, including both the principal and interest, will be due within a period of 5 years?",
+    "output": "What is the percentage of borrowings (including interest) that matures over 5 years in the total borrowings in 2019?"
+  },
+  {
+    "input": "What was the net asset value (NAV) of EPRA in 2018?",
+    "output": "What is the EPRA NAV in 2018?"
+  },
+  {
+    "input": "What is the precise percentage difference in the overall investment made in joint ventures for the years 2018 and 2019?",
+    "output": "What is the percentage change in the total investment in joint ventures from 2018 to 2019?"
+  },
+  {
+    "input": "What was the interest rate protection provided in 2019?",
+    "output": "What is the interest rate protection in 2019?"
+  },
+  {
+    "input": "What is the current interest rate protection in 2019 and how does it work?",
+    "output": "What is the interest rate protection in 2019?"
+  },
+  {
+    "input": "What is the percentage difference in the fair value of unallocated swaps between 2018 and 2019?",
+    "output": "What is the percentage change in the fair value of unallocated swaps from 2018 to 2019?"
+  },
+  {
+    "input": "What was the difference in the level of interest rate protection from 2018 to 2019?",
+    "output": "What is the change in interest rate protection between 2018 and 2019?"
+  },
+  {
+    "input": "What is the year-on-year percentage change in the calculation of contingent rents based on tenants' turnover from 2018 to 2019?",
+    "output": "What is the percentage change in the amount of contingent rents calculated by reference to tenants' turnover from 2018 to 2019?"
+  },
+  {
+    "input": "In what year will the future minimum lease amounts receivable be higher than usual?",
+    "output": "In which year is there  higher future minimum lease amounts receivable?"
+  },
+  {
+    "input": "What were the dividends in respect of these shares have been waived by agreement in 2018?",
+    "output": "What was the specific agreement regarding the waiver of dividends on these shares in 2018?"
+  },
+  {
+    "input": "What is the percentage change in the disposals value from 2018 to 2019?",
+    "output": "What is the percentage difference in the value of disposals from 2018 to 2019, and how can it be calculated?"
+  },
+  {
+    "input": "What is the current total quantity of units that the Company possesses?",
+    "output": "What is the total number of units the Company has?"
+  },
+  {
+    "input": "What is the weighted average exercise price for the outstanding options in the Save As You Earn Scheme as of 31 December 2019?",
+    "output": "What is the weighted average exercise prices of the outstanding options exercisable at 31 December 2019 for the Save As You Earn Scheme?"
+  },
+  {
+    "input": "What is the calculated average exercise prices of the options that are currently outstanding and can be exercised as of December 31, 2019, under the Save As You Earn Scheme?",
+    "output": "What is the weighted average exercise prices of the outstanding options exercisable at 31 December 2019 for the Save As You Earn Scheme?"
+  },
+  {
+    "input": "What is the percentage change in the total provided deferred tax provision from 1 January 2018 to 31 December 2019?",
+    "output": "What is the precise percentage change in the total amount of deferred tax provision that was provided from 1 January 2018 to 31 December 2019?"
+  },
+  {
+    "input": "What was the total amount of administration expenses incurred in the year 2019?",
+    "output": "What is the administration expense in 2019?"
+  },
+  {
+    "input": "What is encompassed in diluted shares?",
+    "output": "What do the diluted shares include?"
+  },
+  {
+    "input": "What is the percentage of counterparty #5 exposure in the total exposure in 2019?",
+    "output": "What is the proportion of the exposure to counterparty #5 compared to the total exposure in the year 2019?"
+  },
+  {
+    "input": "What is the percentage of counterparty #5 exposure in the total exposure in 2019?",
+    "output": "What is the proportion of counterparty #5's exposure compared to the total exposure in the year 2019?"
+  },
+  {
+    "input": "What is the percentage difference in the combined value of cash deposits and derivative financial instrument assets between 2018 and 2019?",
+    "output": "What is the percentage change in the sum of cash deposits and derivative financial instrument assets from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage difference in the loss before tax, including joint ventures and associates, from 2018 to 2019?",
+    "output": "What is the percentage change in the loss before tax, joint ventures and associates from 2018 to 2019?"
+  },
+  {
+    "input": "What is the net exposure to foreign exchange risk (euro) in 2018?",
+    "output": "\"What is the total amount of foreign exchange risk (in euros) faced by a company in the year 2018?\""
+  },
+  {
+    "input": "What is the impact on equity attributable to owners of the Group when there is a 10% depreciation in foreign exchange rates from 2018 to 2019? Specifically, how does this affect the negative movement of money to equity?",
+    "output": "What is the change in the negative movement of money to equity attributable to owners of the Group when there is a 10 per cent depreciation in foreign exchange rates from 2018 to 2019?"
+  },
+  {
+    "input": "What is the impact on the Group's owners' equity when foreign exchange rates depreciate by 10% between 2018 and 2019?",
+    "output": "What is the change in the negative movement of money to equity attributable to owners of the Group when there is a 10 per cent depreciation in foreign exchange rates from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage difference in the euro amount drawn from 2018 to 2019?",
+    "output": "What is the percentage change in the amount drawn in euros from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage increase or decrease in the euro amount withdrawn between 2018 and 2019?",
+    "output": "What is the percentage change in the amount drawn in euros from 2018 to 2019?"
+  },
+  {
+    "input": "\"In which specific year did the total debt reach its highest level, and is it greater than the total debt in any other year?\"",
+    "output": "In which year is there a greater total debt?"
+  },
+  {
+    "input": "In which year does the total debt amount exceed or surpass the debt levels of other years?",
+    "output": "In which year is there a greater total debt?"
+  },
+  {
+    "input": "What is the percentage change in the total debt from 2018 to 2019?",
+    "output": "What is the percentage increase or decrease in the total debt from 2018 to 2019?"
+  },
+  {
+    "input": "\"What were the average salaries for administrative positions in fiscal years 2019 and 2018?",
+    "output": "What is the average Administrative salaries for fiscal 2019 and 2018?"
+  },
+  {
+    "input": "\"What was the average annual salary for administrative employees in fiscal years 2019 and 2018?",
+    "output": "What is the average Administrative salaries for fiscal 2019 and 2018?"
+  },
+  {
+    "input": "What is the weekly processing capacity comparison between the plants located in Laurel, Mississippi and Collins, Mississippi?",
+    "output": "What is the difference in capacity per week between the processing plants at Laurel, Mississippi and Collins, Mississippi? "
+  },
+  {
+    "input": "What are the average amounts for prepaid insurance in both fiscal years 2019 and 2018?",
+    "output": "What is the average Prepaid insurance for fiscal years 2019 and 2018?"
+  },
+  {
+    "input": "What is the difference in the amount of Total prepaid expenses recorded in the financial statements for fiscal years 2019 and 2018? I need to determine the change in prepaid expenses for these two specific years.",
+    "output": "What is the change in Total prepaid expenses between fiscal years 2019 and 2018?"
+  },
+  {
+    "input": "What is the difference in the total amount of prepaid expenses between fiscal years 2019 and 2018?",
+    "output": "What is the change in Total prepaid expenses between fiscal years 2019 and 2018?"
+  },
+  {
+    "input": "What is the difference in the statutory income tax rates between fiscal years 2019 and 2018, and how does this impact income taxes?",
+    "output": "What is the change in Income taxes at statutory rate between fiscal years 2019 and 2018?"
+  },
+  {
+    "input": "What is the net sales from Fresh, vacuum-sealed chicken for fiscal years 2019 to 2017 respectively?",
+    "output": "What were the net sales of Fresh, vacuum-sealed chicken for fiscal years 2019, 2018, and 2017?"
+  },
+  {
+    "input": "What is the net sales from Fresh, chill-packed chicken for fiscal years 2019 to 2017 respectively?",
+    "output": "What are the net sales figures for Fresh, chill-packed chicken for fiscal years 2019, 2018, and 2017, respectively?"
+  },
+  {
+    "input": "What is the net sales from Fresh, chill-packed chicken for fiscal years 2019 to 2017 respectively?",
+    "output": "How much revenue was generated from the sales of Fresh, chill-packed chicken for the fiscal years of 2019, 2018, and 2017, respectively?"
+  },
+  {
+    "input": "What is the net sales from Fresh, ice-packed chicken for fiscal years 2019 to 2017 respectively?",
+    "output": "What were the net sales of Fresh, ice-packed chicken for fiscal years 2019, 2018, and 2017?"
+  },
+  {
+    "input": "What was the purpose of the company's operating leases, and how did they contribute to the company's operations or financial performance?",
+    "output": "What was the company's operating leases for?"
+  },
+  {
+    "input": "What is the discrepancy in the overall amount when comparing revenues gained from revolving loan facility fees to expenses incurred from operating leases?",
+    "output": "What was the difference in the total between Revolving loan facility fees and operating leases? "
+  },
+  {
+    "input": "What was the change in the Net cash used in investing activities between 2017 and 2019?",
+    "output": "What was the difference in the amount of net cash used in investing activities from 2017 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in the Net cash used in financing activities between 2018 and 2019?",
+    "output": "What was the percentage difference in the amount of cash used in financing activities from 2018 to 2019?"
+  },
+  {
+    "input": "Which years does the table provide information for the company's Selected Consolidated Statements of Operations Data?",
+    "output": "For which specific time periods does the table present the company's Selected Consolidated Statements of Operations Data?"
+  },
+  {
+    "input": "What was the exact amount of revenue generated in the year 2015?",
+    "output": "What was the total revenue in 2015?"
+  },
+  {
+    "input": "What was the specific amount of operating profit generated in the year 2017?",
+    "output": "What was the operating profit in 2017?"
+  },
+  {
+    "input": "What was the exact amount of operating profit earned during the year 2017?",
+    "output": "What was the operating profit in 2017?"
+  },
+  {
+    "input": "What is the precise percentage difference in operating profit from 2015 to 2016?",
+    "output": "What was the percentage change in operating profit between 2015 and 2016?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in operating profit from 2015 to 2016?",
+    "output": "What was the percentage change in operating profit between 2015 and 2016?"
+  },
+  {
+    "input": "What was the cash used in Software in 2019?",
+    "output": "How much money was spent on Software in the year 2019?"
+  },
+  {
+    "input": "What was the net income in 2019?",
+    "output": "What was the exact net income amount for the fiscal year of 2019?"
+  },
+  {
+    "input": "What was the specific amount of interest that was recorded as an expense in the year 2018?",
+    "output": "What was the interest expense in 2018?"
+  },
+  {
+    "input": "What is the amount of money paid as interest in 2018?",
+    "output": "What was the interest expense in 2018?"
+  },
+  {
+    "input": "For how many consecutive years has the net income exceeded $100,000 thousand?",
+    "output": "How many years did net income exceed $100,000 thousand?"
+  },
+  {
+    "input": "What was the change in the interest expense between 2017 and 2018?",
+    "output": "What were the differences in the amount of interest expense incurred in 2017 and 2018?"
+  },
+  {
+    "input": "How much was the change in fair value of the company's servicing asset that was included in the fees charged for servicing?",
+    "output": "How much was the included change in fair value of the company's servicing asset included in its servicing fees?"
+  },
+  {
+    "input": "For how many consecutive years have Interchange fees surpassed $50,000?",
+    "output": "How many years did Interchange fees exceed $50,000 thousand?"
+  },
+  {
+    "input": "How many years have the Interchange fees been over $50,000 thousand?",
+    "output": "How many years did Interchange fees exceed $50,000 thousand?"
+  },
+  {
+    "input": "What was the difference in the fees charged for transactions in 2017 compared to 2019? How did the transaction fees change between 2017 and 2019?",
+    "output": "What was the change in the transaction fees between 2017 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in the total revenue between 2018 and 2019?",
+    "output": "What is the percentage increase or decrease in the total revenue from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in the total revenue between 2018 and 2019?",
+    "output": "What was the percentage difference in the overall revenue from 2018 to 2019?"
+  },
+  {
+    "input": "How much did the company invest in partnerships during the year 2018?",
+    "output": "What was the company's investment in partnership in 2018?"
+  },
+  {
+    "input": "How much money did the company invest in partnership during the year 2018?",
+    "output": "What was the company's investment in partnership in 2018?"
+  },
+  {
+    "input": "What is the significance and meaning of the number of Settlements?",
+    "output": "What does the amount of Settlements represent?"
+  },
+  {
+    "input": "What is the significance or meaning of the number of Settlements?",
+    "output": "What does the amount of Settlements represent?"
+  },
+  {
+    "input": "How many years did the face value of term loan exceed $300,000 thousand?",
+    "output": "For how many years has the term loan been valued above $300,000 thousand?"
+  },
+  {
+    "input": "How many years did the face value of term loan exceed $300,000 thousand?",
+    "output": "How many years has the face value of the term loan been consistently higher than $300,000 thousand?"
+  },
+  {
+    "input": "What was the change in the Loan Servicing Portfolio between 2017 and 2018?",
+    "output": "What was the difference in the size of the Loan Servicing Portfolio from 2017 to 2018?"
+  },
+  {
+    "input": "What was the total costs and expenses?",
+    "output": "What was the overall amount of costs and expenses associated with the given context or situation?"
+  },
+  {
+    "input": "What was the exact value of the restricted cash in the year 2017?",
+    "output": "What was the amount of restricted cash in 2017?"
+  },
+  {
+    "input": "\"What is the range of years covered in the table?\"",
+    "output": "Which years does the table provide?"
+  },
+  {
+    "input": "What was the change in Cash and cash equivalents between 2018 and 2019?",
+    "output": "What was the difference in the amount of Cash and cash equivalents between 2018 and 2019?"
+  },
+  {
+    "input": "For how long did the amount of Restricted Cash surpass $200,000 thousand?",
+    "output": "How many years did Restricted Cash exceed $200,000 thousand?"
+  },
+  {
+    "input": "What was the total quantity of Furniture sold or produced in the year 2018?",
+    "output": "What was the amount of Furniture in 2018?"
+  },
+  {
+    "input": "What was the total quantity or number of furniture items in the year 2018?",
+    "output": "What was the amount of Furniture in 2018?"
+  },
+  {
+    "input": "What was the total quantity of computer hardware devices manufactured or sold globally in the year 2019?",
+    "output": "What was the amount of Computer hardware in 2019?"
+  },
+  {
+    "input": "How much computer hardware was present in the year 2019?",
+    "output": "What was the amount of Computer hardware in 2019?"
+  },
+  {
+    "input": "What was the change in the amount of Software between 2018 and 2019?",
+    "output": "What was the difference in the quantity of Software from 2018 to 2019, specifically looking at the change in amount?"
+  },
+  {
+    "input": "What was the change in the amount of Software between 2018 and 2019?",
+    "output": "How much did the amount of Software change from 2018 to 2019?"
+  },
+  {
+    "input": "What was the amount of Receipts in 2017?",
+    "output": "How many receipts were recorded in the year 2017?"
+  },
+  {
+    "input": "What was the amount of Receipts in 2017?",
+    "output": "\"How many total receipts were recorded in the year 2017?\""
+  },
+  {
+    "input": "What was the change in the amount of Settlements between 2017 and 2019?",
+    "output": "What was the net difference in the number of Settlements from 2017 to 2019?"
+  },
+  {
+    "input": "What was the change in the amount of Settlements between 2017 and 2019?",
+    "output": "What was the difference in the number of Settlements from 2017 to 2019 and how did it change over the two-year period?"
+  },
+  {
+    "input": "What was the exact amount of revenue generated in the year 2018?",
+    "output": "What was the total revenue in 2018?"
+  },
+  {
+    "input": "What is the total amount of money earned in 2018?",
+    "output": "What was the total revenue in 2018?"
+  },
+  {
+    "input": "\"What was the specific financial figure measuring earnings from regular business operations, known as operating profit, during the year 2019?\"",
+    "output": "What was the operating profit in 2019?"
+  },
+  {
+    "input": "What was the operating profit in the first quarter?",
+    "output": "What is the exact amount of operating profit recorded in the first quarter of the fiscal year?"
+  },
+  {
+    "input": "What was the specific amount of net income generated in the third quarter?",
+    "output": "What was the net income in the third quarter?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the basic earnings per share of Class A common stock from the first quarter to the second quarter?",
+    "output": "What was the percentage change in the basic earnings per share of Class A common stock between the first and second quarter?"
+  },
+  {
+    "input": "\"What is the range of years covered in the table?\"",
+    "output": "Which years does the table provide?"
+  },
+  {
+    "input": "What was the difference in the amount of net income recorded between the years 2017 and 2018?",
+    "output": "What was the change in net income between 2017 and 2018?"
+  },
+  {
+    "input": "\"What is the meaning and significance of net fair value changes recognized in other gains (losses)?\"",
+    "output": "What does the net Fair value changes recognized in other gains (losses) represent?"
+  },
+  {
+    "input": "What is the representation of net fair value changes recognized in other gains (losses)?",
+    "output": "What does the net Fair value changes recognized in other gains (losses) represent?"
+  },
+  {
+    "input": "What was the beginning balance in 2018?",
+    "output": "\"The starting balance for the year 2018, specifically referring to financial records or accounts, is requested. Please provide the initial amount present at the beginning of that year.\""
+  },
+  {
+    "input": "Which years did the ending balance exceed $3,000 thousand?",
+    "output": "In which years was the ending balance higher than $3,000 thousand?"
+  },
+  {
+    "input": "What is the percentage of fair value time deposits compared to the overall fair value of marketable securities?",
+    "output": "What percentage of the total fair value marketable securities is made up of fair value time deposits?"
+  },
+  {
+    "input": "\"What is the current interest rate specified in the credit facility agreement that governs our credit arrangement?\"",
+    "output": "What is the interest rate on the credit facility agreement?"
+  },
+  {
+    "input": "\"What is the specific interest rate specified in the credit facility agreement that governs our borrowing terms?\"",
+    "output": "What is the interest rate on the credit facility agreement?"
+  },
+  {
+    "input": "What factors contributed to the increase in research and development expenses in 2019?",
+    "output": "What are the reasons for higher research and development expense in 2019?"
+  },
+  {
+    "input": "What is the net difference in research and development expense between 2019 and 2017?",
+    "output": "What is the exact amount of change in research and development expense between the fiscal years of 2019 and 2017?"
+  },
+  {
+    "input": "How much were the marketable securities worth in 2019?",
+    "output": "What was the amount of marketable securities in 2019?"
+  },
+  {
+    "input": "What was the precise value of marketable securities held in the year 2019?",
+    "output": "What was the amount of marketable securities in 2019?"
+  },
+  {
+    "input": "What is the difference in the total amount of shareholders' equity between the years 2018 and 2019?",
+    "output": "What is the change in total shareholders' equity from 2018 to 2019?"
+  },
+  {
+    "input": "How much did the total shareholders' equity change between 2018 and 2019?",
+    "output": "What is the change in total shareholders' equity from 2018 to 2019?"
+  },
+  {
+    "input": "How does the net (loss) income of 2019 compare to the net (loss) income of 2018?",
+    "output": "What is the difference between Net (loss) income in 2018 and 2019?"
+  },
+  {
+    "input": "What was the exact percentage increase in gross profit from the year 2018 to the year 2019?",
+    "output": "What was the percentage increase in gross profit from 2018 to 2019?"
+  },
+  {
+    "input": "What are the different components of expenses that need to be considered when starting production?",
+    "output": "What are components of production start-up expense?"
+  },
+  {
+    "input": "What is the total change in expenditure for initiating production between the years 2019 and 2017?",
+    "output": "What is the net difference in production start-up expense between 2019 and 2017?"
+  },
+  {
+    "input": "What is the difference in net sales amount in 2019 and 2018?",
+    "output": "What is the difference in the net sales amount between the years 2019 and 2018? Please provide details and figures to compare the net sales amounts of these two specific years."
+  },
+  {
+    "input": "What is the value of foreign government obligations as of December 31, 2019?",
+    "output": "What is the total amount of foreign government obligations recorded as of December 31, 2019?"
+  },
+  {
+    "input": "What is the comparative value of money market funds and foreign debt as of December 31, 2019, and how do they differ?",
+    "output": "What is the difference in the value of money market funds and foreign debt as of December 31, 2019?"
+  },
+  {
+    "input": "What is the increase in cash provided by operating activities from 2018 to 2019?",
+    "output": "How much did the cash provided by operating activities increase from 2018 to 2019?"
+  },
+  {
+    "input": "What is the difference in net cash and cash equivalents between 2019 and 2018?",
+    "output": "What is the change in net cash and cash equivalents between the fiscal years 2019 and 2018?"
+  },
+  {
+    "input": "\"What specific items or responsibilities are typically considered as part of other obligations?\"",
+    "output": "What are included in other obligations?"
+  },
+  {
+    "input": "What does the category of \"other obligations\" encompass and include?",
+    "output": "What are included in other obligations?"
+  },
+  {
+    "input": "Why was $72.2 million of unrecognized tax benefits excluded?",
+    "output": "Why were the $72.2 million in unrecognized tax benefits excluded from something?"
+  },
+  {
+    "input": "What percentage of the total contractual obligations is made up of total long-term debt obligations?",
+    "output": "What is the proportion of the overall contractual obligations that consists specifically of the total amount owed in long-term debt obligations?"
+  },
+  {
+    "input": "What is the distinction between the total amount paid in interest and the total sum of operating lease obligations?",
+    "output": "What difference between total interest payments and total operating lease obligations?"
+  },
+  {
+    "input": "What is the interest rate for the term loan that I am inquiring about?",
+    "output": "What is the interest rate of the term loan?"
+  },
+  {
+    "input": "What are the differences in the amounts of notes receivable recorded in 2018 and 2019?",
+    "output": "What is the difference between notes receivables from 2018 to 2019?"
+  },
+  {
+    "input": "What is the precise percentage by which the total prepaid expenses and other current assets increased from 2018 to 2019?",
+    "output": "What is the percentage increase in total prepaid expenses and other current assets from 2018 to 2019?"
+  },
+  {
+    "input": "How are the accounting procedures used to record changes in fair value of hedging instruments?",
+    "output": "How are changes in fair value of hedging instruments accounted for?"
+  },
+  {
+    "input": "What are the types of derivative instruments included in the total for other liabilities on a financial statement?",
+    "output": "What is the total derivate instruments for other liabilities?"
+  },
+  {
+    "input": "What are the types of derivative instruments used to measure the total value of other liabilities?",
+    "output": "What is the total derivate instruments for other liabilities?"
+  },
+  {
+    "input": "What is the ratio of the total derivatives classified as hedging instruments to the total derivatives not classified as hedging instruments under prepaid expenses and other current assets?",
+    "output": "Under prepaid expenses and other current assets, what is the ratio of the total derivates designated as hedging instruments to those not designated as hedging instruments?"
+  },
+  {
+    "input": "What was the increase in machinery and equipment from 2018 to 2019?",
+    "output": "What was the percentage change in the value of machinery and equipment between 2018 and 2019?"
+  },
+  {
+    "input": "What was the increase in machinery and equipment from 2018 to 2019?",
+    "output": "What percentage growth was observed in the value of machinery and equipment between the years 2018 and 2019?"
+  },
+  {
+    "input": "What are the differences in land statistics comparing the year 2018 and 2019?",
+    "output": "What is the difference between land from 2018 to 2019?"
+  },
+  {
+    "input": "What are the discrepancies in land conditions between the years 2018 and 2019?",
+    "output": "What is the difference between land from 2018 to 2019?"
+  },
+  {
+    "input": "What is the net change in selling, general, and administrative expense between the years 2019 and 2017?",
+    "output": "What is the net difference in selling, general and administrative expense between 2019 and 2017?"
+  },
+  {
+    "input": "What is the current percentage shareholding of Tata Sons Private Limited and Life Insurance Corporation of India, and how does it differ between the two entities?",
+    "output": "What is the difference in percentage shareholding between Tata Sons Private Limited and Life Insurance Corporation of India?"
+  },
+  {
+    "input": "What is the disparity in the quantity of equity shares possessed by Promoters and Insurance Companies?",
+    "output": "What is the difference between number of equity shares held between Promoters and Insurance Companies?"
+  },
+  {
+    "input": "How many shareholding accounts were there as of March 31, 2019?",
+    "output": "What is the number of existing shareholding accounts as on March 31, 2019?  "
+  },
+  {
+    "input": "As of March 31, 2019, what is the overall sum of all assets owned by someone or something?",
+    "output": "What is the grand total of holdings as on March 31, 2019?"
+  },
+  {
+    "input": "What is the overall value of assets and investments held as of March 31, 2019?",
+    "output": "What is the grand total of holdings as on March 31, 2019?"
+  },
+  {
+    "input": "What is the difference in the number of holdings within the share categories of '1-100' and '101-500'? How do the share categories of '1-100' and '101-500' differ in terms of the number of holdings they encompass?",
+    "output": "What is the difference in holdings between the share categories of '1-100' and '101-500'?"
+  },
+  {
+    "input": "What is the basis for consolidating shareholdings and how does it affect the overall company structure?",
+    "output": "On what basis is shareholding consolidated?"
+  },
+  {
+    "input": "What is the basis for the consolidation of shareholding?",
+    "output": "On what basis is shareholding consolidated?"
+  },
+  {
+    "input": "\"What is the proportion of unsecured loans out of the total debt at the start of the financial year?\"",
+    "output": "At the beginning of the financial year, what percentage of total indebtedness is made up of unsecured loans?"
+  },
+  {
+    "input": "What was the change in the quantity of shares transferred from 2011 to 2012 into the Investor Education and Protection Fund (IEPF)?",
+    "output": "What is the difference in number of shares transferred to IEPF from 2011 to 2012?"
+  },
+  {
+    "input": "What is the total remuneration for Ramakrishnan V?",
+    "output": "What is the complete amount of compensation that Ramakrishnan V receives?"
+  },
+  {
+    "input": "What is the total remuneration for Ramakrishnan V?",
+    "output": "What is the total amount of money received as payment or compensation for services rendered by Ramakrishnan V?"
+  },
+  {
+    "input": "What is the monetary worth of the stock option that the Company Secretary receives?",
+    "output": "What is the value of Stock Option given to the Company Secretary?"
+  },
+  {
+    "input": "What is the monetary worth or amount of Stock Option that has been granted or allocated to the Company Secretary as part of their compensation or benefits package?",
+    "output": "What is the value of Stock Option given to the Company Secretary?"
+  },
+  {
+    "input": "What is the specific value or percentage of commission that is usually given to the Chief Financial Officer (CFO) of a company?",
+    "output": "What is the value of Commission given to the Chief Financial Officer?"
+  },
+  {
+    "input": "\"What is the specific amount or percentage of commission that is typically awarded to the Chief Financial Officer (CFO) for their role?\"",
+    "output": "What is the value of Commission given to the Chief Financial Officer?"
+  },
+  {
+    "input": "What distinguishes the CFO from the Company Secretary in terms of Others and Allowances? Clarify and elaborate on the differences between these two roles regarding their responsibilities, remunerations, and any additional benefits they may receive.",
+    "output": "What is the difference in Others, Allowances between the CFO and Company Secretary?"
+  },
+  {
+    "input": "Which key managerial personnel had the highest total remuneration?",
+    "output": "\"Who among the key managerial personnel received the highest total remuneration?\""
+  },
+  {
+    "input": "How does the ratio of median remuneration between the two Executive Directors differ?",
+    "output": "What is the difference in ratio to median remuneration between the two Executive Directors?"
+  },
+  {
+    "input": "\"What is the difference in the amount of money received from sales or services between the years 2018 and 2019?\"",
+    "output": "What is the change in revenue between 2018 and 2019?"
+  },
+  {
+    "input": "What is the difference in the amount of money earned by the company between the years 2018 and 2019?",
+    "output": "What is the change in revenue between 2018 and 2019?"
+  },
+  {
+    "input": "What portion of total unconsolidated revenue in FY 2019 constitutes of export revenue?",
+    "output": "What percentage of the total unconsolidated revenue for the fiscal year 2019 is comprised of revenue from exports?"
+  },
+  {
+    "input": "What portion of total unconsolidated revenue in FY 2018 constitutes of export revenue?",
+    "output": "What percentage of the overall unconsolidated revenue for the fiscal year 2018 is derived from export sales?"
+  },
+  {
+    "input": "What is the difference in the amount of money earned from foreign exchanges between the fiscal years of 2018 and 2019?",
+    "output": "What is the change in foreign exchange earnings between FY 2018 and FY 2019?"
+  },
+  {
+    "input": "What is the change in Cost, Insurance and Freight (CIF) value of imports from FY 2018 to FY 2019?",
+    "output": "How has the Cost, Insurance and Freight (CIF) value of imports changed from FY 2018 to FY 2019?"
+  },
+  {
+    "input": "What is the specific amount of money charged as a sitting fee for N Chandrasekaran?",
+    "output": "What is the sitting fee of N Chandrasekaran?"
+  },
+  {
+    "input": "What is the difference in sitting fees between O P Bhatt and Aarthi Subramanian?",
+    "output": "What is the difference in sitting fees between O P Bhatt and Aarthi Subramanian, and in what context are these fees being compared or analyzed?"
+  },
+  {
+    "input": "What is the difference in sitting fees between O P Bhatt and Aarthi Subramanian?",
+    "output": "What is the difference in the sitting fees charged by O P Bhatt and Aarthi Subramanian?"
+  },
+  {
+    "input": "What is the percentage change in pre-tax margin between 2017 and 2018?",
+    "output": "What is the increase/ (decrease) in Pre-tax margin from 2017 to 2018"
+  },
+  {
+    "input": "What is the percentage change in Pre-tax margin between 2017 and 2018?",
+    "output": "What is the increase/ (decrease) in Pre-tax margin from 2017 to 2018"
+  },
+  {
+    "input": "What does Net Income include?",
+    "output": "\"What elements are included in the calculation of Net Income?\""
+  },
+  {
+    "input": "What is the increase / (decrease) in revenue from 2018 to 2019?",
+    "output": "How much did the revenue change from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage increase / (decrease) in Current liabilities from 2018 to 2019?",
+    "output": "What is the percentage change in Current Liabilities between 2018 and 2019?"
+  },
+  {
+    "input": "What caused the decrease in the actual shares?",
+    "output": "What factors led to the decrease in the current number of shares held by individuals or entities?"
+  },
+  {
+    "input": "What was the percentage change in Earnings per Share of common stock from continuing operations (basic) compared to the previous period?",
+    "output": "What was the increase / (decrease) from the Earnings per share of common stock from continuing operations basic?"
+  },
+  {
+    "input": "What was the percentage change in the basic earnings per share of common stock from continuing operations?",
+    "output": "What was the increase / (decrease) from the Earnings per share of common stock from continuing operations basic?"
+  },
+  {
+    "input": "How did the performance of hardware platforms compare from year to year?",
+    "output": "How was the performance of hardware platforms year to year"
+  },
+  {
+    "input": "What does the Pre-tax income performance implied? ",
+    "output": "What is the meaning of Pre-tax income performance?"
+  },
+  {
+    "input": "What was the pre-tax margin of 2019?",
+    "output": "\"What was the pre-tax margin percentage for the year 2019?\""
+  },
+  {
+    "input": "What was the pre-tax margin of 2019?",
+    "output": "\"What was the specific pre-tax margin for the fiscal year of 2019?\""
+  },
+  {
+    "input": "What was the External gross profit margin in 2019?",
+    "output": "What was the gross profit margin from sources external to the company for the year 2019?"
+  },
+  {
+    "input": "What is the average of Balance at January 1?",
+    "output": "What is the average balance as of January 1st for all accounts (or customers, if applicable)?"
+  },
+  {
+    "input": "What is the average balance amount as of December 31st?",
+    "output": "What is the average of Balance at December 31?"
+  },
+  {
+    "input": "What is the average balance amount recorded on December 31st for a specific period of time?",
+    "output": "What is the average of Balance at December 31?"
+  },
+  {
+    "input": "What is the increase / (decrease) in the net capitalized software from 2018 to 2019?",
+    "output": "What is the change in the net capitalized software amount from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage change in pre-tax income from 2018 to 2019?",
+    "output": "What was the increase / (decrease) in the Pre-tax income from 2018 to 2019?"
+  },
+  {
+    "input": "What is the percentage change in gross profit for External Operating Systems Software from 2018 to 2019?",
+    "output": "What is the increase / (decrease) in the External Operating Systems Software gross profit from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage increase / (decrease) in total short term debt?",
+    "output": "What was the percentage change in the total amount of short term debt?"
+  },
+  {
+    "input": "What was the percentage change in the pre-tax margin from 2018 to 2019?",
+    "output": "What was the increase / (decrease) in the Pre-tax margin from 2018 to 2019?"
+  },
+  {
+    "input": "What percentage change in the pre-tax margin occurred between 2018 and 2019?",
+    "output": "What was the increase / (decrease) in the Pre-tax margin from 2018 to 2019?"
+  },
+  {
+    "input": "What is the meaning or significance of the launch expense?",
+    "output": "What does launch expense represent?"
+  },
+  {
+    "input": "How many types of expenses are listed in the table?",
+    "output": "How many categories of expenses are included in the table?"
+  },
+  {
+    "input": "What was the average adjusted EBITDA for 2018 and 2019?",
+    "output": "What was the average amount of adjusted EBITDA for both the years 2018 and 2019?"
+  },
+  {
+    "input": "\"What is the chronological order of the gross profit for each financial year end as depicted in the table?\"",
+    "output": "What is the gross profit for each financial year end shown in the table (in chronological order)?"
+  },
+  {
+    "input": "What was the year with the highest value for other financial expenses?",
+    "output": "In which year was the amount of other financial expenses the largest?"
+  },
+  {
+    "input": "What was the net difference in the overall financial expenses incurred during the year 2019 as compared to 2018?",
+    "output": "What was the change in the total financial expenses in 2019 from 2018?"
+  },
+  {
+    "input": "What was the sum of all financial costs incurred in the year 2019?",
+    "output": "What is the total amount of financial expenses in 2019?"
+  },
+  {
+    "input": "What was the specific year when the amount of Current lease liabilities was greater than in any other year?",
+    "output": "In which year was the amount of Current lease liabilities larger?"
+  },
+  {
+    "input": "When did the amount of Current lease liabilities exceed the previous year's amount?",
+    "output": "In which year was the amount of Current lease liabilities larger?"
+  },
+  {
+    "input": "What is the total financial income for 2019?",
+    "output": "What was the overall financial income for the year 2019?"
+  },
+  {
+    "input": "What was the total number of financial items in the year 2019?",
+    "output": "What is the amount of total financial items in 2019?"
+  },
+  {
+    "input": "What is the total number of financial items recorded in the year 2019?",
+    "output": "What is the amount of total financial items in 2019?"
+  },
+  {
+    "input": "What are the primary categories that are examined within the Financial Items section of the table?",
+    "output": "What are the main categories analyzed under Financial Items in the table?"
+  },
+  {
+    "input": "In which year was the amount of financial income the smallest?",
+    "output": "What is the year with the minimum financial income amount?"
+  },
+  {
+    "input": "What was the change in the Average invested capital less average impairment in 2019 from 2018?",
+    "output": "What was the difference in the average amount of capital invested excluding average impairment between 2019 and 2018?"
+  },
+  {
+    "input": "What was the change in the Average invested capital less average impairment in 2019 from 2018?",
+    "output": "What was the difference in the average amount of money invested minus the average amount of impairments in 2019 compared to 2018?"
+  },
+  {
+    "input": "What was the percentage change in retained earnings in 2019 from 2018?",
+    "output": "What was the precise percentage difference in retained earnings between 2018 and 2019, specifically referring to the change observed in 2019?"
+  },
+  {
+    "input": "What was the percentage change in retained earnings in 2019 from 2018?",
+    "output": "What was the percentage increase or decrease in the amount of retained earnings from the year 2018 to 2019?"
+  },
+  {
+    "input": "How is Total Cost of Ownership (TCE) earnings utilized as a widely accepted performance measure for the shipping industry?",
+    "output": "How is TCE earnings used as a standard shipping industry performance measure?"
+  },
+  {
+    "input": "What is the specific year when the revenue reached its maximum value?",
+    "output": "In which year was Revenue the largest?"
+  },
+  {
+    "input": "What items were included in the freight receivables as of 2019?",
+    "output": "As of 2019, what did freight receivables include?"
+  },
+  {
+    "input": "What was included in the category of freight receivables as of 2019?",
+    "output": "As of 2019, what did freight receivables include?"
+  },
+  {
+    "input": "What is the determining basis for the calculation and establishment of an allowance for expected credit loss?",
+    "output": "What is the making of allowance for expected credit loss based on?"
+  },
+  {
+    "input": "What is the basis for determining the provision for expected credit loss?",
+    "output": "What is the making of allowance for expected credit loss based on?"
+  },
+  {
+    "input": "What is the specific year when the total gross freight receivables reached their highest amount?",
+    "output": "In which year was the amount of total gross freight receivables the largest?"
+  },
+  {
+    "input": "What was the net increase or decrease in the total gross freight receivables between 2018 and 2019?",
+    "output": "What was the change in the total net gross freight receivables in 2019 from 2018?"
+  },
+  {
+    "input": "What information can be derived in note 16?",
+    "output": "What data or details can be obtained from note 16?"
+  },
+  {
+    "input": "What information can be derived in note 16?",
+    "output": "What kind of information can be obtained from note 16?"
+  },
+  {
+    "input": "What was the change in the carrying amount as of 31 December from 2018 to 2019?",
+    "output": "What was the difference in the carrying amount as of December 31 between the years 2018 and 2019? Please provide the specific change in the carrying amount."
+  },
+  {
+    "input": "What are the specific components present in the table that are utilized for liquidity calculations?",
+    "output": "What are the components in the table used to calculate liquidity?"
+  },
+  {
+    "input": "What was the difference in liquidity levels between 2019 and 2018, and how did it change over the course of the year?",
+    "output": "What was the change in liquidity in 2019 from 2018?"
+  },
+  {
+    "input": "What does TORM have leases for?",
+    "output": "What types of leases does TORM currently have?"
+  },
+  {
+    "input": "How are leases represented on the balance sheet and what specific details are included for each lease?",
+    "output": "How is each lease reflected on the balance sheet?"
+  },
+  {
+    "input": "\"What is the asset type that experiences the highest rate of depreciation in a year?\"",
+    "output": "Which asset type has the largest depreciation of the year?"
+  },
+  {
+    "input": "What was the difference in the amount of gross profit in 2019 compared to 2018?",
+    "output": "What was the change in gross profit in 2019 from 2018?"
+  },
+  {
+    "input": "What was the exact percentage increase or decrease in the gross profit from 2018 to 2019?",
+    "output": "What was the percentage change in gross profit in 2019 from 2018?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in gross profit from 2018 to 2019?",
+    "output": "What was the percentage change in gross profit in 2019 from 2018?"
+  },
+  {
+    "input": "\"What specific details or data can be found in Note 21 of the provided information?\"",
+    "output": "What information is provided in Note 21?"
+  },
+  {
+    "input": "What was the percentage change in interest rate swaps between the year 2018 and 2019?",
+    "output": "What was the change in interest rate swaps from 2018 to 2019?"
+  },
+  {
+    "input": "How did interest rate swaps change from 2018 to 2019?",
+    "output": "What was the change in interest rate swaps from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage difference in the interest rate swaps between 2018 and 2019? I would like to know the rate at which the interest rate swaps changed between these two years.",
+    "output": "What was the percentage change in interest rate swaps from 2018 to 2019?"
+  },
+  {
+    "input": "What was the precise percentage difference in the interest rate swap market between 2018 and 2019?",
+    "output": "What was the percentage change in interest rate swaps from 2018 to 2019?"
+  },
+  {
+    "input": "What is TORM's belief or estimation of the amount of net interest-bearing debt they possess?",
+    "output": "What does TORM believe net interest-bearing debt to be?"
+  },
+  {
+    "input": "What is the methodology used to calculate the net asset value (NAV) per share in investment funds?",
+    "output": "How is NAV/share calculated?"
+  },
+  {
+    "input": "What are the types of audit fees in the table?",
+    "output": "What are the different types of audit fees listed in the table?"
+  },
+  {
+    "input": "In what year did the auditor receive the highest overall compensation?",
+    "output": "In which year was the total remuneration to the auditor the largest?"
+  },
+  {
+    "input": "For which specific years were the calculations of Other Liabilities amounts conducted?",
+    "output": "In which years were the amounts of Other Liabilities calculated for?"
+  },
+  {
+    "input": "In which specific year did the amount allocated to Partners and commercial managements surpass the amount allocated in other years?",
+    "output": "In which year was the amount under Partners and commercial managements larger?"
+  },
+  {
+    "input": "\"What specific expenses are included in the category of operating expenses that are related to staff costs?\"",
+    "output": "What are the staff costs included in operating expenses related to?"
+  },
+  {
+    "input": "How is the average number of employees determined and what is the methodology behind its calculation?",
+    "output": "How is the average number of employees calculated?"
+  },
+  {
+    "input": "What was the change in the total number of permanent employees from 2018 to 2019?",
+    "output": "How many permanent employees were added or lost between 2018 and 2019, resulting in the change in the total number of permanent employees?"
+  },
+  {
+    "input": "What does note 21 include in terms of information?",
+    "output": "What information does note 21 contain?"
+  },
+  {
+    "input": "What does accounts receivables in the table include?",
+    "output": "What specific components are included in the accounts receivables section of the table?"
+  },
+  {
+    "input": "What specific items or costs are included in the income statement as Port expenses, including bunkers and commissions?",
+    "output": "What is recognized in the income statement under Port expenses, bunkers and commissions?"
+  },
+  {
+    "input": "How is allowance for expected credit loss of freight receivables calculated?",
+    "output": "How is the calculation for allowance for expected credit loss determined specifically for freight receivables?"
+  },
+  {
+    "input": "\"What specific components are accounted for in the carrying amount designated for 'Vessels and capitalized dry-docking'?\"",
+    "output": "What is included in the carrying amount for \"Vessels and capitalized dry-docking\"?"
+  },
+  {
+    "input": "What specific sub-elements, denoted in capital letters, can be found under the category of \"Vessels\" in the table related to dry-docking?",
+    "output": "What are the sub-elements under Vessels and capitalized dry-docking in the table?"
+  },
+  {
+    "input": "What specific components or sub-elements fall under the category of Vessels and capitalized dry-docking in the table?",
+    "output": "What are the sub-elements under Vessels and capitalized dry-docking in the table?"
+  },
+  {
+    "input": "\"What is the meaning of the phrase 'company's retirement obligations' and what specific aspects does it encompass?\"",
+    "output": "What does the company's retirement obligations refer to?"
+  },
+  {
+    "input": "\"What are the retirement obligations of the company and what do they encompass?\"",
+    "output": "What does the company's retirement obligations refer to?"
+  },
+  {
+    "input": "What is the company's total contractual cash obligations due in more than 5 years?",
+    "output": "What is the total amount of cash that the company is obligated to pay according to its contracts, and when are these payments due, specifically for obligations that extend beyond a period of 5 years?"
+  },
+  {
+    "input": "What is the company's total contractual cash obligations due in more than 5 years?",
+    "output": "\"What is the total amount of cash that the company owes based on contractual agreements with a maturity period of more than 5 years?\""
+  },
+  {
+    "input": "How much does operating leases account for total contractual cash obligations for period of less than 1 year?",
+    "output": "What is the percentage of total contractual cash obligations attributable to operating leases with a duration of less than one year?"
+  },
+  {
+    "input": "What is the specific monetary difference in the company's operating income between the years 2018 and 2019?",
+    "output": "What is the company's increase in income from operating activities between 2018 and 2019? "
+  },
+  {
+    "input": "What is the cash balance and the amount of cash equivalents held by the company as of August 31, 2019?",
+    "output": "What is the company's cash and cash equivalents as at 31 August 2019?"
+  },
+  {
+    "input": "What is the change in cash flow from investing activities from 2018 to 2019, specifically referring to any increase or decrease in the amount of cash generated or used in investing activities during this period?",
+    "output": "What is the increase in cash flow from investing activities between 2018 and 2019?"
+  },
+  {
+    "input": "What is the total cashflow from investing activities in both 2018 and 2019?",
+    "output": "What is the combined amount of cash inflows and outflows from investing activities in 2018 and 2019?"
+  },
+  {
+    "input": "What is the total cashflow from investing activities in both 2018 and 2019?",
+    "output": "What was the net cash flow from investing activities in 2018 and 2019 combined?"
+  },
+  {
+    "input": "What is the total effect of exchange rate changes on cash and cash equivalents in both 2018 and 2019?",
+    "output": "What is the cumulative impact of fluctuations in exchange rates on the combined balances of cash and cash equivalents for the years 2018 and 2019?"
+  },
+  {
+    "input": "When was the approval by Accenture's Board of Directors granted for the implementation of an open-market share purchase program specifically designed for the acquisition of Accenture's Class A ordinary shares?",
+    "output": "When did Accenture's Board of Directors authorize and confirm an open-market share purchase program for acquiring Accenture pls Class A ordinary shares?"
+  },
+  {
+    "input": "How much of the shares purchased in July were part of publicly announced plan or programs?",
+    "output": "What proportion of the shares acquired in the month of July were specifically disclosed as part of publicly announced plan or programs?"
+  },
+  {
+    "input": "How much of the shares purchased in July were part of publicly announced plan or programs?",
+    "output": "What was the proportion of shares acquired in July that were part of publicly disclosed plans or programs?"
+  },
+  {
+    "input": "What was the primary source of the company's operational revenue in the year 2019?",
+    "output": "What is the company's main source of operating income in 2019?"
+  },
+  {
+    "input": "What was the specific amount spent on expenses directly related to the production or provision of goods and services in the year 2019?",
+    "output": "What was the cost of revenue in 2019?"
+  },
+  {
+    "input": "What was the precise amount spent on expenses directly related to generating revenue in the year 2019?",
+    "output": "What was the cost of revenue in 2019?"
+  },
+  {
+    "input": "What was the total count of shares that were approved and allocated?",
+    "output": "What was the number of granted shares?"
+  },
+  {
+    "input": "How many shares were approved and given to individuals or entities as stocks?",
+    "output": "What was the number of granted shares?"
+  },
+  {
+    "input": "What is the disparity in the weighted-average grant date fair value between shares that have been allotted and subsequently forfeited?",
+    "output": "What is the difference in weighted-average grant date fair value between granted and forfeited shares?"
+  },
+  {
+    "input": "What is the difference in the weighted-average grant date fair value between unvested shares and vested shares in 2018?",
+    "output": "What is the difference in weighted-average grant date fair value for unvested shares in 2018 and vested shares?"
+  },
+  {
+    "input": "What is the discrepancy in the weighted-average grant date fair value between unvested shares and vested shares in the year 2018?",
+    "output": "What is the difference in weighted-average grant date fair value for unvested shares in 2018 and vested shares?"
+  },
+  {
+    "input": "What was the previously reported amount of total current assets?",
+    "output": "What was the total current assets as previously reported?"
+  },
+  {
+    "input": "What is the total value of the deferred commissions, including the current portion, and the total current assets as reported in the previous financial statement?",
+    "output": "What was the sum of deferred commissions, current portion and total current assets as previously reported?"
+  },
+  {
+    "input": "What was the total amount reported for deferred commissions, the current portion of deferred commissions, and the overall total for current assets in the previous report?",
+    "output": "What was the sum of deferred commissions, current portion and total current assets as previously reported?"
+  },
+  {
+    "input": "What is the revised percentage of accrued expenses in relation to the total sum of liabilities and stockholders' equity?",
+    "output": "What was accrued expenses as revised as a percentage of total liabilities and stockholders' equity?"
+  },
+  {
+    "input": "What was the company's bad debt expense for the year ending December 31, 2018?",
+    "output": "What was the exact amount recorded as bad debt expense by the company for the entire year ending on December 31, 2018?"
+  },
+  {
+    "input": "What was the company's bad debt expense for the year ending December 31, 2018?",
+    "output": "What was the amount of bad debts that the company recorded as an expense for the fiscal year that ended on December 31, 2018?"
+  },
+  {
+    "input": "What was the total amount of interest earned as income in the year 2017?",
+    "output": "What was interest income in 2017?"
+  },
+  {
+    "input": "What was the net income of others in 2019?",
+    "output": "What was others, net in 2019?"
+  },
+  {
+    "input": "What was the percentage change in Interest expense between 2017 and 2018?",
+    "output": "What was the percentage increase or decrease in Interest expense from 2017 to 2018?"
+  },
+  {
+    "input": "What is the average interest income from 2017-2019?",
+    "output": "What is the average income generated from interest for the years 2017 to 2019 inclusive?"
+  },
+  {
+    "input": "What percentage increase or decrease in interest income occurred from 2018 to 2019?",
+    "output": "What was the percentage change in interest income between 2018 and 2019?"
+  },
+  {
+    "input": "What is the average amount of net cash provided by operating activities for the years 2017, 2018, and 2019?",
+    "output": "What was the average net cash provided by operating activities from 2017-2019?"
+  },
+  {
+    "input": "What was the percentage change in the free flow cash margin from 2017 to 2018?",
+    "output": "What was the change in free flow cash margin between 2017 and 2018?"
+  },
+  {
+    "input": "What was the total amount of federal net operating losses incurred by the company in the year 2019?",
+    "output": "What was the company's federal net operating losses in 2019?"
+  },
+  {
+    "input": "What was the amount of federal net operating losses incurred by the company during the year 2019 for tax purposes?",
+    "output": "What was the company's federal net operating losses in 2019?"
+  },
+  {
+    "input": "What was the company's state net operating losses in 2019?",
+    "output": "What was the total amount of net operating losses incurred by the company in the state during the year 2019?"
+  },
+  {
+    "input": "What is the percentage change in Deferred revenue between 2018 and 2019?",
+    "output": "What is the exact percentage difference in Deferred revenue from 2018 to 2019?"
+  },
+  {
+    "input": "What was the precise amount spent on sales and marketing in the year 2018?",
+    "output": "What was the sales and marketing expense in 2018?"
+  },
+  {
+    "input": "What was the specific fair value amount of cash that is being referred to in the context of a certain situation or event?",
+    "output": "What was the fair value amount of cash?"
+  },
+  {
+    "input": "What was the specific fair value figure assigned to the amount of cash? Please provide the determined fair value amount for the cash in question.",
+    "output": "What was the fair value amount of cash?"
+  },
+  {
+    "input": "What percentage of fair value total cash equivalents consist of agency bonds?",
+    "output": "What is the proportion of agency bonds, in terms of percentage, in the total cash equivalents that make up the fair value?"
+  },
+  {
+    "input": "What updates and advancements were made in computer equipment and software from 2018 to 2019?",
+    "output": "What is the change in computer equipment and software between 2018 and 2019?"
+  },
+  {
+    "input": "What are the differences in computer equipment and software between the years 2018 and 2019?",
+    "output": "What is the change in computer equipment and software between 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in revenue from research and development from 2017 to 2018?",
+    "output": "What was the change in percentage of revenue of research and development between 2017 and 2018?"
+  },
+  {
+    "input": "What are the main promises or obligations that the company is committed to?",
+    "output": "What are the company's principal commitments?"
+  },
+  {
+    "input": "What are the main commitments of the company?",
+    "output": "What are the company's principal commitments?"
+  },
+  {
+    "input": "What is the sum of total operating lease obligations and Long-term debt obligations including interest?",
+    "output": "What is the combined amount of operating lease obligations and long-term debt obligations, including accrued interest?"
+  },
+  {
+    "input": "What portion of the total obligations is attributable to payments that are due within a time frame of 1-3 years?",
+    "output": "What is the percentage of the total obligations that consists of payments due in 1-3 years?"
+  },
+  {
+    "input": "What is the specific percentage of the overall obligations that are categorized as payments to be made within a period of 1-3 years?",
+    "output": "What is the percentage of the total obligations that consists of payments due in 1-3 years?"
+  },
+  {
+    "input": "\"What are the regulations or guidelines regarding the payment of Foreign income tax applicable for the year 2019?\"",
+    "output": "What is the provision for Foreign income tax in 2019?"
+  },
+  {
+    "input": "\"What are the regulations or rules implemented in 2019 regarding the taxation of foreign income?\"",
+    "output": "What is the provision for Foreign income tax in 2019?"
+  },
+  {
+    "input": "What is the disparity in the weighted-average grant date fair value when comparing shares that have been granted to those that have been forfeited?",
+    "output": "What is the difference in weighted-average grant date fair value between granted and forfeited shares?"
+  },
+  {
+    "input": "What is the disparity in weighted-average grant date fair value when comparing shares that have been granted with those that have been forfeited?",
+    "output": "What is the difference in weighted-average grant date fair value between granted and forfeited shares?"
+  },
+  {
+    "input": "What is the  Net sales for 2019?",
+    "output": "What was the total global revenue generated from sales in the year 2019?"
+  },
+  {
+    "input": "What was the percentage change in diluted net income per share from continuing operations from 2017 to 2018?",
+    "output": "What was the change in Diluted Net income per share from continuing operations in 2018 from 2017?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in the Diluted Net income per share from continuing operations between 2018 and 2017?",
+    "output": "What was the percentage change in Diluted Net income per share from continuing operations in 2018 from 2017?"
+  },
+  {
+    "input": "What does the Euro Term Loan due 2024 consist of?",
+    "output": "What is included in the Euro Term Loan due in 2024?"
+  },
+  {
+    "input": "What does the Euro Term Loan due 2024 consist of?",
+    "output": "\"What are the components and details of the Euro Term Loan due in 2024?\""
+  },
+  {
+    "input": "In which year did the value of the 1.0% State of Connecticut term loan, which has a maturity date in 2023, surpass its previous size?",
+    "output": "In which year was the 1.0% State of Connecticut term loan due 2023 larger?"
+  },
+  {
+    "input": "In which year did the size of the 1.0% State of Connecticut term loan due in 2023 increase?",
+    "output": "In which year was the 1.0% State of Connecticut term loan due 2023 larger?"
+  },
+  {
+    "input": "What was the change in Capital lease obligations from 2018 to 2019?",
+    "output": "What was the exact difference in the amount of Capital lease obligations between the years 2018 and 2019?"
+  },
+  {
+    "input": "In which year was ILS a higher percentage of total net sales?",
+    "output": "In which specific year did the Incremental Lifetime Sales (ILS) constitute a larger proportion of the overall net sales?"
+  },
+  {
+    "input": "What were the net sales figures for ILS in both 2018 and 2019, and what was the average amount for each year?",
+    "output": "What was the average amount of net sales for ILS in 2018 and 2019?"
+  },
+  {
+    "input": "What were the average net sales for ILS for the years 2018 and 2019?",
+    "output": "What was the average amount of net sales for ILS in 2018 and 2019?"
+  },
+  {
+    "input": "What is the estimated duration of a building and any associated enhancements before they are deemed no longer useful?",
+    "output": "What is the useful life of Buildings and improvements?"
+  },
+  {
+    "input": "How is property and equipment stated in the table?",
+    "output": "In what manner is property and equipment presented and described within the table?"
+  },
+  {
+    "input": "In which year was the amount of Land larger?",
+    "output": "\"When comparing the years, which year had a larger amount of land?\""
+  },
+  {
+    "input": "What was the change in Leasehold improvements from 2018 to 2019?",
+    "output": "What was the difference in Leasehold improvements value between 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in Short-term investments in 2019 from 2018?",
+    "output": "What was the percentage difference in the value of Short-term investments between 2019 and 2018?"
+  },
+  {
+    "input": "What was the percentage change in Short-term investments in 2019 from 2018?",
+    "output": "What was the exact percentage difference in the value of Short-term investments between 2018 and 2019?"
+  },
+  {
+    "input": "What was the net income for the year 2019?",
+    "output": "What was Net income in 2019?"
+  },
+  {
+    "input": "\"In which specific year did the company report the highest amount of comprehensive income?\"",
+    "output": "In which year was Comprehensive income largest?"
+  },
+  {
+    "input": "What is the largest year for Comprehensive income?",
+    "output": "In which year was Comprehensive income largest?"
+  },
+  {
+    "input": "What was the difference in the amount of money earned in 2018 compared to 2017 after all expenses and taxes were accounted for?",
+    "output": "What was the change in Net income in 2018 from 2017?"
+  },
+  {
+    "input": "What was the Projected Benefit Obligation (PBO) amount for the year 2019? Please provide the numerical value for the PBO as of December 31st, 2019.",
+    "output": "What was the Projected benefit obligation in 2019?"
+  },
+  {
+    "input": "In which years was information on defined benefit plans provided?",
+    "output": "During which specific years was information regarding defined benefit plans provided?"
+  },
+  {
+    "input": "In which year was the amount of Research and development largest?",
+    "output": "In which specific year did Research and development reach its highest amount or expenditure?"
+  },
+  {
+    "input": "What was the specific amount allocated for income taxes in the year 2019?",
+    "output": "What was the Provision for income taxes in 2019?"
+  },
+  {
+    "input": "What specific amount was set aside for income taxes in the fiscal year of 2019?",
+    "output": "What was the Provision for income taxes in 2019?"
+  },
+  {
+    "input": "What has been the modification in the amount of state income taxes that individuals are required to pay in 2019 compared to 2018?",
+    "output": "What was the change in state income taxes that is currently payable in 2019 from 2018?"
+  },
+  {
+    "input": "How much Microelectronics was present in the year 2019?",
+    "output": "What is the amount of Microelectronics in 2019?"
+  },
+  {
+    "input": "What is the total volume or quantity of Microelectronics produced or used worldwide in the year 2019?",
+    "output": "What is the amount of Microelectronics in 2019?"
+  },
+  {
+    "input": "In which years is net sales calculated?",
+    "output": "In which specific years does the calculation of net sales occur?"
+  },
+  {
+    "input": "In which years is net sales calculated?",
+    "output": "During which specific years is the calculation of net sales undertaken?"
+  },
+  {
+    "input": "What was the percentage difference in the quantity of OEM components and instrumentation between 2018 and 2019?",
+    "output": "What was the percentage change in the amount of OEM components and instrumentation in 2019 from 2018?"
+  },
+  {
+    "input": "What was the Net cash provided by operating activities in 2019?",
+    "output": "What was the amount of net cash generated from operating activities during the year 2019?"
+  },
+  {
+    "input": "What was the  Purchases of property and equipment  in 2018?",
+    "output": "What was the total value of purchases made for property and equipment in the year 2018?"
+  },
+  {
+    "input": "In which specific years are the instances or applications of physical currency emphasized or showcased in the table?",
+    "output": "In which years are the uses of cash highlighted in the table?"
+  },
+  {
+    "input": "What years are specified in the table to illustrate the purpose of utilizing cash?",
+    "output": "In which years are the uses of cash highlighted in the table?"
+  },
+  {
+    "input": "In which year was the Issuance of shares under employee stock plans larger?",
+    "output": "When comparing the issuance of shares under employee stock plans, in which specific year was the volume of shares issued found to be greater?"
+  },
+  {
+    "input": "In which year were the Additions charged to expenses the largest?",
+    "output": "What is the specific year when the largest Additions were charged to expenses?"
+  },
+  {
+    "input": "In which year were the Additions charged to expenses the largest?",
+    "output": "What was the year when the largest amount of Additions were charged to expenses?"
+  },
+  {
+    "input": "In which year did tax fees have a higher value or amount than in other years?",
+    "output": "In which year were tax fees larger?"
+  },
+  {
+    "input": "What was the difference in the amount of all other fees charged in 2019 compared to 2018?",
+    "output": "What was the change in All other fees in 2019 from 2018?"
+  },
+  {
+    "input": "What was Long-term taxes payable in 2019?",
+    "output": "What was the amount of Long-term taxes payable in the year 2019?"
+  },
+  {
+    "input": "In which year was Other long-term liabilities larger?",
+    "output": "\"In which specific year did Other long-term liabilities exhibit a greater amount compared to the other years?\""
+  },
+  {
+    "input": "In 2019, what was the exact amount of deferred compensation liability?",
+    "output": "What was the Total deferred compensation liability in 2019?"
+  },
+  {
+    "input": "For which years was the calculation of Total deferred compensation liability performed?",
+    "output": "In which years was Total deferred compensation liability calculated?"
+  },
+  {
+    "input": "How was the calculation for diluted earnings per share derived?",
+    "output": "How was diluted earnings per share computed?"
+  },
+  {
+    "input": "In which specific years can we find the earnings per share data that was included in the table?",
+    "output": "In which years was earnings per share provided in the table?"
+  },
+  {
+    "input": "What year experienced the highest dilutive impact from employee stock awards?",
+    "output": "In which year was the Dilutive effect of employee stock awards largest?"
+  },
+  {
+    "input": "In what specific year did employee stock awards have the most significant dilutive effect?",
+    "output": "In which year was the Dilutive effect of employee stock awards largest?"
+  },
+  {
+    "input": "What was the change in Dilutive effect of employee stock awards in 2019 from 2018?",
+    "output": "What was the difference in the dilutive impact caused by employee stock awards between 2019 and 2018?"
+  },
+  {
+    "input": "What was the change in Dilutive effect of employee stock awards in 2019 from 2018?",
+    "output": "What was the difference in how much employee stock awards had a dilutive effect in 2019 compared to 2018?"
+  },
+  {
+    "input": "What was the percentage change in Dilutive effect of employee stock awards in 2019 from 2018?",
+    "output": "What was the percentage change in the dilutive effect of employee stock awards from 2018 to 2019?"
+  },
+  {
+    "input": "In which specific year did the Amortization of intangible assets account for a larger proportion of the overall net sales?",
+    "output": "In which year was Amortization of intangible assets a higher percentage of total net sales?"
+  },
+  {
+    "input": "What was the change in the amount of Amortization of intangible assets in 2019 from 2018?",
+    "output": "What was the difference in the amount of Amortization of intangible assets between the fiscal years 2019 and 2018?"
+  },
+  {
+    "input": "What was the value of Net deferred tax assets in the financial year of 2019?",
+    "output": "What was the Net deferred tax assets in 2019?"
+  },
+  {
+    "input": "What was the difference in the initial balance for 2019 compared to 2018?",
+    "output": "What was the change in Beginning balance in 2019 from 2018?"
+  },
+  {
+    "input": "What was the difference in the starting balance in 2019 compared to 2018?",
+    "output": "What was the change in Beginning balance in 2019 from 2018?"
+  },
+  {
+    "input": "What was the balance in the accounts as of the start of the year in 2018?",
+    "output": "What was the  Balance as of the beginning of the year  in 2018?"
+  },
+  {
+    "input": "\"What was the exact monetary balance in the accounts as of January 1st, 2018, at the start of the year?\"",
+    "output": "What was the  Balance as of the beginning of the year  in 2018?"
+  },
+  {
+    "input": "What was the percentage change in Balance as of the beginning of the year in 2019 from 2018?",
+    "output": "What is the percentage change in the Balance as of the start of 2019 compared to the Balance at the start of 2018?"
+  },
+  {
+    "input": "What was the percentage change in Prepaid and refundable income taxes from 2018 to 2019?",
+    "output": "What was the percentage difference in the amount of income taxes that were prepaid and refundable between the years 2018 and 2019?"
+  },
+  {
+    "input": "What was the percentage change in Prepaid and refundable income taxes from 2018 to 2019?",
+    "output": "What was the percentage difference in the amount of prepaid and refundable income taxes between 2018 and 2019?"
+  },
+  {
+    "input": "What does the Current portion of Euro Term Loan consist of?",
+    "output": "What is included in the current portion of the Euro Term Loan?"
+  },
+  {
+    "input": "What was the net change in the amount of outstanding Capital lease obligations between the years 2018 and 2019?",
+    "output": "What was the change in Capital lease obligations from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in Capital lease obligations from 2018 to 2019?",
+    "output": "What was the percentage change in Capital lease obligations from 2018 to 2019?"
+  },
+  {
+    "input": "What was the percentage increase or decrease in Capital lease obligations between 2018 and 2019?",
+    "output": "What was the percentage change in Capital lease obligations from 2018 to 2019?"
+  },
+  {
+    "input": "What are the respective values of the company's work in process inventories on March 31, 2018 and 2019?",
+    "output": "What were the specific values of the company's work in process inventories as of March 31 in both 2018 and 2019?"
+  },
+  {
+    "input": "What are the respective values of the company's work in process inventories on March 31, 2018 and 2019?",
+    "output": "What is the dollar amount of the work in process inventories for the company on March 31 of both 2018 and 2019?"
+  },
+  {
+    "input": "What was the value of the company's finished goods on March 31, 2018 and March 31, 2019?",
+    "output": "What are the respective values of the company's finished goods on March 31, 2018 and 2019?"
+  },
+  {
+    "input": "What is the change in the company's raw materials on March 31, 2018 and 2019?",
+    "output": "What is the difference in the amount of raw materials held by the company as of March 31, 2018 and March 31, 2019?"
+  },
+  {
+    "input": "What was the mean value of the company's raw materials on March 31 in both 2018 and 2019?",
+    "output": "What is the average value of the company's raw materials on March 31, 2018 and 2019?"
+  },
+  {
+    "input": "What is the average worth of the company's raw materials specifically on March 31st, both in the years 2018 and 2019?",
+    "output": "What is the average value of the company's raw materials on March 31, 2018 and 2019?"
+  },
+  {
+    "input": "What is the mean amount of the company's completed products on March 31, in both the years 2018 and 2019?",
+    "output": "What is the average value of the company's finished goods on March 31, 2018 and 2019?"
+  },
+  {
+    "input": "What is the average value of the finished goods held by the company on March 31 in both 2018 and 2019?",
+    "output": "What is the average value of the company's finished goods on March 31, 2018 and 2019?"
+  },
+  {
+    "input": "What are the company's respective foreign income before taxes in 2018 and 2019?",
+    "output": "What were the pre-tax foreign incomes of the company in 2018 and 2019?"
+  },
+  {
+    "input": "What is the company's average loss before income taxes from the United States in 2018 and 2019?",
+    "output": "\"What was the average pre-tax loss incurred by the company in the United States for the years 2018 and 2019?\""
+  },
+  {
+    "input": "What is the company's average loss before income taxes from the United States in 2018 and 2019?",
+    "output": "What was the average pre-tax income loss for the company in the United States for both 2018 and 2019?"
+  },
+  {
+    "input": "What is the company's average foreign income before taxes in 2018 and 2019?",
+    "output": "What was the average amount of foreign income before taxes for the company in both 2018 and 2019?"
+  },
+  {
+    "input": "What are the average pre-tax earnings of the company for the fiscal years 2018 and 2019?",
+    "output": "What is the company's average total income before income taxes in 2018 and 2019?"
+  },
+  {
+    "input": "What are the specific cost of goods sold for the company in both 2018 and 2019?",
+    "output": "What are the company's respective cost of goods sold in 2018 and 2019?"
+  },
+  {
+    "input": "What were the cost of goods sold for the company in 2018 and 2019?",
+    "output": "What are the company's respective cost of goods sold in 2018 and 2019?"
+  }
+]
diff --git a/libs/ktem/ktem/reasoning/prompt_optimization/rewrite_question.py b/libs/ktem/ktem/reasoning/prompt_optimization/rewrite_question.py
new file mode 100644
index 000000000..3891f54d0
--- /dev/null
+++ b/libs/ktem/ktem/reasoning/prompt_optimization/rewrite_question.py
@@ -0,0 +1,37 @@
+from ktem.llms.manager import llms
+
+from kotaemon.base import BaseComponent, Document, HumanMessage, Node, SystemMessage
+from kotaemon.llms import ChatLLM, PromptTemplate
+
+DEFAULT_REWRITE_PROMPT = (
+    "Given the following question, rephrase and expand it "
+    "to help you do better answering. Maintain all information "
+    "in the original question. Keep the question as concise as possible. "
+    "Give answer in {lang}\n"
+    "Original question: {question}\n"
+    "Rephrased question: "
+)
+
+
+class RewriteQuestionPipeline(BaseComponent):
+    """Rewrite user question
+
+    Args:
+        llm: the language model to rewrite question
+        rewrite_template: the prompt template for llm to paraphrase a text input
+        lang: the language of the answer. Currently support English and Japanese
+    """
+
+    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
+    rewrite_template: str = DEFAULT_REWRITE_PROMPT
+
+    lang: str = "English"
+
+    def run(self, question: str) -> Document:  # type: ignore
+        prompt_template = PromptTemplate(self.rewrite_template)
+        prompt = prompt_template.populate(question=question, lang=self.lang)
+        messages = [
+            SystemMessage(content="You are a helpful assistant"),
+            HumanMessage(content=prompt),
+        ]
+        return self.llm(messages)
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 2e9e00cb7..ba06a4ec5 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -10,6 +10,10 @@
 import numpy as np
 import tiktoken
 from ktem.llms.manager import llms
+from ktem.reasoning.prompt_optimization import (
+    DecomposeQuestionPipeline,
+    RewriteQuestionPipeline,
+)
 from ktem.utils.render import Render
 from theflow.settings import settings as flowsettings
 
@@ -98,7 +102,7 @@ class PrepareEvidencePipeline(BaseComponent):
     def run(self, docs: list[RetrievedDocument]) -> Document:
         evidence = ""
         table_found = 0
-        evidence_mode = EVIDENCE_MODE_TEXT
+        evidence_modes = []
 
         for _id, retrieved_item in enumerate(docs):
             retrieved_content = ""
@@ -107,7 +111,7 @@ def run(self, docs: list[RetrievedDocument]) -> Document:
             if page:
                 source += f" (Page {page})"
             if retrieved_item.metadata.get("type", "") == "table":
-                evidence_mode = EVIDENCE_MODE_TABLE
+                evidence_modes.append(EVIDENCE_MODE_TABLE)
                 if table_found < 5:
                     retrieved_content = retrieved_item.metadata.get("table_origin", "")
                     if retrieved_content not in evidence:
@@ -118,7 +122,7 @@ def run(self, docs: list[RetrievedDocument]) -> Document:
                             + "\n<br>"
                         )
             elif retrieved_item.metadata.get("type", "") == "chatbot":
-                evidence_mode = EVIDENCE_MODE_CHATBOT
+                evidence_modes.append(EVIDENCE_MODE_CHATBOT)
                 retrieved_content = retrieved_item.metadata["window"]
                 evidence += (
                     f"<br><b>Chatbot scenario from {filename} (Row {page})</b>\n"
@@ -126,7 +130,7 @@ def run(self, docs: list[RetrievedDocument]) -> Document:
                     + "\n<br>"
                 )
             elif retrieved_item.metadata.get("type", "") == "image":
-                evidence_mode = EVIDENCE_MODE_FIGURE
+                evidence_modes.append(EVIDENCE_MODE_FIGURE)
                 retrieved_content = retrieved_item.metadata.get("image_origin", "")
                 retrieved_caption = html.escape(retrieved_item.get_content())
                 evidence += (
@@ -148,6 +152,13 @@ def run(self, docs: list[RetrievedDocument]) -> Document:
                         + " \n<br>"
                     )
 
+        # resolve evidence mode
+        evidence_mode = EVIDENCE_MODE_TEXT
+        if EVIDENCE_MODE_FIGURE in evidence_modes:
+            evidence_mode = EVIDENCE_MODE_FIGURE
+        elif EVIDENCE_MODE_TABLE in evidence_modes:
+            evidence_mode = EVIDENCE_MODE_TABLE
+
         if evidence_mode != EVIDENCE_MODE_FIGURE:
             # trim context by trim_len
             print("len (original)", len(evidence))
@@ -170,8 +181,8 @@ def run(self, docs: list[RetrievedDocument]) -> Document:
 )
 
 DEFAULT_QA_TABLE_PROMPT = (
-    "List all rows (row number) from the table context that related to the question, "
-    "then provide detail answer with clear explanation. "
+    "Use the given context: texts, tables, and figures below to answer the question, "
+    "then provide answer with clear explanation."
     "If you don't know the answer, just say that you don't know, "
     "don't try to make up an answer. Give answer in {lang}.\n\n"
     "Context:\n"
@@ -438,33 +449,10 @@ def extract_evidence_images(self, evidence: str):
         image_pattern = r"src='(data:image\/[^;]+;base64[^']+)'"
         matches = re.findall(image_pattern, evidence)
         context = re.sub(image_pattern, "", evidence)
+        print(f"Got {len(matches)} images")
         return context, matches
 
 
-class RewriteQuestionPipeline(BaseComponent):
-    """Rewrite user question
-
-    Args:
-        llm: the language model to rewrite question
-        rewrite_template: the prompt template for llm to paraphrase a text input
-        lang: the language of the answer. Currently support English and Japanese
-    """
-
-    llm: ChatLLM = Node(default_callback=lambda _: llms.get_default())
-    rewrite_template: str = DEFAULT_REWRITE_PROMPT
-
-    lang: str = "English"
-
-    def run(self, question: str) -> Document:  # type: ignore
-        prompt_template = PromptTemplate(self.rewrite_template)
-        prompt = prompt_template.populate(question=question, lang=self.lang)
-        messages = [
-            SystemMessage(content="You are a helpful assistant"),
-            HumanMessage(content=prompt),
-        ]
-        return self.llm(messages)
-
-
 class AddQueryContextPipeline(BaseComponent):
 
     n_last_interactions: int = 5
@@ -519,7 +507,7 @@ class Config:
 
     evidence_pipeline: PrepareEvidencePipeline = PrepareEvidencePipeline.withx()
     answering_pipeline: AnswerWithContextPipeline = AnswerWithContextPipeline.withx()
-    rewrite_pipeline: RewriteQuestionPipeline = RewriteQuestionPipeline.withx()
+    rewrite_pipeline: RewriteQuestionPipeline | None = None
     add_query_context: AddQueryContextPipeline = AddQueryContextPipeline.withx()
     trigger_context: int = 150
     use_rewrite: bool = False
@@ -528,15 +516,16 @@ def retrieve(
         self, message: str, history: list
     ) -> tuple[list[RetrievedDocument], list[Document]]:
         """Retrieve the documents based on the message"""
-        if len(message) < self.trigger_context:
-            # prefer adding context for short user questions, avoid adding context for
-            # long questions, as they are likely to contain enough information
-            # plus, avoid the situation where the original message is already too long
-            # for the model to handle
-            query = self.add_query_context(message, history).content
-        else:
-            query = message
-        print(f"Rewritten query: {query}")
+        # if len(message) < self.trigger_context:
+        #     # prefer adding context for short user questions, avoid adding context for
+        #     # long questions, as they are likely to contain enough information
+        #     # plus, avoid the situation where the original message is already too long
+        #     # for the model to handle
+        #     query = self.add_query_context(message, history).content
+        # else:
+        #     query = message
+        # print(f"Rewritten query: {query}")
+        query = None
         if not query:
             # TODO: previously return [], [] because we think this message as something
             # like "Hello", "I need help"...
@@ -703,7 +692,8 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
     async def ainvoke(  # type: ignore
         self, message: str, conv_id: str, history: list, **kwargs  # type: ignore
     ) -> Document:  # type: ignore
-        if self.use_rewrite:
+        if self.use_rewrite and self.rewrite_pipeline:
+            print("Chosen rewrite pipeline", self.rewrite_pipeline)
             rewrite = await self.rewrite_pipeline(question=message)
             message = rewrite.text
 
@@ -740,15 +730,17 @@ async def ainvoke(  # type: ignore
     def stream(  # type: ignore
         self, message: str, conv_id: str, history: list, **kwargs  # type: ignore
     ) -> Generator[Document, None, Document]:
-        if self.use_rewrite:
+        if self.use_rewrite and self.rewrite_pipeline:
+            print("Chosen rewrite pipeline", self.rewrite_pipeline)
             message = self.rewrite_pipeline(question=message).text
+            print("Rewrite result", message)
 
         print(f"Rewritten message (use_rewrite={self.use_rewrite}): {message}")
         print(f"Retrievers {self.retrievers}")
         # should populate the context
         docs, infos = self.retrieve(message, history)
-        for _ in infos:
-            yield _
+        print(f"Got {len(docs)} retrieved documents")
+        yield from infos
 
         evidence_mode, evidence = self.evidence_pipeline(docs).content
         answer = yield from self.answering_pipeline.stream(
@@ -797,7 +789,10 @@ def get_pipeline(cls, settings, states, retrievers):
             retrievers: the retrievers to use
         """
         prefix = f"reasoning.options.{cls.get_info()['id']}"
-        pipeline = cls(retrievers=retrievers)
+        pipeline = cls(
+            retrievers=retrievers,
+            rewrite_pipeline=RewriteQuestionPipeline(),
+        )
 
         llm_name = settings.get(f"{prefix}.llm", None)
         llm = llms.get(llm_name, llms.get_default())
@@ -821,10 +816,11 @@ def get_pipeline(cls, settings, states, retrievers):
 
         pipeline.trigger_context = settings[f"{prefix}.trigger_context"]
         pipeline.use_rewrite = states.get("app", {}).get("regen", False)
-        pipeline.rewrite_pipeline.llm = llm
-        pipeline.rewrite_pipeline.lang = {"en": "English", "ja": "Japanese"}.get(
-            settings["reasoning.lang"], "English"
-        )
+        if pipeline.rewrite_pipeline:
+            pipeline.rewrite_pipeline.llm = llm
+            pipeline.rewrite_pipeline.lang = {"en": "English", "ja": "Japanese"}.get(
+                settings["reasoning.lang"], "English"
+            )
         return pipeline
 
     @classmethod
@@ -891,3 +887,155 @@ def get_info(cls) -> dict:
                 "context. After that it includes that context to generate the answer."
             ),
         }
+
+
+class FullDecomposeQAPipeline(FullQAPipeline):
+    def answer_sub_questions(
+        self, messages: list, conv_id: str, history: list, **kwargs
+    ):
+        output_str = ""
+        for idx, message in enumerate(messages):
+            yield Document(
+                channel="chat",
+                content=f"<br><b>Sub-question {idx + 1}</b><br>{message}<br>",
+            )
+            # should populate the context
+            docs, infos = self.retrieve(message, history)
+            print(f"Got {len(docs)} retrieved documents")
+
+            yield from infos
+
+            evidence_mode, evidence = self.evidence_pipeline(docs).content
+            answer = yield from self.answering_pipeline.stream(
+                question=message,
+                history=history,
+                evidence=evidence,
+                evidence_mode=evidence_mode,
+                conv_id=conv_id,
+                **kwargs,
+            )
+
+            output_str += (
+                f"Sub-question {idx + 1}-th: '{message}'\nAnswer: '{answer.text}'\n\n"
+            )
+
+        return output_str
+
+    def stream(  # type: ignore
+        self, message: str, conv_id: str, history: list, **kwargs  # type: ignore
+    ) -> Generator[Document, None, Document]:
+        sub_question_answer_output = ""
+        if self.rewrite_pipeline:
+            print("Chosen rewrite pipeline", self.rewrite_pipeline)
+            result = self.rewrite_pipeline(question=message)
+            print("Rewrite result", result)
+            if isinstance(result, Document):
+                message = result.text
+            elif (
+                isinstance(result, list)
+                and len(result) > 0
+                and isinstance(result[0], Document)
+            ):
+                yield Document(
+                    channel="chat",
+                    content="<h4>Sub questions and their answers</h4>",
+                )
+                sub_question_answer_output = yield from self.answer_sub_questions(
+                    [r.text for r in result], conv_id, history, **kwargs
+                )
+
+        yield Document(channel="chat", content=f"<h4>Main question</h4>{message}<br>")
+
+        # should populate the context
+        docs, infos = self.retrieve(message, history)
+        print(f"Got {len(docs)} retrieved documents")
+        yield from infos
+
+        evidence_mode, evidence = self.evidence_pipeline(docs).content
+        answer = yield from self.answering_pipeline.stream(
+            question=message,
+            history=history,
+            evidence=evidence + "\n" + sub_question_answer_output,
+            evidence_mode=evidence_mode,
+            conv_id=conv_id,
+            **kwargs,
+        )
+
+        # show the evidence
+        with_citation, without_citation = self.prepare_citations(answer, docs)
+        if not with_citation and not without_citation:
+            yield Document(channel="info", content="<h5><b>No evidence found.</b></h5>")
+        else:
+            yield Document(channel="info", content=None)
+            for _ in with_citation:
+                yield _
+            if without_citation:
+                for _ in without_citation:
+                    yield _
+
+        return answer
+
+    @classmethod
+    def get_user_settings(cls) -> dict:
+        user_settings = super().get_user_settings()
+        user_settings["decompose_prompt"] = {
+            "name": "Decompose Prompt",
+            "value": DecomposeQuestionPipeline.DECOMPOSE_SYSTEM_PROMPT_TEMPLATE,
+        }
+        return user_settings
+
+    @classmethod
+    def get_pipeline(cls, settings, states, retrievers):
+        """Get the reasoning pipeline
+
+        Args:
+            settings: the settings for the pipeline
+            retrievers: the retrievers to use
+        """
+        print(settings)
+        prefix = f"reasoning.options.{cls.get_info()['id']}"
+        pipeline = cls(
+            retrievers=retrievers,
+            rewrite_pipeline=DecomposeQuestionPipeline(
+                prompt_template=settings.get(f"{prefix}.decompose_prompt")
+            ),
+        )
+
+        llm_name = settings.get(f"{prefix}.llm", None)
+        llm = llms.get(llm_name, llms.get_default())
+
+        # answering pipeline configuration
+        answer_pipeline = pipeline.answering_pipeline
+        answer_pipeline.llm = llm
+        answer_pipeline.citation_pipeline.llm = llm
+        answer_pipeline.n_last_interactions = settings[f"{prefix}.n_last_interactions"]
+        answer_pipeline.enable_citation = settings[f"{prefix}.highlight_citation"]
+        answer_pipeline.system_prompt = settings[f"{prefix}.system_prompt"]
+        answer_pipeline.qa_template = settings[f"{prefix}.qa_prompt"]
+        answer_pipeline.lang = {"en": "English", "ja": "Japanese"}.get(
+            settings["reasoning.lang"], "English"
+        )
+
+        pipeline.add_query_context.llm = llm
+        pipeline.add_query_context.n_last_interactions = settings[
+            f"{prefix}.n_last_interactions"
+        ]
+
+        pipeline.trigger_context = settings[f"{prefix}.trigger_context"]
+        pipeline.use_rewrite = states.get("app", {}).get("regen", False)
+        if pipeline.rewrite_pipeline:
+            pipeline.rewrite_pipeline.llm = llm
+        return pipeline
+
+    @classmethod
+    def get_info(cls) -> dict:
+        return {
+            "id": "complex",
+            "name": "Complex QA",
+            "description": (
+                "Use multi-step reasoning to decompose a complex question into "
+                "multiple sub-questions. This pipeline can "
+                "perform both keyword search and similarity search to retrieve the "
+                "context. After that it includes that context to generate the answer."
+            ),
+        }

From fccb8f6a74af926855815e6259e5b84dafd79aed Mon Sep 17 00:00:00 2001
From: kan_cin <kan@cinnamon.is>
Date: Mon, 8 Jul 2024 14:00:07 +0700
Subject: [PATCH 43/56] fix: add encoding utf-8 when save temporal markdown in
 vectorIndex (#101)

---
 libs/kotaemon/kotaemon/indices/vectorindex.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/kotaemon/kotaemon/indices/vectorindex.py b/libs/kotaemon/kotaemon/indices/vectorindex.py
index 571320866..f8cd870cb 100644
--- a/libs/kotaemon/kotaemon/indices/vectorindex.py
+++ b/libs/kotaemon/kotaemon/indices/vectorindex.py
@@ -97,7 +97,7 @@ def run(self, text: str | list[str] | Document | list[Document]):
                     markdown_content += f"\ntext:\n{input_[i].text}"
 
                 with open(
-                    Path(self.cache_dir) / f"{file_name.stem}_{self.count_+i}.md", "w"
+                    Path(self.cache_dir) / f"{file_name.stem}_{self.count_+i}.md", "w", encoding="utf-8"
                 ) as f:
                     f.write(markdown_content)
             self.count_ += len(input_)

From b4cee6fcb8aa6721b839c58bb266d7fc4b9308f6 Mon Sep 17 00:00:00 2001
From: "Tuan Anh Nguyen Dang (Tadashi_Cin)" <tadashi@cinnamon.is>
Date: Mon, 8 Jul 2024 15:53:38 +0700
Subject: [PATCH 44/56] fix: improve retrieval pipeline and relevant score
 display (#102)

* fix: improve retrieval pipeline by extending first round top_k with multiplier

* fix: minor fix
---
 .../kotaemon/indices/rankings/cohere.py       |  1 +
 .../kotaemon/indices/rankings/llm_trulens.py  | 33 ++++++++--
 libs/kotaemon/kotaemon/indices/vectorindex.py | 22 +++++--
 libs/ktem/ktem/index/file/pipelines.py        | 10 +--
 libs/ktem/ktem/pages/settings.py              |  4 +-
 .../fewshot_rewrite_question.py               |  2 +-
 libs/ktem/ktem/reasoning/simple.py            | 62 ++++++++++---------
 7 files changed, 85 insertions(+), 49 deletions(-)

diff --git a/libs/kotaemon/kotaemon/indices/rankings/cohere.py b/libs/kotaemon/kotaemon/indices/rankings/cohere.py
index 4f5866ac5..4244b58e2 100644
--- a/libs/kotaemon/kotaemon/indices/rankings/cohere.py
+++ b/libs/kotaemon/kotaemon/indices/rankings/cohere.py
@@ -31,6 +31,7 @@ def run(self, documents: list[Document], query: str) -> list[Document]:
         response = cohere_client.rerank(
             model=self.model_name, query=query, documents=_docs
         )
+        print("Cohere score", [r.relevance_score for r in response.results])
         for r in response.results:
             doc = documents[r.index]
             doc.metadata["cohere_reranking_score"] = r.relevance_score
diff --git a/libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py b/libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py
index 1fa4dc45f..b7811f042 100644
--- a/libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py
+++ b/libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py
@@ -2,8 +2,12 @@
 
 import re
 from concurrent.futures import ThreadPoolExecutor
+from functools import partial
+
+import tiktoken
 
 from kotaemon.base import Document, HumanMessage, SystemMessage
+from kotaemon.indices.splitters import TokenSplitter
 from kotaemon.llms import BaseLLM, PromptTemplate
 
 from .llm import LLMReranking
@@ -42,6 +46,8 @@
 PATTERN_INTEGER: re.Pattern = re.compile(r"([+-]?[1-9][0-9]*|0)")
 """Regex that matches integers."""
 
+MAX_CONTEXT_LEN = 8000
+
 
 def validate_rating(rating) -> int:
     """Validate a rating is between 0 and 10."""
@@ -91,9 +97,18 @@ class LLMTrulensScoring(LLMReranking):
     llm: BaseLLM
     system_prompt_template: PromptTemplate = SYSTEM_PROMPT_TEMPLATE
     user_prompt_template: PromptTemplate = USER_PROMPT_TEMPLATE
-    top_k: int = 3
     concurrent: bool = True
     normalize: float = 10
+    trim_func: TokenSplitter = TokenSplitter.withx(
+        chunk_size=MAX_CONTEXT_LEN,
+        chunk_overlap=0,
+        separator=" ",
+        tokenizer=partial(
+            tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
+            allowed_special=set(),
+            disallowed_special="all",
+        ),
+    )
 
     def run(
         self,
@@ -108,6 +123,13 @@ def run(
             with ThreadPoolExecutor() as executor:
                 futures = []
                 for doc in documents:
+                    chunked_doc_content = self.trim_func(
+                        [
+                            Document(content=doc.get_content())
+                            # skip metadata which cause troubles
+                        ]
+                    )[0].text
+
                     messages = []
                     messages.append(
                         SystemMessage(self.system_prompt_template.populate())
@@ -115,7 +137,7 @@ def run(
                     messages.append(
                         HumanMessage(
                             self.user_prompt_template.populate(
-                                question=query, context=doc.get_content()
+                                question=query, context=chunked_doc_content
                             )
                         )
                     )
@@ -148,8 +170,9 @@ def run(
             doc.metadata["llm_trulens_score"] = score
             filtered_docs.append(doc)
 
-        # prevent returning empty result
-        if len(filtered_docs) == 0:
-            filtered_docs = documents[: self.top_k]
+        print(
+            "LLM rerank scores",
+            [doc.metadata["llm_trulens_score"] for doc in filtered_docs],
+        )
 
         return filtered_docs
diff --git a/libs/kotaemon/kotaemon/indices/vectorindex.py b/libs/kotaemon/kotaemon/indices/vectorindex.py
index f8cd870cb..ac02014f3 100644
--- a/libs/kotaemon/kotaemon/indices/vectorindex.py
+++ b/libs/kotaemon/kotaemon/indices/vectorindex.py
@@ -111,8 +111,16 @@ class VectorRetrieval(BaseRetrieval):
     embedding: BaseEmbeddings
     rerankers: Sequence[BaseReranking] = []
     top_k: int = 5
+    first_round_top_k_mult: int = 10
     retrieval_mode: str = "hybrid"  # vector, text, hybrid
 
+    def _filter_docs(
+        self, documents: list[RetrievedDocument], top_k: int | None = None
+    ):
+        if top_k:
+            documents = documents[:top_k]
+        return documents
+
     def run(
         self, text: str | Document, top_k: Optional[int] = None, **kwargs
     ) -> list[RetrievedDocument]:
@@ -128,6 +136,8 @@ def run(
         if top_k is None:
             top_k = self.top_k
 
+        top_k_first_round = top_k * self.first_round_top_k_mult
+
         if self.doc_store is None:
             raise ValueError(
                 "doc_store is not provided. Please provide a doc_store to "
@@ -142,7 +152,7 @@ def run(
         if self.retrieval_mode == "vector":
             emb = self.embedding(text)[0].embedding
             _, scores, ids = self.vector_store.query(
-                embedding=emb, top_k=top_k, **kwargs
+                embedding=emb, top_k=top_k_first_round, **kwargs
             )
             docs = self.doc_store.get(ids)
             result = [
@@ -151,19 +161,19 @@ def run(
             ]
         elif self.retrieval_mode == "text":
             query = text.text if isinstance(text, Document) else text
-            docs = self.doc_store.query(query, top_k=top_k, doc_ids=scope)
+            docs = self.doc_store.query(query, top_k=top_k_first_round, doc_ids=scope)
             result = [RetrievedDocument(**doc.to_dict(), score=-1.0) for doc in docs]
         elif self.retrieval_mode == "hybrid":
             # similartiy search section
             emb = self.embedding(text)[0].embedding
             _, vs_scores, vs_ids = self.vector_store.query(
-                embedding=emb, top_k=top_k, **kwargs
+                embedding=emb, top_k=top_k_first_round, **kwargs
             )
             vs_docs = self.doc_store.get(vs_ids)
 
             # full-text search section
             query = text.text if isinstance(text, Document) else text
-            docs = self.doc_store.query(query, top_k=top_k, doc_ids=scope)
+            docs = self.doc_store.query(query, top_k=top_k_first_round, doc_ids=scope)
             result = [
                 RetrievedDocument(**doc.to_dict(), score=-1.0)
                 for doc in docs
@@ -177,8 +187,12 @@ def run(
         # use additional reranker to re-order the document list
         if self.rerankers and text:
             for reranker in self.rerankers:
+                # if reranker is LLMReranking, limit the document with top_k items only
+                result = self._filter_docs(result, top_k=top_k)
                 result = reranker(documents=result, query=text)
 
+        result = self._filter_docs(result, top_k=top_k)
+
         return result
 
 
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 50839e6b4..454eded56 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -200,12 +200,6 @@ def get_user_settings(cls) -> dict:
                 "choices": reranking_llm_choices,
                 "special_type": "llm",
             },
-            "separate_embedding": {
-                "name": "Use separate embedding",
-                "value": False,
-                "choices": [("Yes", True), ("No", False)],
-                "component": "dropdown",
-            },
             "num_retrieval": {
                 "name": "Number of document chunks to retrieve",
                 "value": 3,
@@ -225,12 +219,12 @@ def get_user_settings(cls) -> dict:
             },
             "mmr": {
                 "name": "Use MMR",
-                "value": True,
+                "value": False,
                 "choices": [True, False],
                 "component": "checkbox",
             },
             "use_reranking": {
-                "name": "Use reranking",
+                "name": "Use reranking (Cohere)",
                 "value": False,
                 "choices": [True, False],
                 "component": "checkbox",
diff --git a/libs/ktem/ktem/pages/settings.py b/libs/ktem/ktem/pages/settings.py
index 72803dd63..9c47f92f0 100644
--- a/libs/ktem/ktem/pages/settings.py
+++ b/libs/ktem/ktem/pages/settings.py
@@ -107,7 +107,7 @@ def __init__(self, app):
 
     def on_building_ui(self):
         if self._app.f_user_management:
-            with gr.Tab("Users"):
+            with gr.Tab("User settings"):
                 self.user_tab()
 
         self.app_tab()
@@ -270,7 +270,7 @@ def index_tab(self):
         #         self._components[f"index.{n}"] = obj
 
         id2name = {k: v.name for k, v in self._app.index_manager.info().items()}
-        with gr.Tab("Document Indices", visible=self._render_index_tab):
+        with gr.Tab("Retrieval settings", visible=self._render_index_tab):
             for pn, sig in self._default_settings.index.options.items():
                 name = id2name.get(pn, f"<id {pn}>")
                 with gr.Tab(name):
diff --git a/libs/ktem/ktem/reasoning/prompt_optimization/fewshot_rewrite_question.py b/libs/ktem/ktem/reasoning/prompt_optimization/fewshot_rewrite_question.py
index a90cc94cb..3c3b7191e 100644
--- a/libs/ktem/ktem/reasoning/prompt_optimization/fewshot_rewrite_question.py
+++ b/libs/ktem/ktem/reasoning/prompt_optimization/fewshot_rewrite_question.py
@@ -68,7 +68,7 @@ def get_pipeline(
         pipeline = cls(
             embedding=embedding, vector_store=vector_store, doc_store=doc_store
         )
-        if vector_store.count() or doc_store.count():
+        if doc_store.count():
             return pipeline
 
         examples = json.load(open(example_path, "r"))
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index ba06a4ec5..3f733eb41 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -546,7 +546,7 @@ def retrieve(
                     Document(
                         channel="info",
                         content=Render.collapsible(
-                            header=get_header(doc),
+                            header=f"<i>{get_header(doc)}</i>",
                             content=Render.image(
                                 url=doc.metadata["image_origin"], text=doc.text
                             ),
@@ -559,7 +559,7 @@ def retrieve(
                     Document(
                         channel="info",
                         content=Render.collapsible(
-                            header=get_header(doc),
+                            header=f"<i>{get_header(doc)}</i>",
                             content=Render.table(doc.text),
                             open=True,
                         ),
@@ -585,27 +585,35 @@ def _format_retrieval_score_and_doc(
         llm_reranking_score = (
             round(doc.metadata["llm_trulens_score"], 2)
             if doc.metadata.get("llm_trulens_score") is not None
-            else None
+            else 0.0
         )
         cohere_reranking_score = (
             round(doc.metadata["cohere_reranking_score"], 2)
-            if doc.metadata.get("cohere_reranking_score")
-            else None
+            if doc.metadata.get("cohere_reranking_score") is not None
+            else 0.0
         )
         item_type_prefix = doc.metadata.get("type", "")
         item_type_prefix = item_type_prefix.capitalize()
         if item_type_prefix:
             item_type_prefix += " from "
 
-        return Render.collapsible(
-            header=(f"{item_type_prefix}{get_header(doc)} [{llm_reranking_score}]"),
-            content="<b>Vectorstore score:</b>"
+        rendered_score = Render.collapsible(
+            header=f"<b>&emsp;Relevance score</b>: {llm_reranking_score}",
+            content="<b>&emsp;&emsp;Vectorstore score:</b>"
             f" {vectorstore_score}"
             f"{text_search_str}"
-            "<b>LLM reranking score:</b>"
+            "<b>&emsp;&emsp;LLM reranking score:</b>"
             f" {llm_reranking_score}<br>"
-            "<b>Cohere reranking score:</b>"
-            f" {cohere_reranking_score}<br>" + rendered_doc_content,
+            "<b>&emsp;&emsp;Cohere reranking score:</b>"
+            f" {cohere_reranking_score}<br>",
+        )
+
+        return Render.collapsible(
+            header=(
+                f"<i>{item_type_prefix}{get_header(doc)}</i>"
+                f" [score: {llm_reranking_score}]"
+            ),
+            content=rendered_score + rendered_doc_content,
             open=open_collapsible,
         )
 
@@ -757,26 +765,22 @@ def stream(  # type: ignore
         if not with_citation and not without_citation:
             yield Document(channel="info", content="<h5><b>No evidence found.</b></h5>")
         else:
+            # clear the Info panel
             yield Document(channel="info", content=None)
-            for _ in with_citation:
-                yield _
-            if without_citation:
-                for _ in without_citation:
-                    yield _
+            # show QA score
+            qa_score = (
+                round(answer.metadata["qa_score"], 2)
+                if answer.metadata.get("qa_score")
+                else None
+            )
+            yield Document(
+                channel="info",
+                content=(f"<h5>Answer confidence: {qa_score}</h5>"),
+            )
 
-        qa_score = (
-            round(answer.metadata["qa_score"], 2)
-            if answer.metadata.get("qa_score")
-            else None
-        )
-        yield Document(
-            channel="info",
-            content=(
-                "<h5><b>Question answering</b></h5><br>"
-                "<b>Question answering confidence:</b> "
-                f"{qa_score}"
-            ),
-        )
+            yield from with_citation
+            if without_citation:
+                yield from without_citation
 
         return answer
 

From 16a7e4fe0a50be74bbe8224593b85ba5d4883ec7 Mon Sep 17 00:00:00 2001
From: taprosoft <tadashi@cinnamon.is>
Date: Mon, 8 Jul 2024 10:44:52 +0000
Subject: [PATCH 45/56] feat: improve UI default settings and add quick switch
 option for pipeline

---
 libs/ktem/ktem/pages/chat/__init__.py | 80 ++++++++++++++++++++++++---
 1 file changed, 73 insertions(+), 7 deletions(-)

diff --git a/libs/ktem/ktem/pages/chat/__init__.py b/libs/ktem/ktem/pages/chat/__init__.py
index b7f814323..c47aa0a96 100644
--- a/libs/ktem/ktem/pages/chat/__init__.py
+++ b/libs/ktem/ktem/pages/chat/__init__.py
@@ -21,12 +21,15 @@
 from .control import ConversationControl
 from .report import ReportIssue
 
+DEFAULT_SETTING = "(default)"
+
 
 class ChatPage(BasePage):
     def __init__(self, app):
         self._app = app
         self._indices_input = []
         self.on_building_ui()
+        self._reasoning_type = gr.State(value=None)
 
     def on_building_ui(self):
         with gr.Row():
@@ -68,6 +71,22 @@ def on_building_ui(self):
                                 self._indices_input.append(gr_index)
                         setattr(self, f"_index_{index.id}", index_ui)
 
+                with gr.Accordion(label="Hint") as _:
+                    self.upload_help = gr.HTML(
+                        "<i>To upload new file(s), go to "
+                        "<b>File Index</b> section in top navigation bar.</i>"
+                    )
+
+                # a hacky quick switch for reasoning type option
+                with gr.Accordion(label="Reasoning options", open=False) as _:
+                    reasoning_type_values = [
+                        (DEFAULT_SETTING, DEFAULT_SETTING)
+                    ] + self._app.default_settings.reasoning.settings["use"].choices
+                    self.reasoning_types = gr.Dropdown(
+                        choices=reasoning_type_values,
+                        value=DEFAULT_SETTING,
+                        show_label=False,
+                    )
                 self.report_issue = ReportIssue(self._app)
 
             with gr.Column(scale=6, elem_id="chat-area"):
@@ -106,6 +125,7 @@ def on_register_events(self):
                 self.chat_control.conversation_id,
                 self.chat_panel.chatbot,
                 self._app.settings_state,
+                self._reasoning_type,
                 self.chat_state,
                 self._app.user_id,
             ]
@@ -148,6 +168,7 @@ def on_register_events(self):
                 self.chat_control.conversation_id,
                 self.chat_panel.chatbot,
                 self._app.settings_state,
+                self._reasoning_type,
                 self.chat_state,
                 self._app.user_id,
             ]
@@ -286,6 +307,11 @@ def on_register_events(self):
             + self._indices_input,
             outputs=None,
         )
+        self.reasoning_types.change(
+            self.reasoning_changed,
+            inputs=[self.reasoning_types],
+            outputs=[self._reasoning_type],
+        )
         if getattr(flowsettings, "KH_FEATURE_CHAT_SUGGESTION", False):
             self.chat_suggestion.example.select(
                 self.chat_suggestion.select_example,
@@ -382,6 +408,12 @@ def update_data_source(self, convo_id, messages, state, *selecteds):
             session.add(result)
             session.commit()
 
+    def reasoning_changed(self, reasoning_type):
+        if reasoning_type != DEFAULT_SETTING:
+            # override app settings state (temporary)
+            gr.Info("Reasoning type changed to `{}`".format(reasoning_type))
+        return reasoning_type
+
     def is_liked(self, convo_id, liked: gr.LikeData):
         with Session(engine) as session:
             statement = select(Conversation).where(Conversation.id == convo_id)
@@ -396,7 +428,14 @@ def is_liked(self, convo_id, liked: gr.LikeData):
             session.add(result)
             session.commit()
 
-    def create_pipeline(self, settings: dict, state: dict, user_id: int, *selecteds):
+    def create_pipeline(
+        self,
+        settings: dict,
+        session_reasoning_type: str,
+        state: dict,
+        user_id: int,
+        *selecteds,
+    ):
         """Create the pipeline from settings
 
         Args:
@@ -408,8 +447,15 @@ def create_pipeline(self, settings: dict, state: dict, user_id: int, *selecteds)
         Returns:
             - the pipeline objects
         """
-        reasoning_mode = settings["reasoning.use"]
+        # override reasoning_mode by temporary chat page state
+        print("Session reasoning type", session_reasoning_type)
+        reasoning_mode = (
+            settings["reasoning.use"]
+            if session_reasoning_type == DEFAULT_SETTING
+            else session_reasoning_type
+        )
         reasoning_cls = reasonings[reasoning_mode]
+        print("Reasoning class", reasoning_cls)
         reasoning_id = reasoning_cls.get_info()["id"]
 
         # get retrievers
@@ -437,7 +483,14 @@ def create_pipeline(self, settings: dict, state: dict, user_id: int, *selecteds)
         return pipeline, reasoning_state
 
     def chat_fn(
-        self, conversation_id, chat_history, settings, state, user_id, *selecteds
+        self,
+        conversation_id,
+        chat_history,
+        settings,
+        reasoning_type,
+        state,
+        user_id,
+        *selecteds,
     ):
         """Chat function"""
         chat_input = chat_history[-1][0]
@@ -447,8 +500,9 @@ def chat_fn(
 
         # construct the pipeline
         pipeline, reasoning_state = self.create_pipeline(
-            settings, state, user_id, *selecteds
+            settings, reasoning_type, state, user_id, *selecteds
         )
+        print("Reasoning state", reasoning_state)
         pipeline.set_output_queue(queue)
 
         text, refs = "", ""
@@ -481,7 +535,6 @@ def chat_fn(
                     refs += response.content
 
             if len(refs) > len_ref:
-                print(f"Len refs: {len(refs)}")
                 len_ref = len(refs)
 
             state[pipeline.get_info()["id"]] = reasoning_state["pipeline"]
@@ -495,7 +548,14 @@ def chat_fn(
             yield chat_history + [(chat_input, text or empty_msg)], refs, state
 
     def regen_fn(
-        self, conversation_id, chat_history, settings, state, user_id, *selecteds
+        self,
+        conversation_id,
+        chat_history,
+        settings,
+        reasoning_type,
+        state,
+        user_id,
+        *selecteds,
     ):
         """Regen function"""
         if not chat_history:
@@ -505,7 +565,13 @@ def regen_fn(
 
         state["app"]["regen"] = True
         for chat, refs, state in self.chat_fn(
-            conversation_id, chat_history, settings, state, user_id, *selecteds
+            conversation_id,
+            chat_history,
+            settings,
+            reasoning_type,
+            state,
+            user_id,
+            *selecteds,
         ):
             new_state = deepcopy(state)
             new_state["app"]["regen"] = False

From f0a75763755430659e8ff907bf110ec970ac4f98 Mon Sep 17 00:00:00 2001
From: "Tuan Anh Nguyen Dang (Tadashi_Cin)" <tadashi@cinnamon.is>
Date: Mon, 8 Jul 2024 18:23:50 +0700
Subject: [PATCH 46/56] fix: improve agent logics (#103)

* fix: improve agent progres display

* fix: update retrieval logic

* fix: UI display
---
 libs/kotaemon/kotaemon/agents/rewoo/agent.py  | 10 +++++
 libs/kotaemon/kotaemon/agents/tools/llm.py    |  8 +++-
 libs/kotaemon/kotaemon/indices/vectorindex.py | 10 ++++-
 libs/ktem/ktem/index/file/pipelines.py        |  6 ++-
 libs/ktem/ktem/pages/chat/__init__.py         |  2 +-
 libs/ktem/ktem/reasoning/react.py             |  4 +-
 libs/ktem/ktem/reasoning/rewoo.py             | 40 +++++++++++++++----
 libs/ktem/ktem/utils/render.py                | 14 +++++++
 8 files changed, 78 insertions(+), 16 deletions(-)

diff --git a/libs/kotaemon/kotaemon/agents/rewoo/agent.py b/libs/kotaemon/kotaemon/agents/rewoo/agent.py
index 3ec19503f..e77801138 100644
--- a/libs/kotaemon/kotaemon/agents/rewoo/agent.py
+++ b/libs/kotaemon/kotaemon/agents/rewoo/agent.py
@@ -317,6 +317,14 @@ def stream(self, instruction: str, use_citation: bool = False):
         )
 
         print("Planner output:", planner_text_output)
+        # output planner to info panel
+        yield AgentOutput(
+            text="",
+            agent_type=self.agent_type,
+            status="thinking",
+            intermediate_steps=[{"planner_log": planner_text_output}],
+        )
+
         # Work
         worker_evidences, plugin_cost, plugin_token = self._get_worker_evidence(
             planner_evidences, evidence_level
@@ -326,7 +334,9 @@ def stream(self, instruction: str, use_citation: bool = False):
             worker_log += f"{plan}: {plans[plan]}\n"
             current_progress = f"{plan}: {plans[plan]}\n"
             for e in plan_to_es[plan]:
+                worker_log += f"#Action: {planner_evidences.get(e, None)}\n"
                 worker_log += f"{e}: {worker_evidences[e]}\n"
+                current_progress += f"#Action: {planner_evidences.get(e, None)}\n"
                 current_progress += f"{e}: {worker_evidences[e]}\n"
 
             yield AgentOutput(
diff --git a/libs/kotaemon/kotaemon/agents/tools/llm.py b/libs/kotaemon/kotaemon/agents/tools/llm.py
index ae2dd2e7f..d7cf7485c 100644
--- a/libs/kotaemon/kotaemon/agents/tools/llm.py
+++ b/libs/kotaemon/kotaemon/agents/tools/llm.py
@@ -22,12 +22,16 @@ class LLMTool(BaseTool):
     )
     llm: BaseLLM
     args_schema: Optional[Type[BaseModel]] = LLMArgs
+    dummy_mode: bool = True
 
     def _run_tool(self, query: AnyStr) -> str:
         output = None
         try:
-            response = self.llm(query)
+            if not self.dummy_mode:
+                response = self.llm(query)
+            else:
+                response = None
         except ValueError:
             raise ToolException("LLM Tool call failed")
-        output = response.text
+        output = response.text if response else "<->"
         return output
diff --git a/libs/kotaemon/kotaemon/indices/vectorindex.py b/libs/kotaemon/kotaemon/indices/vectorindex.py
index ac02014f3..d1295a608 100644
--- a/libs/kotaemon/kotaemon/indices/vectorindex.py
+++ b/libs/kotaemon/kotaemon/indices/vectorindex.py
@@ -97,7 +97,9 @@ def run(self, text: str | list[str] | Document | list[Document]):
                     markdown_content += f"\ntext:\n{input_[i].text}"
 
                 with open(
-                    Path(self.cache_dir) / f"{file_name.stem}_{self.count_+i}.md", "w", encoding="utf-8"
+                    Path(self.cache_dir) / f"{file_name.stem}_{self.count_+i}.md",
+                    "w",
+                    encoding="utf-8",
                 ) as f:
                     f.write(markdown_content)
             self.count_ += len(input_)
@@ -135,8 +137,12 @@ def run(
         """
         if top_k is None:
             top_k = self.top_k
+        do_extend = kwargs.pop("do_extend", False)
 
-        top_k_first_round = top_k * self.first_round_top_k_mult
+        if do_extend:
+            top_k_first_round = top_k * self.first_round_top_k_mult
+        else:
+            top_k_first_round = top_k
 
         if self.doc_store is None:
             raise ValueError(
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 454eded56..f47000c99 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -127,6 +127,8 @@ def run(
             results = session.execute(stmt)
             vs_ids = [r[0].target_id for r in results.all()]
 
+        # do first round top_k extension
+        retrieval_kwargs["do_extend"] = True
         retrieval_kwargs["scope"] = vs_ids
         retrieval_kwargs["filters"] = MetadataFilters(
             filters=[
@@ -202,7 +204,7 @@ def get_user_settings(cls) -> dict:
             },
             "num_retrieval": {
                 "name": "Number of document chunks to retrieve",
-                "value": 3,
+                "value": 10,
                 "component": "number",
             },
             "retrieval_mode": {
@@ -225,7 +227,7 @@ def get_user_settings(cls) -> dict:
             },
             "use_reranking": {
                 "name": "Use reranking (Cohere)",
-                "value": False,
+                "value": True,
                 "choices": [True, False],
                 "component": "checkbox",
             },
diff --git a/libs/ktem/ktem/pages/chat/__init__.py b/libs/ktem/ktem/pages/chat/__init__.py
index c47aa0a96..acbe23a6d 100644
--- a/libs/ktem/ktem/pages/chat/__init__.py
+++ b/libs/ktem/ktem/pages/chat/__init__.py
@@ -451,7 +451,7 @@ def create_pipeline(
         print("Session reasoning type", session_reasoning_type)
         reasoning_mode = (
             settings["reasoning.use"]
-            if session_reasoning_type == DEFAULT_SETTING
+            if session_reasoning_type in (DEFAULT_SETTING, None)
             else session_reasoning_type
         )
         reasoning_cls = reasonings[reasoning_mode]
diff --git a/libs/ktem/ktem/reasoning/react.py b/libs/ktem/ktem/reasoning/react.py
index c93827451..02ff8481d 100644
--- a/libs/ktem/ktem/reasoning/react.py
+++ b/libs/ktem/ktem/reasoning/react.py
@@ -190,7 +190,9 @@ def prepare_citation(self, step_id, step, output, status) -> Document:
             "<b>Action</b>: <em>{tool}[{input}]</em>\n\n<b>Output</b>: {output}"
         ).format(
             tool=step.tool if status == "thinking" else "",
-            input=step.tool_input.replace("\n", "") if status == "thinking" else "",
+            input=step.tool_input.replace("\n", "").replace('"', "")
+            if status == "thinking"
+            else "",
             output=output if status == "thinking" else "Finished",
         )
         return Document(
diff --git a/libs/ktem/ktem/reasoning/rewoo.py b/libs/ktem/ktem/reasoning/rewoo.py
index 7b66c8b1d..705711d12 100644
--- a/libs/ktem/ktem/reasoning/rewoo.py
+++ b/libs/ktem/ktem/reasoning/rewoo.py
@@ -215,7 +215,7 @@ class Config:
     use_rewrite: bool = False
     enable_citation: bool = False
 
-    def format_info_panel(self, worker_log):
+    def format_info_panel_evidence(self, worker_log):
         header = ""
         content = []
 
@@ -223,6 +223,10 @@ def format_info_panel(self, worker_log):
             if line.startswith("#Plan"):
                 # line starts with #Plan should be marked as a new segment
                 header = line
+            elif line.startswith("#Action"):
+                # small fix for markdown output
+                line = "\\" + line + "<br>"
+                content.append(line)
             elif line.startswith("#"):
                 # stop markdown from rendering big headers
                 line = "\\" + line
@@ -238,6 +242,17 @@ def format_info_panel(self, worker_log):
             content=Render.collapsible(
                 header=header,
                 content=Render.table("\n".join(content)),
+                open=False,
+            ),
+        )
+
+    def format_info_panel_planner(self, planner_output):
+        planner_output = planner_output.replace("\n", "<br>")
+        return Document(
+            channel="info",
+            content=Render.collapsible(
+                header="Planner Output",
+                content=planner_output,
                 open=True,
             ),
         )
@@ -285,6 +300,10 @@ def prepare_citation(self, answer) -> list[Document]:
                 # line starts with #Plan should be marked as a new segment
                 new_segment = [line]
                 segments.append(new_segment)
+            elif line.startswith("#Action"):
+                # small fix for markdown output
+                line = "\\" + line + "<br>"
+                segments[-1].append(line)
             elif line.startswith("#"):
                 # stop markdown from rendering big headers
                 line = "\\" + line
@@ -337,18 +356,23 @@ def stream(  # type: ignore
         for item in output_stream:
             if item.intermediate_steps:
                 for step in item.intermediate_steps:
-                    yield Document(
-                        channel="info",
-                        content=self.format_info_panel(step["worker_log"]),
-                    )
+                    if "planner_log" in step:
+                        yield Document(
+                            channel="info",
+                            content=self.format_info_panel_planner(step["planner_log"]),
+                        )
+                    else:
+                        yield Document(
+                            channel="info",
+                            content=self.format_info_panel_evidence(step["worker_log"]),
+                        )
             if item.text:
+                # final answer
                 yield Document(channel="chat", content=item.text)
 
         answer = output_stream.value
         yield Document(channel="info", content=None)
-        refined_citations = self.prepare_citation(answer)
-        for _ in refined_citations:
-            yield _
+        yield from self.prepare_citation(answer)
 
         return answer
 
diff --git a/libs/ktem/ktem/utils/render.py b/libs/ktem/ktem/utils/render.py
index ac8d6aa3a..f5addf733 100644
--- a/libs/ktem/ktem/utils/render.py
+++ b/libs/ktem/ktem/utils/render.py
@@ -1,6 +1,19 @@
 import markdown
 
 
+def replace_mardown_header(text: str) -> str:
+    textlines = text.splitlines()
+    newlines = []
+    for line in textlines:
+        if line.startswith("#"):
+            line = "<strong>" + line.replace("#", "") + "</strong>"
+        if line.startswith("=="):
+            line = ""
+        newlines.append(line)
+
+    return "\n".join(newlines)
+
+
 class Render:
     """Default text rendering into HTML for the UI"""
 
@@ -13,6 +26,7 @@ def collapsible(header, content, open: bool = False) -> str:
     @staticmethod
     def table(text: str) -> str:
         """Render table from markdown format into HTML"""
+        text = replace_mardown_header(text)
         return markdown.markdown(text, extensions=["markdown.extensions.tables"])
 
     @staticmethod

From 813f871df4e0ca240345a02a3e16f069e1c89754 Mon Sep 17 00:00:00 2001
From: taprosoft <tadashi@cinnamon.is>
Date: Mon, 8 Jul 2024 11:26:31 +0000
Subject: [PATCH 47/56] fix: less verbose debug log

---
 libs/ktem/ktem/reasoning/simple.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 3f733eb41..2b6eece93 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -66,12 +66,7 @@ def find_text(search_span, context):
             ).find_longest_match()
             if match.size > len(search_span) * 0.6:
                 matches.append((match.b, match.b + match.size))
-                print(
-                    "search",
-                    search_span,
-                    "matched",
-                    context[match.b : match.b + match.size],
-                )
+
     return matches
 
 
@@ -996,7 +991,6 @@ def get_pipeline(cls, settings, states, retrievers):
             settings: the settings for the pipeline
             retrievers: the retrievers to use
         """
-        print(settings)
         prefix = f"reasoning.options.{cls.get_info()['id']}"
         pipeline = cls(
             retrievers=retrievers,

From 87f14a392492a68ed08d0ccacb32502d077e8b6e Mon Sep 17 00:00:00 2001
From: taprosoft <tadashi@cinnamon.is>
Date: Mon, 8 Jul 2024 12:02:11 +0000
Subject: [PATCH 48/56] feat: add warning message for low confidence

---
 libs/ktem/ktem/reasoning/simple.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 2b6eece93..9b23019a3 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -216,6 +216,8 @@ def run(self, docs: list[RetrievedDocument]) -> Document:
     "Rephrased question: "
 )  # noqa
 
+CONTEXT_RELEVANT_WARNING_SCORE = 0.7
+
 
 class AnswerWithContextPipeline(BaseComponent):
     """Answer the question based on the evidence
@@ -276,7 +278,7 @@ def get_prompt(self, question, evidence, evidence_mode: int):
                 lang=self.lang,
             )
 
-        return prompt, images
+        return prompt, evidence, images
 
     def run(
         self, question: str, evidence: str, evidence_mode: int = 0, **kwargs
@@ -287,7 +289,7 @@ def invoke(
         self, question: str, evidence: str, evidence_mode: int = 0, **kwargs
     ) -> Document:
         history = kwargs.get("history", [])
-        prompt, images = self.get_prompt(question, evidence, evidence_mode)
+        prompt, evidence, images = self.get_prompt(question, evidence, evidence_mode)
 
         output = ""
         if evidence_mode == EVIDENCE_MODE_FIGURE:
@@ -339,7 +341,7 @@ async def ainvoke(  # type: ignore
             evidence_mode: the mode of evidence, 0 for text, 1 for table, 2 for chatbot
         """
         history = kwargs.get("history", [])
-        prompt, images = self.get_prompt(question, evidence, evidence_mode)
+        prompt, evidence, images = self.get_prompt(question, evidence, evidence_mode)
 
         citation_task = None
         if evidence and self.enable_citation:
@@ -390,7 +392,7 @@ def stream(  # type: ignore
         self, question: str, evidence: str, evidence_mode: int = 0, **kwargs
     ) -> Generator[Document, None, Document]:
         history = kwargs.get("history", [])
-        prompt, images = self.get_prompt(question, evidence, evidence_mode)
+        prompt, evidence, images = self.get_prompt(question, evidence, evidence_mode)
 
         output = ""
         logprobs = []
@@ -761,7 +763,22 @@ def stream(  # type: ignore
             yield Document(channel="info", content="<h5><b>No evidence found.</b></h5>")
         else:
             # clear the Info panel
+            max_llm_rerank_score = max(
+                doc.metadata.get("llm_trulens_score", 0.0) for doc in docs
+            )
+            # clear previous info
             yield Document(channel="info", content=None)
+
+            # yield warning message
+            if max_llm_rerank_score < CONTEXT_RELEVANT_WARNING_SCORE:
+                yield Document(
+                    channel="info",
+                    content=(
+                        "<h5>WARNING! Context relevance score is low. "
+                        "Double check the model answer for correctness.</h5>"
+                    ),
+                )
+
             # show QA score
             qa_score = (
                 round(answer.metadata["qa_score"], 2)

From e98b797f3b2f0ff20b01e475270c3cd7b55b16fd Mon Sep 17 00:00:00 2001
From: taprosoft <tadashi@cinnamon.is>
Date: Mon, 8 Jul 2024 14:47:28 +0000
Subject: [PATCH 49/56] fix: LLM scoring enabled by default

---
 libs/kotaemon/kotaemon/indices/vectorindex.py |  5 +++--
 libs/ktem/ktem/index/file/pipelines.py        | 14 +++++++-------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/libs/kotaemon/kotaemon/indices/vectorindex.py b/libs/kotaemon/kotaemon/indices/vectorindex.py
index d1295a608..b1d0338b1 100644
--- a/libs/kotaemon/kotaemon/indices/vectorindex.py
+++ b/libs/kotaemon/kotaemon/indices/vectorindex.py
@@ -11,7 +11,7 @@
 from kotaemon.storages import BaseDocumentStore, BaseVectorStore
 
 from .base import BaseIndexing, BaseRetrieval
-from .rankings import BaseReranking
+from .rankings import BaseReranking, LLMReranking
 
 VECTOR_STORE_FNAME = "vectorstore"
 DOC_STORE_FNAME = "docstore"
@@ -194,7 +194,8 @@ def run(
         if self.rerankers and text:
             for reranker in self.rerankers:
                 # if reranker is LLMReranking, limit the document with top_k items only
-                result = self._filter_docs(result, top_k=top_k)
+                if isinstance(reranker, LLMReranking):
+                    result = self._filter_docs(result, top_k=top_k)
                 result = reranker(documents=result, query=text)
 
         result = self._filter_docs(result, top_k=top_k)
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index f47000c99..2a9bc8503 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -254,13 +254,13 @@ def get_pipeline(cls, user_settings, index_settings, selected):
             rerankers=[CohereReranking(), LLMTrulensScoring()],
         )
         if not user_settings["use_reranking"]:
-            retriever.rerankers = []  # type: ignore
-        else:
-            for reranker in retriever.rerankers:
-                if isinstance(reranker, LLMReranking):
-                    reranker.llm = llms.get(
-                        user_settings["reranking_llm"], llms.get_default()
-                    )
+            retriever.rerankers = [LLMTrulensScoring()]  # type: ignore
+
+        for reranker in retriever.rerankers:
+            if isinstance(reranker, LLMReranking):
+                reranker.llm = llms.get(
+                    user_settings["reranking_llm"], llms.get_default()
+                )
 
         kwargs = {".doc_ids": selected}
         retriever.set_run(kwargs, temp=False)

From af2a542a20c7dc9ee31883c7412094deada8a5c0 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Tue, 9 Jul 2024 06:53:20 +0000
Subject: [PATCH 50/56] fix: minor update logics

---
 libs/kotaemon/kotaemon/indices/qa/citation.py          | 4 +++-
 libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py | 2 +-
 libs/kotaemon/kotaemon/llms/chats/openai.py            | 8 ++++----
 libs/ktem/ktem/pages/chat/__init__.py                  | 2 +-
 libs/ktem/ktem/reasoning/simple.py                     | 7 ++++---
 5 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/libs/kotaemon/kotaemon/indices/qa/citation.py b/libs/kotaemon/kotaemon/indices/qa/citation.py
index bf8def015..30eceaa00 100644
--- a/libs/kotaemon/kotaemon/indices/qa/citation.py
+++ b/libs/kotaemon/kotaemon/indices/qa/citation.py
@@ -103,7 +103,9 @@ def invoke(self, context: str, question: str):
             print("CitationPipeline: invoking LLM")
             llm_output = self.get_from_path("llm").invoke(messages, **llm_kwargs)
             print("CitationPipeline: finish invoking LLM")
-            if not llm_output.messages:
+            if not llm_output.messages or not llm_output.additional_kwargs.get(
+                "tool_calls"
+            ):
                 return None
             function_output = llm_output.additional_kwargs["tool_calls"][0]["function"][
                 "arguments"
diff --git a/libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py b/libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py
index b7811f042..ff509dd11 100644
--- a/libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py
+++ b/libs/kotaemon/kotaemon/indices/rankings/llm_trulens.py
@@ -46,7 +46,7 @@
 PATTERN_INTEGER: re.Pattern = re.compile(r"([+-]?[1-9][0-9]*|0)")
 """Regex that matches integers."""
 
-MAX_CONTEXT_LEN = 8000
+MAX_CONTEXT_LEN = 7500
 
 
 def validate_rating(rating) -> int:
diff --git a/libs/kotaemon/kotaemon/llms/chats/openai.py b/libs/kotaemon/kotaemon/llms/chats/openai.py
index e8d4783f8..6a605f651 100644
--- a/libs/kotaemon/kotaemon/llms/chats/openai.py
+++ b/libs/kotaemon/kotaemon/llms/chats/openai.py
@@ -163,10 +163,10 @@ def prepare_output(self, resp: dict) -> LLMInterface:
         if resp["choices"][0].get("logprobs") is None:
             logprobs = []
         else:
-            logprobs = [
-                logprob["logprob"]
-                for logprob in resp["choices"][0]["logprobs"].get("content", [])
-            ]
+            all_logprobs = resp["choices"][0]["logprobs"].get("content")
+            logprobs = (
+                [logprob["logprob"] for logprob in all_logprobs] if all_logprobs else []
+            )
 
         output = LLMInterface(
             candidates=[(_["message"]["content"] or "") for _ in resp["choices"]],
diff --git a/libs/ktem/ktem/pages/chat/__init__.py b/libs/ktem/ktem/pages/chat/__init__.py
index acbe23a6d..7f0608511 100644
--- a/libs/ktem/ktem/pages/chat/__init__.py
+++ b/libs/ktem/ktem/pages/chat/__init__.py
@@ -74,7 +74,7 @@ def on_building_ui(self):
                 with gr.Accordion(label="Hint") as _:
                     self.upload_help = gr.HTML(
                         "<i>To upload new file(s), go to "
-                        "<b>File Index</b> section in top navigation bar.</i>"
+                        "<b>Indices</b> section in top navigation bar.</i>"
                     )
 
                 # a hacky quick switch for reasoning type option
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index 9b23019a3..c19eaed94 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -574,11 +574,12 @@ def _format_retrieval_score_and_doc(
         """Format the retrieval score and the document"""
         # score from doc_store (Elasticsearch)
         if is_close(doc.score, -1.0):
-            text_search_str = " default from full-text search<br>"
+            vectorstore_score = ""
+            text_search_str = " (full-text search)<br>"
         else:
+            vectorstore_score = round(doc.score, 2)
             text_search_str = "<br>"
 
-        vectorstore_score = round(doc.score, 2)
         llm_reranking_score = (
             round(doc.metadata["llm_trulens_score"], 2)
             if doc.metadata.get("llm_trulens_score") is not None
@@ -619,7 +620,7 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
         with_citation, without_citation = [], []
         spans = defaultdict(list)
 
-        if answer.metadata["citation"] is not None:
+        if answer.metadata["citation"] and answer.metadata["citation"].answer:
             for fact_with_evidence in answer.metadata["citation"].answer:
                 for quote in fact_with_evidence.substring_quote:
                     for doc in docs:

From 775476c6ee9dad08fef7331a34c0c9c9046accad Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Tue, 9 Jul 2024 08:27:08 +0000
Subject: [PATCH 51/56] fix: hotfix image citation

---
 libs/ktem/ktem/reasoning/simple.py | 32 +++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
index c19eaed94..5984df9e6 100644
--- a/libs/ktem/ktem/reasoning/simple.py
+++ b/libs/ktem/ktem/reasoning/simple.py
@@ -652,16 +652,30 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
                     text += cur_doc.text[span["end"] : ss[idx + 1]["start"]]
             text += cur_doc.text[ss[-1]["end"] :]
             # add to display list
-            with_citation.append(
-                Document(
-                    channel="info",
-                    content=self._format_retrieval_score_and_doc(
-                        cur_doc,
-                        Render.table(text),
-                        open_collapsible=True,
-                    ),
+            if cur_doc.metadata.get("type", "") == "image":
+                with_citation.append(
+                    Document(
+                        channel="info",
+                        content=self._format_retrieval_score_and_doc(
+                            cur_doc,
+                            Render.image(
+                                url=cur_doc.metadata["image_origin"], text=text
+                            ),
+                            open_collapsible=True,
+                        ),
+                    )
+                )
+            else:
+                with_citation.append(
+                    Document(
+                        channel="info",
+                        content=self._format_retrieval_score_and_doc(
+                            cur_doc,
+                            Render.table(text),
+                            open_collapsible=True,
+                        ),
+                    )
                 )
-            )
         print("Got {} cited docs".format(len(with_citation)))
 
         sorted_not_detected_items_with_scores = [

From 7e2572865c9bba61a6aa97e6395179fbfe657b4a Mon Sep 17 00:00:00 2001
From: kan_cin <kan@cinnamon.is>
Date: Wed, 10 Jul 2024 11:23:44 +0700
Subject: [PATCH 52/56] feat: update docx loader for handle merged table cells
 + handle zip file upload (#104)

* feat: update docx loader for handle merged table cells

* feat: handle zip file

* refactor: pre-commit
---
 .../azureai_document_intelligence_loader.py   |  4 ++-
 .../kotaemon/loaders/composite_loader.py      |  2 +-
 libs/kotaemon/kotaemon/loaders/docx_loader.py | 27 +++++++++++----
 libs/ktem/ktem/index/file/ui.py               | 34 +++++++++++++++++++
 4 files changed, 58 insertions(+), 9 deletions(-)

diff --git a/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py b/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
index 85ecf1460..f4123d3f2 100644
--- a/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/azureai_document_intelligence_loader.py
@@ -218,7 +218,9 @@ def load_data(
             removed_spans += table_desc["spans"]
         # save the text content into markdown format
         if self.cache_dir is not None:
-            with open(Path(self.cache_dir) / f"{file_name.stem}.md", "w") as f:
+            with open(
+                Path(self.cache_dir) / f"{file_name.stem}.md", "w", encoding="utf-8"
+            ) as f:
                 f.write(text_content)
 
         removed_spans = sorted(removed_spans, key=lambda x: x["offset"], reverse=True)
diff --git a/libs/kotaemon/kotaemon/loaders/composite_loader.py b/libs/kotaemon/kotaemon/loaders/composite_loader.py
index 4090aef95..ddf71fadd 100644
--- a/libs/kotaemon/kotaemon/loaders/composite_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/composite_loader.py
@@ -48,6 +48,6 @@ class DirectoryReader(LIReaderMixin, BaseReader):
     file_metadata: Optional[Callable[[str], dict]] = None
 
     def _get_wrapped_class(self) -> Type["LIBaseReader"]:
-        from llama_index import SimpleDirectoryReader
+        from llama_index.core import SimpleDirectoryReader
 
         return SimpleDirectoryReader
diff --git a/libs/kotaemon/kotaemon/loaders/docx_loader.py b/libs/kotaemon/kotaemon/loaders/docx_loader.py
index ef0ae6527..c45fad282 100644
--- a/libs/kotaemon/kotaemon/loaders/docx_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/docx_loader.py
@@ -27,6 +27,23 @@ def __init__(self, *args, **kwargs):
                 "Please install it using `pip install python-docx`"
             )
 
+    def _load_single_table(self, table) -> List[List[str]]:
+        """Extract content from tables. Return a list of columns: list[str]
+        Some merged cells will share duplicated content.
+        """
+        n_row = len(table.rows)
+        n_col = len(table.columns)
+
+        arrays = [["" for _ in range(n_row)] for _ in range(n_col)]
+
+        for row in table.rows:
+            for c in row.cells:
+                for row_index in range(c._tc.top, c._tc.bottom):
+                    for col_index in range(c._tc.left, c._tc.right):
+                        arrays[col_index][row_index] = c.text
+
+        return arrays
+
     def load_data(
         self, file_path: Path, extra_info: Optional[dict] = None, **kwargs
     ) -> List[Document]:
@@ -50,13 +67,9 @@ def load_data(
 
         tables = []
         for t in doc.tables:
-            arrays = [
-                [
-                    unicodedata.normalize("NFKC", t.cell(i, j).text)
-                    for i in range(len(t.rows))
-                ]
-                for j in range(len(t.columns))
-            ]
+            # return list of columns: list of string
+            arrays = self._load_single_table(t)
+
             tables.append(pd.DataFrame({a[0]: a[1:] for a in arrays}))
 
         extra_info = extra_info or {}
diff --git a/libs/ktem/ktem/index/file/ui.py b/libs/ktem/ktem/index/file/ui.py
index d97e316a9..3aee7d782 100644
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@@ -1,4 +1,5 @@
 import os
+import shutil
 import tempfile
 import zipfile
 from pathlib import Path
@@ -477,6 +478,37 @@ def _on_app_created(self):
             outputs=[self.file_list_state, self.file_list],
         )
 
+    def _may_extract_zip(self, files, zip_dir: str):
+        """Handle zip files"""
+        zip_files = [file for file in files if file.endswith(".zip")]
+        remaining_files = [file for file in files if not file.endswith("zip")]
+
+        # Clean-up <zip_dir> before unzip to remove old files
+        shutil.rmtree(zip_dir, ignore_errors=True)
+
+        for zip_file in zip_files:
+            # Prepare new zip output dir, separated for each files
+            basename = os.path.splitext(os.path.basename(zip_file))[0]
+            zip_out_dir = os.path.join(zip_dir, basename)
+            os.makedirs(zip_out_dir, exist_ok=True)
+            with zipfile.ZipFile(zip_file, "r") as zip_ref:
+                zip_ref.extractall(zip_out_dir)
+
+        n_zip_file = 0
+        for root, dirs, files in os.walk(zip_dir):
+            for file in files:
+                ext = os.path.splitext(file)[1]
+
+                # only allow supported file-types ( not zip )
+                if ext not in [".zip"] and ext in self._supported_file_types:
+                    remaining_files += [os.path.join(root, file)]
+                    n_zip_file += 1
+
+        if n_zip_file > 0:
+            print(f"Update zip files: {n_zip_file}")
+
+        return remaining_files
+
     def index_fn(
         self, files, reindex: bool, settings, user_id
     ) -> Generator[tuple[str, str], None, None]:
@@ -493,6 +525,8 @@ def index_fn(
             yield "", ""
             return
 
+        files = self._may_extract_zip(files, flowsettings.KH_ZIP_INPUT_DIR)
+
         errors = self.validate(files)
         if errors:
             gr.Warning(", ".join(errors))

From 7a1bdeb52377b0ff0923e441f712d591de100830 Mon Sep 17 00:00:00 2001
From: taprosoft <tadashi@cinnamon.is>
Date: Wed, 10 Jul 2024 04:30:54 +0000
Subject: [PATCH 53/56] fix: escape text in download UI

---
 libs/ktem/ktem/index/file/ui.py   | 13 +++++++++----
 libs/ktem/ktem/reasoning/rewoo.py |  5 ++++-
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/libs/ktem/ktem/index/file/ui.py b/libs/ktem/ktem/index/file/ui.py
index 3aee7d782..6d83e8c7f 100644
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@@ -1,3 +1,4 @@
+import html
 import os
 import shutil
 import tempfile
@@ -16,6 +17,8 @@
 from sqlalchemy.orm import Session
 from theflow.settings import settings as flowsettings
 
+DOWNLOAD_MESSAGE = "Press again to download"
+
 
 class File(gr.File):
     """Subclass from gr.File to maintain the original filename
@@ -238,11 +241,13 @@ def file_selected(self, file_id):
                 )
 
                 for idx, doc in enumerate(docs):
-                    title = f"{doc.text[:50]}..." if len(doc.text) > 50 else doc.text
+                    title = html.escape(
+                        f"{doc.text[:50]}..." if len(doc.text) > 50 else doc.text
+                    )
                     doc_type = doc.metadata.get("type", "text")
                     content = ""
                     if doc_type == "text":
-                        content = doc.text
+                        content = html.escape(doc.text)
                     elif doc_type == "table":
                         content = Render.table(doc.text)
                     elif doc_type == "image":
@@ -330,7 +335,7 @@ def download_single_file(self, file_id):
         with zipfile.ZipFile(f"{zip_file_path}.zip", "w") as zipMe:
             for file in zip_files:
                 zipMe.write(file, arcname=os.path.basename(file))
-        return gr.DownloadButton(label="Download pressed", value=f"{zip_file_path}.zip")
+        return gr.DownloadButton(label=DOWNLOAD_MESSAGE, value=f"{zip_file_path}.zip")
 
     def download_all_files(self):
         zip_files = []
@@ -345,7 +350,7 @@ def download_all_files(self):
             for file in zip_files:
                 arcname = Path(file)
                 zipMe.write(file, arcname=arcname.name)
-        return gr.DownloadButton(label="Download pressed", value=f"{zip_file_path}.zip")
+        return gr.DownloadButton(label=DOWNLOAD_MESSAGE, value=f"{zip_file_path}.zip")
 
     def on_register_events(self):
         """Register all events to the app"""
diff --git a/libs/ktem/ktem/reasoning/rewoo.py b/libs/ktem/ktem/reasoning/rewoo.py
index 705711d12..785efb246 100644
--- a/libs/ktem/ktem/reasoning/rewoo.py
+++ b/libs/ktem/ktem/reasoning/rewoo.py
@@ -309,7 +309,10 @@ def prepare_citation(self, answer) -> list[Document]:
                 line = "\\" + line
                 segments[-1].append(line)
             else:
-                segments[-1].append(line)
+                if segments:
+                    segments[-1].append(line)
+                else:
+                    segments.append([line])
 
         outputs = []
         for segment in segments:

From 88d02c37702fef8549c5db1596b745ef3efedc75 Mon Sep 17 00:00:00 2001
From: kan_cin <kan@cinnamon.is>
Date: Fri, 12 Jul 2024 11:11:41 +0700
Subject: [PATCH 54/56] feat: optimize vector store query db (#105)

* feat: optimize vector store query db

* feat: add file_id to chroma metadatas

* feat: remove unnecessary logs and update migrate script

* feat: iterate through file index

* fix: remove unused code

---------

Co-authored-by: taprosoft <tadashi@cinnamon.is>
---
 libs/kotaemon/kotaemon/indices/vectorindex.py |   5 +-
 libs/kotaemon/kotaemon/loaders/docx_loader.py |   8 +-
 libs/ktem/ktem/index/file/pipelines.py        |  15 +-
 scripts/migrate/__init__.py                   |   0
 scripts/migrate/migrate_chroma_db.py          | 191 ++++++++++++++++++
 5 files changed, 209 insertions(+), 10 deletions(-)
 create mode 100644 scripts/migrate/__init__.py
 create mode 100644 scripts/migrate/migrate_chroma_db.py

diff --git a/libs/kotaemon/kotaemon/indices/vectorindex.py b/libs/kotaemon/kotaemon/indices/vectorindex.py
index b1d0338b1..21823a418 100644
--- a/libs/kotaemon/kotaemon/indices/vectorindex.py
+++ b/libs/kotaemon/kotaemon/indices/vectorindex.py
@@ -170,16 +170,19 @@ def run(
             docs = self.doc_store.query(query, top_k=top_k_first_round, doc_ids=scope)
             result = [RetrievedDocument(**doc.to_dict(), score=-1.0) for doc in docs]
         elif self.retrieval_mode == "hybrid":
-            # similartiy search section
+            # similarity search section
             emb = self.embedding(text)[0].embedding
+
             _, vs_scores, vs_ids = self.vector_store.query(
                 embedding=emb, top_k=top_k_first_round, **kwargs
             )
+
             vs_docs = self.doc_store.get(vs_ids)
 
             # full-text search section
             query = text.text if isinstance(text, Document) else text
             docs = self.doc_store.query(query, top_k=top_k_first_round, doc_ids=scope)
+
             result = [
                 RetrievedDocument(**doc.to_dict(), score=-1.0)
                 for doc in docs
diff --git a/libs/kotaemon/kotaemon/loaders/docx_loader.py b/libs/kotaemon/kotaemon/loaders/docx_loader.py
index c45fad282..463a8fa42 100644
--- a/libs/kotaemon/kotaemon/loaders/docx_loader.py
+++ b/libs/kotaemon/kotaemon/loaders/docx_loader.py
@@ -36,11 +36,9 @@ def _load_single_table(self, table) -> List[List[str]]:
 
         arrays = [["" for _ in range(n_row)] for _ in range(n_col)]
 
-        for row in table.rows:
-            for c in row.cells:
-                for row_index in range(c._tc.top, c._tc.bottom):
-                    for col_index in range(c._tc.left, c._tc.right):
-                        arrays[col_index][row_index] = c.text
+        for i, row in enumerate(table.rows):
+            for j, cell in enumerate(row.cells):
+                arrays[j][i] = cell.text
 
         return arrays
 
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 2a9bc8503..755126456 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -2,6 +2,7 @@
 
 import logging
 import shutil
+import time
 import warnings
 from collections import defaultdict
 from functools import lru_cache
@@ -133,11 +134,10 @@ def run(
         retrieval_kwargs["filters"] = MetadataFilters(
             filters=[
                 MetadataFilter(
-                    key="doc_id",
-                    value=vs_id,
-                    operator=FilterOperator.EQ,
+                    key="file_id",
+                    value=doc_ids,
+                    operator=FilterOperator.IN,
                 )
-                for vs_id in vs_ids
             ],
             condition=FilterCondition.OR,
         )
@@ -148,7 +148,10 @@ def run(
             retrieval_kwargs["mmr_threshold"] = 0.5
 
         # rerank
+        s_time = time.time()
+        print(f"retrieval_kwargs: {retrieval_kwargs.keys()}")
         docs = self.vector_retrieval(text=text, top_k=self.top_k, **retrieval_kwargs)
+        print("retrieval step took", time.time() - s_time)
 
         if not self.get_extra_table:
             return docs
@@ -477,6 +480,8 @@ def run(self, file_path: str | Path, reindex: bool, **kwargs) -> str:
 
         # extract the file
         extra_info = default_file_metadata_func(str(file_path))
+        extra_info["file_id"] = file_id
+
         docs = self.loader.load_data(file_path, extra_info=extra_info)
         for _ in self.handle_docs(docs, file_id, file_path.name):
             continue
@@ -507,6 +512,8 @@ def stream(
 
         # extract the file
         extra_info = default_file_metadata_func(str(file_path))
+        extra_info["file_id"] = file_id
+
         yield Document(f" => Converting {file_path.name} to text", channel="debug")
         docs = self.loader.load_data(file_path, extra_info=extra_info)
         yield Document(f" => Converted {file_path.name} to text", channel="debug")
diff --git a/scripts/migrate/__init__.py b/scripts/migrate/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/scripts/migrate/migrate_chroma_db.py b/scripts/migrate/migrate_chroma_db.py
new file mode 100644
index 000000000..96fa9201b
--- /dev/null
+++ b/scripts/migrate/migrate_chroma_db.py
@@ -0,0 +1,191 @@
+import uuid
+
+import chromadb
+from ktem.index.models import Index
+from sqlalchemy import (
+    JSON,
+    Column,
+    DateTime,
+    Integer,
+    String,
+    UniqueConstraint,
+    create_engine,
+    select,
+)
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.ext.mutable import MutableDict
+from sqlalchemy.orm import Session
+from sqlalchemy.sql import func
+
+
+def _init_resource(private: bool = True, id: int = 1):
+    """Init schemas. Hard-code"""
+    Base = declarative_base()
+
+    if private:
+        Source = type(
+            "Source",
+            (Base,),
+            {
+                "__tablename__": f"index__{id}__source",
+                "__table_args__": (
+                    UniqueConstraint("name", "user", name="_name_user_uc"),
+                ),
+                "id": Column(
+                    String,
+                    primary_key=True,
+                    default=lambda: str(uuid.uuid4()),
+                    unique=True,
+                ),
+                "name": Column(String),
+                "path": Column(String),
+                "size": Column(Integer, default=0),
+                "date_created": Column(
+                    DateTime(timezone=True), server_default=func.now()
+                ),
+                "user": Column(Integer, default=1),
+                "note": Column(
+                    MutableDict.as_mutable(JSON),  # type: ignore
+                    default={},
+                ),
+            },
+        )
+    else:
+        Source = type(
+            "Source",
+            (Base,),
+            {
+                "__tablename__": f"index__{id}__source",
+                "id": Column(
+                    String,
+                    primary_key=True,
+                    default=lambda: str(uuid.uuid4()),
+                    unique=True,
+                ),
+                "name": Column(String, unique=True),
+                "path": Column(String),
+                "size": Column(Integer, default=0),
+                "date_created": Column(
+                    DateTime(timezone=True), server_default=func.now()
+                ),
+                "user": Column(Integer, default=1),
+                "note": Column(
+                    MutableDict.as_mutable(JSON),  # type: ignore
+                    default={},
+                ),
+            },
+        )
+    Index = type(
+        "IndexTable",
+        (Base,),
+        {
+            "__tablename__": f"index__{id}__index",
+            "id": Column(Integer, primary_key=True, autoincrement=True),
+            "source_id": Column(String),
+            "target_id": Column(String),
+            "relation_type": Column(Integer),
+            "user": Column(Integer, default=1),
+        },
+    )
+
+    return {"Source": Source, "Index": Index}
+
+
+def get_chromadb_collection(
+    db_dir: str = "../ktem_app_data/user_data/vectorstore",
+    collection_name: str = "index_1",
+):
+    """Extract collection from chromadb"""
+    client = chromadb.PersistentClient(path=db_dir)
+    collection = client.get_or_create_collection(collection_name)
+
+    return collection
+
+
+def update_metadata(metadata, file_id):
+    """Update file_id"""
+    metadata["file_id"] = file_id
+    return metadata
+
+
+def migrate_chroma_db(
+    chroma_db_dir: str, sqlite_path: str, is_private: bool = True, int_index: int = 1
+):
+    chroma_collection_name = f"index_{int_index}"
+
+    """Update chromadb with metadata.file_id"""
+    engine = create_engine(sqlite_path)
+    resource = _init_resource(private=is_private, id=int_index)
+    print("Load sqlalchemy engine successfully!")
+
+    chroma_db_collection = get_chromadb_collection(
+        db_dir=chroma_db_dir, collection_name=chroma_collection_name
+    )
+    print(
+        f"Load chromadb collection: {chroma_collection_name}, "
+        f"path: {chroma_db_dir} successfully!"
+    )
+
+    # Load docs id of user
+    with Session(engine) as session:
+        stmt = select(resource["Source"])
+        results = session.execute(stmt)
+        doc_ids = [r[0].id for r in results.all()]
+    print(f"Retrieve n-docs: {len(doc_ids)}")
+    print(doc_ids)
+
+    for doc_id in doc_ids:
+        print("-")
+        # Find corresponding vector ids
+        with Session(engine) as session:
+            stmt = select(resource["Index"]).where(
+                resource["Index"].relation_type == "vector",
+                resource["Index"].source_id.in_([doc_id]),
+            )
+            results = session.execute(stmt)
+            vs_ids = [r[0].target_id for r in results.all()]
+
+        print(f"Got {len(vs_ids)} vs_ids for doc {doc_id}")
+
+        # Update file_id
+        if len(vs_ids) > 0:
+            batch = chroma_db_collection.get(ids=vs_ids, include=["metadatas"])
+            batch.update(
+                ids=batch["ids"],
+                metadatas=[
+                    update_metadata(metadata, doc_id) for metadata in batch["metadatas"]
+                ],
+            )
+
+        # Assert file_id. Skip
+        print(f"doc-{doc_id} got updated")
+
+
+def main(chroma_db_dir: str, sqlite_path: str):
+    engine = create_engine(sqlite_path)
+
+    with Session(engine) as session:
+        stmt = select(Index)
+
+        results = session.execute(stmt)
+        file_indices = [r[0] for r in results.all()]
+
+        for file_index in file_indices:
+            _id = file_index.id
+            _is_private = file_index.config["private"]
+
+            print(f"Migrating for Index id: {_id}, is_private: {_is_private}")
+
+            migrate_chroma_db(
+                chroma_db_dir=chroma_db_dir,
+                sqlite_path=sqlite_path,
+                is_private=_is_private,
+                int_index=_id,
+            )
+
+
+if __name__ == "__main__":
+    chrome_db_dir: str = "./vectorstore/kan_db"
+    sqlite_path: str = "sqlite:///../ktem_app_data/user_data/sql.db"
+
+    main(chrome_db_dir, sqlite_path)

From 9e215c406be8b05d4917291111ceaec2de159045 Mon Sep 17 00:00:00 2001
From: trducng <trungduc1992@gmail.com>
Date: Tue, 16 Jul 2024 08:36:17 +0000
Subject: [PATCH 55/56] fix: add openai embedidng exponential back-off

---
 libs/kotaemon/kotaemon/embeddings/openai.py | 3 +++
 libs/ktem/ktem/index/file/pipelines.py      | 4 ++--
 libs/ktem/ktem/index/file/ui.py             | 3 ++-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/libs/kotaemon/kotaemon/embeddings/openai.py b/libs/kotaemon/kotaemon/embeddings/openai.py
index fb8ca43bd..58061d458 100644
--- a/libs/kotaemon/kotaemon/embeddings/openai.py
+++ b/libs/kotaemon/kotaemon/embeddings/openai.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 import tiktoken
+from tenacity import retry, stop_after_attempt, wait_random_exponential
 from theflow.utils.modules import import_dotted_string
 
 from kotaemon.base import Param
@@ -168,6 +169,7 @@ def prepare_client(self, async_version: bool = False):
 
         return OpenAI(**params)
 
+    @retry(wait=wait_random_exponential(min=1, max=40), stop=stop_after_attempt(6))
     def openai_response(self, client, **kwargs):
         """Get the openai response"""
         params: dict = {
@@ -224,6 +226,7 @@ def prepare_client(self, async_version: bool = False):
 
         return AzureOpenAI(**params)
 
+    @retry(wait=wait_random_exponential(min=1, max=40), stop=stop_after_attempt(6))
     def openai_response(self, client, **kwargs):
         """Get the openai response"""
         params: dict = {
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
index 755126456..567f4ff96 100644
--- a/libs/ktem/ktem/index/file/pipelines.py
+++ b/libs/ktem/ktem/index/file/pipelines.py
@@ -275,7 +275,7 @@ class IndexPipeline(BaseComponent):
 
     loader: BaseReader
     splitter: BaseSplitter
-    chunk_batch_size: int = 50
+    chunk_batch_size: int = 100
 
     Source = Param(help="The SQLAlchemy Source table")
     Index = Param(help="The SQLAlchemy Index table")
@@ -306,7 +306,7 @@ def handle_docs(self, docs, file_id, file_name) -> Generator[Document, None, int
 
         for cidx, chunk in enumerate(self.splitter(text_docs)):
             chunks.append(chunk)
-            if cidx % self.chunk_batch_size == 0:
+            if (cidx + 1) % self.chunk_batch_size == 0:
                 self.handle_chunks(chunks, file_id)
                 n_chunks += len(chunks)
                 chunks = []
diff --git a/libs/ktem/ktem/index/file/ui.py b/libs/ktem/ktem/index/file/ui.py
index 6d83e8c7f..003413d3d 100644
--- a/libs/ktem/ktem/index/file/ui.py
+++ b/libs/ktem/ktem/index/file/ui.py
@@ -296,7 +296,8 @@ def delete_event(self, file_id):
                 session.delete(each[0])
             session.commit()
 
-        self._index._vs.delete(vs_ids)
+        if vs_ids:
+            self._index._vs.delete(vs_ids)
         self._index._docstore.delete(ds_ids)
 
         gr.Info(f"File {file_id} has been deleted")

From 774e359dfc0b3a9a2f75321458f8aae37e3a4d93 Mon Sep 17 00:00:00 2001
From: phv2312 <kat87yb@gmail.com>
Date: Wed, 17 Jul 2024 17:11:17 +0700
Subject: [PATCH 56/56] fix: update import download_loader

---
 libs/kotaemon/kotaemon/loaders/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/kotaemon/kotaemon/loaders/base.py b/libs/kotaemon/kotaemon/loaders/base.py
index eff70c8be..aebf91961 100644
--- a/libs/kotaemon/kotaemon/loaders/base.py
+++ b/libs/kotaemon/kotaemon/loaders/base.py
@@ -20,7 +20,7 @@ def __init__(self, reader_type: Union[str, Type["LIBaseReader"]]) -> None:
         """Init reader using string identifier or class name from llama-hub"""
 
         if isinstance(reader_type, str):
-            from llama_index import download_loader
+            from llama_index.core import download_loader
 
             self._reader = download_loader(reader_type)()
         else: