Skip to content

Commit

Permalink
Merge branch 'Cinnamon:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
tknrych authored Dec 4, 2024
2 parents 1a9b249 + b016a84 commit 5e57cf3
Show file tree
Hide file tree
Showing 15 changed files with 198 additions and 28 deletions.
23 changes: 16 additions & 7 deletions flowsettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

KH_ENABLE_FIRST_SETUP = True
KH_DEMO_MODE = config("KH_DEMO_MODE", default=False, cast=bool)
KH_OLLAMA_URL = config("KH_OLLAMA_URL", default="http://localhost:11434/v1/")

# App can be ran from anywhere and it's not trivial to decide where to store app data.
# So let's use the same directory as the flowsetting.py file.
Expand Down Expand Up @@ -162,7 +163,7 @@
KH_LLMS["ollama"] = {
"spec": {
"__type__": "kotaemon.llms.ChatOpenAI",
"base_url": "http://localhost:11434/v1/",
"base_url": KH_OLLAMA_URL,
"model": config("LOCAL_MODEL", default="llama3.1:8b"),
"api_key": "ollama",
},
Expand All @@ -171,7 +172,7 @@
KH_EMBEDDINGS["ollama"] = {
"spec": {
"__type__": "kotaemon.embeddings.OpenAIEmbeddings",
"base_url": "http://localhost:11434/v1/",
"base_url": KH_OLLAMA_URL,
"model": config("LOCAL_MODEL_EMBEDDINGS", default="nomic-embed-text"),
"api_key": "ollama",
},
Expand All @@ -195,11 +196,11 @@
},
"default": False,
}
KH_LLMS["gemini"] = {
KH_LLMS["google"] = {
"spec": {
"__type__": "kotaemon.llms.chats.LCGeminiChat",
"model_name": "gemini-1.5-pro",
"api_key": "your-key",
"model_name": "gemini-1.5-flash",
"api_key": config("GOOGLE_API_KEY", default="your-key"),
},
"default": False,
}
Expand Down Expand Up @@ -231,6 +232,13 @@
},
"default": False,
}
KH_EMBEDDINGS["google"] = {
"spec": {
"__type__": "kotaemon.embeddings.LCGoogleEmbeddings",
"model": "models/text-embedding-004",
"google_api_key": config("GOOGLE_API_KEY", default="your-key"),
}
}
# KH_EMBEDDINGS["huggingface"] = {
# "spec": {
# "__type__": "kotaemon.embeddings.LCHuggingFaceEmbeddings",
Expand Down Expand Up @@ -303,7 +311,8 @@

GRAPHRAG_INDICES = [
{
"name": graph_type.split(".")[-1].replace("Index", ""), # get last name
"name": graph_type.split(".")[-1].replace("Index", "")
+ " Collection", # get last name
"config": {
"supported_file_types": (
".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
Expand All @@ -318,7 +327,7 @@

KH_INDICES = [
{
"name": "File",
"name": "File Collection",
"config": {
"supported_file_types": (
".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
Expand Down
2 changes: 2 additions & 0 deletions libs/kotaemon/kotaemon/embeddings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from .langchain_based import (
LCAzureOpenAIEmbeddings,
LCCohereEmbeddings,
LCGoogleEmbeddings,
LCHuggingFaceEmbeddings,
LCOpenAIEmbeddings,
)
Expand All @@ -18,6 +19,7 @@
"LCAzureOpenAIEmbeddings",
"LCCohereEmbeddings",
"LCHuggingFaceEmbeddings",
"LCGoogleEmbeddings",
"OpenAIEmbeddings",
"AzureOpenAIEmbeddings",
"FastEmbedEmbeddings",
Expand Down
35 changes: 35 additions & 0 deletions libs/kotaemon/kotaemon/embeddings/langchain_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,38 @@ def _get_lc_class(self):
from langchain.embeddings import HuggingFaceBgeEmbeddings

return HuggingFaceBgeEmbeddings


class LCGoogleEmbeddings(LCEmbeddingMixin, BaseEmbeddings):
"""Wrapper around Langchain's Google GenAI embedding, focusing on key parameters"""

google_api_key: str = Param(
help="API key (https://aistudio.google.com/app/apikey)",
default=None,
required=True,
)
model: str = Param(
help="Model name to use (https://ai.google.dev/gemini-api/docs/models/gemini#text-embedding-and-embedding)", # noqa
default="models/text-embedding-004",
required=True,
)

def __init__(
self,
model: str = "models/text-embedding-004",
google_api_key: Optional[str] = None,
**params,
):
super().__init__(
model=model,
google_api_key=google_api_key,
**params,
)

def _get_lc_class(self):
try:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
except ImportError:
raise ImportError("Please install langchain-google-genai")

return GoogleGenerativeAIEmbeddings
11 changes: 9 additions & 2 deletions libs/ktem/ktem/assets/css/main.css
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ button.selected {
#chat-info-panel {
max-height: var(--main-area-height) !important;
overflow: auto !important;
transition: all 0.5s;
transition: all 0.4s;
}

body.dark #chat-info-panel figure>img{
Expand All @@ -109,12 +109,12 @@ body.dark #chat-info-panel figure>img{
flex-wrap: unset;
overflow-y: scroll !important;
position: sticky;
min-width: min(305px, 100%) !important;
column-gap: 2px !important;
scrollbar-width: none;
/* Firefox */
-ms-overflow-style: none;
/* Internet Explorer 10+ */
transition: all 0.3s;
}

#conv-settings-panel::-webkit-scrollbar {
Expand Down Expand Up @@ -204,6 +204,13 @@ mark {
right: 15px;
}

#chat-expand-button {
position: absolute;
top: 6px;
right: -10px;
z-index: 10;
}

#use-mindmap-checkbox {
position: absolute;
width: 110px;
Expand Down
1 change: 1 addition & 0 deletions libs/ktem/ktem/assets/icons/expand.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
23 changes: 23 additions & 0 deletions libs/ktem/ktem/assets/js/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,29 @@ function run() {
let chat_info_panel = document.getElementById("info-expand");
chat_info_panel.insertBefore(info_expand_button, chat_info_panel.childNodes[2]);

// move toggle-side-bar button
let chat_expand_button = document.getElementById("chat-expand-button");
let chat_column = document.getElementById("main-chat-bot");
let conv_column = document.getElementById("conv-settings-panel");

let default_conv_column_min_width = "min(300px, 100%)";
conv_column.style.minWidth = default_conv_column_min_width

globalThis.toggleChatColumn = (() => {
/* get flex-grow value of chat_column */
let flex_grow = conv_column.style.flexGrow;
console.log("chat col", flex_grow);
if (flex_grow == '0') {
conv_column.style.flexGrow = '1';
conv_column.style.minWidth = default_conv_column_min_width;
} else {
conv_column.style.flexGrow = '0';
conv_column.style.minWidth = "0px";
}
});

chat_column.insertBefore(chat_expand_button, chat_column.firstChild);

// move use mind-map checkbox
let mindmap_checkbox = document.getElementById("use-mindmap-checkbox");
let chat_setting_panel = document.getElementById("chat-settings-expand");
Expand Down
2 changes: 2 additions & 0 deletions libs/ktem/ktem/embeddings/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def load_vendors(self):
AzureOpenAIEmbeddings,
FastEmbedEmbeddings,
LCCohereEmbeddings,
LCGoogleEmbeddings,
LCHuggingFaceEmbeddings,
OpenAIEmbeddings,
TeiEndpointEmbeddings,
Expand All @@ -68,6 +69,7 @@ def load_vendors(self):
FastEmbedEmbeddings,
LCCohereEmbeddings,
LCHuggingFaceEmbeddings,
LCGoogleEmbeddings,
TeiEndpointEmbeddings,
]

Expand Down
2 changes: 2 additions & 0 deletions libs/ktem/ktem/index/file/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ class BaseFileIndexIndexing(BaseComponent):
FSPath = Param(help="The file storage path")
user_id = Param(help="The user id")
private = Param(False, help="Whether this is private index")
chunk_size = Param(help="Chunk size for this index")
chunk_overlap = Param(help="Chunk overlap for this index")

def run(
self, file_paths: str | Path | list[str | Path], *args, **kwargs
Expand Down
21 changes: 21 additions & 0 deletions libs/ktem/ktem/index/file/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,25 @@ def get_admin_settings(cls):
"choices": [("Yes", True), ("No", False)],
"info": "If private, files will not be accessible across users.",
},
"chunk_size": {
"name": "Size of chunk (number of tokens)",
"value": 0,
"component": "number",
"info": (
"Number of tokens of each text segment. "
"Set 0 to use developer setting."
),
},
"chunk_overlap": {
"name": "Number of overlapping tokens between chunks",
"value": 0,
"component": "number",
"info": (
"Number of tokens that consecutive text segments "
"should overlap with each other. "
"Set 0 to use developer setting."
),
},
}

def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing:
Expand All @@ -423,6 +442,8 @@ def get_indexing_pipeline(self, settings, user_id) -> BaseFileIndexIndexing:
obj.FSPath = self._fs_path
obj.user_id = user_id
obj.private = self.config.get("private", False)
obj.chunk_size = self.config.get("chunk_size", 0)
obj.chunk_overlap = self.config.get("chunk_overlap", 0)

return obj

Expand Down
10 changes: 8 additions & 2 deletions libs/ktem/ktem/index/file/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,11 @@ def route(self, file_path: str | Path) -> IndexPipeline:
Can subclass this method for a more elaborate pipeline routing strategy.
"""
_, chunk_size, chunk_overlap = dev_settings()

_, dev_chunk_size, dev_chunk_overlap = dev_settings()

chunk_size = self.chunk_size or dev_chunk_size
chunk_overlap = self.chunk_overlap or dev_chunk_overlap

# check if file_path is a URL
if self.is_url(file_path):
Expand All @@ -744,12 +748,14 @@ def route(self, file_path: str | Path) -> IndexPipeline:
"the suitable pipeline for this file type in the settings."
)

print(f"Chunk size: {chunk_size}, chunk overlap: {chunk_overlap}")

print("Using reader", reader)
pipeline: IndexPipeline = IndexPipeline(
loader=reader,
splitter=TokenSplitter(
chunk_size=chunk_size or 1024,
chunk_overlap=chunk_overlap if chunk_overlap is not None else 256,
chunk_overlap=chunk_overlap or 256,
separator="\n\n",
backup_separators=["\n", ".", "\u200B"],
),
Expand Down
2 changes: 1 addition & 1 deletion libs/ktem/ktem/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def ui(self):
) as self._tabs["indices-tab"]:
for index in self.index_manager.indices:
with gr.Tab(
f"{index.name} Collection",
index.name,
elem_id=f"{index.id}-tab",
) as self._tabs[f"{index.id}-tab"]:
page = index.get_index_page_ui()
Expand Down
15 changes: 11 additions & 4 deletions libs/ktem/ktem/pages/chat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ktem.app import BasePage
from ktem.components import reasonings
from ktem.db.models import Conversation, engine
from ktem.index.file.ui import File, chat_input_focus_js
from ktem.index.file.ui import File
from ktem.reasoning.prompt_optimization.suggest_conversation_name import (
SuggestConvNamePipeline,
)
Expand All @@ -31,6 +31,12 @@
DEFAULT_SETTING = "(default)"
INFO_PANEL_SCALES = {True: 8, False: 4}

chat_input_focus_js = """
function() {
let chatInput = document.querySelector("#chat-input textarea");
chatInput.focus();
}
"""

pdfview_js = """
function() {
Expand Down Expand Up @@ -126,9 +132,7 @@ def on_building_ui(self):
continue

index_ui.unrender() # need to rerender later within Accordion
with gr.Accordion(
label=f"{index.name} Collection", open=index_id < 1
):
with gr.Accordion(label=index.name, open=index_id < 1):
index_ui.render()
gr_index = index_ui.as_gradio_component()

Expand Down Expand Up @@ -403,6 +407,9 @@ def on_register_events(self):
inputs=self._info_panel_expanded,
outputs=[self.info_column, self._info_panel_expanded],
)
self.chat_control.btn_chat_expand.click(
fn=None, inputs=None, js="function() {toggleChatColumn();}"
)

self.chat_panel.chatbot.like(
fn=self.is_liked,
Expand Down
10 changes: 9 additions & 1 deletion libs/ktem/ktem/pages/chat/control.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,17 @@ def on_building_ui(self):
elem_classes=["no-background", "body-text-color"],
elem_id="toggle-dark-button",
)
self.btn_chat_expand = gr.Button(
value="",
icon=f"{ASSETS_DIR}/expand.svg",
scale=1,
size="sm",
elem_classes=["no-background", "body-text-color"],
elem_id="chat-expand-button",
)
self.btn_info_expand = gr.Button(
value="",
icon=f"{ASSETS_DIR}/sidebar.svg",
icon=f"{ASSETS_DIR}/expand.svg",
min_width=2,
scale=1,
size="sm",
Expand Down
2 changes: 1 addition & 1 deletion libs/ktem/ktem/pages/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def index_tab(self):
id2name = {k: v.name for k, v in self._app.index_manager.info().items()}
with gr.Tab("Retrieval settings", visible=self._render_index_tab):
for pn, sig in self._default_settings.index.options.items():
name = "{} Collection".format(id2name.get(pn, f"<id {pn}>"))
name = id2name.get(pn, f"<id {pn}>")
with gr.Tab(name):
for n, si in sig.settings.items():
obj = render_setting_item(si, si.value)
Expand Down
Loading

0 comments on commit 5e57cf3

Please sign in to comment.