Skip to content

Commit

Permalink
fix: knowledge base bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
kangfenmao committed Dec 25, 2024
1 parent 34ebab0 commit c50ac44
Show file tree
Hide file tree
Showing 14 changed files with 142 additions and 47 deletions.
6 changes: 5 additions & 1 deletion electron.vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ export default defineConfig({
build: {
rollupOptions: {
external: ['@lancedb/lancedb']
}
},
minify: true
}
},
preload: {
Expand All @@ -51,6 +52,9 @@ export default defineConfig({
},
optimizeDeps: {
exclude: []
},
build: {
minify: true
}
}
})
2 changes: 1 addition & 1 deletion src/main/services/KnowledgeService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import * as fs from 'node:fs'
import path from 'node:path'

import { LocalPathLoader, RAGApplication, RAGApplicationBuilder, TextLoader } from '@llm-tools/embedjs'
import { AddLoaderReturn, ExtractChunkData } from '@llm-tools/embedjs-interfaces'
import type { AddLoaderReturn, ExtractChunkData } from '@llm-tools/embedjs-interfaces'
import { LanceDb } from '@llm-tools/embedjs-lancedb'
import { MarkdownLoader } from '@llm-tools/embedjs-loader-markdown'
import { DocxLoader, ExcelLoader, PptLoader } from '@llm-tools/embedjs-loader-msoffice'
Expand Down
7 changes: 4 additions & 3 deletions src/main/services/WindowService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { isLinux, isWin } from '@main/constant'
import { app, BrowserWindow, Menu, MenuItem, shell } from 'electron'
import Logger from 'electron-log'
import windowStateKeeper from 'electron-window-state'
import { join } from 'path'
import path, { join } from 'path'

import icon from '../../../build/icon.png?asset'
import { titleBarOverlayDark, titleBarOverlayLight } from '../config'
Expand Down Expand Up @@ -137,8 +137,9 @@ export class WindowService {
const { url } = details

if (url.includes('http://file/')) {
const fileUrl = url.replace('http://file/', '')
const filePath = decodeURIComponent(fileUrl)
const fileName = url.replace('http://file/', '')
const storageDir = path.join(app.getPath('userData'), 'Data', 'Files')
const filePath = storageDir + '/' + fileName
shell.openPath(filePath).catch((err) => Logger.error('Failed to open file:', err))
} else {
shell.openExternal(details.url)
Expand Down
1 change: 1 addition & 0 deletions src/renderer/src/config/prompts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ export const REFERENCE_PROMPT = `请根据参考资料回答问题,并使用
1. **脚注标记**:在正文中使用 [^数字] 的形式标记脚注,例如 [^1]。
2. **脚注内容**:在文档末尾使用 [^数字]: 脚注内容 的形式定义脚注的具体内容
3. **脚注内容**:应该尽量简洁
## 我的问题是:
Expand Down
2 changes: 1 addition & 1 deletion src/renderer/src/config/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ export const PROVIDER_CONFIG = {
url: 'https://aihubmix.com?aff=SJyh'
},
websites: {
official: 'https://aihubmix.com/',
official: 'https://aihubmix.com?aff=SJyh',
apiKey: 'https://aihubmix.com?aff=SJyh',
docs: 'https://doc.aihubmix.com/',
models: 'https://aihubmix.com/models'
Expand Down
6 changes: 4 additions & 2 deletions src/renderer/src/i18n/locales/en-us.json
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@
"error.enter.api.host": "Please enter your API host first",
"error.enter.api.key": "Please enter your API key first",
"error.enter.model": "Please select a model first",
"error.enter.name": "Please enter the name of the knowledge base",
"error.invalid.proxy.url": "Invalid proxy URL",
"error.invalid.webdav": "Invalid WebDAV settings",
"message.code_style": "Code style",
Expand All @@ -249,7 +250,7 @@
"upgrade.success.title": "Upgrade successfully",
"regenerate.confirm": "Regenerating will replace current message",
"copy.success": "Copied!",
"get_embedding_dimensions": "Failed to get embedding dimensions"
"error.get_embedding_dimensions": "Failed to get embedding dimensions"
},
"minapp": {
"title": "MinApp"
Expand Down Expand Up @@ -562,7 +563,8 @@
"add_directory": "Add Directory",
"directory_placeholder": "Enter Directory Path",
"model_info": "Model Info",
"not_support": "Knowledge base database engine updated, the knowledge base will no longer be supported, please create a new knowledge base"
"not_support": "Knowledge base database engine updated, the knowledge base will no longer be supported, please create a new knowledge base",
"source": "Source"
},
"models": {
"pinned": "Pinned",
Expand Down
6 changes: 4 additions & 2 deletions src/renderer/src/i18n/locales/ru-ru.json
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@
"error.enter.api.host": "Пожалуйста, введите ваш API хост",
"error.enter.api.key": "Пожалуйста, введите ваш API ключ",
"error.enter.model": "Пожалуйста, выберите модель",
"error.enter.name": "Пожалуйста, введите название базы знаний",
"error.invalid.proxy.url": "Неверный URL прокси",
"error.invalid.webdav": "Неверные настройки WebDAV",
"message.code_style": "Стиль кода",
Expand All @@ -249,7 +250,7 @@
"upgrade.success.title": "Обновление успешно",
"regenerate.confirm": "Перегенерация заменит текущее сообщение",
"copy.success": "Скопировано!",
"get_embedding_dimensions": "Не удалось получить размерность встраивания"
"error.get_embedding_dimensions": "Не удалось получить размерность встраивания"
},
"minapp": {
"title": "Встроенные приложения"
Expand Down Expand Up @@ -562,7 +563,8 @@
"add_directory": "Добавить директорию",
"directory_placeholder": "Введите путь к директории",
"model_info": "Модель информации",
"not_support": "База знаний базы данных движок обновлен, база знаний больше не поддерживается, пожалуйста, создайте новую базу знаний"
"not_support": "База знаний базы данных движок обновлен, база знаний больше не поддерживается, пожалуйста, создайте новую базу знаний",
"source": "Источник"
},
"models": {
"pinned": "Закреплено",
Expand Down
6 changes: 4 additions & 2 deletions src/renderer/src/i18n/locales/zh-cn.json
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@
"error.enter.api.host": "请输入您的 API 地址",
"error.enter.api.key": "请输入您的 API 密钥",
"error.enter.model": "请选择一个模型",
"error.enter.name": "请输入知识库名称",
"error.invalid.proxy.url": "无效的代理地址",
"error.invalid.webdav": "无效的 WebDAV 设置",
"message.code_style": "代码风格",
Expand All @@ -250,7 +251,7 @@
"upgrade.success.title": "升级成功",
"regenerate.confirm": "重新生成会覆盖当前消息",
"copy.success": "复制成功",
"get_embedding_dimensions": "获取嵌入维度失败"
"error.get_embedding_dimensions": "获取嵌入维度失败"
},
"minapp": {
"title": "小程序"
Expand Down Expand Up @@ -551,7 +552,8 @@
"add_directory": "添加目录",
"directory_placeholder": "请输入目录路径",
"model_info": "模型信息",
"not_support": "知识库数据库引擎已更新,该知识库将不再支持,请重新创建知识库"
"not_support": "知识库数据库引擎已更新,该知识库将不再支持,请重新创建知识库",
"source": "来源"
},
"models": {
"pinned": "已固定",
Expand Down
6 changes: 4 additions & 2 deletions src/renderer/src/i18n/locales/zh-tw.json
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@
"error.enter.api.host": "請先輸入您的 API 主機地址",
"error.enter.api.key": "請先輸入您的 API 密鑰",
"error.enter.model": "請先選擇一個模型",
"error.enter.name": "請先輸入知識庫名稱",
"error.invalid.proxy.url": "無效的代理 URL",
"error.invalid.webdav": "無效的 WebDAV 設定",
"message.code_style": "程式碼風格",
Expand All @@ -249,7 +250,7 @@
"upgrade.success.title": "升級成功",
"regenerate.confirm": "重新生成會覆蓋當前訊息",
"copy.success": "複製成功",
"get_embedding_dimensions": "獲取嵌入維度失敗"
"error.get_embedding_dimensions": "獲取嵌入維度失敗"
},
"minapp": {
"title": "小程序"
Expand Down Expand Up @@ -550,7 +551,8 @@
"add_directory": "添加目錄",
"directory_placeholder": "請輸入目錄路徑",
"model_info": "模型信息",
"not_support": "知識庫數據庫引擎已更新,該知識庫將不再支持,請重新創建知識庫"
"not_support": "知識庫數據庫引擎已更新,該知識庫將不再支持,請重新創建知識庫",
"source": "來源"
},
"models": {
"pinned": "已固定",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { ExtractChunkData } from '@llm-tools/embedjs-interfaces'
import { TopView } from '@renderer/components/TopView'
import { getKnowledgeBaseParams } from '@renderer/services/KnowledgeService'
import { KnowledgeBase } from '@renderer/types'
import { getFileFromUrl, getKnowledgeBaseParams } from '@renderer/services/KnowledgeService'
import { FileType, KnowledgeBase } from '@renderer/types'
import { Input, List, Modal, Spin, Typography } from 'antd'
import { useState } from 'react'
import { useTranslation } from 'react-i18next'
Expand All @@ -21,7 +21,7 @@ interface Props extends ShowParams {
const PopupContainer: React.FC<Props> = ({ base, resolve }) => {
const [open, setOpen] = useState(true)
const [loading, setLoading] = useState(false)
const [results, setResults] = useState<ExtractChunkData[]>([])
const [results, setResults] = useState<Array<ExtractChunkData & { file: FileType | null }>>([])
const [searchKeyword, setSearchKeyword] = useState('')
const { t } = useTranslation()

Expand All @@ -39,7 +39,13 @@ const PopupContainer: React.FC<Props> = ({ base, resolve }) => {
search: value,
base: getKnowledgeBaseParams(base)
})
setResults(searchResults)
const results = await Promise.all(
searchResults.map(async (item) => {
const file = await getFileFromUrl(item.metadata.source)
return { ...item, file }
})
)
setResults(results)
} catch (error) {
console.error('Search failed:', error)
} finally {
Expand Down Expand Up @@ -102,7 +108,16 @@ const PopupContainer: React.FC<Props> = ({ base, resolve }) => {
<ScoreTag>Score: {(item.score * 100).toFixed(1)}%</ScoreTag>
<Paragraph>{highlightText(item.pageContent)}</Paragraph>
<MetadataContainer>
<Text type="secondary">Source: {item.metadata.source}</Text>
<Text type="secondary">
{t('knowledge_base.source')}:{' '}
{item.file ? (
<a href={`http://file/${item.file.name}`} target="_blank" rel="noreferrer">
{item.file.origin_name}
</a>
) : (
item.metadata.source
)}
</Text>
</MetadataContainer>
</ResultItem>
</List.Item>
Expand Down
24 changes: 3 additions & 21 deletions src/renderer/src/providers/BaseProvider.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import { REFERENCE_PROMPT } from '@renderer/config/prompts'
import { getOllamaKeepAliveTime } from '@renderer/hooks/useOllama'
import { getKnowledgeBaseParams } from '@renderer/services/KnowledgeService'
import { getKnowledgeReferences } from '@renderer/services/KnowledgeService'
import store from '@renderer/store'
import { Assistant, Message, Model, Provider, Suggestion } from '@renderer/types'
import { delay } from '@renderer/utils'
import { take } from 'lodash'
import OpenAI from 'openai'

import { CompletionsParams } from '.'
Expand Down Expand Up @@ -95,25 +94,8 @@ export default abstract class BaseProvider {
return message.content
}

const searchResults = await window.api.knowledgeBase.search({
search: message.content,
base: getKnowledgeBaseParams(base)
})
const references = await getKnowledgeReferences(base, message)

const references = take(searchResults, 6).map((item, index) => {
const sourceUrl = item.metadata.source
const baseItem = base.items.find((i) => i.uniqueId === item.metadata.uniqueLoaderId)

return {
id: index,
content: item.pageContent,
sourceUrl: sourceUrl.startsWith('http') ? sourceUrl : encodeURIComponent(sourceUrl),
type: baseItem?.type
}
})

const referencesContent = `\`\`\`json\n${JSON.stringify(references, null, 2)}\n\`\`\``

return REFERENCE_PROMPT.replace('{question}', message.content).replace('{references}', referencesContent)
return REFERENCE_PROMPT.replace('{question}', message.content).replace('{references}', references)
}
}
4 changes: 2 additions & 2 deletions src/renderer/src/services/FileManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class FileManager {

if (file) {
const filesPath = store.getState().runtime.filesPath
file.path = filesPath + file.id
file.path = filesPath + '/' + file.id + file.ext
}

return file
Expand Down Expand Up @@ -91,7 +91,7 @@ class FileManager {

static getFileUrl(file: FileType) {
const filesPath = store.getState().runtime.filesPath
return 'file://' + filesPath + '/' + file.id + file.ext
return 'file://' + filesPath + '/' + file.name
}
}

Expand Down
74 changes: 71 additions & 3 deletions src/renderer/src/services/KnowledgeService.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import type { ExtractChunkData } from '@llm-tools/embedjs-interfaces'
import AiProvider from '@renderer/providers/AiProvider'
import { KnowledgeBase, KnowledgeBaseParams } from '@renderer/types'
import { isEmpty } from 'lodash'
import { FileType, KnowledgeBase, KnowledgeBaseParams, Message } from '@renderer/types'
import { isEmpty, take } from 'lodash'

import { getProviderByModel } from './AssistantService'
import FileManager from './FileManager'

export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams => {
const provider = getProviderByModel(base.model)
Expand All @@ -14,7 +16,7 @@ export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams

let host = aiProvider.getBaseURL()

if (host.includes('generativelanguage.googleapis.com')) {
if (provider.type === 'gemini') {
host = host + '/v1beta/openai/'
}

Expand All @@ -26,3 +28,69 @@ export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams
baseURL: host
}
}

export const getFileFromUrl = async (url: string): Promise<FileType | null> => {
let fileName = ''

if (url && url.includes('CherryStudio')) {
if (url.includes('/Data/Files')) {
fileName = url.split('/Data/Files/')[1]
}

if (url.includes('\\Data\\Files')) {
fileName = url.split('\\Data\\Files\\')[1]
}
}

if (fileName) {
const fileId = fileName.split('.')[0]
const file = await FileManager.getFile(fileId)
if (file) {
return file
}
}

return null
}

export const getKnowledgeSourceUrl = async (item: ExtractChunkData & { file: FileType | null }) => {
if (item.metadata.source.startsWith('http')) {
return item.metadata.source
}

if (item.file) {
return `[${item.file.origin_name}](http://file/${item.file.name})`
}

return item.metadata.source
}

export const getKnowledgeReferences = async (base: KnowledgeBase, message: Message) => {
const searchResults = await window.api.knowledgeBase.search({
search: message.content,
base: getKnowledgeBaseParams(base)
})

const _searchResults = await Promise.all(
searchResults.map(async (item) => {
const file = await getFileFromUrl(item.metadata.source)
return { ...item, file }
})
)

const references = await Promise.all(
take(_searchResults, 6).map(async (item, index) => {
const baseItem = base.items.find((i) => i.uniqueId === item.metadata.uniqueLoaderId)
return {
id: index,
content: item.pageContent,
sourceUrl: await getKnowledgeSourceUrl(item),
type: baseItem?.type
}
})
)

const referencesContent = `\`\`\`json\n${JSON.stringify(references, null, 2)}\n\`\`\``

return referencesContent
}
Loading

0 comments on commit c50ac44

Please sign in to comment.