Skip to content

Commit

Permalink
refactor: knowledge base database engine
Browse files Browse the repository at this point in the history
  • Loading branch information
kangfenmao committed Dec 25, 2024
1 parent b857659 commit 34ebab0
Show file tree
Hide file tree
Showing 31 changed files with 611 additions and 380 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
- name: Build Linux
if: matrix.os == 'ubuntu-latest'
run: |
yarn download:npm linux
yarn build:npm linux
yarn build:linux
env:
Expand All @@ -58,7 +58,7 @@ jobs:
- name: Build Mac
if: matrix.os == 'macos-latest'
run: |
yarn download:npm darwin
yarn build:npm mac
yarn build:mac
env:
CSC_LINK: ${{ secrets.CSC_LINK }}
Expand Down

This file was deleted.

17 changes: 17 additions & 0 deletions .yarn/patches/@llm-tools-embedjs-npm-0.1.25-ec5645cf36.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
diff --git a/src/core/rag-embedding.js b/src/core/rag-embedding.js
index 50c3c4064af17bc4c7c46554d8f2419b3afceb0e..632c9b2e04d2e0e3bb09ef1cd8f29d2560e6afc1 100644
--- a/src/core/rag-embedding.js
+++ b/src/core/rag-embedding.js
@@ -1,10 +1,8 @@
export class RAGEmbedding {
static singleton;
static async init(embeddingModel) {
- if (!this.singleton) {
- await embeddingModel.init();
- this.singleton = new RAGEmbedding(embeddingModel);
- }
+ await embeddingModel.init();
+ this.singleton = new RAGEmbedding(embeddingModel);
}
static getInstance() {
return RAGEmbedding.singleton;
12 changes: 4 additions & 8 deletions electron.vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ export default defineConfig({
'@llm-tools/embedjs-loader-xml',
'@llm-tools/embedjs-loader-pdf',
'@llm-tools/embedjs-loader-sitemap',
'@llm-tools/embedjs-libsql'
'@llm-tools/embedjs-lancedb'
]
}),
...visualizerPlugin('main')
Expand All @@ -34,9 +34,8 @@ export default defineConfig({
},
build: {
rollupOptions: {
external: ['@libsql/client']
},
minify: true
external: ['@lancedb/lancedb']
}
}
},
preload: {
Expand All @@ -51,10 +50,7 @@ export default defineConfig({
}
},
optimizeDeps: {
exclude: ['chunk-7UIZINC5.js', 'chunk-7OJJKI46.js']
},
build: {
minify: true
exclude: []
}
}
})
7 changes: 4 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
"typecheck": "npm run typecheck:node && npm run typecheck:web",
"start": "electron-vite preview",
"dev": "electron-vite dev",
"download:npm": "node scripts/download-npm.js",
"build": "npm run typecheck && electron-vite build",
"postinstall": "electron-builder install-app-deps",
"build:unpack": "dotenv npm run build && electron-builder --dir",
Expand All @@ -37,6 +36,7 @@
"build:linux": "dotenv electron-vite build && electron-builder --linux",
"build:linux:arm64": "dotenv electron-vite build && electron-builder --linux --arm64",
"build:linux:x64": "dotenv electron-vite build && electron-builder --linux --x64",
"build:npm": "node scripts/build-npm.js",
"release": "node scripts/version.js",
"publish": "yarn release patch push",
"pulish:artifacts": "cd packages/artifacts && npm publish && cd -",
Expand All @@ -49,8 +49,8 @@
"@electron-toolkit/preload": "^3.0.0",
"@electron-toolkit/utils": "^3.0.0",
"@electron/notarize": "^2.5.0",
"@llm-tools/embedjs": "^0.1.25",
"@llm-tools/embedjs-libsql": "patch:@llm-tools/embedjs-libsql@npm%3A0.1.25#~/.yarn/patches/@llm-tools-embedjs-libsql-npm-0.1.25-fad000d74c.patch",
"@llm-tools/embedjs": "patch:@llm-tools/embedjs@npm%3A0.1.25#~/.yarn/patches/@llm-tools-embedjs-npm-0.1.25-ec5645cf36.patch",
"@llm-tools/embedjs-lancedb": "^0.1.25",
"@llm-tools/embedjs-loader-csv": "^0.1.25",
"@llm-tools/embedjs-loader-markdown": "^0.1.25",
"@llm-tools/embedjs-loader-msoffice": "^0.1.25",
Expand All @@ -61,6 +61,7 @@
"@llm-tools/embedjs-openai": "^0.1.25",
"@types/react-infinite-scroll-component": "^5.0.0",
"adm-zip": "^0.5.16",
"apache-arrow": "^18.1.0",
"docx": "^9.0.2",
"electron-log": "^5.1.5",
"electron-store": "^8.2.0",
Expand Down
25 changes: 15 additions & 10 deletions scripts/after-pack.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,34 @@ exports.default = async function (context) {
const arch = context.arch

if (platform === 'mac') {
const nodeModulesPath = path.join(
const node_modules_path = path.join(
context.appOutDir,
'Cherry Studio.app',
'Contents',
'Resources',
'app.asar.unpacked',
'node_modules',
'@libsql'
'node_modules'
)

keepLibsqlNodeModules(nodeModulesPath, arch === Arch.arm64 ? ['darwin-arm64'] : ['darwin-x64'])
removeDifferentArchNodeFiles(
node_modules_path,
'@lancedb',
arch === Arch.arm64 ? ['lancedb-darwin-arm64'] : ['lancedb-darwin-x64']
)
}

if (platform === 'linux') {
const nodeModulesPath = path.join(context.appOutDir, 'resources', 'app.asar.unpacked', 'node_modules', '@libsql')
keepLibsqlNodeModules(
nodeModulesPath,
arch === Arch.arm64 ? ['linux-arm64-gnu', 'linux-arm64-musl'] : ['linux-x64-gnu', 'linux-x64-musl']
)
const node_modules_path = path.join(context.appOutDir, 'resources', 'app.asar.unpacked', 'node_modules')
const _arch =
arch === Arch.arm64
? ['lancedb-linux-arm64-gnu', 'lancedb-linux-arm64-musl']
: ['lancedb-linux-x64-gnu', 'lancedb-linux-x64-musl']
removeDifferentArchNodeFiles(node_modules_path, '@lancedb', _arch)
}
}

function keepLibsqlNodeModules(modulePath, arch) {
function removeDifferentArchNodeFiles(nodeModulesPath, packageName, arch) {
const modulePath = path.join(nodeModulesPath, packageName)
const dirs = fs.readdirSync(modulePath)
dirs
.filter((dir) => !arch.includes(dir))
Expand Down
28 changes: 28 additions & 0 deletions scripts/build-npm.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
const { downloadNpmPackage } = require('./utils')

async function downloadNpm(platform) {
if (!platform || platform === 'mac') {
downloadNpmPackage(
'@lancedb/lancedb-darwin-arm64',
'https://registry.npmjs.org/@lancedb/lancedb-darwin-arm64/-/lancedb-darwin-arm64-0.14.0.tgz'
)
downloadNpmPackage(
'@lancedb/lancedb-darwin-x64',
'https://registry.npmjs.org/@lancedb/lancedb-darwin-x64/-/lancedb-darwin-x64-0.14.0.tgz'
)
}

if (!platform || platform === 'linux') {
downloadNpmPackage(
'@lancedb/lancedb-linux-arm64-gnu',
'https://registry.npmjs.org/@lancedb/lancedb-linux-arm64-gnu/-/lancedb-linux-arm64-gnu-0.14.0.tgz'
)
downloadNpmPackage(
'@lancedb/lancedb-linux-x64-gnu',
'https://registry.npmjs.org/@lancedb/lancedb-linux-x64-gnu/-/lancedb-linux-x64-gnu-0.14.0.tgz'
)
}
}

const platformArg = process.argv[2]
downloadNpm(platformArg)
14 changes: 0 additions & 14 deletions scripts/download-npm.js

This file was deleted.

48 changes: 22 additions & 26 deletions scripts/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,33 @@ const fs = require('fs')
const path = require('path')
const os = require('os')

function downloadNpmPackage(package, version, platform, architectures = ['x64', 'arm64']) {
function downloadNpmPackage(packageName, url) {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'npm-download-'))

for (const arch of architectures) {
const targetDir = path.join('./node_modules/', package, `${platform}-${arch}`)
const targetDir = path.join('./node_modules/', packageName)
const filename = packageName.replace('/', '-') + '.tgz'

// Skip if directory already exists
if (fs.existsSync(targetDir)) {
console.log(`${targetDir} already exists, skipping download...`)
continue
}

const filename = path.join(tempDir, `${platform}-${arch}-${version}.tgz`)
const url = `https://registry.npmjs.org/${package}/${platform}-${arch}/-/${platform}-${arch}-${version}.tgz`

try {
console.log(`Downloading ${filename}...`, url)
const { execSync } = require('child_process')
execSync(`curl --fail -o ${filename} ${url}`)
// Skip if directory already exists
if (fs.existsSync(targetDir)) {
console.log(`${targetDir} already exists, skipping download...`)
return
}

console.log(`Extracting ${filename}...`)
execSync(`tar -xvf ${filename}`)
execSync(`rm -rf ${filename}`)
execSync(`mv package ${targetDir}`)
} catch (error) {
console.error(`Error processing ${filename}: ${error.message}`)
if (fs.existsSync(filename)) {
fs.unlinkSync(filename)
}
throw error
try {
console.log(`Downloading ${packageName}...`, url)
const { execSync } = require('child_process')
execSync(`curl --fail -o ${filename} ${url}`)

console.log(`Extracting ${filename}...`)
execSync(`tar -xvf ${filename}`)
execSync(`rm -rf ${filename}`)
execSync(`mv package ${targetDir}`)
} catch (error) {
console.error(`Error processing ${packageName}: ${error.message}`)
if (fs.existsSync(filename)) {
fs.unlinkSync(filename)
}
throw error
}

fs.rmSync(tempDir, { recursive: true, force: true })
Expand Down
2 changes: 0 additions & 2 deletions src/main/services/FileStorage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ class FileStorage {
const storedFilePath = path.join(this.storageDir, file)
const storedStats = fs.statSync(storedFilePath)

console.debug('storedFilePath', storedFilePath)

if (storedStats.size === fileSize) {
const [originalHash, storedHash] = await Promise.all([
this.getFileHash(filePath),
Expand Down
24 changes: 13 additions & 11 deletions src/main/services/KnowledgeService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import path from 'node:path'

import { LocalPathLoader, RAGApplication, RAGApplicationBuilder, TextLoader } from '@llm-tools/embedjs'
import { AddLoaderReturn, ExtractChunkData } from '@llm-tools/embedjs-interfaces'
import { LibSqlDb } from '@llm-tools/embedjs-libsql'
import { LanceDb } from '@llm-tools/embedjs-lancedb'
import { MarkdownLoader } from '@llm-tools/embedjs-loader-markdown'
import { DocxLoader, ExcelLoader, PptLoader } from '@llm-tools/embedjs-loader-msoffice'
import { PdfLoader } from '@llm-tools/embedjs-loader-pdf'
Expand All @@ -26,28 +26,30 @@ class KnowledgeService {
}
}

private getRagApplication = async ({ id, model, apiKey, baseURL }: KnowledgeBaseParams): Promise<RAGApplication> => {
console.debug('getRagApplication', path.join(this.storageDir, id))
private getRagApplication = async ({
id,
model,
apiKey,
baseURL,
dimensions
}: KnowledgeBaseParams): Promise<RAGApplication> => {
return new RAGApplicationBuilder()
.setModel('NO_MODEL')
.setEmbeddingModel(
new OpenAiEmbeddings({
model,
apiKey,
configuration: { baseURL },
dimensions: 1024,
batchSize: 10
dimensions,
batchSize: 20
})
)
.setVectorDatabase(new LibSqlDb({ path: path.join(this.storageDir, id) }))
.setVectorDatabase(new LanceDb({ path: path.join(this.storageDir, id) }))
.build()
}

public create = async (
_: Electron.IpcMainInvokeEvent,
{ id, model, apiKey, baseURL }: KnowledgeBaseParams
): Promise<void> => {
this.getRagApplication({ id, model, apiKey, baseURL })
public create = async (_: Electron.IpcMainInvokeEvent, base: KnowledgeBaseParams): Promise<void> => {
this.getRagApplication(base)
}

public reset = async (_: Electron.IpcMainInvokeEvent, { base }: { base: KnowledgeBaseParams }): Promise<void> => {
Expand Down
Loading

0 comments on commit 34ebab0

Please sign in to comment.