Skip to content

Commit

Permalink
Merge pull request #484 from ItzCrazyKns/feat/uploads
Browse files Browse the repository at this point in the history
Add file uploads
  • Loading branch information
ItzCrazyKns authored Nov 24, 2024
2 parents c650d1c + 4b89008 commit aa176c1
Showing 25 changed files with 1,035 additions and 86 deletions.
5 changes: 4 additions & 1 deletion .prettierignore
Original file line number Diff line number Diff line change
@@ -35,4 +35,7 @@ coverage
*.swp

# Ignore all files with the .DS_Store extension (macOS specific)
.DS_Store
.DS_Store

# Ignore all files in uploads directory
uploads
1 change: 1 addition & 0 deletions backend.dockerfile
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@ COPY package.json /home/perplexica/
COPY yarn.lock /home/perplexica/

RUN mkdir /home/perplexica/data
RUN mkdir /home/perplexica/uploads

RUN yarn install --frozen-lockfile --network-timeout 600000
RUN yarn build
2 changes: 2 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -22,6 +22,7 @@ services:
- 3001:3001
volumes:
- backend-dbstore:/home/perplexica/data
- uploads:/home/perplexica/uploads
- ./config.toml:/home/perplexica/config.toml
extra_hosts:
- 'host.docker.internal:host-gateway'
@@ -50,3 +51,4 @@ networks:

volumes:
backend-dbstore:
uploads:
5 changes: 4 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@
"scripts": {
"start": "npm run db:push && node dist/app.js",
"build": "tsc",
"dev": "nodemon src/app.ts",
"dev": "nodemon --ignore uploads/ src/app.ts ",
"db:push": "drizzle-kit push sqlite",
"format": "prettier . --check",
"format:write": "prettier . --write"
@@ -16,6 +16,7 @@
"@types/cors": "^2.8.17",
"@types/express": "^4.17.21",
"@types/html-to-text": "^9.0.4",
"@types/multer": "^1.4.12",
"@types/pdf-parse": "^1.1.4",
"@types/readable-stream": "^4.0.11",
"@types/ws": "^8.5.12",
@@ -41,6 +42,8 @@
"express": "^4.19.2",
"html-to-text": "^9.0.5",
"langchain": "^0.1.30",
"mammoth": "^1.8.0",
"multer": "^1.4.5-lts.1",
"pdf-parse": "^1.1.1",
"winston": "^3.13.0",
"ws": "^8.17.1",
87 changes: 84 additions & 3 deletions src/agents/webSearchAgent.ts
Original file line number Diff line number Diff line change
@@ -20,10 +20,12 @@ import eventEmitter from 'events';
import computeSimilarity from '../utils/computeSimilarity';
import logger from '../utils/logger';
import LineListOutputParser from '../lib/outputParsers/listLineOutputParser';
import { getDocumentsFromLinks } from '../lib/linkDocument';
import LineOutputParser from '../lib/outputParsers/lineOutputParser';
import { IterableReadableStream } from '@langchain/core/utils/stream';
import { ChatOpenAI } from '@langchain/openai';
import path from 'path';
import fs from 'fs';
import { getDocumentsFromLinks } from '../utils/documents';

const basicSearchRetrieverPrompt = `
You are an AI question rephraser. You will be given a conversation and a follow-up question, you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it.
@@ -316,6 +318,7 @@ const createBasicWebSearchAnsweringChain = (
llm: BaseChatModel,
embeddings: Embeddings,
optimizationMode: 'speed' | 'balanced' | 'quality',
fileIds: string[],
) => {
const basicWebSearchRetrieverChain = createBasicWebSearchRetrieverChain(llm);

@@ -336,16 +339,76 @@ const createBasicWebSearchAnsweringChain = (
return docs;
}

const filesData = fileIds
.map((file) => {
const filePath = path.join(process.cwd(), 'uploads', file);

const contentPath = filePath + '-extracted.json';
const embeddingsPath = filePath + '-embeddings.json';

const content = JSON.parse(fs.readFileSync(contentPath, 'utf8'));
const embeddings = JSON.parse(fs.readFileSync(embeddingsPath, 'utf8'));

const fileSimilaritySearchObject = content.contents.map(
(c: string, i) => {
return {
fileName: content.title,
content: c,
embeddings: embeddings.embeddings[i],
};
},
);

return fileSimilaritySearchObject;
})
.flat();

if (query.toLocaleLowerCase() === 'summarize') {
return docs.slice(0, 15)
return docs.slice(0, 15);
}

const docsWithContent = docs.filter(
(doc) => doc.pageContent && doc.pageContent.length > 0,
);

if (optimizationMode === 'speed') {
return docsWithContent.slice(0, 15);
if (filesData.length > 0) {
const [queryEmbedding] = await Promise.all([
embeddings.embedQuery(query),
]);

const fileDocs = filesData.map((fileData) => {
return new Document({
pageContent: fileData.content,
metadata: {
title: fileData.fileName,
url: `File`,
},
});
});

const similarity = filesData.map((fileData, i) => {
const sim = computeSimilarity(queryEmbedding, fileData.embeddings);

return {
index: i,
similarity: sim,
};
});

const sortedDocs = similarity
.filter((sim) => sim.similarity > 0.3)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, 8)
.map((sim) => fileDocs[sim.index]);

return [
...sortedDocs,
...docsWithContent.slice(0, 15 - sortedDocs.length),
];
} else {
return docsWithContent.slice(0, 15);
}
} else if (optimizationMode === 'balanced') {
const [docEmbeddings, queryEmbedding] = await Promise.all([
embeddings.embedDocuments(
@@ -354,6 +417,20 @@ const createBasicWebSearchAnsweringChain = (
embeddings.embedQuery(query),
]);

docsWithContent.push(
...filesData.map((fileData) => {
return new Document({
pageContent: fileData.content,
metadata: {
title: fileData.fileName,
url: `File`,
},
});
}),
);

docEmbeddings.push(...filesData.map((fileData) => fileData.embeddings));

const similarity = docEmbeddings.map((docEmbedding, i) => {
const sim = computeSimilarity(queryEmbedding, docEmbedding);

@@ -408,6 +485,7 @@ const basicWebSearch = (
llm: BaseChatModel,
embeddings: Embeddings,
optimizationMode: 'speed' | 'balanced' | 'quality',
fileIds: string[],
) => {
const emitter = new eventEmitter();

@@ -416,6 +494,7 @@ const basicWebSearch = (
llm,
embeddings,
optimizationMode,
fileIds,
);

const stream = basicWebSearchAnsweringChain.streamEvents(
@@ -446,13 +525,15 @@ const handleWebSearch = (
llm: BaseChatModel,
embeddings: Embeddings,
optimizationMode: 'speed' | 'balanced' | 'quality',
fileIds: string[],
) => {
const emitter = basicWebSearch(
message,
history,
llm,
embeddings,
optimizationMode,
fileIds,
);
return emitter;
};
9 changes: 9 additions & 0 deletions src/db/schema.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { sql } from 'drizzle-orm';
import { text, integer, sqliteTable } from 'drizzle-orm/sqlite-core';

export const messages = sqliteTable('messages', {
@@ -11,9 +12,17 @@ export const messages = sqliteTable('messages', {
}),
});

interface File {
name: string;
fileId: string;
}

export const chats = sqliteTable('chats', {
id: text('id').primaryKey(),
title: text('title').notNull(),
createdAt: text('createdAt').notNull(),
focusMode: text('focusMode').notNull(),
files: text('files', { mode: 'json' })
.$type<File[]>()
.default(sql`'[]'`),
});
4 changes: 2 additions & 2 deletions src/lib/providers/ollama.ts
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@ import { ChatOllama } from '@langchain/community/chat_models/ollama';
export const loadOllamaChatModels = async () => {
const ollamaEndpoint = getOllamaApiEndpoint();
const keepAlive = getKeepAlive();

if (!ollamaEndpoint) return {};

try {
@@ -25,7 +25,7 @@ export const loadOllamaChatModels = async () => {
baseUrl: ollamaEndpoint,
model: model.model,
temperature: 0.7,
keepAlive: keepAlive
keepAlive: keepAlive,
}),
};

2 changes: 2 additions & 0 deletions src/routes/index.ts
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ import suggestionsRouter from './suggestions';
import chatsRouter from './chats';
import searchRouter from './search';
import discoverRouter from './discover';
import uploadsRouter from './uploads';

const router = express.Router();

@@ -18,5 +19,6 @@ router.use('/suggestions', suggestionsRouter);
router.use('/chats', chatsRouter);
router.use('/search', searchRouter);
router.use('/discover', discoverRouter);
router.use('/uploads', uploadsRouter);

export default router;
Loading

0 comments on commit aa176c1

Please sign in to comment.