diff --git a/frontend/src/app/chat/chat-controls/chat-controls.component.ts b/frontend/src/app/chat/chat-controls/chat-controls.component.ts index 57ccb8bd..181de3ad 100644 --- a/frontend/src/app/chat/chat-controls/chat-controls.component.ts +++ b/frontend/src/app/chat/chat-controls/chat-controls.component.ts @@ -87,7 +87,7 @@ export class ChatControlsComponent implements OnInit { this.scrollBottom(); this.messageSent.emit([ { role: 'user', text: msg, index: -1 }, - { role: 'assistant', text: data, index: -1 }, + { role: 'assistant', text: data, index: -1, llmId: selectedLlmId }, ]); }, error: (err: Error) => { diff --git a/frontend/src/app/chat/model/chat.ts b/frontend/src/app/chat/model/chat.ts index c33a702f..3e49421a 100644 --- a/frontend/src/app/chat/model/chat.ts +++ b/frontend/src/app/chat/model/chat.ts @@ -1,6 +1,8 @@ + export interface LlmMessage { role: 'system' | 'user' | 'assistant'; text: string; + llmId?: string; /** Set the cache_control flag with Claude models */ cache?: 'ephemeral'; index: number; diff --git a/frontend/src/app/shared/services/llm.service.ts b/frontend/src/app/shared/services/llm.service.ts index f38e0551..7faccb7e 100644 --- a/frontend/src/app/shared/services/llm.service.ts +++ b/frontend/src/app/shared/services/llm.service.ts @@ -33,7 +33,7 @@ export class LlmService { } private fetchLlms(): Observable { - return this.http.get<{ data: LLM[] }>(`${environment.serverUrl}/api/llms/list`) + return this.http.get<{ data: LLM[] }>(`${environment.serverUrl}/llms/list`) .pipe( map(response => response.data), retry(3), diff --git a/src/cli/easy.ts b/src/cli/easy.ts index 87479187..7a25aa54 100644 --- a/src/cli/easy.ts +++ b/src/cli/easy.ts @@ -10,7 +10,7 @@ import { Blueberry } from '#llm/multi-agent/blueberry.ts'; import { initFirestoreApplicationContext } from '../app'; import { parseProcessArgs } from './cli'; -// See https://github.com/autogenai/easy-problems-that-llms-get-wrong +// See https://arxiv.org/html/2405.19616v1 https://github.com/autogenai/easy-problems-that-llms-get-wrong // Usage: // npm run easy diff --git a/src/cli/research.ts b/src/cli/research.ts index cf212463..f3ac4617 100644 --- a/src/cli/research.ts +++ b/src/cli/research.ts @@ -3,20 +3,19 @@ import '#fastify/trace-init/trace-init'; // leave an empty line next so this doe import { readFileSync } from 'fs'; import { AgentLLMs } from '#agent/agentContextTypes'; -import { startAgent, startAgentAndWait } from '#agent/agentRunner'; +import { startAgentAndWait } from '#agent/agentRunner'; import { Perplexity } from '#functions/web/perplexity'; import { PublicWeb } from '#functions/web/web'; import { ClaudeVertexLLMs } from '#llm/models/anthropic-vertex'; import { fireworksLlama3_70B } from '#llm/models/fireworks'; -import { groqMixtral8x7b } from '#llm/models/groq'; + import { Ollama_LLMs } from '#llm/models/ollama'; import { togetherLlama3_70B } from '#llm/models/together'; -import { CliOptions, getLastRunAgentId, parseProcessArgs, saveAgentId } from './cli'; +import { parseProcessArgs, saveAgentId } from './cli'; // Usage: // npm run research -const groqMixtral = groqMixtral8x7b(); let llama3 = togetherLlama3_70B(); llama3 = fireworksLlama3_70B(); diff --git a/src/functions/storage/chroma.ts b/src/functions/storage/chroma.ts index e69de29b..816c7a31 100644 --- a/src/functions/storage/chroma.ts +++ b/src/functions/storage/chroma.ts @@ -0,0 +1,35 @@ +// import { ChromaClient } from 'chromadb'; +// import { funcClass } from '#functionSchema/functionDecorators'; +// +// @funcClass(__filename) +// export class Chroma { +// client = new ChromaClient(); +// +// private async createCollection(name: string, metadata?: Record): Promise { +// try { +// return await this.client.createCollection({ name, metadata }); +// } catch (error) { +// console.error(`Error creating collection ${name}:`, error); +// throw error; +// } +// } +// +// private async getCollection(name: string): Promise { +// try { +// return await this.client.getCollection({ name }); +// } catch (error) { +// console.error(`Error getting collection ${name}:`, error); +// throw error; +// } +// } +// +// private async deleteCollection(name: string): Promise { +// try { +// await this.client.deleteCollection({ name }); +// console.log(`Collection ${name} deleted successfully`); +// } catch (error) { +// console.error(`Error deleting collection ${name}:`, error); +// throw error; +// } +// } +// } diff --git a/src/llm/llmFactory.ts b/src/llm/llmFactory.ts index f87de26e..c34fcfd5 100644 --- a/src/llm/llmFactory.ts +++ b/src/llm/llmFactory.ts @@ -10,6 +10,7 @@ import { ollamaLLMRegistry } from '#llm/models/ollama'; import { openAiLLMRegistry } from '#llm/models/openai'; import { togetherLLMRegistry } from '#llm/models/together'; import { vertexLLMRegistry } from '#llm/models/vertexai'; +import { blueberryLLMRegistry } from '#llm/multi-agent/blueberry.ts'; import { MultiLLM } from '#llm/multi-llm'; import { logger } from '#o11y/logger'; @@ -23,6 +24,7 @@ export const LLM_FACTORY: Record LLM> = { ...vertexLLMRegistry(), ...deepseekLLMRegistry(), ...ollamaLLMRegistry(), + ...blueberryLLMRegistry(), ...{ 'mock:mock': () => mockLLM }, }; diff --git a/src/llm/models/deepseek.ts b/src/llm/models/deepseek.ts index 40e45d94..31fb5299 100644 --- a/src/llm/models/deepseek.ts +++ b/src/llm/models/deepseek.ts @@ -15,21 +15,10 @@ export const DEEPSEEK_SERVICE = 'deepseek'; export function deepseekLLMRegistry(): Record LLM> { return { - [`${DEEPSEEK_SERVICE}:deepseek-coder`]: () => deepseekChat(), [`${DEEPSEEK_SERVICE}:deepseek-chat`]: () => deepseekChat(), }; } -// export function deepseekCoder(): LLM { -// return new DeepseekLLM( -// 'DeepSeek Coder', -// 'deepseek-coder', -// 32000, -// (input: string) => (input.length * 0.14) / (1_000_000 * 3.5), -// (output: string) => (output.length * 0.28) / (1_000_000 * 3.5), -// ); -// } - export function deepseekChat(): LLM { return new DeepseekLLM( 'DeepSeek Chat', @@ -73,7 +62,6 @@ export class DeepseekLLM extends BaseLLM { super(displayName, DEEPSEEK_SERVICE, model, maxTokens, inputCostPerToken, outputCostPerToken); } - @logTextGeneration async generateText(userPrompt: string, systemPrompt?: string, opts?: GenerateTextOptions): Promise { return withSpan(`generateText ${opts?.id ?? ''}`, async (span) => { const prompt = combinePrompts(userPrompt, systemPrompt); diff --git a/src/llm/models/llm.int.ts b/src/llm/models/llm.int.ts index 6762442e..09cfa5ca 100644 --- a/src/llm/models/llm.int.ts +++ b/src/llm/models/llm.int.ts @@ -5,7 +5,7 @@ import { Claude3_Haiku_Vertex } from '#llm/models/anthropic-vertex'; import { cerebrasLlama3_8b } from '#llm/models/cerebras'; import { deepseekChat } from '#llm/models/deepseek'; import { fireworksLlama3_70B } from '#llm/models/fireworks'; -import { groqGemma7bIt } from '#llm/models/groq'; +import { groqLlama3_1_8b } from '#llm/models/groq.ts'; import { Ollama_Phi3 } from '#llm/models/ollama'; import { GPT4oMini } from '#llm/models/openai'; import { togetherLlama3_70B } from '#llm/models/together'; @@ -84,7 +84,7 @@ describe('LLMs', () => { }); describe('Groq', () => { - const llm = groqGemma7bIt(); + const llm = groqLlama3_1_8b(); it('should generateText', async () => { const response = await llm.generateText(SKY_PROMPT, null, { temperature: 0 }); diff --git a/src/llm/multi-agent/blueberry.ts b/src/llm/multi-agent/blueberry.ts index 03bbfcff..3734e9d4 100644 --- a/src/llm/multi-agent/blueberry.ts +++ b/src/llm/multi-agent/blueberry.ts @@ -1,11 +1,22 @@ import { BaseLLM } from '#llm/base-llm.ts'; import { GenerateTextOptions, LLM } from '#llm/llm.ts'; +import { getLLM } from '#llm/llmFactory.ts'; import { Claude3_5_Sonnet_Vertex } from '#llm/models/anthropic-vertex.ts'; import { fireworksLlama3_405B } from '#llm/models/fireworks.ts'; import { GPT4o } from '#llm/models/openai.ts'; import { Gemini_1_5_Pro } from '#llm/models/vertexai.ts'; import { logger } from '#o11y/logger.ts'; +// sparse multi-agent debate https://arxiv.org/abs/2406.11776 +// self-refine https://arxiv.org/pdf/2303.17651 +// https://www.academia.edu/123745078/Mind_over_Data_Elevating_LLMs_from_Memorization_to_Cognition + +export function blueberryLLMRegistry(): Record LLM> { + return { + 'blueberry:': () => new Blueberry(), + }; +} + const MIND_OVER_DATA_SYS_PROMPT = `When addressing a problem, employ "Comparative Problem Analysis and Direct Reasoning" as follows: 1. Problem Transcription: @@ -66,18 +77,40 @@ const MIND_OVER_DATA_SYS_PROMPT = `When addressing a problem, employ "Comparativ `; export class Blueberry extends BaseLLM { - llms: LLM[] = [Claude3_5_Sonnet_Vertex(), GPT4o(), Gemini_1_5_Pro(), Claude3_5_Sonnet_Vertex(), fireworksLlama3_405B()]; - mediator: LLM = Claude3_5_Sonnet_Vertex(); + llms: LLM[]; + mediator: LLM; - constructor() { + constructor(model = 'default') { super( 'Blueberry', - 'MAD', 'blueberry', + model, 200_000, () => 0, () => 0, ); + if (model !== 'default') { + try { + const parts = model.split('|'); + if (parts.length > 1) { + // Set the mediator + this.mediator = getLLM(parts[0]); + + // Set the LLMs + this.llms = parts.slice(1).map((llmId) => getLLM(llmId)); + } else { + logger.error(`Invalid model string format for Blueberry ${model}`); + } + } catch (e) { + logger.error(e, `Invalid model string format for Blueberry ${model}`); + } + } + if (!this.llms) this.llms = [Claude3_5_Sonnet_Vertex(), GPT4o(), Gemini_1_5_Pro(), Claude3_5_Sonnet_Vertex(), fireworksLlama3_405B()]; + if (!this.mediator) this.mediator = Claude3_5_Sonnet_Vertex(); + } + + getModel(): string { + return `${this.mediator.getId()}|${this.llms.map((llm) => llm.getId()).join('|')}`; } async generateText(userPrompt: string, systemPrompt?: string, opts?: GenerateTextOptions): Promise { diff --git a/src/llm/multi-llm.ts b/src/llm/multi-llm.ts index 9bce14db..0454cfef 100644 --- a/src/llm/multi-llm.ts +++ b/src/llm/multi-llm.ts @@ -1,7 +1,7 @@ import { llms } from '#agent/agentContextLocalStorage'; import { logger } from '#o11y/logger'; import { BaseLLM } from './base-llm'; -import { LLM } from './llm'; +import { GenerateTextOptions, LLM } from './llm'; /* https://news.ycombinator.com/item?id=39955725 @@ -33,17 +33,17 @@ export class MultiLLM extends BaseLLM { this.maxTokens = Math.min(...llms.map((llm) => llm.getMaxInputTokens())); } - async generateText(prompt: string, systemPrompt?: string): Promise { + async generateText(userPrompt: string, systemPrompt?: string, opts?: GenerateTextOptions): Promise { const calls: Array<{ model: string; call: Promise }> = []; for (const llm of this.llms) { for (let i = 0; i < this.callsPerLLM; i++) { - calls.push({ model: llm.getModel(), call: llm.generateText(prompt, systemPrompt) }); + calls.push({ model: llm.getModel(), call: llm.generateText(userPrompt, systemPrompt) }); } } const settled = await Promise.allSettled(calls.map((call) => call.call)); const responses = settled.filter((result) => result.status === 'fulfilled').map((result) => (result as PromiseFulfilledResult).value); - const response = await llms().hard.generateTextWithResult(selectBestResponsePrompt(responses, prompt, systemPrompt)); + const response = await llms().hard.generateTextWithResult(selectBestResponsePrompt(responses, userPrompt, systemPrompt)); const index = Number.parseInt(response) - 1; // sub 1 as responses are indexed from 1 in the prompt logger.info(`Best response was from ${calls[index].model}`); return responses[index]; diff --git a/src/routes/chat/chat-routes.ts b/src/routes/chat/chat-routes.ts index 2401f906..8a5e82fe 100644 --- a/src/routes/chat/chat-routes.ts +++ b/src/routes/chat/chat-routes.ts @@ -2,8 +2,10 @@ import { randomUUID } from 'crypto'; import { Type } from '@sinclair/typebox'; import { Chat, ChatList } from '#chat/chatTypes.ts'; import { send, sendBadRequest } from '#fastify/index'; +import { LLM } from '#llm/llm.ts'; import { getLLM } from '#llm/llmFactory.ts'; import { Claude3_5_Sonnet_Vertex } from '#llm/models/anthropic-vertex.ts'; +import { logger } from '#o11y/logger.ts'; import { currentUser } from '#user/userService/userContext.ts'; import { AppFastifyInstance } from '../../app'; @@ -58,9 +60,13 @@ export async function chatRoutes(fastify: AppFastifyInstance) { } : await fastify.chatService.loadChat(chatId); - // const llm = getLLM(llmId) - const llm = getLLM(Claude3_5_Sonnet_Vertex().getId()); - if (!llm.isConfigured()) return sendBadRequest(reply, 'LLM is not configured'); + let llm: LLM = getLLM(Claude3_5_Sonnet_Vertex().getId()); + try { + llm = getLLM(llmId); + } catch (e) { + logger.error(`No LLM for ${llmId}`); + } + if (!llm.isConfigured()) return sendBadRequest(reply, `LLM ${llm.getId()} is not configured`); const titlePromise: Promise | undefined = isNew ? llm.generateText( diff --git a/src/routes/llms/llm-routes.ts b/src/routes/llms/llm-routes.ts index 86d512f0..59e56b6a 100644 --- a/src/routes/llms/llm-routes.ts +++ b/src/routes/llms/llm-routes.ts @@ -1,13 +1,11 @@ import { send } from '#fastify/index'; -import { LLM_FACTORY, LLM_TYPES, getLLM } from '#llm/llmFactory'; +import { LLM_TYPES, getLLM } from '#llm/llmFactory'; import { AppFastifyInstance } from '../../app'; const basePath = '/api/llms'; export async function llmRoutes(fastify: AppFastifyInstance) { fastify.get(`${basePath}/list`, async (req, reply) => { - console.log(Object.keys(LLM_FACTORY)); - console.log(Object.values(LLM_TYPES)); const configuredLLMs = LLM_TYPES.map((llm) => getLLM(llm.id)) .filter((llm) => llm.isConfigured()) .map((llm) => ({ id: llm.getId(), name: llm.getDisplayName(), isConfigured: true }));