Add LiveFiles. Add defaultLLMs. Update GitLab.cloneProject. In SWE ag…

…ent handle scmFullProjectPath arg not existing
TrafficGuard · Jan 20, 2025 · 805d6b3 · 805d6b3
1 parent 165579c
commit 805d6b3
Show file tree

Hide file tree

Showing 37 changed files with 212 additions and 196 deletions.
diff --git a/src/agent/agentContextTypes.ts b/src/agent/agentContextTypes.ts
@@ -5,7 +5,7 @@ import { User } from '#user/user';
 
 /**
  * The difficulty of a LLM generative task. Used to select an appropriate model for the cost vs capability.
- * xeasy  LLama 8b
+ * xeasy  LLama 8b/Flash 8b
  * easy   Haiku 3.5/GPT4-mini/Llama 70b/Gemini Flash
  * medium Sonnet 3.5/GPT4-o/Llama 405b
  * hard   Opus 3.5/OpenAI o1
@@ -22,18 +22,18 @@ export interface AgentCompleted {
 }
 
 /**
- * agent - waiting for the agent LLM call(s) to generate control loop update
- * functions - waiting for the planned function call(s) to complete
- * error - the agent control loop has errored
+ * agent - autonomous agent waiting for the agent LLM call(s) to generate control loop update
+ * functions - waiting for function call(s) to complete
+ * error - the agent control loop has errored or force stopped
  * hil - deprecated for humanInLoop_agent and humanInLoop_tool
  * hitl_threshold - If the agent has reached budget or iteration thresholds. At this point the agent is not executing any LLM/function calls.
- * hitl_tool - When a function has request HITL in the function calling part of the control loop
+ * hitl_tool - When a function has request real-time HITL in the function calling part of the control loop
  * hitl_feedback - the agent has requested human feedback for a decision. At this point the agent is not executing any LLM/function calls.
  * hil - deprecated version of hitl_feedback
  * feedback - deprecated version of hitl_feedback
- * child_agents - waiting for child agents to complete
+ * child_agents - stopped waiting for child agents to complete
  * completed - the agent has called the completed function.
- * shutdown - if the agent has been instructed by the system to pause (e.g. for server shutdown)
+ * shutdown - if the agent has stopped after being instructed by the system to pause (e.g. for server shutdown)
  * timeout - for chat agents when there hasn't been a user input for a configured amount of time
  */
 export type AgentRunningState =
@@ -56,7 +56,7 @@ export type AgentRunningState =
  * @returns if the agent has a live execution thread
  */
 export function isExecuting(agent: AgentContext): boolean {
-	return agent.state !== 'completed' && agent.state !== 'feedback' && agent.state !== 'hil' && agent.state !== 'error';
+	return agent.state === 'workflow' || agent.state === 'agent' || agent.state === 'functions' || agent.state === 'hitl_tool';
 }
 
 /**

diff --git a/src/agent/agentPromptUtils.ts b/src/agent/agentPromptUtils.ts
@@ -1,4 +1,5 @@
 import { agentContext, getFileSystem } from '#agent/agentContextLocalStorage';
+import { LiveFiles } from '#agent/liveFiles';
 import { FileSystemService } from '#functions/storage/fileSystemService';
 import { FileMetadata, FileStore } from '#functions/storage/filestore';
 import { FunctionCallResult } from '#llm/llm';
@@ -22,7 +23,7 @@ export function buildMemoryPrompt(): string {
  * TODO move the string generation into the tool classes
  */
 export async function buildToolStatePrompt(): Promise<string> {
-	return (await buildFileStorePrompt()) + buildFileSystemPrompt();
+	return (await buildLiveFilesPrompt()) + (await buildFileStorePrompt()) + buildFileSystemPrompt();
 }
 /**
  * @return An XML representation of the FileSystem tool state
@@ -39,7 +40,7 @@ function buildFileSystemPrompt(): string {
 }
 
 /**
- * @returnAn XML representation of the FileStore tool if one exists in the agents functions
+ * @return An XML representation of the FileStore tool if one exists in the agents functions
  */
 async function buildFileStorePrompt(): Promise<string> {
 	const fileStore = agentContext().functions.getFunctionType('filestore') as FileStore;
@@ -52,6 +53,22 @@ ${JSON.stringify(files)}
 `;
 }
 
+/**
+ * @return An XML representation of the Live Files tool if one exists in the agents functions
+ */
+async function buildLiveFilesPrompt(): Promise<string> {
+	const agent = agentContext();
+	if (!agent.functions.getFunctionClassNames().includes(LiveFiles.name)) return '';
+
+	const liveFiles = agentContext().liveFiles;
+	if (!liveFiles?.length) return '';
+
+	return `\n<live_files>
+${await getFileSystem().readFilesAsXml(liveFiles)}
+</live_files>
+`;
+}
+
 /**
  * @param maxLength {number} The maximum length of the returned string
  * @param fromIndex {number} The index of the function calls history to build from. Defaults from the start of the array.

diff --git a/src/agent/agentWorkflowRunner.ts b/src/agent/agentWorkflowRunner.ts
@@ -29,7 +29,7 @@ export async function runAgentWorkflow(config: RunAgentConfig, workflow: (agent:
 			context.state = 'completed';
 			const duration = Date.now() - start;
 
-			logger.info(`Completed. Cost $${context.cost.toFixed(2)}. Time: ${formatMillisDuration(duration)}`);
+			logger.info(`Completed. Cost $${context.cost.toFixed(context.cost > 1 ? 2 : 3)}. Time: ${formatMillisDuration(duration)}`);
 		} catch (e) {
 			logger.error(e);
 			context = agentContext();

diff --git a/src/agent/codeGenAgentRunner.ts b/src/agent/codeGenAgentRunner.ts
@@ -12,7 +12,7 @@ import {
 } from '#agent/agentFunctions';
 import { buildFunctionCallHistoryPrompt, buildMemoryPrompt, buildToolStatePrompt, updateFunctionSchemas } from '#agent/agentPromptUtils';
 import { AgentExecution, formatFunctionError, formatFunctionResult } from '#agent/agentRunner';
-import { convertJsonToPythonDeclaration, extractPythonCode } from '#agent/codeGenAgentUtils';
+import { convertJsonToPythonDeclaration, extractPythonCode, removePythonMarkdownWrapper } from '#agent/codeGenAgentUtils';
 import { humanInTheLoop } from '#agent/humanInTheLoop';
 import { getServiceName } from '#fastify/trace-init/trace-init';
 import { FUNC_SEP, FunctionSchema, getAllFunctionSchemas } from '#functionSchema/functions';
@@ -144,7 +144,7 @@ export async function runCodeGenAgent(agent: AgentContext): Promise<AgentExecuti
 						logger.warn(e, 'Error with Codegen agent plan');
 						// One re-try if the generate fails or the code can't be extracted
 						agentPlanResponse = await agentLLM.generateText(systemPromptWithFunctions, initialPrompt, {
-							id: 'Codegen agent plan',
+							id: 'Codegen agent plan retry',
 							stopSequences,
 							temperature: 0.5,
 						});
@@ -154,17 +154,19 @@ export async function runCodeGenAgent(agent: AgentContext): Promise<AgentExecuti
 					agent.state = 'functions';
 					await agentStateService.save(agent);
 
-					// The XML formatted results of the function call(s)
-					const functionResults: string[] = [];
 					let pythonScriptResult: any;
 					let pythonScript = '';
 
 					const functionInstances: Record<string, object> = agent.functions.getFunctionInstanceMap();
-					const schemas: FunctionSchema[] = getAllFunctionSchemas(Object.values(functionInstances));
+					const funcSchemas: FunctionSchema[] = getAllFunctionSchemas(Object.values(functionInstances));
 					const jsGlobals = {};
-					for (const schema of schemas) {
+					for (const schema of funcSchemas) {
 						const [className, method] = schema.name.split(FUNC_SEP);
 						jsGlobals[schema.name] = async (...args) => {
+							// The system prompt instructs the generated code to use positional arguments.
+							// If the generated code mistakenly uses named arguments then there will an arg
+							// which is an object with the property names matching the parameter names. This will cause an error
+
 							// Un-proxy any JsProxy objects. https://pyodide.org/en/stable/usage/type-conversions.html
 							args = args.map((arg) => (typeof arg?.toJs === 'function' ? arg.toJs() : arg));
 
@@ -190,10 +192,9 @@ export async function runCodeGenAgent(agent: AgentContext): Promise<AgentExecuti
 									stdout,
 									// stdoutSummary: outputSummary, TODO
 								});
-								functionResults.push(formatFunctionResult(schema.name, functionResponse));
 								return functionResponse;
 							} catch (e) {
-								functionResults.push(formatFunctionError(schema.name, e));
+								logger.warn(e, 'Error calling function');
 
 								agent.functionCallHistory.push({
 									function_name: schema.name,
@@ -252,10 +253,11 @@ main()`.trim();
 						} catch (e) {
 							// Attempt to fix Syntax/indentation errors and retry
 							// Otherwise let execution errors re-throw.
-							if (e.type === 'IndentationError' || e.type !== 'SyntaxError') {
+							if (e.type === 'IndentationError' || e.type === 'SyntaxError') {
 								// Fix the compile issues in the script
 								const prompt = `${functionsXml}\n<python>\n${pythonScript}</python>\n<error>${e.message}</error>\nPlease adjust/reformat the Python script to fix the issue. Output only the updated code. Do no chat, do not output markdown ticks. Only the updated code.`;
 								pythonScript = await llms().hard.generateText(prompt, { id: 'Fix python script error' });
+								pythonScript = removePythonMarkdownWrapper(pythonScript);
 
 								// Re-try execution of fixed syntax/indentation error
 								const result = await pyodide.runPythonAsync(pythonScript, { globals });
@@ -296,7 +298,7 @@ main()`.trim();
 					agent.invoking = [];
 					const currentFunctionCallHistory = buildFunctionCallHistoryPrompt('results', 10000, currentFunctionHistorySize);
 
-					currentPrompt = `${oldFunctionCallHistory}${buildMemoryPrompt()}${toolStatePrompt}\n${userRequestXml}\n${agentPlanResponse}\n${currentFunctionCallHistory}\n<script-result>${pythonScriptResult}</script-result>\nReview the results of the scripts and make any observations about the output/errors, then proceed with the response.`;
+					currentPrompt = `${oldFunctionCallHistory}\n${currentFunctionCallHistory}${buildMemoryPrompt()}${toolStatePrompt}\n${userRequestXml}\n${agentPlanResponse}\n<script-result>${pythonScriptResult}</script-result>\nReview the results of the script and make any observations about the output/errors, then proceed with the response.`;
 					currentFunctionHistorySize = agent.functionCallHistory.length;
 				} catch (e) {
 					span.setStatus({ code: SpanStatusCode.ERROR, message: e.toString() });

diff --git a/src/agent/codeGenAgentUtils.ts b/src/agent/codeGenAgentUtils.ts
@@ -67,5 +67,18 @@ export function extractPythonCode(llmResponse: string): string {
 
 	if (!matchXml) throw new Error(`Could not find <python-code></python-code> in the response \n${resultText}`);
 
-	return matchXml[1].trim();
+	const xmlContents = matchXml[1].trim();
+	return removePythonMarkdownWrapper(xmlContents);
+}
+
+/**
+ * Sometimes an LLM will wrap the reformatted code in Markdown tags, remove them if there.
+ * @param code
+ */
+export function removePythonMarkdownWrapper(code: string): string {
+	if (code.startsWith('```python') && code.endsWith('```')) {
+		// Remove the markdown lines
+		code = code.slice(9, -3).trim();
+	}
+	return code;
 }
diff --git a/src/agent/codegen-agent-system-prompt b/src/agent/codegen-agent-system-prompt
@@ -192,15 +192,17 @@ Suggested function(s):
 Example_getPage(url: str) -> str:
     """
     Get the contents of a web page
-    url: str The web page URL
+    url: str The web page URL, beginning with https://
     """
+
 Example_processText(text: str, descriptionOfChanges: str) -> str:
     """
     Transforms text given the described changes
-    text: the input text
-    descriptionOfChanges: a description of the changes/processing to apply to the text
+    text: The input text to process
+    descriptionOfChanges: A detailed description of the changes/processing to apply to the text
     Returns the processed text
     """
+
 Example_PublicWeb_getPage is suitable as the URLs are on publicly available documentation and blog pages.
 We can retrieve the two pages, and then create a report by processing the combined contents.
 </next_step_details>
@@ -227,7 +229,7 @@ await Agent_requestFeedback("I have collated a report on the new registration pr
 
 # Response format
 
-Your response must be in the following format:
+Your response must be in the following format (including the <response></response> tags):
 
 <response>
 <expanded_user_request>
@@ -242,8 +244,8 @@ Instructions:
 - Make observations on the current state:
     - Analysing the new function-call-results
     - Considering the Task Execution Phases for Problem-Solving
-    - Applying the Reasoning Techniques
-    - Taking into account the Interpreting User Requests items
+    - List all of the "Reasoning Techniques" and detail which ones are relevant
+    - List all of the "Interpreting User Requests" items and identify which ones are relevant
 - List the reasoning techniques relevant to the user request and the plan tasks to complete next.
 - Rephrase the selected reasoning techniques to be more specific to the next plan tasks.
 -->
@@ -299,6 +301,7 @@ Otherwise return any values to analyse further.
 # Do NOT assume anything about the structure of the results from functions. Return values that require further analysis
 # The script can return a Dict with any values you want to have available to view/process next. You don't need to do everything here.
 # If you do process an object/collection and the result is unexpected (empty or otherwise), then return the original object/collection so it can be inspected in the next step.
+# Always use positional arguments when calling functions
 # Example:
 # Check if the desired content is in memory from a previous step. Result: (None found/Found ...)
 # Get the two lists asked for in the next step details

diff --git a/src/agent/liveFileFunctions.ts → src/agent/liveFiles.ts b/src/agent/liveFileFunctions.ts → src/agent/liveFiles.ts
@@ -9,7 +9,7 @@ export const LIVE_FILES_REMOVE = 'LiveFiles_removeFiles';
  * Functions for the agent to add/remove files which always displays the current file contents in the agent control prompt
  */
 @funcClass(__filename)
-export class LiveFileFunctions {
+export class LiveFiles {
 	/**
 	 * Add files which will always have their current contents displayed in the <live-files> section (increasing LLM token costs)
 	 * @param {string[]} files the files to always include the current contents of in the prompt

diff --git a/src/agent/xmlAgentRunner.test.ts b/src/agent/xmlAgentRunner.test.ts
@@ -58,6 +58,11 @@ describe.skip('xmlAgentRunner', () => {
 			hilBudget: 0,
 			hilCount: 0,
 			id: '',
+			chat: {
+				enabledLLMs: {},
+				defaultLLM: '',
+				temperature: 1,
+			},
 			llmConfig: {},
 			functionConfig: {},
 			createdAt: new Date(),

diff --git a/src/cli/agent.ts b/src/cli/agent.ts
@@ -4,20 +4,16 @@ import { provideFeedback, resumeCompleted, resumeError, resumeHil, startAgentAnd
 import { FileSystemRead } from '#functions/storage/FileSystemRead';
 import { Perplexity } from '#functions/web/perplexity';
 import { PublicWeb } from '#functions/web/web';
-import { ClaudeLLMs } from '#llm/services/anthropic';
-import { ClaudeVertexLLMs } from '#llm/services/anthropic-vertex';
+import { defaultLLMs } from '#llm/services/defaultLlms';
 import { logger } from '#o11y/logger';
 import { CodeEditingAgent } from '#swe/codeEditingAgent';
 import { SoftwareDeveloperAgent } from '#swe/softwareDeveloperAgent';
-import { appContext, initFirestoreApplicationContext } from '../applicationContext';
+import { appContext, initApplicationContext } from '../applicationContext';
 import { parseProcessArgs, saveAgentId } from './cli';
 
 export async function main() {
-	let llms = ClaudeLLMs();
-	if (process.env.GCLOUD_PROJECT) {
-		await initFirestoreApplicationContext();
-		llms = ClaudeVertexLLMs();
-	}
+	const llms = defaultLLMs();
+	await initApplicationContext();
 
 	let functions: Array<any>;
 	functions = [FileSystemRead, SoftwareDeveloperAgent, Perplexity, PublicWeb];

diff --git a/src/cli/blueberry.ts b/src/cli/blueberry.ts
@@ -5,15 +5,14 @@ import { agentContext, agentContextStorage, createContext } from '#agent/agentCo
 import { AgentContext } from '#agent/agentContextTypes';
 import { Blueberry } from '#llm/multi-agent/blueberry';
 import { mockLLMs } from '#llm/services/mock-llm';
-import { initFirestoreApplicationContext } from '../applicationContext';
+import { initApplicationContext } from '../applicationContext';
 import { parseProcessArgs, saveAgentId } from './cli';
 
 // Usage:
 // npm run blueberry
 
 async function main() {
-	if (process.env.GCLOUD_PROJECT) await initFirestoreApplicationContext();
-
+	await initApplicationContext();
 	const { initialPrompt } = parseProcessArgs();
 
 	const context: AgentContext = createContext({

diff --git a/src/cli/code.ts b/src/cli/code.ts
@@ -5,18 +5,14 @@ import { RunAgentConfig } from '#agent/agentRunner';
 import { runAgentWorkflow } from '#agent/agentWorkflowRunner';
 import { shutdownTrace } from '#fastify/trace-init/trace-init';
 import { GitLab } from '#functions/scm/gitlab';
-import { ClaudeLLMs } from '#llm/services/anthropic';
-import { ClaudeVertexLLMs } from '#llm/services/anthropic-vertex';
+import { defaultLLMs } from '#llm/services/defaultLlms';
 import { CodeEditingAgent } from '#swe/codeEditingAgent';
-import { initFirestoreApplicationContext } from '../applicationContext';
+import { initApplicationContext } from '../applicationContext';
 import { parseProcessArgs, saveAgentId } from './cli';
 
 async function main() {
-	let agentLlms: AgentLLMs = ClaudeLLMs();
-	if (process.env.GCLOUD_PROJECT) {
-		await initFirestoreApplicationContext();
-		agentLlms = ClaudeVertexLLMs();
-	}
+	const agentLlms: AgentLLMs = defaultLLMs();
+	await initApplicationContext();
 
 	const { initialPrompt, resumeAgentId } = parseProcessArgs();
 

diff --git a/src/cli/docs.ts b/src/cli/docs.ts
@@ -4,22 +4,16 @@ import { AgentLLMs } from '#agent/agentContextTypes';
 import { RunAgentConfig } from '#agent/agentRunner';
 import { runAgentWorkflow } from '#agent/agentWorkflowRunner';
 import { shutdownTrace } from '#fastify/trace-init/trace-init';
-import { ClaudeLLMs } from '#llm/services/anthropic';
-import { ClaudeVertexLLMs } from '#llm/services/anthropic-vertex';
-import { Gemini_1_5_Flash } from '#llm/services/vertexai';
-import { buildSummaryDocs } from '#swe/documentationBuilder';
+import { defaultLLMs } from '#llm/services/defaultLlms';
 import { detectProjectInfo } from '#swe/projectDetection';
+import { buildIndexDocs } from '#swe/repoIndexDocBuilder';
 import { generateRepositoryMaps } from '#swe/repositoryMap';
-import { initFirestoreApplicationContext } from '../applicationContext';
+import { initApplicationContext } from '../applicationContext';
 import { parseProcessArgs, saveAgentId } from './cli';
 
 async function main() {
-	let agentLlms: AgentLLMs = ClaudeLLMs();
-	if (process.env.GCLOUD_PROJECT) {
-		await initFirestoreApplicationContext();
-		agentLlms = ClaudeVertexLLMs();
-	}
-	agentLlms.easy = Gemini_1_5_Flash();
+	const agentLlms: AgentLLMs = defaultLLMs();
+	await initApplicationContext();
 
 	const { initialPrompt, resumeAgentId } = parseProcessArgs();
 
@@ -41,12 +35,11 @@ async function main() {
 	console.log(`languageProjectMap ${maps.languageProjectMap.tokens}`);
 	console.log(`fileSystemTree ${maps.fileSystemTree.tokens}`);
 	console.log(`folderSystemTreeWithSummaries ${maps.folderSystemTreeWithSummaries.tokens}`);
-	console.log(`fileSystemTreeWithSummaries ${maps.fileSystemTreeWithSummaries.tokens}`);
 
 	if (console.log) return;
 
 	const agentId = await runAgentWorkflow(config, async () => {
-		await buildSummaryDocs();
+		await buildIndexDocs();
 	});
 
 	if (agentId) {

diff --git a/src/cli/easy.ts b/src/cli/easy.ts
@@ -7,15 +7,15 @@ import { AgentContext } from '#agent/agentContextTypes';
 import { Blueberry } from '#llm/multi-agent/blueberry';
 import { mockLLMs } from '#llm/services/mock-llm';
 import { Gemini_1_5_Flash } from '#llm/services/vertexai';
-import { initFirestoreApplicationContext } from '../applicationContext';
+import { initApplicationContext, initFirestoreApplicationContext } from '../applicationContext';
 import { parseProcessArgs } from './cli';
 
 // See https://arxiv.org/html/2405.19616v1 https://github.com/autogenai/easy-problems-that-llms-get-wrong
 // Usage:
 // npm run easy
 
 async function main() {
-	if (process.env.GCLOUD_PROJECT) await initFirestoreApplicationContext();
+	await initApplicationContext();
 
 	const context: AgentContext = createContext({
 		initialPrompt: '',