From 9361119636c40871ae129c1e5bd984b26097c069 Mon Sep 17 00:00:00 2001 From: Daniel Campagnoli Date: Tue, 3 Sep 2024 09:53:39 +0800 Subject: [PATCH] Add files for a new autonomous agent --- src/agent/caching-python-agent-system-prompt | 310 ++++++++++++++++++ src/agent/cachingPythonAgentRunner.ts | 313 +++++++++++++++++++ 2 files changed, 623 insertions(+) create mode 100644 src/agent/caching-python-agent-system-prompt create mode 100644 src/agent/cachingPythonAgentRunner.ts diff --git a/src/agent/caching-python-agent-system-prompt b/src/agent/caching-python-agent-system-prompt new file mode 100644 index 00000000..3e831092 --- /dev/null +++ b/src/agent/caching-python-agent-system-prompt @@ -0,0 +1,310 @@ +You are an advanced autonomous AI agent who will complete tasks requested by the user to the best of your ability, ethics and intelligence. + +You will be given a user request in the following format: + +{{USER_REQUEST}} + + +# Task Execution Phases for Problem-Solving + +Apply these phases dynamically as you approach tasks, moving forward or backward as needed: + +- Problem Definition: Clearly articulate the issue or task at hand +- Requirements Gathering: Collect and document all necessary specifications +- Discovery and Research: Explore existing solutions and gather relevant information +- Ideation: Generate potential solutions through brainstorming +- Planning: Outline steps, allocate resources +- Assumption Verification: Identify and validate any assumptions made +- Design: Create a detailed blueprint or model of the solution +- Implementation: Execute the plan and create the actual solution +- Testing: Check if the solution works as intended +- Validation: Ensure the solution meets the original requirements +- Iteration: Refine and improve based on feedback and results to meet validation + +# Reasoning Techniques + +Apply these techniques throughout your problem-solving process, adapting and combining them as the task evolves: + +### Problem Analysis +- Articulate the core issue and its context +- Identify key factors, dependencies, and assumptions +- Break down complex problems into manageable components +- Clarify all context-dependent language in the problem description by rewriting relative or ambiguous descriptors with absolute, specific terms. + +### Solution Generation +- List potential solutions, including established and novel approaches exploring unconventional angles +- Adapt known solutions from related domains and best practices +- Generate the most optimal plan from the options + +### Critical Evaluation +- Analyze proposed solutions from multiple perspectives +- Evaluate evidence and assumptions. +- Establish clear, measurable goals + +### Data Analysis +- Identify and collect relevant data +- Interpret results in the problem's context + +### Systems Thinking +- Consider problems within their larger system context +- Analyze interactions between components + +### Planning and Implementation +- Develop step-by-step plans with clear, actionable tasks, applying critical path analysis +- Implement solutions methodically, tracking progress + +### Reflection +- Assess the recent outputs/errors in the function call history and the memory contents +- Adapt improvement strategies based on outcomes and new insights + +# Functions + +To complete the task, you will have access to the following functions: + + + +The FileSystem is an interface to the temporary local computer filesystem. + +The FileStore should be used for storage of large content (500 words or more) related to the request to reduce memory size. + +# Instructions + +## Overall Approach +- Apply the Task Execution Phases and Reasoning Techniques to transform the user request into a hierarchical plan which can be completed by the functions. +- Focus on completing the request efficiently. +- Continuously reassess and update your approach as new information becomes available. + +## Interpreting User Requests + +- Approach user inputs with flexibility, recognizing that information may be inexact, contextual, or assume shared knowledge. +- Apply the problem analysis technique of clarifying context-dependent language by rewriting relative or ambiguous descriptors with absolute, specific terms. +- For all types of identifiers, names, descriptors, or instructions: + - Consider case-insensitive matches unless precision is explicitly required. + - Explore the broader context or structure where the item or concept might exist. + - Be prepared to search for close matches, variations, or related concepts. + +- When faced with potentially imprecise or contextual information, consider: + 1. Date and Time: Clarify specific dates, times, or periods (e.g., "next Friday", "the 60s"). + 2. Names and Titles: Verify full names, official titles, or specific roles. + 3. Measurements and Quantities: Confirm units, systems of measurement, or exact numbers. + 4. Locations: Specify exact places, addresses, or coordinate systems. + 5. Technical Terms: Ensure shared understanding of domain-specific language. + 6. Version Numbers: Clarify exact versions, release dates, or update status. + 7. Cultural or Temporal References: Explain or contextualize references that may not be universally understood. + +- When interpreting user requests: + - Utilize available context to infer the most likely intent. + - Consider multiple interpretations if the intent is unclear. + - Clearly state any assumptions made in your interpretation. + +- Use the Agent_requestFeedback function when: + - Clarification is necessary, or + - To verify assumptions when confidence is low + +Example scenario: +If a user requests an action involving a specific item (e.g., "read the file agentcontext.ts"): +- Don't assume the exact naming or location. +- Look for variations (e.g., "agentContext.ts", "AgentContext.ts") and extend the search to relevant areas (e.g., current directory and subdirectories). +- If there is a single match, then assume its the one, and verify yourself if possible. +- If there is zero matches when one or more is definitely expected, then request feedback. +- If there are multiple matches, then return the values to analyse yourself first. Select one if its clear from the overall context, otherwise request feedback. + +Remember: The goal is to transform ambiguous or contextual information into clear, specific, and actionable instructions or queries. + +## Key Steps + +1. Analyze the overall user request applying the reasoning techniques. +2. Select relevant reasoning techniques for the immediate tasks to be completed. +3. Rephrase the selected reasoning techniques to be more specific to the immediate tasks. +2. Create an hierarchical plan to progress the user request task, utilising the rephrased reasoning techniques. +3. Implement the plan by calling appropriate functions. +4. Store crucial information from function results in memory for future use. + +## Iterative Process +As you progress through the task, you may need to: + +1. Update the task understanding: + Output an updated that expands on the original user_request, incorporating new information and applying relevant reasoning techniques. + +2. Refine the plan: + Produce an updated keeping all completed/attempted items, and updating the anticipated future items based on new information and previous function call results/errors. + You may compress descriptions of successfully completed item hierarchies while keeping key details. + +3. Manage memory: + Call Agent_addMemory to store key information (e.g., identifiers, summaries) required for future steps or function calls. + Call Agent_deleteMemory to delete entries no longer required for future steps or completing the user request. + Complex analysis of data can be performed in subsequent iterations if necessary. + +4. Seek clarification: + Call Agent_requestFeedback when facing uncertainty or needing additional information to proceed. + +## Important Considerations +- Information retention: After updating the plan, you won't have access to complete function results, so include all necessary information in the plan or memory. +- Continuous application of reasoning: Apply relevant reasoning techniques at each step of the process. +- Step-by-step processing: Avoid trying to complete complex tasks in a single step. Instead, focus on retrieving necessary data and returning it for further analysis in subsequent steps. + +# Example request/response + + +USER: + +GitHub is changing the process for registration of CI/CD runner which is described at https://docs.github.com/docs/new-runner-token-design and https://blog.github.com/new-runner-registration +Our runner registration is currently in the VM metadata startup script in the compute instance terraform. + +Research the new registration process and provide an outline of the new process. +Provide a design proposal of what changes we will need to make (dont do any implementation) + + +ASSISTANT: + + + + +Relevant reasoning: +- Articulate the core issue and its context +- Identify and collect relevant data +- Interpret results in the problem's context +- Adapt known solutions from related domains and best practices +Rephrased reasoning: +- We need to generate a report on how to migrate from the VM metadata startup script process to the new process. +- Collect the content from the provided URLs +- Interpret the context in the context of migrating to the new process +- Research online for known solutions to the migration process + + + +🔄 Understand what the new registration process is: + 🔄 Get the webpage at https://docs.github.com/docs/new-runner-token-design + 🔄 Get the webpage at https://blog.github.com/new-runner-registration + 🔄 Generate a report of the registration process: + 🔄 Create a combined report of the new registration process combining the information from the two pages, saving to memory +- Compare the current process of the VM metadata startup script to the new process: + - Research online for information or case studies of implementing the new process + - Ask for feedback if more details are required on the current process. + - Write the comparison details to memory +- Propose a design of the changes required for the new process: + - Complete the task with the proposed design synthesized from the report and comparison + + + +Read the webpages at https://docs.github.com/docs/new-runner-token-design and https://blog.github.com/new-runner-registration using PublicWeb.getPage, as we have public https URLs to read. +Suggested function(s): +Example_getPage(url: str) -> str: + """ + Get the contents of a web page + url: str The web page URL + """ +Example_processText(text: str, descriptionOfChanges: str) -> str: + """ + Transforms text given the described changes + text: the input text + descriptionOfChanges: a description of the changes/processing to apply to the text + Returns the processed text + """ +Example_PublicWeb_getPage is suitable as the URLs are on publicly available documentation and blog pages. +We can retrieve the two pages, and then create a report by processing the combined contents. + + + +# Do not use Example_xxx functions in your code +# Check if the content is in memory from a previous step. Result: None found +tokenDesignPage: str = await Example_getPage("https://docs.github.com/docs/new-runner-token-design") +runnerRegistrationPage: str = await Example_getPage("https://blog.github.com/new-runner-registration") +webPages: str = f'${tokenDesignPage}${runnerRegistrationPage}' +newProcessReport: str = await Example_processText(webPages, "Provide a detailed report of the new token registration process") +# Store the work we have done so far +await Agent_setMemory("new_registration_process", newProcessReport) +current_process_knowledge = f''' + +''' +await Agent_setMemory("current_process_knowledge", current_process_knowledge) +# The current process knowledge is minimal, request feedback for more +await Agent_requestFeedback("I have collated a report on the new registration process. My understanding of the current process is limited. Could you provide more details?") + + + + + +# Response format + +Your response must be in the following format: + + + + + + + + + + + + + + + + + +# Instructions: +# The built-in packages json, re, math and datetime are already imported in the script. Including additional imports is forbidden. +# await on every call to functions defined previously in the block. +# Keep the code as simple as possible. Do not manipulate the function return values unless absolutely necessary. Prefer returning the values returned from the functions directly. +# Add comments with your reasoning. +# Add print calls throughout your code +# If defining new variables then add typings from the value being assigned. +# If you save a variable to memory then do not return it. +# You don't need to re-save existing memory values +# Always code defensively, checking values are the type and format as expected +# For any operation involving user-specified items, refer to 'Interpreting User Requests' items to code defensively, ensuring flexible and context-aware handling. +# The script should return a Dict with any values you want to have available to view/process next. You don't need to do everything here. +# When calling Agent_completed or Agent_requestFeedback you must directly return its result. (Ensure any required information has already been stored to memory) +# This script may be running on repositories where the source code files are TypeScript, Java, Terraform, PHP, C#, C++, Ruby etc. Do not assume Python files. +# The files projectInfo.json and CONVENTIONS.md may possibly exists to tell you more about a code project. +# You can directly analyze contents in memory. If you need to analyze unstructured data then include it to a return Dict value to view in the next step. +# All maths must be done in Python code +# Do NOT assume anything about the structure of the results from functions. Return values that require further analysis +# Example: +# Check if the desired content is in memory from a previous step. Result: (None found/Found ...) +# Get the two lists asked for in the next step details +list1str: str = await Agent_getMemory("list1-json") +list1: List[str] = json.loads(list1str) +print("list1.length " + len(list1)) +# list1 is unchanged so do not re-save to memory +result2: List[str] = await FunctionClass2_returnStringList() +print("result2.length " + len(result2)) +# Do not assume the structure/styles values, return the values for further analysis +return { list1: list1, list2: list2} + + diff --git a/src/agent/cachingPythonAgentRunner.ts b/src/agent/cachingPythonAgentRunner.ts new file mode 100644 index 00000000..0a6dbc7e --- /dev/null +++ b/src/agent/cachingPythonAgentRunner.ts @@ -0,0 +1,313 @@ +import { readFileSync } from 'fs'; +import { Span, SpanStatusCode } from '@opentelemetry/api'; +import { PyodideInterface, loadPyodide } from 'pyodide'; +import { AGENT_COMPLETED_NAME, AGENT_REQUEST_FEEDBACK, AGENT_SAVE_MEMORY_CONTENT_PARAM_NAME } from '#agent/agentFunctions'; +import { buildFunctionCallHistoryPrompt, buildMemoryPrompt, buildToolStatePrompt, updateFunctionSchemas } from '#agent/agentPromptUtils'; +import { AgentExecution, formatFunctionError, formatFunctionResult, notificationMessage } from '#agent/agentRunner'; +import { agentHumanInTheLoop, notifySupervisor } from '#agent/humanInTheLoop'; +import { convertJsonToPythonDeclaration, extractPythonCode } from '#agent/pythonAgentUtils'; +import { getServiceName } from '#fastify/trace-init/trace-init'; +import { FUNC_SEP, FunctionParameter, FunctionSchema, getAllFunctionSchemas } from '#functionSchema/functions'; +import { logger } from '#o11y/logger'; +import { withActiveSpan } from '#o11y/trace'; +import { envVar } from '#utils/env-var'; +import { errorToString } from '#utils/errors'; +import { appContext } from '../app'; +import { AgentContext, agentContext, agentContextStorage, llms } from './agentContext'; + +const stopSequences = ['']; + +export const DYNAMIC_AGENT_SPAN = 'DynamicAgent'; + +let pyodide: PyodideInterface; + +/* + * The aim of the cachingPython agent compared to the pythonAgent is to utilise the context caching in Claude. + * This will require using the new methods on the LLM interface which have a message history. This message history + * will be treated in some ways like a stack. + * + * Message stack: + * system - system prompt + * system - function definitions + * user - user request + * assistant - memory + * user - function call history + * assistant - response + * ----------------------- + * user - function call results + * assistant - observations/actions/memory ops + * + */ +/** + * + * @param agent + */ +export async function runCachingPythonAgent(agent: AgentContext): Promise { + if (!pyodide) pyodide = await loadPyodide(); + + // Hot reload (TODO only when not deployed) + const pythonSystemPrompt = readFileSync('src/agent/caching-python-agent-system-prompt').toString(); + + const agentStateService = appContext().agentStateService; + agent.state = 'agent'; + + agentContextStorage.enterWith(agent); + + const agentLLM = llms().hard; + + const userRequestXml = `\n${agent.userPrompt}\n`; + let currentPrompt = agent.inputPrompt; + // logger.info(`userRequestXml ${userRequestXml}`) + logger.info(`currentPrompt ${currentPrompt}`); + + // Human in the loop settings + // How often do we require human input to avoid misguided actions and wasting money + let hilBudget = agent.hilBudget; + const hilCount = agent.hilCount; + + // Default to $2 budget to avoid accidents + if (!hilCount && !hilBudget) { + logger.info('Default Human in the Loop budget to $2'); + hilBudget = 2; + } + + let countSinceHil = 0; + + await agentStateService.save(agent); + + const execution: Promise = withActiveSpan(agent.name, async (span: Span) => { + agent.traceId = span.spanContext().traceId; + span.setAttributes({ + initialPrompt: agent.inputPrompt, + 'service.name': getServiceName(), + agentId: agent.agentId, + executionId: agent.executionId, + parentId: agent.parentAgentId, + functions: agent.functions.getFunctionClassNames(), + }); + + let functionErrorCount = 0; + + let currentFunctionHistorySize = agent.functionCallHistory.length; + + let shouldContinue = true; + while (shouldContinue) { + shouldContinue = await withActiveSpan(DYNAMIC_AGENT_SPAN, async (span) => { + agent.callStack = []; + // Might need to reload the agent for dynamic updating of the tools + const functionsXml = convertJsonToPythonDeclaration(getAllFunctionSchemas(agent.functions.getFunctionInstances())); + const systemPromptWithFunctions = updateFunctionSchemas(pythonSystemPrompt, functionsXml); + + let completed = false; + let requestFeedback = false; + const anyFunctionCallErrors = false; + let controlError = false; + try { + if (hilCount && countSinceHil === hilCount) { + await agentHumanInTheLoop(`Agent control loop has performed ${hilCount} iterations`); + countSinceHil = 0; + } + countSinceHil++; + + logger.debug(`Budget remaining $${agent.budgetRemaining.toFixed(2)}. Total cost $${agentContextStorage.getStore().cost.toFixed(2)}`); + if (hilBudget && agent.budgetRemaining <= 0) { + // HITL happens once budget is exceeded, which may be more than the allocated budget + const increase = agent.hilBudget - agent.budgetRemaining; + await agentHumanInTheLoop(`Agent cost has increased by USD\$${increase.toFixed(2)}. Increase budget by $${agent.hilBudget}`); + agent.budgetRemaining = agent.hilBudget; + } + + const toolStatePrompt = await buildToolStatePrompt(); + + // If the last function was requestFeedback then we'll remove it from function history add it as function results + let historyToIndex = agent.functionCallHistory.length ? agent.functionCallHistory.length - 1 : 0; + let requestFeedbackCallResult = ''; + if (agent.functionCallHistory.length && agent.functionCallHistory.at(-1).function_name === AGENT_REQUEST_FEEDBACK) { + historyToIndex--; + requestFeedbackCallResult = buildFunctionCallHistoryPrompt('results', 10000, historyToIndex + 1, historyToIndex + 2); + } + const oldFunctionCallHistory = buildFunctionCallHistoryPrompt('history', 10000, 0, historyToIndex); + + const isNewAgent = agent.iterations === 0 && agent.functionCallHistory.length === 0; + // For the initial prompt we create the empty memory, functional calls and default tool state content. Subsequent iterations already have it + const initialPrompt = isNewAgent + ? oldFunctionCallHistory + buildMemoryPrompt() + toolStatePrompt + currentPrompt + : currentPrompt + requestFeedbackCallResult; + + const agentPlanResponse: string = await agentLLM.generateText(initialPrompt, systemPromptWithFunctions, { + id: 'dynamicAgentPlan', + stopSequences, + temperature: 0.5, + }); + + const llmPythonCode = extractPythonCode(agentPlanResponse); + + agent.state = 'functions'; + await agentStateService.save(agent); + + // The XML formatted results of the function call(s) + const functionResults: string[] = []; + let pythonScriptResult: any; + let pythonScript = ''; + + const functionInstances = agent.functions.getFunctionInstanceMap(); + const schemas: FunctionSchema[] = getAllFunctionSchemas(Object.values(functionInstances)); + const jsGlobals = {}; + for (const schema of schemas) { + const [className, method] = schema.name.split(FUNC_SEP); + jsGlobals[schema.name] = async (...args) => { + // Convert arg array to parameters name/value map + const parameters: { [key: string]: any } = {}; + for (let index = 0; index < args.length; index++) parameters[schema.parameters[index].name] = args[index]; + + try { + const functionResponse = await functionInstances[className][method](...args); + // To minimise the function call history size becoming too large (i.e. expensive) + // we'll create a summary for responses which are quite long + // const outputSummary = await summariseLongFunctionOutput(functionResponse) + + // Don't need to duplicate the content in the function call history + // TODO Would be nice to save over-written memory keys for history/debugging + let stdout = JSON.stringify(functionResponse); + if (className === 'Agent' && method === 'saveMemory') parameters[AGENT_SAVE_MEMORY_CONTENT_PARAM_NAME] = '(See entry)'; + if (className === 'Agent' && method === 'getMemory') stdout = '(See entry)'; + + agent.functionCallHistory.push({ + function_name: schema.name, + parameters, + stdout, + // stdoutSummary: outputSummary, TODO + }); + functionResults.push(formatFunctionResult(schema.name, functionResponse)); + return functionResponse; + } catch (e) { + functionResults.push(formatFunctionError(schema.name, e)); + + agent.functionCallHistory.push({ + function_name: schema.name, + parameters, + stderr: errorToString(e, false), + // stderrSummary: outputSummary, TODO + }); + throw e; + } + }; + } + const globals = pyodide.toPy(jsGlobals); + + pyodide.setStdout({ + batched: (output) => { + logger.info(`Script stdout: ${JSON.stringify(output)}`); + }, + }); + pyodide.setStderr({ + batched: (output) => { + logger.info(`Script stderr: ${JSON.stringify(output)}`); + }, + }); + logger.info(`llmPythonCode: ${llmPythonCode}`); + const allowedImports = ['json', 're', 'math', 'datetime']; + // Add the imports from the allowed packages being used in the script + pythonScript = allowedImports + .filter((pkg) => llmPythonCode.includes(`${pkg}.`)) + .map((pkg) => `import ${pkg}\n`) + .join(); + logger.info(`Allowed imports: ${pythonScript}`); + pythonScript += ` +from typing import Any, List, Dict, Tuple, Optional, Union + +async def main(): +${llmPythonCode + .split('\n') + .map((line) => ` ${line}`) + .join('\n')} + +str(await main())`.trim(); + + try { + try { + // Initial execution attempt + const result = await pyodide.runPythonAsync(pythonScript, { globals }); + pythonScriptResult = result?.toJs ? result.toJs() : result; + pythonScriptResult = pythonScriptResult?.toString ? pythonScriptResult.toString() : pythonScriptResult; + logger.info(pythonScriptResult, 'Script result'); + if (result?.destroy) result.destroy(); + } catch (e) { + // Attempt to fix Syntax/indentation errors and retry + // Otherwise let execution errors re-throw. + if (e.type !== 'IndentationError' && e.type !== 'SyntaxError') throw e; + + // Fix the compile issues in the script + const prompt = `${functionsXml}\n\n${pythonScript}\n${e.message}\nPlease adjust/reformat the Python script to fix the issue. Output only the updated code. Do no chat, do not output markdown ticks. Only the updated code.`; + pythonScript = await llms().hard.generateText(prompt, null, { id: 'Fix python script error' }); + + // Re-try execution of fixed syntax/indentation error + const result = await pyodide.runPythonAsync(pythonScript, { globals }); + pythonScriptResult = result?.toJs ? result.toJs() : result; + pythonScriptResult = pythonScriptResult?.toString ? pythonScriptResult.toString() : pythonScriptResult; + + if (result?.destroy) result.destroy(); + logger.info(pythonScriptResult, 'Script result'); + } + + const lastFunctionCall = agent.functionCallHistory[agent.functionCallHistory.length - 1]; + + // Should force completed/requestFeedback to exit the script - throw a particular Error class + if (lastFunctionCall.function_name === AGENT_COMPLETED_NAME) { + logger.info('Task completed'); + agent.state = 'completed'; + completed = true; + } else if (lastFunctionCall.function_name === AGENT_REQUEST_FEEDBACK) { + logger.info('Feedback requested'); + agent.state = 'feedback'; + requestFeedback = true; + } else { + if (!anyFunctionCallErrors && !completed && !requestFeedback) agent.state = 'agent'; + } + } catch (e) { + logger.info(`Caught function error ${e.message}`); + functionErrorCount++; + } + // Function invocations are complete + // span.setAttribute('functionCalls', pythonCode.map((functionCall) => functionCall.function_name).join(', ')); + + // The agent should store important values in memory + // functionResults + + // This section is duplicated in the provideFeedback function + agent.invoking = []; + const currentFunctionCallHistory = buildFunctionCallHistoryPrompt('results', 10000, currentFunctionHistorySize); + + currentPrompt = `${oldFunctionCallHistory}${buildMemoryPrompt()}${toolStatePrompt}\n${userRequestXml}\n${agentPlanResponse}\n${currentFunctionCallHistory}\n${pythonScriptResult}\nReview the results of the scripts and make any observations about the output/errors, then proceed with the response.`; + currentFunctionHistorySize = agent.functionCallHistory.length; + } catch (e) { + span.setStatus({ code: SpanStatusCode.ERROR, message: e.toString() }); + logger.error(e, 'Control loop error'); + controlError = true; + agent.state = 'error'; + agent.error = errorToString(e); + } finally { + agent.inputPrompt = currentPrompt; + agent.callStack = []; + agent.iterations++; + await agentStateService.save(agent); + } + // return if the control loop should continue + return !(completed || requestFeedback || anyFunctionCallErrors || controlError); + }); + } + + // Send notification message + const uiUrl = envVar('UI_URL'); + let message = notificationMessage(agent); + message += `\n${uiUrl}/agent/${agent.agentId}`; + logger.info(message); + + try { + await notifySupervisor(agent, message); + } catch (e) { + logger.warn(e`Failed to send supervisor notification message ${message}`); + } + }); + return { agentId: agent.agentId, execution }; +}