diff --git a/examples/appworld/run_appworld_agent.py b/examples/appworld/run_appworld_agent.py index 5602359f7..f84c06f3b 100644 --- a/examples/appworld/run_appworld_agent.py +++ b/examples/appworld/run_appworld_agent.py @@ -24,18 +24,55 @@ async def main(num_tasks=10, max_turns=40, split="dev"): os.environ["TOKENIZERS_PARALLELISM"] = "true" # Check API key - if not os.getenv("OPENAI_API_KEY"): - print("No OPENAI_API_KEY") - return + # if not os.getenv("OPENAI_API_KEY"): + # print("No OPENAI_API_KEY") + # return + + # n_parallel_agents = 4 + + # model_name = "gpt-4o-mini" + # # Use a tokenizer with chat template (only for formatting messages and calculating token counts in the engine) + # # Qwen2-0.5B is small and fast to download + # tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") + + # sampling_params = {"temperature": 0.6, "top_p": 0.95, "model": model_name} + # agent_args = {} + # env_args = {"max_turns": max_turns} + + # # Create engine + # engine = AgentExecutionEngine( + # agent_class=AppWorldReactAgent, + # agent_args=agent_args, + # env_class=AppWorldEnv, + # env_args=env_args, + # engine_name="openai", + # tokenizer=tokenizer, + # sampling_params=sampling_params, + # rollout_engine_args={"base_url": "https://api.openai.com/v1", "api_key": os.getenv("OPENAI_API_KEY")}, + # n_parallel_agents=n_parallel_agents, + # max_response_length=16384, + # max_prompt_length=4096, + # max_steps=max_turns, + # ) + + if not os.getenv("SAMBANOVA_API_KEY"): + print("No SAMBANOVA_API_KEY") + exit(1) n_parallel_agents = 4 - model_name = "gpt-4o-mini" - # Use a tokenizer with chat template (only for formatting messages and calculating token counts in the engine) - # Qwen2-0.5B is small and fast to download + # Use SAMBANOVA_API_KEY's latest Llama 3.3 70B Turbo instruct model + model_name = "Meta-Llama-3.3-70B-Instruct" + + # Tokenizer can remain lightweight just for formatting / token counting tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") - sampling_params = {"temperature": 0.6, "top_p": 0.95, "model": model_name} + sampling_params = { + "temperature": 0.6, + "top_p": 0.95, + "model": model_name, + } + agent_args = {} env_args = {"max_turns": max_turns} @@ -48,10 +85,13 @@ async def main(num_tasks=10, max_turns=40, split="dev"): engine_name="openai", tokenizer=tokenizer, sampling_params=sampling_params, - rollout_engine_args={"base_url": "https://api.openai.com/v1", "api_key": os.getenv("OPENAI_API_KEY")}, + rollout_engine_args={ + "base_url": "https://api.sambanova.ai/v1", + "api_key": os.getenv("SAMBANOVA_API_KEY"), + }, n_parallel_agents=n_parallel_agents, - max_response_length=16384, - max_prompt_length=4096, + max_response_length=40000, + max_prompt_length=80000, max_steps=max_turns, ) @@ -127,9 +167,9 @@ def load_appworld_official_tasks(split="dev", num_tasks=10): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run AppWorld Agent with rLLM", formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("-n", "--num-tasks", type=int, default=10, help="Number of tasks to run (use -1 for all tasks)") + parser.add_argument("-n", "--num-tasks", type=int, default=1, help="Number of tasks to run (use -1 for all tasks)") parser.add_argument("-t", "--max-turns", type=int, default=40, help="Maximum number of turns per task") - parser.add_argument("-s", "--split", type=str, default="dev", choices=["train", "dev", "test_normal", "test_challenge"], help="Which split to use") + parser.add_argument("-s", "--split", type=str, default="test_normal", choices=["train", "dev", "test_normal", "test_challenge"], help="Which split to use") args = parser.parse_args() diff --git a/rllm/agents/appworld_react_agents.py b/rllm/agents/appworld_react_agents.py index a1eae9d70..49eba24cd 100644 --- a/rllm/agents/appworld_react_agents.py +++ b/rllm/agents/appworld_react_agents.py @@ -2,6 +2,7 @@ from jinja2 import Template +import re from rllm.agents.agent import Action, BaseAgent, Step, Trajectory # logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(filename)s:%(lineno)d] %(message)s") @@ -22,7 +23,7 @@ class AppWorldReactAgent(BaseAgent): 5. Environment executes the code and returns the result """ - SYSTEM_PROMPT: str = """USER: + REACT_PROMPT: str = """USER: I am your supervisor and you are a super intelligent AI Assistant whose job is to achieve my day-to-day tasks completely autonomously. To do this, you will need to interact with app/s (e.g., spotify, venmo etc) using their associated APIs on my behalf. For this you will undertake a *multi-step conversation* using a python REPL environment. That is, you will write the python code and the environment will execute it and show you the result, based on which, you will write python code for the next step and so on, until you've achieved the goal. This environment will let you interact with app/s using their associated APIs on my behalf. @@ -433,6 +434,9 @@ def update_from_model(self, response: str, **kwargs) -> Action: Returns: Action: Action (string) containing the Python code to execute """ + # import pdb + # pdb.set_trace() + # Extract the Python code from the response python_code = self._extract_code_from_response(response) # Append assistant message to history @@ -471,11 +475,29 @@ def _initialize_from_task(self, observation: dict, **kwargs): app_descriptions = "[List of available apps will be shown here]" # Format the system prompt with user info and task - template = Template(self.SYSTEM_PROMPT) - system_prompt = template.render(main_user=self.user_info, app_descriptions=app_descriptions, input_str=self.task_instruction) + template = Template(self.REACT_PROMPT) + react_prompt = template.render(main_user=self.user_info, app_descriptions=app_descriptions, input_str=self.task_instruction) # Set the system message - self.messages = [{"role": "system", "content": system_prompt}] + self.messages = self.text_to_messages(react_prompt) + + def text_to_messages(self, input_str: str) -> list[dict]: + messages_json = [] + last_start = 0 + for m in re.finditer("(USER|ASSISTANT|SYSTEM):\n", input_str, flags=re.IGNORECASE): + last_end = m.span()[0] + if len(messages_json) == 0: + if last_end != 0: + raise ValueError( + f"Start of the prompt has no assigned role: {input_str[:last_end]}" + ) + else: + messages_json[-1]["content"] = input_str[last_start:last_end] + role = m.group(1).lower() + messages_json.append({"role": role, "content": None}) + last_start = m.span()[1] + messages_json[-1]["content"] = input_str[last_start:] + return messages_json def _format_execution_result(self, observation: dict) -> str: """Format code execution result as user message.""" diff --git a/rllm/engine/agent_execution_engine.py b/rllm/engine/agent_execution_engine.py index 9ac1f0646..18a06723b 100644 --- a/rllm/engine/agent_execution_engine.py +++ b/rllm/engine/agent_execution_engine.py @@ -231,7 +231,7 @@ async def run_agent_trajectory_async(self, idx, application_id, seed=0, mode="Te agent = self.agents[idx] env = self.envs[idx] # env_id = env.env_id - + termination_reason = None prompt_token_len = 0 prompt_tokens = [] diff --git a/rllm/environments/appworld/appworld_env.py b/rllm/environments/appworld/appworld_env.py index f35519f56..ecb441cdb 100644 --- a/rllm/environments/appworld/appworld_env.py +++ b/rllm/environments/appworld/appworld_env.py @@ -1,8 +1,8 @@ +import json import logging import threading from appworld import AppWorld as _AppWorld - from rllm.environments.base.base_env import BaseEnv logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(filename)s:%(lineno)d] %(message)s") @@ -97,7 +97,13 @@ def reset(self): # Default user info if not available user_info = {"first_name": "User", "last_name": "Test", "email": "user@example.com", "phone_number": "+1234567890"} + app_descriptions = json.dumps( + [{"name": k, "description": v} for (k, v) in self.world.task.app_descriptions.items()], + indent=1, + ) + observation = {"instruction": instruction, "user_info": user_info, "available_apps": ["spotify", "gmail", "calendar", "contacts", "messages", "notes", "todo", "files", "banking"], "helper_apis": {"show_app_descriptions": "apis.api_docs.show_app_descriptions()", "show_api_descriptions": "apis.api_docs.show_api_descriptions(app_name='app')", "show_api_doc": "apis.api_docs.show_api_doc(app_name='app', api_name='api')", "complete_task": "apis.supervisor.complete_task(answer='your_answer')"}} + observation["app_descriptions"] = app_descriptions return observation, {}