From 5b758f43aadf78cb5200da66700030bf4679582e Mon Sep 17 00:00:00 2001 From: mengmeng-bot Date: Sat, 11 Oct 2025 17:33:31 +0000 Subject: [PATCH 1/2] SN api with bug fix --- examples/appworld/run_appworld_agent.py | 62 ++++++++++++++++++---- rllm/environments/appworld/appworld_env.py | 5 +- 2 files changed, 54 insertions(+), 13 deletions(-) diff --git a/examples/appworld/run_appworld_agent.py b/examples/appworld/run_appworld_agent.py index 5602359f7..c25f7db1f 100644 --- a/examples/appworld/run_appworld_agent.py +++ b/examples/appworld/run_appworld_agent.py @@ -24,18 +24,55 @@ async def main(num_tasks=10, max_turns=40, split="dev"): os.environ["TOKENIZERS_PARALLELISM"] = "true" # Check API key - if not os.getenv("OPENAI_API_KEY"): - print("No OPENAI_API_KEY") - return + # if not os.getenv("OPENAI_API_KEY"): + # print("No OPENAI_API_KEY") + # return + + # n_parallel_agents = 4 + + # model_name = "gpt-4o-mini" + # # Use a tokenizer with chat template (only for formatting messages and calculating token counts in the engine) + # # Qwen2-0.5B is small and fast to download + # tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") + + # sampling_params = {"temperature": 0.6, "top_p": 0.95, "model": model_name} + # agent_args = {} + # env_args = {"max_turns": max_turns} + + # # Create engine + # engine = AgentExecutionEngine( + # agent_class=AppWorldReactAgent, + # agent_args=agent_args, + # env_class=AppWorldEnv, + # env_args=env_args, + # engine_name="openai", + # tokenizer=tokenizer, + # sampling_params=sampling_params, + # rollout_engine_args={"base_url": "https://api.openai.com/v1", "api_key": os.getenv("OPENAI_API_KEY")}, + # n_parallel_agents=n_parallel_agents, + # max_response_length=16384, + # max_prompt_length=4096, + # max_steps=max_turns, + # ) + + if not os.getenv("SAMBANOVA_API_KEY"): + print("No SAMBANOVA_API_KEY") + exit(1) n_parallel_agents = 4 - model_name = "gpt-4o-mini" - # Use a tokenizer with chat template (only for formatting messages and calculating token counts in the engine) - # Qwen2-0.5B is small and fast to download + # Use SAMBANOVA_API_KEY's latest Llama 3.3 70B Turbo instruct model + model_name = "Meta-Llama-3.3-70B-Instruct" + + # Tokenizer can remain lightweight just for formatting / token counting tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") - sampling_params = {"temperature": 0.6, "top_p": 0.95, "model": model_name} + sampling_params = { + "temperature": 0.6, + "top_p": 0.95, + "model": model_name, + } + agent_args = {} env_args = {"max_turns": max_turns} @@ -48,10 +85,13 @@ async def main(num_tasks=10, max_turns=40, split="dev"): engine_name="openai", tokenizer=tokenizer, sampling_params=sampling_params, - rollout_engine_args={"base_url": "https://api.openai.com/v1", "api_key": os.getenv("OPENAI_API_KEY")}, + rollout_engine_args={ + "base_url": "https://api.sambanova.ai/v1", + "api_key": os.getenv("SAMBANOVA_API_KEY"), + }, n_parallel_agents=n_parallel_agents, - max_response_length=16384, - max_prompt_length=4096, + max_response_length=40000, + max_prompt_length=80000, max_steps=max_turns, ) @@ -127,7 +167,7 @@ def load_appworld_official_tasks(split="dev", num_tasks=10): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run AppWorld Agent with rLLM", formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("-n", "--num-tasks", type=int, default=10, help="Number of tasks to run (use -1 for all tasks)") + parser.add_argument("-n", "--num-tasks", type=int, default=1, help="Number of tasks to run (use -1 for all tasks)") parser.add_argument("-t", "--max-turns", type=int, default=40, help="Maximum number of turns per task") parser.add_argument("-s", "--split", type=str, default="dev", choices=["train", "dev", "test_normal", "test_challenge"], help="Which split to use") diff --git a/rllm/environments/appworld/appworld_env.py b/rllm/environments/appworld/appworld_env.py index 6911e0f07..2bfe04ec5 100644 --- a/rllm/environments/appworld/appworld_env.py +++ b/rllm/environments/appworld/appworld_env.py @@ -3,6 +3,8 @@ from rllm.environments.base.base_env import BaseEnv +from appworld import AppWorld as _AppWorld + logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(filename)s:%(lineno)d] %(message)s") # AppWorld is not thread-safe, so we need to use a lock to synchronize access to the AppWorld instance @@ -62,12 +64,11 @@ def reset(self): # Initialize AppWorld based on unique task_id with _appworld_lock: try: - from appworld import AppWorld # get the task id task_id = self.task.get("task_id") if self.task else None if task_id: - self.world = AppWorld(task_id=task_id) + self.world = _AppWorld(task_id=task_id) self.world_id = task_id # Get instruction from AppWorld if not provided in task From d064ca4dec13520fcdb048b91ddd64e704169e09 Mon Sep 17 00:00:00 2001 From: mengmeng-bot Date: Sat, 11 Oct 2025 22:46:06 +0000 Subject: [PATCH 2/2] add app_descriptions to input prompt --- rllm/engine/agent_execution_engine.py | 2 +- rllm/environments/appworld/appworld_env.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/rllm/engine/agent_execution_engine.py b/rllm/engine/agent_execution_engine.py index 9ac1f0646..18a06723b 100644 --- a/rllm/engine/agent_execution_engine.py +++ b/rllm/engine/agent_execution_engine.py @@ -231,7 +231,7 @@ async def run_agent_trajectory_async(self, idx, application_id, seed=0, mode="Te agent = self.agents[idx] env = self.envs[idx] # env_id = env.env_id - + termination_reason = None prompt_token_len = 0 prompt_tokens = [] diff --git a/rllm/environments/appworld/appworld_env.py b/rllm/environments/appworld/appworld_env.py index f0b8acf1b..ecb441cdb 100644 --- a/rllm/environments/appworld/appworld_env.py +++ b/rllm/environments/appworld/appworld_env.py @@ -1,3 +1,4 @@ +import json import logging import threading @@ -96,7 +97,13 @@ def reset(self): # Default user info if not available user_info = {"first_name": "User", "last_name": "Test", "email": "user@example.com", "phone_number": "+1234567890"} + app_descriptions = json.dumps( + [{"name": k, "description": v} for (k, v) in self.world.task.app_descriptions.items()], + indent=1, + ) + observation = {"instruction": instruction, "user_info": user_info, "available_apps": ["spotify", "gmail", "calendar", "contacts", "messages", "notes", "todo", "files", "banking"], "helper_apis": {"show_app_descriptions": "apis.api_docs.show_app_descriptions()", "show_api_descriptions": "apis.api_docs.show_api_descriptions(app_name='app')", "show_api_doc": "apis.api_docs.show_api_doc(app_name='app', api_name='api')", "complete_task": "apis.supervisor.complete_task(answer='your_answer')"}} + observation["app_descriptions"] = app_descriptions return observation, {}