sunan135 · vamsirk · Oct 11, 2025 · Oct 11, 2025 · Oct 11, 2025
diff --git a/examples/appworld/run_appworld_agent.py b/examples/appworld/run_appworld_agent.py
@@ -24,18 +24,55 @@ async def main(num_tasks=10, max_turns=40, split="dev"):
     os.environ["TOKENIZERS_PARALLELISM"] = "true"
 
     # Check API key
-    if not os.getenv("OPENAI_API_KEY"):
-        print("No OPENAI_API_KEY")
-        return
+    # if not os.getenv("OPENAI_API_KEY"):
+    #     print("No OPENAI_API_KEY")
+    #     return
+
+    # n_parallel_agents = 4
+
+    # model_name = "gpt-4o-mini"
+    # # Use a tokenizer with chat template (only for formatting messages and calculating token counts in the engine)
+    # # Qwen2-0.5B is small and fast to download
+    # tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
+
+    # sampling_params = {"temperature": 0.6, "top_p": 0.95, "model": model_name}
+    # agent_args = {}
+    # env_args = {"max_turns": max_turns}
+
+    # # Create engine
+    # engine = AgentExecutionEngine(
+    #     agent_class=AppWorldReactAgent,
+    #     agent_args=agent_args,
+    #     env_class=AppWorldEnv,
+    #     env_args=env_args,
+    #     engine_name="openai",
+    #     tokenizer=tokenizer,
+    #     sampling_params=sampling_params,
+    #     rollout_engine_args={"base_url": "https://api.openai.com/v1", "api_key": os.getenv("OPENAI_API_KEY")},
+    #     n_parallel_agents=n_parallel_agents,
+    #     max_response_length=16384,
+    #     max_prompt_length=4096,
+    #     max_steps=max_turns,
+    # )
+
+    if not os.getenv("SAMBANOVA_API_KEY"):
+        print("No SAMBANOVA_API_KEY")
+        exit(1)
 
     n_parallel_agents = 4
 
-    model_name = "gpt-4o-mini"
-    # Use a tokenizer with chat template (only for formatting messages and calculating token counts in the engine)
-    # Qwen2-0.5B is small and fast to download
+    # Use SAMBANOVA_API_KEY's latest Llama 3.3 70B Turbo instruct model
+    model_name = "Meta-Llama-3.3-70B-Instruct"
+
+    # Tokenizer can remain lightweight just for formatting / token counting
     tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
 
-    sampling_params = {"temperature": 0.6, "top_p": 0.95, "model": model_name}
+    sampling_params = {
+        "temperature": 0.6,
+        "top_p": 0.95,
+        "model": model_name,
+    }
+
     agent_args = {}
     env_args = {"max_turns": max_turns}
 
@@ -48,10 +85,13 @@ async def main(num_tasks=10, max_turns=40, split="dev"):
         engine_name="openai",
         tokenizer=tokenizer,
         sampling_params=sampling_params,
-        rollout_engine_args={"base_url": "https://api.openai.com/v1", "api_key": os.getenv("OPENAI_API_KEY")},
+        rollout_engine_args={
+            "base_url": "https://api.sambanova.ai/v1",
+            "api_key": os.getenv("SAMBANOVA_API_KEY"),
+        },
         n_parallel_agents=n_parallel_agents,
-        max_response_length=16384,
-        max_prompt_length=4096,
+        max_response_length=40000,
+        max_prompt_length=80000,
         max_steps=max_turns,
     )
 
@@ -127,9 +167,9 @@ def load_appworld_official_tasks(split="dev", num_tasks=10):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Run AppWorld Agent with rLLM", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("-n", "--num-tasks", type=int, default=10, help="Number of tasks to run (use -1 for all tasks)")
+    parser.add_argument("-n", "--num-tasks", type=int, default=1, help="Number of tasks to run (use -1 for all tasks)")
     parser.add_argument("-t", "--max-turns", type=int, default=40, help="Maximum number of turns per task")
-    parser.add_argument("-s", "--split", type=str, default="dev", choices=["train", "dev", "test_normal", "test_challenge"], help="Which split to use")
+    parser.add_argument("-s", "--split", type=str, default="test_normal", choices=["train", "dev", "test_normal", "test_challenge"], help="Which split to use")
 
     args = parser.parse_args()
 

diff --git a/rllm/agents/appworld_react_agents.py b/rllm/agents/appworld_react_agents.py
@@ -2,6 +2,7 @@
 
 from jinja2 import Template
 
+import re
 from rllm.agents.agent import Action, BaseAgent, Step, Trajectory
 
 # logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(filename)s:%(lineno)d] %(message)s")
@@ -22,7 +23,7 @@ class AppWorldReactAgent(BaseAgent):
     5. Environment executes the code and returns the result
     """
 
-    SYSTEM_PROMPT: str = """USER:
+    REACT_PROMPT: str = """USER:
 I am your supervisor and you are a super intelligent AI Assistant whose job is to achieve my day-to-day tasks completely autonomously.
 
 To do this, you will need to interact with app/s (e.g., spotify, venmo etc) using their associated APIs on my behalf. For this you will undertake a *multi-step conversation* using a python REPL environment. That is, you will write the python code and the environment will execute it and show you the result, based on which, you will write python code for the next step and so on, until you've achieved the goal. This environment will let you interact with app/s using their associated APIs on my behalf.
@@ -433,6 +434,9 @@ def update_from_model(self, response: str, **kwargs) -> Action:
         Returns:
             Action: Action (string) containing the Python code to execute
         """
+        # import pdb
+        # pdb.set_trace()
+
         # Extract the Python code from the response
         python_code = self._extract_code_from_response(response)
         # Append assistant message to history
@@ -471,11 +475,29 @@ def _initialize_from_task(self, observation: dict, **kwargs):
             app_descriptions = "[List of available apps will be shown here]"
 
         # Format the system prompt with user info and task
-        template = Template(self.SYSTEM_PROMPT)
-        system_prompt = template.render(main_user=self.user_info, app_descriptions=app_descriptions, input_str=self.task_instruction)
+        template = Template(self.REACT_PROMPT)
+        react_prompt = template.render(main_user=self.user_info, app_descriptions=app_descriptions, input_str=self.task_instruction)
 
         # Set the system message
-        self.messages = [{"role": "system", "content": system_prompt}]
+        self.messages = self.text_to_messages(react_prompt)
+
+    def text_to_messages(self, input_str: str) -> list[dict]:
+        messages_json = []
+        last_start = 0
+        for m in re.finditer("(USER|ASSISTANT|SYSTEM):\n", input_str, flags=re.IGNORECASE):
+            last_end = m.span()[0]
+            if len(messages_json) == 0:
+                if last_end != 0:
+                    raise ValueError(
+                        f"Start of the prompt has no assigned role: {input_str[:last_end]}"
+                    )
+            else:
+                messages_json[-1]["content"] = input_str[last_start:last_end]
+            role = m.group(1).lower()
+            messages_json.append({"role": role, "content": None})
+            last_start = m.span()[1]
+        messages_json[-1]["content"] = input_str[last_start:]
+        return messages_json
 
     def _format_execution_result(self, observation: dict) -> str:
         """Format code execution result as user message."""

diff --git a/rllm/engine/agent_execution_engine.py b/rllm/engine/agent_execution_engine.py
@@ -231,7 +231,7 @@ async def run_agent_trajectory_async(self, idx, application_id, seed=0, mode="Te
         agent = self.agents[idx]
         env = self.envs[idx]
         # env_id = env.env_id
-
+        
         termination_reason = None
         prompt_token_len = 0
         prompt_tokens = []

diff --git a/rllm/environments/appworld/appworld_env.py b/rllm/environments/appworld/appworld_env.py
@@ -1,8 +1,8 @@
+import json
 import logging
 import threading
 
 from appworld import AppWorld as _AppWorld
-
 from rllm.environments.base.base_env import BaseEnv
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(filename)s:%(lineno)d] %(message)s")
@@ -97,7 +97,13 @@ def reset(self):
             # Default user info if not available
             user_info = {"first_name": "User", "last_name": "Test", "email": "user@example.com", "phone_number": "+1234567890"}
 
+        app_descriptions = json.dumps(
+            [{"name": k, "description": v} for (k, v) in self.world.task.app_descriptions.items()],
+            indent=1,
+        )
+
         observation = {"instruction": instruction, "user_info": user_info, "available_apps": ["spotify", "gmail", "calendar", "contacts", "messages", "notes", "todo", "files", "banking"], "helper_apis": {"show_app_descriptions": "apis.api_docs.show_app_descriptions()", "show_api_descriptions": "apis.api_docs.show_api_descriptions(app_name='app')", "show_api_doc": "apis.api_docs.show_api_doc(app_name='app', api_name='api')", "complete_task": "apis.supervisor.complete_task(answer='your_answer')"}}
+        observation["app_descriptions"] = app_descriptions
 
         return observation, {}