Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 52 additions & 12 deletions examples/appworld/run_appworld_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,55 @@ async def main(num_tasks=10, max_turns=40, split="dev"):
os.environ["TOKENIZERS_PARALLELISM"] = "true"

# Check API key
if not os.getenv("OPENAI_API_KEY"):
print("No OPENAI_API_KEY")
return
# if not os.getenv("OPENAI_API_KEY"):
# print("No OPENAI_API_KEY")
# return

# n_parallel_agents = 4

# model_name = "gpt-4o-mini"
# # Use a tokenizer with chat template (only for formatting messages and calculating token counts in the engine)
# # Qwen2-0.5B is small and fast to download
# tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")

# sampling_params = {"temperature": 0.6, "top_p": 0.95, "model": model_name}
# agent_args = {}
# env_args = {"max_turns": max_turns}

# # Create engine
# engine = AgentExecutionEngine(
# agent_class=AppWorldReactAgent,
# agent_args=agent_args,
# env_class=AppWorldEnv,
# env_args=env_args,
# engine_name="openai",
# tokenizer=tokenizer,
# sampling_params=sampling_params,
# rollout_engine_args={"base_url": "https://api.openai.com/v1", "api_key": os.getenv("OPENAI_API_KEY")},
# n_parallel_agents=n_parallel_agents,
# max_response_length=16384,
# max_prompt_length=4096,
# max_steps=max_turns,
# )

if not os.getenv("SAMBANOVA_API_KEY"):
print("No SAMBANOVA_API_KEY")
exit(1)

n_parallel_agents = 4

model_name = "gpt-4o-mini"
# Use a tokenizer with chat template (only for formatting messages and calculating token counts in the engine)
# Qwen2-0.5B is small and fast to download
# Use SAMBANOVA_API_KEY's latest Llama 3.3 70B Turbo instruct model
model_name = "Meta-Llama-3.3-70B-Instruct"

# Tokenizer can remain lightweight just for formatting / token counting
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")

sampling_params = {"temperature": 0.6, "top_p": 0.95, "model": model_name}
sampling_params = {
"temperature": 0.6,
"top_p": 0.95,
"model": model_name,
}

agent_args = {}
env_args = {"max_turns": max_turns}

Expand All @@ -48,10 +85,13 @@ async def main(num_tasks=10, max_turns=40, split="dev"):
engine_name="openai",
tokenizer=tokenizer,
sampling_params=sampling_params,
rollout_engine_args={"base_url": "https://api.openai.com/v1", "api_key": os.getenv("OPENAI_API_KEY")},
rollout_engine_args={
"base_url": "https://api.sambanova.ai/v1",
"api_key": os.getenv("SAMBANOVA_API_KEY"),
},
n_parallel_agents=n_parallel_agents,
max_response_length=16384,
max_prompt_length=4096,
max_response_length=40000,
max_prompt_length=80000,
max_steps=max_turns,
)

Expand Down Expand Up @@ -127,9 +167,9 @@ def load_appworld_official_tasks(split="dev", num_tasks=10):

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run AppWorld Agent with rLLM", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-n", "--num-tasks", type=int, default=10, help="Number of tasks to run (use -1 for all tasks)")
parser.add_argument("-n", "--num-tasks", type=int, default=1, help="Number of tasks to run (use -1 for all tasks)")
parser.add_argument("-t", "--max-turns", type=int, default=40, help="Maximum number of turns per task")
parser.add_argument("-s", "--split", type=str, default="dev", choices=["train", "dev", "test_normal", "test_challenge"], help="Which split to use")
parser.add_argument("-s", "--split", type=str, default="test_normal", choices=["train", "dev", "test_normal", "test_challenge"], help="Which split to use")

args = parser.parse_args()

Expand Down
30 changes: 26 additions & 4 deletions rllm/agents/appworld_react_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from jinja2 import Template

import re
from rllm.agents.agent import Action, BaseAgent, Step, Trajectory

# logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(filename)s:%(lineno)d] %(message)s")
Expand All @@ -22,7 +23,7 @@ class AppWorldReactAgent(BaseAgent):
5. Environment executes the code and returns the result
"""

SYSTEM_PROMPT: str = """USER:
REACT_PROMPT: str = """USER:
I am your supervisor and you are a super intelligent AI Assistant whose job is to achieve my day-to-day tasks completely autonomously.

To do this, you will need to interact with app/s (e.g., spotify, venmo etc) using their associated APIs on my behalf. For this you will undertake a *multi-step conversation* using a python REPL environment. That is, you will write the python code and the environment will execute it and show you the result, based on which, you will write python code for the next step and so on, until you've achieved the goal. This environment will let you interact with app/s using their associated APIs on my behalf.
Expand Down Expand Up @@ -433,6 +434,9 @@ def update_from_model(self, response: str, **kwargs) -> Action:
Returns:
Action: Action (string) containing the Python code to execute
"""
# import pdb
# pdb.set_trace()

# Extract the Python code from the response
python_code = self._extract_code_from_response(response)
# Append assistant message to history
Expand Down Expand Up @@ -471,11 +475,29 @@ def _initialize_from_task(self, observation: dict, **kwargs):
app_descriptions = "[List of available apps will be shown here]"

# Format the system prompt with user info and task
template = Template(self.SYSTEM_PROMPT)
system_prompt = template.render(main_user=self.user_info, app_descriptions=app_descriptions, input_str=self.task_instruction)
template = Template(self.REACT_PROMPT)
react_prompt = template.render(main_user=self.user_info, app_descriptions=app_descriptions, input_str=self.task_instruction)

# Set the system message
self.messages = [{"role": "system", "content": system_prompt}]
self.messages = self.text_to_messages(react_prompt)

def text_to_messages(self, input_str: str) -> list[dict]:
messages_json = []
last_start = 0
for m in re.finditer("(USER|ASSISTANT|SYSTEM):\n", input_str, flags=re.IGNORECASE):
last_end = m.span()[0]
if len(messages_json) == 0:
if last_end != 0:
raise ValueError(
f"Start of the prompt has no assigned role: {input_str[:last_end]}"
)
else:
messages_json[-1]["content"] = input_str[last_start:last_end]
role = m.group(1).lower()
messages_json.append({"role": role, "content": None})
last_start = m.span()[1]
messages_json[-1]["content"] = input_str[last_start:]
return messages_json

def _format_execution_result(self, observation: dict) -> str:
"""Format code execution result as user message."""
Expand Down
2 changes: 1 addition & 1 deletion rllm/engine/agent_execution_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ async def run_agent_trajectory_async(self, idx, application_id, seed=0, mode="Te
agent = self.agents[idx]
env = self.envs[idx]
# env_id = env.env_id

termination_reason = None
prompt_token_len = 0
prompt_tokens = []
Expand Down
8 changes: 7 additions & 1 deletion rllm/environments/appworld/appworld_env.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import json
import logging
import threading

from appworld import AppWorld as _AppWorld

from rllm.environments.base.base_env import BaseEnv

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(filename)s:%(lineno)d] %(message)s")
Expand Down Expand Up @@ -97,7 +97,13 @@ def reset(self):
# Default user info if not available
user_info = {"first_name": "User", "last_name": "Test", "email": "user@example.com", "phone_number": "+1234567890"}

app_descriptions = json.dumps(
[{"name": k, "description": v} for (k, v) in self.world.task.app_descriptions.items()],
indent=1,
)

observation = {"instruction": instruction, "user_info": user_info, "available_apps": ["spotify", "gmail", "calendar", "contacts", "messages", "notes", "todo", "files", "banking"], "helper_apis": {"show_app_descriptions": "apis.api_docs.show_app_descriptions()", "show_api_descriptions": "apis.api_docs.show_api_descriptions(app_name='app')", "show_api_doc": "apis.api_docs.show_api_doc(app_name='app', api_name='api')", "complete_task": "apis.supervisor.complete_task(answer='your_answer')"}}
observation["app_descriptions"] = app_descriptions

return observation, {}

Expand Down