Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ coverage.xml
.env
.venv
venv/
myenv/
ENV/
env.bak/
venv.bak/
Expand Down
134 changes: 134 additions & 0 deletions examples/browsergym_custom_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""Example usage of custom BrowserGym tasks.

This script demonstrates how to create and use custom tasks with the
BrowserGym environment wrapper in OpenEnv.
"""

import sys
import os
import time

# Add src to path for imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', '..'))

from envs.browsergym_env.server.browsergym_environment import BrowserGymEnvironment
from envs.browsergym_env.models import BrowserGymAction


def multi_tab_copy_paste_example():
"""Run the multi-tab copy-paste example."""

print("Multi-Tab Copy-Paste Task Example")
print("-" * 80)

# Create environment
env = BrowserGymEnvironment(
benchmark="custom",
task_name="copy-paste-multitab",
headless=False,
viewport_width=1280,
viewport_height=720,
timeout=10000.0,
)

# Reset environment
obs = env.reset()
print(f"Goal: {obs.goal}\n")

# Solve the multi-tab task- simulates user actions
steps = [
("Select source text", "click('#source-text')"),
("Select all text", "press('Control+A')"),
("Copy text", "press('Control+C')"),
("Navigate to target page", "click('#open-target-btn')"),
("Click target input field", "click('#target-text')"),
("Paste text", "press('Control+V')"),
("Submit form", "click('#submit-btn')"),
]

for i, (description, action_str) in enumerate(steps, 1):
print(f"Step {i}: {description}")
action = BrowserGymAction(action_str=action_str)
obs = env.step(action)

# Show which page we're on
current_page = "unknown"
if obs.metadata and 'custom_data' in obs.metadata:
current_page = obs.metadata['custom_data'].get('current_page', 'unknown')

print(f" Reward: {obs.reward}, Done: {obs.done}, Page: {current_page}")

# Add delay to see the browser actions
time.sleep(1)

if obs.done:
print(f"\n✓ Task completed! Total reward: {env.state.cum_reward}")
break

env.close()
print("-" * 80)

def single_tab_copy_paste_example():
"""Run the single-tab copy-paste example."""

print("Custom BrowserGym Task Example: Copy-Paste")
print("-" * 80)

# Create environment
env = BrowserGymEnvironment(
benchmark="custom",
task_name="copy-paste",
headless=False,
viewport_width=1280,
viewport_height=720,
timeout=10000.0,
)

# Reset environment
obs = env.reset()
print(f"Goal: {obs.goal}\n")

# Solve the task
steps = [
("Click source text field", "click('#source-text')"),
("Select all text", "press('Control+A')"),
("Copy text", "press('Control+C')"),
("Click target field", "click('#target-text')"),
("Paste text", "press('Control+V')"),
("Click submit button", "click('#submit-btn')"),
]

for i, (description, action_str) in enumerate(steps, 1):
print(f"Step {i}: {description}")
action = BrowserGymAction(action_str=action_str)
obs = env.step(action)
print(f" Reward: {obs.reward}, Done: {obs.done}")

# Add delay to see the browser actions
time.sleep(1)

if obs.done:
print(f"\n✓ Task completed! Total reward: {env.state.cum_reward}")
break

env.close()
print("-" * 80)

def main():
"""Run the custom task example."""

# Run single-tab copy-paste example
print("Single-Tab Copy-Paste")
single_tab_copy_paste_example()

time.sleep(3)

# Run multi-tab copy-paste example
print("\nMulti-Tab Copy-Paste")
multi_tab_copy_paste_example()


if __name__ == "__main__":
# Run main example
main()

82 changes: 82 additions & 0 deletions rfcs/005-generic-task-integration.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# RFC: Generic Custom Task Integration

**Status**: Draft
**Created**: 06/12/2025
**Authors**: @atchudhansg
**RFC ID:** 004

## Summary
This RFC proposes a standardized mechanism for injecting custom task logic into OpenEnv environments at runtime. This allows users to define domain-specific tasks, rewards, and termination conditions without modifying the core environment code or rebuilding Docker images.

## Motivation
Currently, adding new tasks to an OpenEnv environment (like BrowserGym) typically requires:
1. Modifying the source code of the environment.
2. Rebuilding the Docker image.
3. Waiting for upstream PRs to be merged for official benchmarks.

This slows down research and prototyping. Users often need to:
- Test agents on proprietary or local tasks.
- Rapidly iterate on reward functions.
- Create custom curricula of tasks.

We need a way to "plug in" task definitions dynamically.

## Proposal

### 1. Task Interface
We define a standard protocol that any custom task must implement. This should be generic enough for various environments (Browser, Terminal, etc.).

```python
class CustomTask:
def setup(self, config: dict) -> None:
"""Initialize task resources."""
pass

def get_observation(self, env_state: Any) -> dict:
"""Transform raw environment state into agent observation."""
pass

def calculate_reward(self, state: Any, action: Any, result: Any) -> float:
"""Compute reward for the transition."""
pass

def check_done(self, state: Any) -> bool:
"""Determine if the episode should terminate."""
pass
```

### 2. Injection Mechanism
Environments should support loading these tasks from a specific directory or module path at runtime.

- **Volume Mount**: Users mount their task code to a standard path (e.g., `/opt/openenv/custom_tasks`).
- **Dynamic Loading**: The environment server scans this directory and registers valid task classes.
- **Configuration**: A standard environment variable (e.g., `OPENENV_CUSTOM_TASK_DIR`) tells the server where to look.

### 3. Usage Example (BrowserGym)
In the context of BrowserGym (as implemented in PR #X), this looks like:

```python
# User defines a task locally
class MyTask(CustomTask):
...

# User runs the environment with the custom task
env = BrowserGymEnv(
environment={
"BROWSERGYM_BENCHMARK": "custom",
"BROWSERGYM_TASK_NAME": "my-task"
},
volumes={
"/local/path/to/tasks": "/opt/openenv/custom_tasks"
}
)
```

## Benefits
- **Decoupling**: Task logic is separate from environment infrastructure.
- **Speed**: No rebuilds required to change a reward function.
- **Flexibility**: Supports private/proprietary tasks that cannot be upstreamed.

## Future Work
- Define schemas for task configuration.
- Support for remote task definitions (loading from URL/Git).
Loading