Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
247 changes: 2 additions & 245 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "scrapybara"
version = "2.2.9"
version = "2.3.0"
description = ""
readme = "README.md"
authors = []
Expand Down Expand Up @@ -32,9 +32,7 @@ Repository = 'https://github.com/scrapybara/scrapybara-python'

[tool.poetry.dependencies]
python = "^3.8"
anthropic = "^0.47.2"
httpx = ">=0.21.2"
playwright = "^1.48.0"
pydantic = ">= 1.9.2"
pydantic-core = "^2.18.2"
typing_extensions = ">= 4.0.0"
Expand Down
23 changes: 5 additions & 18 deletions reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -336,13 +336,16 @@ client.instance.get_stream_url(

```python
from scrapybara import Scrapybara
from scrapybara.instance import Request_MoveMouse

client = Scrapybara(
api_key="YOUR_API_KEY",
)
client.instance.computer(
instance_id="instance_id",
action="key",
request=Request_MoveMouse(
coordinates=[1],
),
)

```
Expand All @@ -367,23 +370,7 @@ client.instance.computer(
<dl>
<dd>

**action:** `Action`

</dd>
</dl>

<dl>
<dd>

**coordinate:** `typing.Optional[typing.Sequence[int]]`

</dd>
</dl>

<dl>
<dd>

**text:** `typing.Optional[str]`
**request:** `Request`

</dd>
</dl>
Expand Down
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
httpx>=0.21.2
pydantic>= 1.9.2
pydantic-core==^2.18.2
typing_extensions>= 4.0.0
53 changes: 51 additions & 2 deletions src/scrapybara/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,72 +2,121 @@

from .types import (
AuthStateResponse,
BashResponse,
BrowserAuthenticateResponse,
BrowserGetCdpUrlResponse,
Button,
CellType,
ClickMouseAction,
ClickMouseActionClickType,
ComputerResponse,
DeploymentConfigInstanceType,
DragMouseAction,
EditResponse,
EnvGetResponse,
EnvResponse,
ExecuteCellRequest,
FileDownloadResponse,
FileReadResponse,
GetCursorPositionAction,
GetInstanceResponse,
GetInstanceResponseInstanceType,
HttpValidationError,
InstanceGetStreamUrlResponse,
InstanceScreenshotResponse,
KernelInfo,
ModifyBrowserAuthResponse,
MoveMouseAction,
Notebook,
NotebookCell,
PressKeyAction,
SaveBrowserAuthResponse,
ScrollAction,
StartBrowserResponse,
Status,
StopBrowserResponse,
StopInstanceResponse,
TakeScreenshotAction,
TypeTextAction,
ValidationError,
ValidationErrorLocItem,
WaitAction,
)
from .errors import UnprocessableEntityError
from . import browser, code, env, file, instance, notebook
from .client import AsyncScrapybara, Scrapybara
from .environment import ScrapybaraEnvironment
from .instance import Action, Command
from .instance import (
Command,
Request,
Request_ClickMouse,
Request_DragMouse,
Request_GetCursorPosition,
Request_MoveMouse,
Request_PressKey,
Request_Scroll,
Request_TakeScreenshot,
Request_TypeText,
Request_Wait,
)
from .version import __version__

__all__ = [
"Action",
"AsyncScrapybara",
"AuthStateResponse",
"BashResponse",
"BrowserAuthenticateResponse",
"BrowserGetCdpUrlResponse",
"Button",
"CellType",
"ClickMouseAction",
"ClickMouseActionClickType",
"Command",
"ComputerResponse",
"DeploymentConfigInstanceType",
"DragMouseAction",
"EditResponse",
"EnvGetResponse",
"EnvResponse",
"ExecuteCellRequest",
"FileDownloadResponse",
"FileReadResponse",
"GetCursorPositionAction",
"GetInstanceResponse",
"GetInstanceResponseInstanceType",
"HttpValidationError",
"InstanceGetStreamUrlResponse",
"InstanceScreenshotResponse",
"KernelInfo",
"ModifyBrowserAuthResponse",
"MoveMouseAction",
"Notebook",
"NotebookCell",
"PressKeyAction",
"Request",
"Request_ClickMouse",
"Request_DragMouse",
"Request_GetCursorPosition",
"Request_MoveMouse",
"Request_PressKey",
"Request_Scroll",
"Request_TakeScreenshot",
"Request_TypeText",
"Request_Wait",
"SaveBrowserAuthResponse",
"Scrapybara",
"ScrapybaraEnvironment",
"ScrollAction",
"StartBrowserResponse",
"Status",
"StopBrowserResponse",
"StopInstanceResponse",
"TakeScreenshotAction",
"TypeTextAction",
"UnprocessableEntityError",
"ValidationError",
"ValidationErrorLocItem",
"WaitAction",
"__version__",
"browser",
"code",
Expand Down
188 changes: 2 additions & 186 deletions src/scrapybara/anthropic/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
from typing import Literal, Optional, TypedDict, Any, Dict
from anthropic.types.beta import (
BetaToolComputerUse20241022Param,
BetaToolTextEditor20241022Param,
BetaToolBash20241022Param,
)
import asyncio
from typing import Literal, Optional

from pydantic import Field

from ..client import BaseInstance, UbuntuInstance
from ..types.act import Model
from .base import BaseAnthropicTool, CLIResult, ToolError, ToolResult


# New: universal act API
class Anthropic(Model):
"""Model adapter for Anthropic.

Expand All @@ -37,179 +29,3 @@ def __init__(
api_key: Optional[str] = None,
) -> None:
super().__init__(provider="anthropic", name=name, api_key=api_key)


# Legacy: Anthropic SDK-compatible tools
class ComputerToolOptions(TypedDict):
display_height_px: int
display_width_px: int
display_number: Optional[int]


class ComputerTool(BaseAnthropicTool):
"""A computer interaction tool that allows the agent to control mouse and keyboard.

Available for Ubuntu, Browser, and Windows instances."""

api_type: Literal["computer_20241022"] = "computer_20241022"
name: Literal["computer"] = "computer"
width: int = 1024
height: int = 768
display_num: Optional[int] = 1
_instance: BaseInstance

def __init__(self, instance: BaseInstance):
self._instance = instance
super().__init__()

@property
def options(self) -> ComputerToolOptions:
return {
"display_width_px": self.width,
"display_height_px": self.height,
"display_number": self.display_num,
}

def to_params(self) -> BetaToolComputerUse20241022Param:
return {
"name": self.name,
"type": self.api_type,
"display_width_px": self.width,
"display_height_px": self.height,
"display_number": self.display_num,
}

async def __call__(self, **kwargs: Any) -> ToolResult:
action = kwargs.pop("action")
coordinate = kwargs.pop("coordinate", None)
text = kwargs.pop("text", None)

try:
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
None,
lambda: self._instance.computer(
action=action,
coordinate=tuple(coordinate) if coordinate else None,
text=text,
),
)
return CLIResult(
output=result.get("output") if result else "",
error=result.get("error") if result else None,
base64_image=result.get("base64_image") if result else None,
system=result.get("system") if result else None,
)
except Exception as e:
raise ToolError(str(e)) from None


class EditTool(BaseAnthropicTool):
"""A filesystem editor tool that allows the agent to view, create, and edit files.

Available for Ubuntu instances."""

api_type: Literal["text_editor_20241022"] = "text_editor_20241022"
name: Literal["str_replace_editor"] = "str_replace_editor"
_instance: UbuntuInstance

def __init__(self, instance: UbuntuInstance):
self._instance = instance
super().__init__()

def to_params(self) -> BetaToolTextEditor20241022Param:
return {
"name": self.name,
"type": self.api_type,
}

async def __call__(self, **kwargs: Any) -> ToolResult:
command = kwargs.pop("command")
path = kwargs.pop("path")
file_text = kwargs.pop("file_text", None)
view_range = kwargs.pop("view_range", None)
old_str = kwargs.pop("old_str", None)
new_str = kwargs.pop("new_str", None)
insert_line = kwargs.pop("insert_line", None)
try:
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
None,
lambda: self._instance.edit(
command=command,
path=path,
file_text=file_text,
view_range=view_range,
old_str=old_str,
new_str=new_str,
insert_line=insert_line,
),
)
return CLIResult(
output=result.get("output") if result else "",
error=result.get("error") if result else None,
base64_image=result.get("base64_image") if result else None,
system=result.get("system") if result else None,
)
except Exception as e:
raise ToolError(str(e)) from None


class BashTool(BaseAnthropicTool):
"""A shell execution tool that allows the agent to run bash commands.

Available for Ubuntu instances."""

api_type: Literal["bash_20241022"] = "bash_20241022"
name: Literal["bash"] = "bash"
_instance: UbuntuInstance

def __init__(self, instance: UbuntuInstance):
self._instance = instance
super().__init__()

def to_params(self) -> BetaToolBash20241022Param:
return {
"name": self.name,
"type": self.api_type,
}

async def __call__(self, **kwargs: Any) -> ToolResult:
command = kwargs.pop("command")
restart = kwargs.pop("restart", False)
try:
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
None,
lambda: self._instance.bash(command=command, restart=restart),
)
return CLIResult(
output=result.get("output") if result else "",
error=result.get("error") if result else None,
base64_image=result.get("base64_image") if result else None,
system=result.get("system") if result else None,
)
except Exception as e:
raise ToolError(str(e)) from None


class ToolCollection:
"""A collection of anthropic-defined tools."""

def __init__(self, *tools):
self.tools = tools
self.tool_map = {tool.to_params()["name"]: tool for tool in tools}

def to_params(self) -> list:
return [tool.to_params() for tool in self.tools]

async def run(self, *, name: str, tool_input: Dict[str, Any]) -> ToolResult:
tool = self.tool_map.get(name)
if not tool:
return ToolResult(error=f"Tool {name} not found")
try:
r = await tool(**tool_input)
return r if r else ToolResult()
except Exception as e:
print(f"Error running tool {name}: {e}")
return ToolResult(error=str(e))
Loading
Loading