From ffcde9f729e0aee379c2ce4fc5cda80568ec636e Mon Sep 17 00:00:00 2001 From: fern-api <115122769+fern-api[bot]@users.noreply.github.com> Date: Sun, 26 Jan 2025 02:26:03 +0000 Subject: [PATCH 1/7] SDK regeneration --- poetry.lock | 6 +++--- pyproject.toml | 2 +- src/scrapybara/__init__.py | 2 ++ src/scrapybara/core/client_wrapper.py | 20 ++++--------------- src/scrapybara/types/__init__.py | 2 ++ .../types/deployment_config_instance_type.py | 2 +- src/scrapybara/types/get_instance_response.py | 3 ++- .../get_instance_response_instance_type.py | 5 +++++ 8 files changed, 20 insertions(+), 22 deletions(-) create mode 100644 src/scrapybara/types/get_instance_response_instance_type.py diff --git a/poetry.lock b/poetry.lock index 659e929..8c03bbe 100644 --- a/poetry.lock +++ b/poetry.lock @@ -464,13 +464,13 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "pydantic" -version = "2.10.5" +version = "2.10.6" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.10.5-py3-none-any.whl", hash = "sha256:4dd4e322dbe55472cb7ca7e73f4b63574eecccf2835ffa2af9021ce113c83c53"}, - {file = "pydantic-2.10.5.tar.gz", hash = "sha256:278b38dbbaec562011d659ee05f63346951b3a248a6f3642e1bc68894ea2b4ff"}, + {file = "pydantic-2.10.6-py3-none-any.whl", hash = "sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584"}, + {file = "pydantic-2.10.6.tar.gz", hash = "sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236"}, ] [package.dependencies] diff --git a/pyproject.toml b/pyproject.toml index 9158a74..c44c4eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "scrapybara" -version = "2.1.3" +version = "2.2.0" description = "" readme = "README.md" authors = [] diff --git a/src/scrapybara/__init__.py b/src/scrapybara/__init__.py index 03f549c..4921e4c 100644 --- a/src/scrapybara/__init__.py +++ b/src/scrapybara/__init__.py @@ -12,6 +12,7 @@ FileDownloadResponse, FileReadResponse, GetInstanceResponse, + GetInstanceResponseInstanceType, HttpValidationError, InstanceGetStreamUrlResponse, InstanceScreenshotResponse, @@ -48,6 +49,7 @@ "FileDownloadResponse", "FileReadResponse", "GetInstanceResponse", + "GetInstanceResponseInstanceType", "HttpValidationError", "InstanceGetStreamUrlResponse", "InstanceScreenshotResponse", diff --git a/src/scrapybara/core/client_wrapper.py b/src/scrapybara/core/client_wrapper.py index cb823df..2de7010 100644 --- a/src/scrapybara/core/client_wrapper.py +++ b/src/scrapybara/core/client_wrapper.py @@ -7,9 +7,7 @@ class BaseClientWrapper: - def __init__( - self, *, api_key: str, base_url: str, timeout: typing.Optional[float] = None - ): + def __init__(self, *, api_key: str, base_url: str, timeout: typing.Optional[float] = None): self.api_key = api_key self._base_url = base_url self._timeout = timeout @@ -18,7 +16,7 @@ def get_headers(self) -> typing.Dict[str, str]: headers: typing.Dict[str, str] = { "X-Fern-Language": "Python", "X-Fern-SDK-Name": "scrapybara", - "X-Fern-SDK-Version": "2.1.3", + "X-Fern-SDK-Version": "2.2.0", } headers["x-api-key"] = self.api_key return headers @@ -32,12 +30,7 @@ def get_timeout(self) -> typing.Optional[float]: class SyncClientWrapper(BaseClientWrapper): def __init__( - self, - *, - api_key: str, - base_url: str, - timeout: typing.Optional[float] = None, - httpx_client: httpx.Client + self, *, api_key: str, base_url: str, timeout: typing.Optional[float] = None, httpx_client: httpx.Client ): super().__init__(api_key=api_key, base_url=base_url, timeout=timeout) self.httpx_client = HttpClient( @@ -50,12 +43,7 @@ def __init__( class AsyncClientWrapper(BaseClientWrapper): def __init__( - self, - *, - api_key: str, - base_url: str, - timeout: typing.Optional[float] = None, - httpx_client: httpx.AsyncClient + self, *, api_key: str, base_url: str, timeout: typing.Optional[float] = None, httpx_client: httpx.AsyncClient ): super().__init__(api_key=api_key, base_url=base_url, timeout=timeout) self.httpx_client = AsyncHttpClient( diff --git a/src/scrapybara/types/__init__.py b/src/scrapybara/types/__init__.py index 40bc910..7998c7b 100644 --- a/src/scrapybara/types/__init__.py +++ b/src/scrapybara/types/__init__.py @@ -11,6 +11,7 @@ from .file_download_response import FileDownloadResponse from .file_read_response import FileReadResponse from .get_instance_response import GetInstanceResponse +from .get_instance_response_instance_type import GetInstanceResponseInstanceType from .http_validation_error import HttpValidationError from .instance_get_stream_url_response import InstanceGetStreamUrlResponse from .instance_screenshot_response import InstanceScreenshotResponse @@ -37,6 +38,7 @@ "FileDownloadResponse", "FileReadResponse", "GetInstanceResponse", + "GetInstanceResponseInstanceType", "HttpValidationError", "InstanceGetStreamUrlResponse", "InstanceScreenshotResponse", diff --git a/src/scrapybara/types/deployment_config_instance_type.py b/src/scrapybara/types/deployment_config_instance_type.py index 5e872c2..128f5bf 100644 --- a/src/scrapybara/types/deployment_config_instance_type.py +++ b/src/scrapybara/types/deployment_config_instance_type.py @@ -2,4 +2,4 @@ import typing -DeploymentConfigInstanceType = typing.Union[typing.Literal["small", "medium", "large"], typing.Any] +DeploymentConfigInstanceType = typing.Union[typing.Literal["ubuntu", "browser", "windows"], typing.Any] diff --git a/src/scrapybara/types/get_instance_response.py b/src/scrapybara/types/get_instance_response.py index ddc79ac..a0b60c4 100644 --- a/src/scrapybara/types/get_instance_response.py +++ b/src/scrapybara/types/get_instance_response.py @@ -2,6 +2,7 @@ from ..core.pydantic_utilities import UniversalBaseModel import datetime as dt +from .get_instance_response_instance_type import GetInstanceResponseInstanceType from .status import Status from ..core.pydantic_utilities import IS_PYDANTIC_V2 import typing @@ -11,7 +12,7 @@ class GetInstanceResponse(UniversalBaseModel): id: str launch_time: dt.datetime - instance_type: str + instance_type: GetInstanceResponseInstanceType status: Status if IS_PYDANTIC_V2: diff --git a/src/scrapybara/types/get_instance_response_instance_type.py b/src/scrapybara/types/get_instance_response_instance_type.py new file mode 100644 index 0000000..95c40a0 --- /dev/null +++ b/src/scrapybara/types/get_instance_response_instance_type.py @@ -0,0 +1,5 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +GetInstanceResponseInstanceType = typing.Union[typing.Literal["ubuntu", "browser", "windows"], typing.Any] From 555b8ac4a600094bd6879022bbeca8cd969289e1 Mon Sep 17 00:00:00 2001 From: Justin Sun Date: Sun, 26 Jan 2025 21:08:31 -0800 Subject: [PATCH 2/7] ubuntu/browser/windows instances + prompts + tool compatability + import act + tool types --- .fernignore | 1 + src/scrapybara/anthropic/__init__.py | 44 +-- src/scrapybara/client.py | 428 ++++++++++++++++++------ src/scrapybara/instance/types/action.py | 2 + src/scrapybara/prompts/__init__.py | 57 +++- src/scrapybara/tools/__init__.py | 43 ++- src/scrapybara/types/__init__.py | 35 +- tests/custom/test_client.py | 85 +++-- 8 files changed, 529 insertions(+), 166 deletions(-) diff --git a/.fernignore b/.fernignore index ced6301..f77513a 100644 --- a/.fernignore +++ b/.fernignore @@ -4,6 +4,7 @@ src/scrapybara/client.py src/scrapybara/anthropic/ src/scrapybara/prompts/ src/scrapybara/tools/ +src/scrapybara/types/__init__.py src/scrapybara/types/act.py src/scrapybara/types/tool.py tests/custom/test_client.py diff --git a/src/scrapybara/anthropic/__init__.py b/src/scrapybara/anthropic/__init__.py index a3cfda2..8e32b7a 100644 --- a/src/scrapybara/anthropic/__init__.py +++ b/src/scrapybara/anthropic/__init__.py @@ -7,7 +7,7 @@ import asyncio from pydantic import Field -from ..client import Instance +from ..client import BaseInstance, UbuntuInstance from ..types.act import Model from .base import BaseAnthropicTool, CLIResult, ToolError, ToolResult @@ -47,19 +47,19 @@ class ComputerToolOptions(TypedDict): class ComputerTool(BaseAnthropicTool): - """ - A computer interaction tool that allows the agent to control mouse and keyboard. - The tool parameters are defined by Anthropic and are not editable. - """ + """A computer interaction tool that allows the agent to control mouse and keyboard. + + Available for Ubuntu, Browser, and Windows instances.""" api_type: Literal["computer_20241022"] = "computer_20241022" name: Literal["computer"] = "computer" width: int = 1024 height: int = 768 display_num: Optional[int] = 1 + _instance: BaseInstance - def __init__(self, instance: Instance): - self.instance = instance + def __init__(self, instance: BaseInstance): + self._instance = instance super().__init__() @property @@ -88,7 +88,7 @@ async def __call__(self, **kwargs: Any) -> ToolResult: loop = asyncio.get_event_loop() result = await loop.run_in_executor( None, - lambda: self.instance.computer( + lambda: self._instance.computer( action=action, coordinate=tuple(coordinate) if coordinate else None, text=text, @@ -105,16 +105,16 @@ async def __call__(self, **kwargs: Any) -> ToolResult: class EditTool(BaseAnthropicTool): - """ - A filesystem editor tool that allows the agent to view, create, and edit files. - The tool parameters are defined by Anthropic and are not editable. - """ + """A filesystem editor tool that allows the agent to view, create, and edit files. + + Available for Ubuntu instances.""" api_type: Literal["text_editor_20241022"] = "text_editor_20241022" name: Literal["str_replace_editor"] = "str_replace_editor" + _instance: UbuntuInstance - def __init__(self, instance: Instance): - self.instance = instance + def __init__(self, instance: UbuntuInstance): + self._instance = instance super().__init__() def to_params(self) -> BetaToolTextEditor20241022Param: @@ -135,7 +135,7 @@ async def __call__(self, **kwargs: Any) -> ToolResult: loop = asyncio.get_event_loop() result = await loop.run_in_executor( None, - lambda: self.instance.edit( + lambda: self._instance.edit( command=command, path=path, file_text=file_text, @@ -156,16 +156,16 @@ async def __call__(self, **kwargs: Any) -> ToolResult: class BashTool(BaseAnthropicTool): - """ - A shell execution tool that allows the agent to run bash commands. - The tool parameters are defined by Anthropic and are not editable. - """ + """A shell execution tool that allows the agent to run bash commands. + + Available for Ubuntu instances.""" api_type: Literal["bash_20241022"] = "bash_20241022" name: Literal["bash"] = "bash" + _instance: UbuntuInstance - def __init__(self, instance: Instance): - self.instance = instance + def __init__(self, instance: UbuntuInstance): + self._instance = instance super().__init__() def to_params(self) -> BetaToolBash20241022Param: @@ -181,7 +181,7 @@ async def __call__(self, **kwargs: Any) -> ToolResult: loop = asyncio.get_event_loop() result = await loop.run_in_executor( None, - lambda: self.instance.bash(command=command, restart=restart), + lambda: self._instance.bash(command=command, restart=restart), ) return CLIResult( output=result.get("output") if result else "", diff --git a/src/scrapybara/client.py b/src/scrapybara/client.py index bce5a69..945e978 100644 --- a/src/scrapybara/client.py +++ b/src/scrapybara/client.py @@ -8,7 +8,6 @@ Type, TypeVar, Union, - Literal, Generator, Callable, AsyncGenerator, @@ -29,7 +28,6 @@ BrowserAuthenticateResponse, BrowserGetCdpUrlResponse, CellType, - DeploymentConfigInstanceType, EnvGetResponse, EnvResponse, FileDownloadResponse, @@ -572,25 +570,18 @@ async def delete( ) -class Instance: +class BaseInstance: def __init__( self, id: str, launch_time: datetime, - instance_type: str, status: str, client: BaseClient, ): self.id = id self.launch_time = launch_time - self.instance_type = instance_type self.status = status self._client = client - self.browser = Browser(self.id, self._client) - self.code = Code(self.id, self._client) - self.notebook = Notebook(self.id, self._client) - self.file = File(self.id, self._client) - self.env = Env(self.id, self._client) def screenshot( self, request_options: Optional[RequestOptions] = None @@ -622,6 +613,44 @@ def computer( request_options=request_options, ) + def stop( + self, request_options: Optional[RequestOptions] = None + ) -> StopInstanceResponse: + return self._client.instance.stop(self.id, request_options=request_options) + + def pause( + self, request_options: Optional[RequestOptions] = None + ) -> StopInstanceResponse: + return self._client.instance.pause(self.id, request_options=request_options) + + def resume( + self, + *, + timeout_hours: Optional[float] = None, + request_options: Optional[RequestOptions] = None, + ) -> GetInstanceResponse: + return self._client.instance.resume( + self.id, + timeout_hours=timeout_hours, + request_options=request_options, + ) + + +class UbuntuInstance(BaseInstance): + def __init__( + self, + id: str, + launch_time: datetime, + status: str, + client: BaseClient, + ): + super().__init__(id, launch_time, status, client) + self.browser = Browser(self.id, self._client) + self.code = Code(self.id, self._client) + self.notebook = Notebook(self.id, self._client) + self.file = File(self.id, self._client) + self.env = Env(self.id, self._client) + def bash( self, *, @@ -657,48 +686,57 @@ def edit( request_options=request_options, ) - def stop( - self, request_options: Optional[RequestOptions] = None - ) -> StopInstanceResponse: - return self._client.instance.stop(self.id, request_options=request_options) - def pause( +class BrowserInstance(BaseInstance): + def __init__( + self, + id: str, + launch_time: datetime, + status: str, + client: BaseClient, + ): + super().__init__(id, launch_time, status, client) + + def get_cdp_url( self, request_options: Optional[RequestOptions] = None - ) -> StopInstanceResponse: - return self._client.instance.pause(self.id, request_options=request_options) + ) -> BrowserGetCdpUrlResponse: + return self._client.browser.get_cdp_url( + self.id, request_options=request_options + ) - def resume( - self, - *, - timeout_hours: Optional[float] = None, - request_options: Optional[RequestOptions] = None, - ) -> GetInstanceResponse: - return self._client.instance.resume( + def authenticate( + self, *, auth_state_id: str, request_options: Optional[RequestOptions] = None + ) -> BrowserAuthenticateResponse: + return self._client.browser.authenticate( self.id, - timeout_hours=timeout_hours, + auth_state_id=auth_state_id, request_options=request_options, ) -class AsyncInstance: +class WindowsInstance(BaseInstance): + def __init__( + self, + id: str, + launch_time: datetime, + status: str, + client: BaseClient, + ): + super().__init__(id, launch_time, status, client) + + +class AsyncBaseInstance: def __init__( self, id: str, launch_time: datetime, - instance_type: str, status: str, client: AsyncBaseClient, ): self.id = id self.launch_time = launch_time - self.instance_type = instance_type self.status = status self._client = client - self.browser = AsyncBrowser(self.id, self._client) - self.code = AsyncCode(self.id, self._client) - self.notebook = AsyncNotebook(self.id, self._client) - self.file = AsyncFile(self.id, self._client) - self.env = AsyncEnv(self.id, self._client) async def screenshot( self, request_options: Optional[RequestOptions] = None @@ -730,6 +768,48 @@ async def computer( request_options=request_options, ) + async def stop( + self, request_options: Optional[RequestOptions] = None + ) -> StopInstanceResponse: + return await self._client.instance.stop( + self.id, request_options=request_options + ) + + async def pause( + self, request_options: Optional[RequestOptions] = None + ) -> StopInstanceResponse: + return await self._client.instance.pause( + self.id, request_options=request_options + ) + + async def resume( + self, + *, + timeout_hours: Optional[float] = None, + request_options: Optional[RequestOptions] = None, + ) -> GetInstanceResponse: + return await self._client.instance.resume( + self.id, + timeout_hours=timeout_hours, + request_options=request_options, + ) + + +class AsyncUbuntuInstance(AsyncBaseInstance): + def __init__( + self, + id: str, + launch_time: datetime, + status: str, + client: AsyncBaseClient, + ): + super().__init__(id, launch_time, status, client) + self.browser = AsyncBrowser(self.id, self._client) + self.code = AsyncCode(self.id, self._client) + self.notebook = AsyncNotebook(self.id, self._client) + self.file = AsyncFile(self.id, self._client) + self.env = AsyncEnv(self.id, self._client) + async def bash( self, *, @@ -765,33 +845,45 @@ async def edit( request_options=request_options, ) - async def stop( - self, request_options: Optional[RequestOptions] = None - ) -> StopInstanceResponse: - return await self._client.instance.stop( - self.id, request_options=request_options - ) - async def pause( +class AsyncBrowserInstance(AsyncBaseInstance): + def __init__( + self, + id: str, + launch_time: datetime, + status: str, + client: AsyncBaseClient, + ): + super().__init__(id, launch_time, status, client) + + async def get_cdp_url( self, request_options: Optional[RequestOptions] = None - ) -> StopInstanceResponse: - return await self._client.instance.pause( + ) -> BrowserGetCdpUrlResponse: + return await self._client.browser.get_cdp_url( self.id, request_options=request_options ) - async def resume( - self, - *, - timeout_hours: Optional[float] = None, - request_options: Optional[RequestOptions] = None, - ) -> GetInstanceResponse: - return await self._client.instance.resume( + async def authenticate( + self, *, auth_state_id: str, request_options: Optional[RequestOptions] = None + ) -> BrowserAuthenticateResponse: + return await self._client.browser.authenticate( self.id, - timeout_hours=timeout_hours, + auth_state_id=auth_state_id, request_options=request_options, ) +class AsyncWindowsInstance(AsyncBaseInstance): + def __init__( + self, + id: str, + launch_time: datetime, + status: str, + client: AsyncBaseClient, + ): + super().__init__(id, launch_time, status, client) + + class Scrapybara: def __init__( self, @@ -816,56 +908,122 @@ def __init__( def httpx_client(self) -> HttpClient: return self._base_client._client_wrapper.httpx_client - def start( + def start_ubuntu( self, *, - instance_type: Optional[ - Union[DeploymentConfigInstanceType, Literal["small", "medium", "large"]] - ] = OMIT, timeout_hours: Optional[float] = OMIT, request_options: Optional[RequestOptions] = None, - ) -> Instance: + ) -> UbuntuInstance: response = self._base_client.start( - instance_type=instance_type, + instance_type="ubuntu", timeout_hours=timeout_hours, request_options=request_options, ) - return Instance( + return UbuntuInstance( response.id, response.launch_time, - response.instance_type, response.status, self._base_client, ) - def get( - self, instance_id: str, *, request_options: Optional[RequestOptions] = None - ) -> Instance: - response = self._base_client.get(instance_id, request_options=request_options) - return Instance( + def start_browser( + self, + *, + timeout_hours: Optional[float] = OMIT, + request_options: Optional[RequestOptions] = None, + ) -> BrowserInstance: + response = self._base_client.start( + instance_type="browser", + timeout_hours=timeout_hours, + request_options=request_options, + ) + return BrowserInstance( response.id, response.launch_time, - response.instance_type, response.status, self._base_client, ) - def get_instances( + def start_windows( self, *, + timeout_hours: Optional[float] = OMIT, request_options: Optional[RequestOptions] = None, - ) -> List[Instance]: - response = self._base_client.get_instances(request_options=request_options) - return [ - Instance( - instance.id, - instance.launch_time, - instance.instance_type, - instance.status, + ) -> WindowsInstance: + response = self._base_client.start( + instance_type="windows", + timeout_hours=timeout_hours, + request_options=request_options, + ) + return WindowsInstance( + response.id, + response.launch_time, + response.status, + self._base_client, + ) + + def get( + self, instance_id: str, *, request_options: Optional[RequestOptions] = None + ) -> Union[UbuntuInstance, BrowserInstance, WindowsInstance]: + response = self._base_client.get(instance_id, request_options=request_options) + if response.instance_type == "ubuntu": + return UbuntuInstance( + response.id, + response.launch_time, + response.status, + self._base_client, + ) + elif response.instance_type == "browser": + return BrowserInstance( + response.id, + response.launch_time, + response.status, + self._base_client, + ) + else: + return WindowsInstance( + response.id, + response.launch_time, + response.status, self._base_client, ) - for instance in response - ] + + def get_instances( + self, + *, + request_options: Optional[RequestOptions] = None, + ) -> List[Union[UbuntuInstance, BrowserInstance, WindowsInstance]]: + response = self._base_client.get_instances(request_options=request_options) + instances: List[Union[UbuntuInstance, BrowserInstance, WindowsInstance]] = [] + for instance in response: + if instance.instance_type == "ubuntu": + instances.append( + UbuntuInstance( + instance.id, + instance.launch_time, + instance.status, + self._base_client, + ) + ) + elif instance.instance_type == "browser": + instances.append( + BrowserInstance( + instance.id, + instance.launch_time, + instance.status, + self._base_client, + ) + ) + else: + instances.append( + WindowsInstance( + instance.id, + instance.launch_time, + instance.status, + self._base_client, + ) + ) + return instances def get_auth_states( self, @@ -1121,60 +1279,128 @@ def __init__( def httpx_client(self) -> AsyncHttpClient: return self._base_client._client_wrapper.httpx_client - async def start( + async def start_ubuntu( self, *, - instance_type: Optional[ - Union[DeploymentConfigInstanceType, Literal["small", "medium", "large"]] - ] = OMIT, timeout_hours: Optional[float] = OMIT, request_options: Optional[RequestOptions] = None, - ) -> AsyncInstance: + ) -> AsyncUbuntuInstance: response = await self._base_client.start( - instance_type=instance_type, + instance_type="ubuntu", timeout_hours=timeout_hours, request_options=request_options, ) - return AsyncInstance( + return AsyncUbuntuInstance( response.id, response.launch_time, - response.instance_type, response.status, self._base_client, ) - async def get( - self, instance_id: str, *, request_options: Optional[RequestOptions] = None - ) -> AsyncInstance: - response = await self._base_client.get( - instance_id, request_options=request_options + async def start_browser( + self, + *, + timeout_hours: Optional[float] = OMIT, + request_options: Optional[RequestOptions] = None, + ) -> AsyncBrowserInstance: + response = await self._base_client.start( + instance_type="browser", + timeout_hours=timeout_hours, + request_options=request_options, + ) + return AsyncBrowserInstance( + response.id, + response.launch_time, + response.status, + self._base_client, + ) + + async def start_windows( + self, + *, + timeout_hours: Optional[float] = OMIT, + request_options: Optional[RequestOptions] = None, + ) -> AsyncWindowsInstance: + response = await self._base_client.start( + instance_type="windows", + timeout_hours=timeout_hours, + request_options=request_options, ) - return AsyncInstance( + return AsyncWindowsInstance( response.id, response.launch_time, - response.instance_type, response.status, self._base_client, ) + async def get( + self, instance_id: str, *, request_options: Optional[RequestOptions] = None + ) -> Union[AsyncUbuntuInstance, AsyncBrowserInstance, AsyncWindowsInstance]: + response = await self._base_client.get( + instance_id, request_options=request_options + ) + if response.instance_type == "ubuntu": + return AsyncUbuntuInstance( + response.id, + response.launch_time, + response.status, + self._base_client, + ) + elif response.instance_type == "browser": + return AsyncBrowserInstance( + response.id, + response.launch_time, + response.status, + self._base_client, + ) + else: + return AsyncWindowsInstance( + response.id, + response.launch_time, + response.status, + self._base_client, + ) + async def get_instances( self, *, request_options: Optional[RequestOptions] = None, - ) -> List[AsyncInstance]: + ) -> List[Union[AsyncUbuntuInstance, AsyncBrowserInstance, AsyncWindowsInstance]]: response = await self._base_client.get_instances( request_options=request_options ) - return [ - AsyncInstance( - instance.id, - instance.launch_time, - instance.instance_type, - instance.status, - self._base_client, - ) - for instance in response - ] + instances: List[ + Union[AsyncUbuntuInstance, AsyncBrowserInstance, AsyncWindowsInstance] + ] = [] + for instance in response: + if instance.instance_type == "ubuntu": + instances.append( + AsyncUbuntuInstance( + instance.id, + instance.launch_time, + instance.status, + self._base_client, + ) + ) + elif instance.instance_type == "browser": + instances.append( + AsyncBrowserInstance( + instance.id, + instance.launch_time, + instance.status, + self._base_client, + ) + ) + else: + instances.append( + AsyncWindowsInstance( + instance.id, + instance.launch_time, + instance.status, + self._base_client, + ) + ) + return instances async def get_auth_states( self, diff --git a/src/scrapybara/instance/types/action.py b/src/scrapybara/instance/types/action.py index 049d28a..f3fa17f 100644 --- a/src/scrapybara/instance/types/action.py +++ b/src/scrapybara/instance/types/action.py @@ -14,6 +14,8 @@ "double_click", "screenshot", "cursor_position", + "scroll", + "wait", ], typing.Any, ] diff --git a/src/scrapybara/prompts/__init__.py b/src/scrapybara/prompts/__init__.py index 3d9668b..16321a7 100644 --- a/src/scrapybara/prompts/__init__.py +++ b/src/scrapybara/prompts/__init__.py @@ -1,13 +1,12 @@ from datetime import datetime -SYSTEM_PROMPT = f""" +UBUNTU_SYSTEM_PROMPT = f""" * You have access to an Ubuntu virtual machine with internet connectivity * You can install Ubuntu applications using the bash tool (use curl over wget) * To run GUI applications with the bash tool: - Use a subshell, e.g. "(DISPLAY=:1 xterm &)", make sure to include the parantheses - GUI apps will appear but may take time to load - confirm with an extra screenshot * Start Chromium (default browser) via the bash tool "(DISPLAY=:1 chromium &)", but interact with it visually via the computer tool -* In Chromium, click the address bar directly to enter URLs/searches * If you need to read a full PDF after initial screenshot - Download with curl - Convert to text using pdftotext @@ -20,24 +19,70 @@ * When viewing pages: - Zoom out to see full content, or - Scroll to ensure you see everything +* When interacting with a field, always clear the field first using "ctrl+A" and "delete" + - Take an extra screenshot after clicking "enter" to confirm the field is properly submitted and move the mouse to the next field * Computer function calls take time, string together calls when possible * You are allowed to take actions on behalf of the user on sites that are authenticated * If the user asks you to access a site, assume that the user has already authenticated -* For accessibility, you can use and create accounts for communication tools such as email, Discord, Telegram on behalf of the user - e.g. join Discord channels, send emails, etc. * To login additional sites, ask the user to use Auth Contexts or the Interactive Desktop +* If first screenshot shows black screen: + - Click mouse in screen center + - Take another screenshot * Today's date is {datetime.today().strftime('%A, %B %-d, %Y')} +* If given a complex task, break down into smaller steps and ask the user for details only if necessary +* Read through web pages thoroughly by scrolling down till the end +* Be concise! +""" + + +BROWSER_SYSTEM_PROMPT = f""" +* You have access to a browser instance with internet connectivity +* You can interact with web pages using the computer tool and browser tool +* When viewing pages: + - Zoom out to see full content, or + - Scroll to ensure you see everything +* When interacting with a field, always clear the field first using "ctrl+A" and "delete" + - Take an extra screenshot after clicking "enter" to confirm the field is properly submitted and move the mouse to the next field +* Computer function calls take time, string together calls when possible +* You are allowed to take actions on behalf of the user on sites that are authenticated +* If the user asks you to access a site, assume that the user has already authenticated +* To login additional sites, ask the user to use Auth Contexts * If first screenshot shows black screen: - Click mouse in screen center - Take another screenshot +* Today's date is {datetime.today().strftime('%A, %B %-d, %Y')} + + + +* If given a complex task, break down into smaller steps and ask the user for details only if necessary +* Read through web pages thoroughly by scrolling down till the end +* Be concise! +""" + + +WINDOWS_SYSTEM_PROMPT = f""" +* You have access to a Windows virtual machine with internet connectivity +* You can interact with the Windows desktop using the computer tool +* When viewing pages: + - Zoom out to see full content, or + - Scroll to ensure you see everything * When interacting with a field, always clear the field first using "ctrl+A" and "delete" - Take an extra screenshot after clicking "enter" to confirm the field is properly submitted and move the mouse to the next field +* Computer function calls take time, string together calls when possible +* You are allowed to take actions on behalf of the user on sites that are authenticated +* If the user asks you to access a site, assume that the user has already authenticated +* To login additional sites, ask the user to use Auth Contexts or the Interactive Desktop +* If first screenshot shows black screen: + - Click mouse in screen center + - Take another screenshot +* Today's date is {datetime.today().strftime('%A, %B %-d, %Y')} + + + * If given a complex task, break down into smaller steps and ask the user for details only if necessary -* Research facts with Google searches in Chromium * Read through web pages thoroughly by scrolling down till the end -* Use more generalized websites during research, e.g. use Google Flights instead of United when searching for flights, only use United when finalizing bookings -* Wait for actions to complete (examine previous screenshots) before taking another action * Be concise! """ diff --git a/src/scrapybara/tools/__init__.py b/src/scrapybara/tools/__init__.py index 6365b67..b50bb21 100644 --- a/src/scrapybara/tools/__init__.py +++ b/src/scrapybara/tools/__init__.py @@ -1,11 +1,11 @@ import base64 import json -from typing import Any, Literal, Optional, Sequence, Tuple +from typing import Any, Literal, Optional, Sequence, Tuple, Union, cast from pydantic import BaseModel, Field from playwright.sync_api import sync_playwright from ..types.tool import Tool -from ..client import Instance +from ..client import BaseInstance, UbuntuInstance, BrowserInstance from ..instance.types import Action, Command @@ -32,11 +32,13 @@ class ComputerToolParameters(BaseModel): class ComputerTool(Tool): - """A computer interaction tool that allows the agent to control mouse and keyboard.""" + """A computer interaction tool that allows the agent to control mouse and keyboard. - _instance: Instance + Available for Ubuntu, Browser, and Windows instances.""" - def __init__(self, instance: Instance) -> None: + _instance: BaseInstance + + def __init__(self, instance: BaseInstance) -> None: super().__init__( name="computer", description="Control mouse and keyboard for computer interaction", @@ -74,11 +76,13 @@ class EditToolParameters(BaseModel): class EditTool(Tool): - """A filesystem editor tool that allows the agent to view, create, and edit files.""" + """A filesystem editor tool that allows the agent to view, create, and edit files. + + Available for Ubuntu instances.""" - _instance: Instance + _instance: UbuntuInstance - def __init__(self, instance: Instance) -> None: + def __init__(self, instance: UbuntuInstance) -> None: super().__init__( name="str_replace_editor", description="View, create, and edit files in the filesystem", @@ -107,11 +111,13 @@ class BashToolParameters(BaseModel): class BashTool(Tool): - """A shell execution tool that allows the agent to run bash commands.""" + """A shell execution tool that allows the agent to run bash commands. + + Available for Ubuntu instances.""" - _instance: Instance + _instance: UbuntuInstance - def __init__(self, instance: Instance) -> None: + def __init__(self, instance: UbuntuInstance) -> None: super().__init__( name="bash", description="Execute bash commands in the shell", @@ -170,11 +176,13 @@ class BrowserToolParameters(BaseModel): class BrowserTool(Tool): - """A browser interaction tool that allows the agent to interact with a browser.""" + """A browser interaction tool that allows the agent to interact with a browser. - _instance: Instance + Available for Ubuntu and Browser instances.""" - def __init__(self, instance: Instance) -> None: + _instance: Union[UbuntuInstance, BrowserInstance] + + def __init__(self, instance: Union[UbuntuInstance, BrowserInstance]) -> None: super().__init__( name="browser", description="Interact with a browser for web scraping and automation", @@ -192,7 +200,12 @@ def __call__(self, **kwargs: Any) -> Any: timeout = params.timeout or 30000 attribute = params.attribute - cdp_url = self._instance.browser.get_cdp_url().cdp_url + # Get CDP URL based on instance type + if isinstance(self._instance, UbuntuInstance): + cdp_url = self._instance.browser.get_cdp_url().cdp_url + else: + cdp_url = self._instance.get_cdp_url().cdp_url + if cdp_url is None: raise ValueError("CDP URL is not available, start the browser first") diff --git a/src/scrapybara/types/__init__.py b/src/scrapybara/types/__init__.py index 7998c7b..5c318a2 100644 --- a/src/scrapybara/types/__init__.py +++ b/src/scrapybara/types/__init__.py @@ -1,5 +1,3 @@ -# This file was auto-generated by Fern from our API Definition. - from .auth_state_response import AuthStateResponse from .browser_authenticate_response import BrowserAuthenticateResponse from .browser_get_cdp_url_response import BrowserGetCdpUrlResponse @@ -25,6 +23,23 @@ from .stop_instance_response import StopInstanceResponse from .validation_error import ValidationError from .validation_error_loc_item import ValidationErrorLocItem +from .act import ( + TextPart, + ImagePart, + ToolCallPart, + ToolResultPart, + UserMessage, + AssistantMessage, + ToolMessage, + Message, + Model, + SingleActRequest, + TokenUsage, + SingleActResponse, + Step, + ActResponse, +) +from .tool import Tool, ApiTool __all__ = [ "AuthStateResponse", @@ -52,4 +67,20 @@ "StopInstanceResponse", "ValidationError", "ValidationErrorLocItem", + "TextPart", + "ImagePart", + "ToolCallPart", + "ToolResultPart", + "UserMessage", + "AssistantMessage", + "ToolMessage", + "Message", + "Model", + "SingleActRequest", + "TokenUsage", + "SingleActResponse", + "Step", + "ActResponse", + "Tool", + "ApiTool", ] diff --git a/tests/custom/test_client.py b/tests/custom/test_client.py index 4747ba0..70cb242 100644 --- a/tests/custom/test_client.py +++ b/tests/custom/test_client.py @@ -3,48 +3,93 @@ import os from scrapybara.anthropic import Anthropic -from scrapybara.prompts import SYSTEM_PROMPT -from scrapybara.tools import BashTool, BrowserTool, ComputerTool, EditTool +from scrapybara.prompts import ( + BROWSER_SYSTEM_PROMPT, + UBUNTU_SYSTEM_PROMPT, + WINDOWS_SYSTEM_PROMPT, +) +from scrapybara.tools import BashTool, ComputerTool, EditTool + + +class YCStats(BaseModel): + number_of_startups: int + combined_valuation: int def test_client() -> None: if os.getenv("SCRAPYBARA_API_KEY") is None: raise ValueError("SCRAPYBARA_API_KEY is not set") - client = Scrapybara() - instance = client.start() - assert instance.id is not None + client = Scrapybara( + base_url="https://scrapybara-api-alpha-47247185186.us-central1.run.app" + ) + + # Ubuntu test + ubuntu_instance = client.start_ubuntu() + assert ubuntu_instance.id is not None instances = client.get_instances() assert len(instances) > 0 - screenshot_response = instance.screenshot() + screenshot_response = ubuntu_instance.screenshot() assert screenshot_response.base_64_image is not None - instance.browser.start() - cdp_url = instance.browser.get_cdp_url() + ubuntu_instance.browser.start() + cdp_url = ubuntu_instance.browser.get_cdp_url() assert cdp_url is not None - - class YCStats(BaseModel): - number_of_startups: int - combined_valuation: int - response = client.act( model=Anthropic(), - system=SYSTEM_PROMPT, + system=UBUNTU_SYSTEM_PROMPT, prompt="Go to the YC website and get the number of funded startups and combined valuation", tools=[ - ComputerTool(instance), - BashTool(instance), - EditTool(instance), - BrowserTool(instance), + ComputerTool(ubuntu_instance), + BashTool(ubuntu_instance), + EditTool(ubuntu_instance), ], schema=YCStats, ) print(response) + assert response.output is not None + assert response.output.number_of_startups is not None + assert response.output.combined_valuation is not None + ubuntu_instance.browser.stop() + ubuntu_instance.stop() + # Browser test + browser_instance = client.start_browser() + assert browser_instance.id is not None + screenshot_response = browser_instance.screenshot() + assert screenshot_response.base_64_image is not None + cdp_url = browser_instance.get_cdp_url() + assert cdp_url is not None + response = client.act( + model=Anthropic(), + system=BROWSER_SYSTEM_PROMPT, + prompt="Go to the YC website and get the number of funded startups and combined valuation", + tools=[ + ComputerTool(browser_instance), + ], + ) + print(response) assert response.output is not None assert response.output.number_of_startups is not None assert response.output.combined_valuation is not None + browser_instance.stop() - instance.browser.stop() - instance.stop() + # Windows test + windows_instance = client.start_windows() + assert windows_instance.id is not None + screenshot_response = windows_instance.screenshot() + assert screenshot_response.base_64_image is not None + response = client.act( + model=Anthropic(), + system=WINDOWS_SYSTEM_PROMPT, + prompt="Go to the YC website and get the number of funded startups and combined valuation", + tools=[ + ComputerTool(windows_instance), + ], + ) + print(response) + assert response.output is not None + assert response.output.number_of_startups is not None + assert response.output.combined_valuation is not None + windows_instance.stop() if __name__ == "__main__": From 5581df34c0446445149509aae92ad103a5bcc4ea Mon Sep 17 00:00:00 2001 From: Justin Sun Date: Sun, 26 Jan 2025 23:10:27 -0800 Subject: [PATCH 3/7] break tests into 3 functions --- tests/custom/test_client.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/tests/custom/test_client.py b/tests/custom/test_client.py index 70cb242..84d7c37 100644 --- a/tests/custom/test_client.py +++ b/tests/custom/test_client.py @@ -16,15 +16,19 @@ class YCStats(BaseModel): combined_valuation: int -def test_client() -> None: +def _check_api_key() -> None: if os.getenv("SCRAPYBARA_API_KEY") is None: raise ValueError("SCRAPYBARA_API_KEY is not set") + + +def test_ubuntu() -> None: + _check_api_key() client = Scrapybara( base_url="https://scrapybara-api-alpha-47247185186.us-central1.run.app" ) - # Ubuntu test ubuntu_instance = client.start_ubuntu() + print(ubuntu_instance.get_stream_url().stream_url) assert ubuntu_instance.id is not None instances = client.get_instances() assert len(instances) > 0 @@ -43,6 +47,7 @@ def test_client() -> None: EditTool(ubuntu_instance), ], schema=YCStats, + on_step=lambda step: print(step), ) print(response) assert response.output is not None @@ -51,8 +56,15 @@ def test_client() -> None: ubuntu_instance.browser.stop() ubuntu_instance.stop() - # Browser test + +def test_browser() -> None: + _check_api_key() + client = Scrapybara( + base_url="https://scrapybara-api-alpha-47247185186.us-central1.run.app" + ) + browser_instance = client.start_browser() + print(browser_instance.get_stream_url().stream_url) assert browser_instance.id is not None screenshot_response = browser_instance.screenshot() assert screenshot_response.base_64_image is not None @@ -65,6 +77,7 @@ def test_client() -> None: tools=[ ComputerTool(browser_instance), ], + on_step=lambda step: print(step), ) print(response) assert response.output is not None @@ -72,8 +85,15 @@ def test_client() -> None: assert response.output.combined_valuation is not None browser_instance.stop() - # Windows test + +def test_windows() -> None: + _check_api_key() + client = Scrapybara( + base_url="https://scrapybara-api-alpha-47247185186.us-central1.run.app" + ) + windows_instance = client.start_windows() + print(windows_instance.get_stream_url().stream_url) assert windows_instance.id is not None screenshot_response = windows_instance.screenshot() assert screenshot_response.base_64_image is not None @@ -84,6 +104,7 @@ def test_client() -> None: tools=[ ComputerTool(windows_instance), ], + on_step=lambda step: print(step), ) print(response) assert response.output is not None @@ -93,4 +114,6 @@ def test_client() -> None: if __name__ == "__main__": - test_client() + test_ubuntu() + test_browser() + test_windows() From 16205191b3be5dbdec7a02252f466bc0db9f2bd3 Mon Sep 17 00:00:00 2001 From: Justin Sun Date: Sun, 26 Jan 2025 23:55:57 -0800 Subject: [PATCH 4/7] skip windows test --- tests/custom/test_client.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/custom/test_client.py b/tests/custom/test_client.py index 84d7c37..8297803 100644 --- a/tests/custom/test_client.py +++ b/tests/custom/test_client.py @@ -1,6 +1,7 @@ from pydantic import BaseModel from scrapybara import Scrapybara import os +import pytest from scrapybara.anthropic import Anthropic from scrapybara.prompts import ( @@ -47,7 +48,7 @@ def test_ubuntu() -> None: EditTool(ubuntu_instance), ], schema=YCStats, - on_step=lambda step: print(step), + on_step=lambda step: print(step.text, step.tool_calls), ) print(response) assert response.output is not None @@ -77,7 +78,8 @@ def test_browser() -> None: tools=[ ComputerTool(browser_instance), ], - on_step=lambda step: print(step), + schema=YCStats, + on_step=lambda step: print(step.text, step.tool_calls), ) print(response) assert response.output is not None @@ -86,6 +88,7 @@ def test_browser() -> None: browser_instance.stop() +@pytest.mark.skip() def test_windows() -> None: _check_api_key() client = Scrapybara( @@ -104,7 +107,8 @@ def test_windows() -> None: tools=[ ComputerTool(windows_instance), ], - on_step=lambda step: print(step), + schema=YCStats, + on_step=lambda step: print(step.text, step.tool_calls), ) print(response) assert response.output is not None @@ -116,4 +120,4 @@ def test_windows() -> None: if __name__ == "__main__": test_ubuntu() test_browser() - test_windows() + # test_windows() From f9ed48f69c7deeadb07634da8362ac5b8ce1f1fc Mon Sep 17 00:00:00 2001 From: Justin Sun Date: Mon, 27 Jan 2025 00:04:36 -0800 Subject: [PATCH 5/7] update prompts --- src/scrapybara/prompts/__init__.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/scrapybara/prompts/__init__.py b/src/scrapybara/prompts/__init__.py index 16321a7..e1cdf6e 100644 --- a/src/scrapybara/prompts/__init__.py +++ b/src/scrapybara/prompts/__init__.py @@ -3,10 +3,10 @@ UBUNTU_SYSTEM_PROMPT = f""" * You have access to an Ubuntu virtual machine with internet connectivity * You can install Ubuntu applications using the bash tool (use curl over wget) -* To run GUI applications with the bash tool: - - Use a subshell, e.g. "(DISPLAY=:1 xterm &)", make sure to include the parantheses - - GUI apps will appear but may take time to load - confirm with an extra screenshot -* Start Chromium (default browser) via the bash tool "(DISPLAY=:1 chromium &)", but interact with it visually via the computer tool +* To run GUI applications with the bash tool, use a subshell, e.g. "(DISPLAY=:1 xterm &)", make sure to include the parantheses +* GUI apps will appear but may take time to load - confirm with an extra screenshot +* Chromium is the default browser +* Start Chromium via the bash tool "(DISPLAY=:1 chromium &)", but interact with it visually via the computer tool * If you need to read a full PDF after initial screenshot - Download with curl - Convert to text using pdftotext @@ -39,8 +39,9 @@ BROWSER_SYSTEM_PROMPT = f""" -* You have access to a browser instance with internet connectivity -* You can interact with web pages using the computer tool and browser tool +* You have access to a Chromium browser instance with internet connectivity +* Chromium should already be open and running +* You can interact with web pages using the computer tool * When viewing pages: - Zoom out to see full content, or - Scroll to ensure you see everything @@ -66,6 +67,8 @@ WINDOWS_SYSTEM_PROMPT = f""" * You have access to a Windows virtual machine with internet connectivity * You can interact with the Windows desktop using the computer tool +* GUI apps will appear but may take time to load - confirm with an extra screenshot +* Edge is the default browser * When viewing pages: - Zoom out to see full content, or - Scroll to ensure you see everything From 5b03917cd679baa7061a9dcc573807054754336b Mon Sep 17 00:00:00 2001 From: Justin Sun Date: Mon, 27 Jan 2025 00:13:14 -0800 Subject: [PATCH 6/7] update readme --- .fernignore | 3 ++- README.md | 17 +++++++---------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/.fernignore b/.fernignore index f77513a..e666a72 100644 --- a/.fernignore +++ b/.fernignore @@ -8,4 +8,5 @@ src/scrapybara/types/__init__.py src/scrapybara/types/act.py src/scrapybara/types/tool.py tests/custom/test_client.py -.github/workflows/ci.yml \ No newline at end of file +.github/workflows/ci.yml +README.md \ No newline at end of file diff --git a/README.md b/README.md index b279a32..69065d4 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ pip install scrapybara ## Reference -A full reference for this library is available [here](./reference.md). +Please refer to [docs](https://docs.scrapybara.com) for more information. ## Requirements @@ -22,10 +22,6 @@ A full reference for this library is available [here](./reference.md). - `anthropic` ^0.39.0 - `pydantic` ^2.0.0 -## License - -This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. - ## Usage Instantiate and use the client with the following: @@ -36,7 +32,7 @@ from scrapybara import Scrapybara client = Scrapybara( api_key="YOUR_API_KEY", ) -client.start() +client.start_ubuntu() ``` ## Async Client @@ -54,7 +50,7 @@ client = AsyncScrapybara( async def main() -> None: - await client.start() + await client.start_ubuntu() asyncio.run(main()) @@ -69,7 +65,7 @@ will be thrown. from scrapybara.core.api_error import ApiError try: - client.start(...) + client.start_ubuntu() except ApiError as e: print(e.status_code) print(e.body) @@ -92,7 +88,7 @@ A request is deemed retriable when any of the following HTTP status codes is ret Use the `max_retries` request option to configure this behavior. ```python -client.start(..., request_options={ +client.start_ubuntu(..., request_options={ "max_retries": 1 }) ``` @@ -112,7 +108,7 @@ client = Scrapybara( # Override timeout for a specific method -client.start(..., request_options={ +client.start_ubuntu(..., request_options={ "timeout_in_seconds": 1 }) ``` @@ -121,6 +117,7 @@ client.start(..., request_options={ You can override the `httpx` client to customize it for your use-case. Some common use-cases include support for proxies and transports. + ```python import httpx from scrapybara import Scrapybara From 7e41ca6c781fd1c7c85329d528cd62f8551465f0 Mon Sep 17 00:00:00 2001 From: Justin Sun Date: Mon, 27 Jan 2025 00:55:38 -0800 Subject: [PATCH 7/7] reset base url --- tests/custom/test_client.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/custom/test_client.py b/tests/custom/test_client.py index 8297803..ab01f81 100644 --- a/tests/custom/test_client.py +++ b/tests/custom/test_client.py @@ -24,9 +24,7 @@ def _check_api_key() -> None: def test_ubuntu() -> None: _check_api_key() - client = Scrapybara( - base_url="https://scrapybara-api-alpha-47247185186.us-central1.run.app" - ) + client = Scrapybara() ubuntu_instance = client.start_ubuntu() print(ubuntu_instance.get_stream_url().stream_url) @@ -60,9 +58,7 @@ def test_ubuntu() -> None: def test_browser() -> None: _check_api_key() - client = Scrapybara( - base_url="https://scrapybara-api-alpha-47247185186.us-central1.run.app" - ) + client = Scrapybara() browser_instance = client.start_browser() print(browser_instance.get_stream_url().stream_url) @@ -91,9 +87,7 @@ def test_browser() -> None: @pytest.mark.skip() def test_windows() -> None: _check_api_key() - client = Scrapybara( - base_url="https://scrapybara-api-alpha-47247185186.us-central1.run.app" - ) + client = Scrapybara() windows_instance = client.start_windows() print(windows_instance.get_stream_url().stream_url)