From b6a7e6554928816a40419211966ed0be9a50d8a5 Mon Sep 17 00:00:00 2001 From: fern-api <115122769+fern-api[bot]@users.noreply.github.com> Date: Mon, 13 Jan 2025 22:42:15 +0000 Subject: [PATCH 1/6] SDK regeneration --- pyproject.toml | 2 +- src/scrapybara/core/client_wrapper.py | 2 +- src/scrapybara/types/tool.py | 11 ----------- 3 files changed, 2 insertions(+), 13 deletions(-) delete mode 100644 src/scrapybara/types/tool.py diff --git a/pyproject.toml b/pyproject.toml index 178562f..ee5d06a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "scrapybara" -version = "2.1.0" +version = "2.1.1" description = "" readme = "README.md" authors = [] diff --git a/src/scrapybara/core/client_wrapper.py b/src/scrapybara/core/client_wrapper.py index fe993b6..a2eb2f4 100644 --- a/src/scrapybara/core/client_wrapper.py +++ b/src/scrapybara/core/client_wrapper.py @@ -16,7 +16,7 @@ def get_headers(self) -> typing.Dict[str, str]: headers: typing.Dict[str, str] = { "X-Fern-Language": "Python", "X-Fern-SDK-Name": "scrapybara", - "X-Fern-SDK-Version": "2.1.0", + "X-Fern-SDK-Version": "2.1.1", } headers["x-api-key"] = self.api_key return headers diff --git a/src/scrapybara/types/tool.py b/src/scrapybara/types/tool.py deleted file mode 100644 index 0a79b94..0000000 --- a/src/scrapybara/types/tool.py +++ /dev/null @@ -1,11 +0,0 @@ -from typing import Any, Dict, Optional -from pydantic import BaseModel - - -class Tool(BaseModel): - name: str - description: Optional[str] = None - parameters: Optional[Dict[str, Any]] = None - - def __call__(self, **kwargs: Any) -> Any: - raise NotImplementedError("Tool.__call__ must be implemented by subclasses") From 3d6cc1f1cb3feb892bd5f43105c42fa9495f98a7 Mon Sep 17 00:00:00 2001 From: Justin Sun Date: Mon, 13 Jan 2025 14:56:49 -0800 Subject: [PATCH 2/6] add SYSTEM_PROMPT + image_result --- .fernignore | 2 ++ src/scrapybara/prompts/__init__.py | 43 +++++++++++++++++++++++++++++ src/scrapybara/tools/__init__.py | 18 +++++++++++- src/scrapybara/types/tool.py | 11 ++++++++ tests/custom/test_client.py | 44 +----------------------------- 5 files changed, 74 insertions(+), 44 deletions(-) create mode 100644 src/scrapybara/prompts/__init__.py create mode 100644 src/scrapybara/types/tool.py diff --git a/.fernignore b/.fernignore index 17fad2b..ced6301 100644 --- a/.fernignore +++ b/.fernignore @@ -2,7 +2,9 @@ src/scrapybara/client.py src/scrapybara/anthropic/ +src/scrapybara/prompts/ src/scrapybara/tools/ src/scrapybara/types/act.py +src/scrapybara/types/tool.py tests/custom/test_client.py .github/workflows/ci.yml \ No newline at end of file diff --git a/src/scrapybara/prompts/__init__.py b/src/scrapybara/prompts/__init__.py new file mode 100644 index 0000000..3d9668b --- /dev/null +++ b/src/scrapybara/prompts/__init__.py @@ -0,0 +1,43 @@ +from datetime import datetime + +SYSTEM_PROMPT = f""" +* You have access to an Ubuntu virtual machine with internet connectivity +* You can install Ubuntu applications using the bash tool (use curl over wget) +* To run GUI applications with the bash tool: + - Use a subshell, e.g. "(DISPLAY=:1 xterm &)", make sure to include the parantheses + - GUI apps will appear but may take time to load - confirm with an extra screenshot +* Start Chromium (default browser) via the bash tool "(DISPLAY=:1 chromium &)", but interact with it visually via the computer tool +* In Chromium, click the address bar directly to enter URLs/searches +* If you need to read a full PDF after initial screenshot + - Download with curl + - Convert to text using pdftotext + - Read the text file with StrReplaceEditTool +* If you need to read a HTML file: + - Open with the address bar in Chromium +* For commands with large text output: + - Redirect to a temp file + - Use str_replace_editor or grep with context (-B and -A flags) to view output +* When viewing pages: + - Zoom out to see full content, or + - Scroll to ensure you see everything +* Computer function calls take time, string together calls when possible +* You are allowed to take actions on behalf of the user on sites that are authenticated +* If the user asks you to access a site, assume that the user has already authenticated +* For accessibility, you can use and create accounts for communication tools such as email, Discord, Telegram on behalf of the user - e.g. join Discord channels, send emails, etc. +* To login additional sites, ask the user to use Auth Contexts or the Interactive Desktop +* Today's date is {datetime.today().strftime('%A, %B %-d, %Y')} + + + +* If first screenshot shows black screen: + - Click mouse in screen center + - Take another screenshot +* When interacting with a field, always clear the field first using "ctrl+A" and "delete" + - Take an extra screenshot after clicking "enter" to confirm the field is properly submitted and move the mouse to the next field +* If given a complex task, break down into smaller steps and ask the user for details only if necessary +* Research facts with Google searches in Chromium +* Read through web pages thoroughly by scrolling down till the end +* Use more generalized websites during research, e.g. use Google Flights instead of United when searching for flights, only use United when finalizing bookings +* Wait for actions to complete (examine previous screenshots) before taking another action +* Be concise! +""" diff --git a/src/scrapybara/tools/__init__.py b/src/scrapybara/tools/__init__.py index 31e6110..6ceb10b 100644 --- a/src/scrapybara/tools/__init__.py +++ b/src/scrapybara/tools/__init__.py @@ -1,3 +1,5 @@ +import base64 +import json from typing import Any from playwright.sync_api import sync_playwright @@ -179,7 +181,9 @@ def __call__(self, **kwargs: Any) -> Any: return True elif command == "screenshot": - return page.screenshot(type="png") + return image_result( + base64.b64encode(page.screenshot(type="png")).decode("utf-8") + ) elif command == "get_text": element = page.wait_for_selector(selector, timeout=timeout) @@ -201,3 +205,15 @@ def __call__(self, **kwargs: Any) -> Any: finally: browser.close() + + +def image_result(base64: str) -> str: + """Return an image result that is interpretable by the model.""" + return json.dumps( + { + "output": "", + "error": "", + "base64_image": base64, + "system": None, + } + ) diff --git a/src/scrapybara/types/tool.py b/src/scrapybara/types/tool.py new file mode 100644 index 0000000..0a79b94 --- /dev/null +++ b/src/scrapybara/types/tool.py @@ -0,0 +1,11 @@ +from typing import Any, Dict, Optional +from pydantic import BaseModel + + +class Tool(BaseModel): + name: str + description: Optional[str] = None + parameters: Optional[Dict[str, Any]] = None + + def __call__(self, **kwargs: Any) -> Any: + raise NotImplementedError("Tool.__call__ must be implemented by subclasses") diff --git a/tests/custom/test_client.py b/tests/custom/test_client.py index 3e3208d..70f026a 100644 --- a/tests/custom/test_client.py +++ b/tests/custom/test_client.py @@ -2,50 +2,8 @@ import os from scrapybara.anthropic import Anthropic +from scrapybara.prompts import SYSTEM_PROMPT from scrapybara.tools import BashTool, BrowserTool, ComputerTool, EditTool -from datetime import datetime - -SYSTEM_PROMPT = f""" -* You have access to an Ubuntu virtual machine with internet connectivity -* You can install Ubuntu applications using the bash tool (use curl over wget) -* To run GUI applications with the bash tool: - - Use a subshell, e.g. "(DISPLAY=:1 xterm &)", make sure to include the parantheses - - GUI apps will appear but may take time to load - confirm with an extra screenshot -* Start Chromium (default browser) via the bash tool "(DISPLAY=:1 chromium &)", but interact with it visually via the computer tool -* In Chromium, click the address bar directly to enter URLs/searches -* If you need to read a full PDF after initial screenshot - - Download with curl - - Convert to text using pdftotext - - Read the text file with StrReplaceEditTool -* If you need to read a HTML file: - - Open with the address bar in Chromium -* For commands with large text output: - - Redirect to a temp file - - Use str_replace_editor or grep with context (-B and -A flags) to view output -* When viewing pages: - - Zoom out to see full content, or - - Scroll to ensure you see everything -* Computer function calls take time, string together calls when possible -* You are allowed to take actions on behalf of the user on sites that are authenticated -* If the user asks you to access a site, assume that the user has already authenticated -* For accessibility, you can use and create accounts for communication tools such as email, Discord, Telegram on behalf of the user - e.g. join Discord channels, send emails, etc. -* To login additional sites, ask the user to use Auth Contexts or the Interactive Desktop -* Today's date is {datetime.today().strftime('%A, %B %-d, %Y')} - - - -* If first screenshot shows black screen: - - Click mouse in screen center - - Take another screenshot -* When interacting with a field, always clear the field first using "ctrl+A" and "delete" - - Take an extra screenshot after clicking "enter" to confirm the field is properly submitted and move the mouse to the next field -* If given a complex task, break down into smaller steps and ask the user for details only if necessary -* Research facts with Google searches in Chromium -* Read through web pages thoroughly by scrolling down till the end -* Use more generalized websites during research, e.g. use Google Flights instead of United when searching for flights, only use United when finalizing bookings -* Wait for actions to complete (examine previous screenshots) before taking another action -* Be concise! -""" def test_client() -> None: From c6a9deb143ae8b8382613c154e68789c0b319b2e Mon Sep 17 00:00:00 2001 From: Justin Sun Date: Thu, 16 Jan 2025 15:21:14 -0800 Subject: [PATCH 3/6] structured outputs + ActResponse --- src/scrapybara/client.py | 159 ++++++++++++++++++++++++++----- src/scrapybara/tools/__init__.py | 24 ++--- src/scrapybara/types/act.py | 17 +++- tests/custom/test_client.py | 17 +++- 4 files changed, 173 insertions(+), 44 deletions(-) diff --git a/src/scrapybara/client.py b/src/scrapybara/client.py index 7db66c6..8f5f7e6 100644 --- a/src/scrapybara/client.py +++ b/src/scrapybara/client.py @@ -5,6 +5,8 @@ Dict, List, Sequence, + Type, + TypeVar, Union, Literal, Generator, @@ -16,6 +18,7 @@ import asyncio import httpx +from pydantic import BaseModel, ConfigDict from scrapybara.core.http_client import AsyncHttpClient, HttpClient from scrapybara.environment import ScrapybaraEnvironment @@ -43,8 +46,8 @@ StopInstanceResponse, ) from .types.act import ( - ActRequest, - ActResponse, + SingleActRequest, + SingleActResponse, Message, Model, TextPart, @@ -55,11 +58,38 @@ UserMessage, AssistantMessage, Step, + ActResponse, + TokenUsage, ) from .base_client import BaseClient, AsyncBaseClient from .instance.types import Action, Command OMIT = typing.cast(typing.Any, ...) +SchemaT = TypeVar("SchemaT", bound=BaseModel) + + +class StructuredOutputTool(Tool): + """A tool that allows the agent to output structured data.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + _model: Type[BaseModel] + + def __init__(self, model: Type[BaseModel]): + schema = model.model_json_schema() + super().__init__( + name="structured_output", + description="Output structured data according to the provided schema parameters. Only use this tool at the end of your task. The output data is final and will be passed directly back to the user.", + parameters={ + "type": "object", + "properties": schema.get("properties", {}), + "required": schema.get("required", []), + }, + ) + self._model = model + + def __call__(self, **kwargs: Any) -> Dict[str, Any]: + validated = self._model.model_validate(kwargs) + return validated.model_dump() class Browser: @@ -853,46 +883,56 @@ def act( self, *, model: Model, + tools: Optional[List[Tool]] = None, system: Optional[str] = None, prompt: Optional[str] = None, messages: Optional[List[Message]] = None, - tools: Optional[List[Tool]] = None, + schema: Optional[Type[SchemaT]] = None, on_step: Optional[Callable[[Step], None]] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, request_options: Optional[RequestOptions] = None, - ) -> List[Message]: + ) -> ActResponse[SchemaT]: """ Run an agent loop with the given tools and model, returning all messages at the end. Args: - tools: List of tools available to the agent model: The model to use for generating responses + tools: List of tools available to the agent system: System prompt for the agent prompt: Initial user prompt messages: List of messages to start with + schema: Optional Pydantic model class to structure the final output on_step: Callback for each step of the conversation temperature: Optional temperature parameter for the model max_tokens: Optional max tokens parameter for the model request_options: Optional request configuration Returns: - List of all messages from the conversation + ActResponse containing all messages, steps, final output (structured according to schema if provided), and token usage """ result_messages: List[Message] = [] + steps: List[Step] = [] + total_prompt_tokens = 0 + total_completion_tokens = 0 + total_tokens = 0 + if messages: result_messages.extend(messages) + for step in self.act_stream( - tools=tools, model=model, + tools=tools, system=system, prompt=prompt, messages=messages, + schema=schema, + on_step=on_step, temperature=temperature, max_tokens=max_tokens, - on_step=on_step, request_options=request_options, ): + steps.append(step) assistant_msg = AssistantMessage( content=[TextPart(text=step.text)] + (step.tool_calls or []) ) @@ -900,16 +940,40 @@ def act( if step.tool_results: tool_msg = ToolMessage(content=step.tool_results) result_messages.append(tool_msg) - return result_messages + + if step.usage: + total_prompt_tokens += step.usage.prompt_tokens + total_completion_tokens += step.usage.completion_tokens + total_tokens += step.usage.total_tokens + + text = steps[-1].text if steps else None + if schema: + output = ( + steps[-1].tool_results[-1].result if steps[-1].tool_results else None + ) + output = schema.model_validate(output) + + usage = None + if total_tokens > 0: + usage = TokenUsage( + prompt_tokens=total_prompt_tokens, + completion_tokens=total_completion_tokens, + total_tokens=total_tokens, + ) + + return ActResponse( + messages=result_messages, steps=steps, text=text, output=output, usage=usage + ) def act_stream( self, *, model: Model, + tools: Optional[List[Tool]] = None, system: Optional[str] = None, prompt: Optional[str] = None, messages: Optional[List[Message]] = None, - tools: Optional[List[Tool]] = None, + schema: Optional[Type[BaseModel]] = None, on_step: Optional[Callable[[Step], None]] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, @@ -919,11 +983,12 @@ def act_stream( Run an interactive agent loop with the given tools and model. Args: - tools: List of tools available to the agent model: The model to use for generating responses + tools: List of tools available to the agent system: System prompt for the agent prompt: Initial user prompt messages: List of messages to start with + schema: Optional Pydantic model class to structure the final output on_step: Callback for each step of the conversation temperature: Optional temperature parameter for the model max_tokens: Optional max tokens parameter for the model @@ -942,8 +1007,11 @@ def act_stream( current_tools = [] if tools is None else list(tools) + if schema: + current_tools.append(StructuredOutputTool(schema)) + while True: - request = ActRequest( + request = SingleActRequest( model=model, system=system, messages=current_messages, @@ -963,7 +1031,7 @@ def act_stream( if not 200 <= response.status_code < 300: raise ApiError(status_code=response.status_code, body=response.json()) - act_response = ActResponse.model_validate(response.json()) + act_response = SingleActResponse.model_validate(response.json()) current_messages.append(act_response.message) # Extract text from assistant message @@ -988,14 +1056,17 @@ def act_stream( usage=act_response.usage, ) - # Check if we should continue the loop + # Check if there are tool calls has_tool_calls = bool(tool_calls) + has_structured_output = False if has_tool_calls: tool_results: List[ToolResultPart] = [] for part in tool_calls: tool = next(t for t in current_tools if t.name == part.tool_name) try: + if tool.name == "structured_output" and schema: + has_structured_output = True result = tool(**part.args) tool_results.append( ToolResultPart( @@ -1021,7 +1092,7 @@ def act_stream( on_step(step) yield step - if not has_tool_calls: + if not has_tool_calls or has_structured_output: break @@ -1117,16 +1188,17 @@ async def get_auth_states( async def act( self, *, + tools: Optional[List[Tool]] = None, model: Model, system: Optional[str] = None, prompt: Optional[str] = None, messages: Optional[List[Message]] = None, - tools: Optional[List[Tool]] = None, + schema: Optional[Type[SchemaT]] = None, on_step: Optional[Callable[[Step], None]] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, request_options: Optional[RequestOptions] = None, - ) -> List[Message]: + ) -> ActResponse[SchemaT]: """ Run an agent loop with the given tools and model, returning all messages at the end. @@ -1136,28 +1208,37 @@ async def act( system: System prompt for the agent prompt: Initial user prompt messages: List of messages to start with + schema: Optional Pydantic model class to structure the final output on_step: Callback for each step of the conversation temperature: Optional temperature parameter for the model max_tokens: Optional max tokens parameter for the model request_options: Optional request configuration Returns: - List of all messages from the conversation + ActResponse containing all messages, steps, final output (structured according to schema if provided), and token usage """ result_messages: List[Message] = [] + steps: List[Step] = [] + total_prompt_tokens = 0 + total_completion_tokens = 0 + total_tokens = 0 + if messages: result_messages.extend(messages) + async for step in self.act_stream( tools=tools, model=model, system=system, prompt=prompt, messages=messages, + schema=schema, temperature=temperature, max_tokens=max_tokens, on_step=on_step, request_options=request_options, ): + steps.append(step) assistant_msg = AssistantMessage( content=[TextPart(text=step.text)] + (step.tool_calls or []) ) @@ -1165,16 +1246,40 @@ async def act( if step.tool_results: tool_msg = ToolMessage(content=step.tool_results) result_messages.append(tool_msg) - return result_messages + + if step.usage: + total_prompt_tokens += step.usage.prompt_tokens + total_completion_tokens += step.usage.completion_tokens + total_tokens += step.usage.total_tokens + + text = steps[-1].text if steps else None + if schema: + output = ( + steps[-1].tool_results[-1].result if steps[-1].tool_results else None + ) + output = schema.model_validate(output) + + usage = None + if total_tokens > 0: + usage = TokenUsage( + prompt_tokens=total_prompt_tokens, + completion_tokens=total_completion_tokens, + total_tokens=total_tokens, + ) + + return ActResponse( + messages=result_messages, steps=steps, text=text, output=output, usage=usage + ) async def act_stream( self, *, model: Model, + tools: Optional[List[Tool]] = None, system: Optional[str] = None, prompt: Optional[str] = None, messages: Optional[List[Message]] = None, - tools: Optional[List[Tool]] = None, + schema: Optional[Type[SchemaT]] = None, on_step: Optional[Callable[[Step], None]] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, @@ -1184,11 +1289,12 @@ async def act_stream( Run an interactive agent loop with the given tools and model. Args: - tools: List of tools available to the agent model: The model to use for generating responses + tools: List of tools available to the agent system: System prompt for the agent prompt: Initial user prompt messages: List of messages to start with + schema: Optional Pydantic model class to structure the final output on_step: Callback for each step of the conversation temperature: Optional temperature parameter for the model max_tokens: Optional max tokens parameter for the model @@ -1208,7 +1314,7 @@ async def act_stream( current_tools = [] if tools is None else list(tools) while True: - request = ActRequest( + request = SingleActRequest( model=model, system=system, messages=current_messages, @@ -1228,7 +1334,7 @@ async def act_stream( if not 200 <= response.status_code < 300: raise ApiError(status_code=response.status_code, body=response.json()) - act_response = ActResponse.model_validate(response.json()) + act_response = SingleActResponse.model_validate(response.json()) current_messages.append(act_response.message) # Extract text from assistant message @@ -1253,14 +1359,17 @@ async def act_stream( usage=act_response.usage, ) - # Check if we should continue the loop + # Check if there are tool calls has_tool_calls = bool(tool_calls) + has_structured_output = False if has_tool_calls: tool_results: List[ToolResultPart] = [] for part in tool_calls: tool = next(t for t in current_tools if t.name == part.tool_name) try: + if tool.name == "structured_output" and schema: + has_structured_output = True loop = asyncio.get_event_loop() result = await loop.run_in_executor( None, lambda: tool(**part.args) @@ -1289,5 +1398,5 @@ async def act_stream( on_step(step) yield step - if not has_tool_calls: + if not has_tool_calls or has_structured_output: break diff --git a/src/scrapybara/tools/__init__.py b/src/scrapybara/tools/__init__.py index 6ceb10b..f31379b 100644 --- a/src/scrapybara/tools/__init__.py +++ b/src/scrapybara/tools/__init__.py @@ -7,6 +7,18 @@ from ..client import Instance +def image_result(base64: str) -> str: + """Return an image result that is interpretable by the model.""" + return json.dumps( + { + "output": "", + "error": "", + "base64_image": base64, + "system": None, + } + ) + + class ComputerTool(Tool): """A computer interaction tool that allows the agent to control mouse and keyboard.""" @@ -205,15 +217,3 @@ def __call__(self, **kwargs: Any) -> Any: finally: browser.close() - - -def image_result(base64: str) -> str: - """Return an image result that is interpretable by the model.""" - return json.dumps( - { - "output": "", - "error": "", - "base64_image": base64, - "system": None, - } - ) diff --git a/src/scrapybara/types/act.py b/src/scrapybara/types/act.py index c70899b..6d7c052 100644 --- a/src/scrapybara/types/act.py +++ b/src/scrapybara/types/act.py @@ -1,7 +1,9 @@ -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Dict, List, Literal, Optional, Union, Generic, TypeVar from pydantic import BaseModel from .tool import Tool +OutputT = TypeVar("OutputT") + # Message part types class TextPart(BaseModel): @@ -55,7 +57,7 @@ class Model(BaseModel): api_key: Optional[str] = None -class ActRequest(BaseModel): +class SingleActRequest(BaseModel): model: Model system: Optional[str] = None messages: Optional[List[Message]] = None @@ -70,7 +72,7 @@ class TokenUsage(BaseModel): total_tokens: int -class ActResponse(BaseModel): +class SingleActResponse(BaseModel): message: AssistantMessage finish_reason: Literal[ "stop", "length", "content-filter", "tool-calls", "error", "other", "unknown" @@ -95,3 +97,12 @@ class Step(BaseModel): ] ] = None usage: Optional[TokenUsage] = None + + +# Act response +class ActResponse(BaseModel, Generic[OutputT]): + messages: List[Message] + steps: List[Step] + text: Optional[str] = None + output: OutputT + usage: Optional[TokenUsage] = None diff --git a/tests/custom/test_client.py b/tests/custom/test_client.py index 70f026a..4747ba0 100644 --- a/tests/custom/test_client.py +++ b/tests/custom/test_client.py @@ -1,3 +1,4 @@ +from pydantic import BaseModel from scrapybara import Scrapybara import os @@ -20,19 +21,27 @@ def test_client() -> None: cdp_url = instance.browser.get_cdp_url() assert cdp_url is not None - messages = client.act( + class YCStats(BaseModel): + number_of_startups: int + combined_valuation: int + + response = client.act( model=Anthropic(), system=SYSTEM_PROMPT, - prompt="Go to the YC website and fetch the HTML", + prompt="Go to the YC website and get the number of funded startups and combined valuation", tools=[ ComputerTool(instance), BashTool(instance), EditTool(instance), BrowserTool(instance), ], - on_step=lambda step: print(f"{step}\n"), + schema=YCStats, ) - assert len(messages) > 0 + print(response) + + assert response.output is not None + assert response.output.number_of_startups is not None + assert response.output.combined_valuation is not None instance.browser.stop() instance.stop() From 9f1c19ebfae2c62f6925653babaed1a519cba976 Mon Sep 17 00:00:00 2001 From: Justin Sun Date: Thu, 16 Jan 2025 15:26:39 -0800 Subject: [PATCH 4/6] bump version --- pyproject.toml | 2 +- src/scrapybara/core/client_wrapper.py | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ee5d06a..9552bec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "scrapybara" -version = "2.1.1" +version = "2.1.2" description = "" readme = "README.md" authors = [] diff --git a/src/scrapybara/core/client_wrapper.py b/src/scrapybara/core/client_wrapper.py index a2eb2f4..e3442ae 100644 --- a/src/scrapybara/core/client_wrapper.py +++ b/src/scrapybara/core/client_wrapper.py @@ -7,7 +7,9 @@ class BaseClientWrapper: - def __init__(self, *, api_key: str, base_url: str, timeout: typing.Optional[float] = None): + def __init__( + self, *, api_key: str, base_url: str, timeout: typing.Optional[float] = None + ): self.api_key = api_key self._base_url = base_url self._timeout = timeout @@ -16,7 +18,7 @@ def get_headers(self) -> typing.Dict[str, str]: headers: typing.Dict[str, str] = { "X-Fern-Language": "Python", "X-Fern-SDK-Name": "scrapybara", - "X-Fern-SDK-Version": "2.1.1", + "X-Fern-SDK-Version": "2.1.2", } headers["x-api-key"] = self.api_key return headers @@ -30,7 +32,12 @@ def get_timeout(self) -> typing.Optional[float]: class SyncClientWrapper(BaseClientWrapper): def __init__( - self, *, api_key: str, base_url: str, timeout: typing.Optional[float] = None, httpx_client: httpx.Client + self, + *, + api_key: str, + base_url: str, + timeout: typing.Optional[float] = None, + httpx_client: httpx.Client ): super().__init__(api_key=api_key, base_url=base_url, timeout=timeout) self.httpx_client = HttpClient( @@ -43,7 +50,12 @@ def __init__( class AsyncClientWrapper(BaseClientWrapper): def __init__( - self, *, api_key: str, base_url: str, timeout: typing.Optional[float] = None, httpx_client: httpx.AsyncClient + self, + *, + api_key: str, + base_url: str, + timeout: typing.Optional[float] = None, + httpx_client: httpx.AsyncClient ): super().__init__(api_key=api_key, base_url=base_url, timeout=timeout) self.httpx_client = AsyncHttpClient( From 2ad0f64eefe20da232260767d17f25bb7a849fa6 Mon Sep 17 00:00:00 2001 From: Justin Sun Date: Thu, 16 Jan 2025 15:41:49 -0800 Subject: [PATCH 5/6] improve docstrings --- src/scrapybara/anthropic/__init__.py | 15 +++++++++++++++ src/scrapybara/client.py | 4 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/scrapybara/anthropic/__init__.py b/src/scrapybara/anthropic/__init__.py index b99c7c2..a3cfda2 100644 --- a/src/scrapybara/anthropic/__init__.py +++ b/src/scrapybara/anthropic/__init__.py @@ -14,6 +14,21 @@ # New: universal act API class Anthropic(Model): + """Model adapter for Anthropic. + + Supported models: + - claude-3-5-sonnet-20241022 (with computer use beta) + + If an API key is not provided, each call will cost 1 agent credit. + + Args: + name: Anthropic model name, defaults to "claude-3-5-sonnet-20241022" + api_key: Optional Anthropic API key + + Returns: + A Model configuration object + """ + provider: Literal["anthropic"] = Field(default="anthropic", frozen=True) def __init__( diff --git a/src/scrapybara/client.py b/src/scrapybara/client.py index 8f5f7e6..a4d8f3d 100644 --- a/src/scrapybara/client.py +++ b/src/scrapybara/client.py @@ -909,7 +909,7 @@ def act( request_options: Optional request configuration Returns: - ActResponse containing all messages, steps, final output (structured according to schema if provided), and token usage + ActResponse containing all messages, steps, text, output (if schema is provided), and token usage """ result_messages: List[Message] = [] steps: List[Step] = [] @@ -1215,7 +1215,7 @@ async def act( request_options: Optional request configuration Returns: - ActResponse containing all messages, steps, final output (structured according to schema if provided), and token usage + ActResponse containing all messages, steps, text, output (if schema is provided), and token usage """ result_messages: List[Message] = [] steps: List[Step] = [] From c7171444e5bfa9fe6a1e71a9a63b600db8c99c37 Mon Sep 17 00:00:00 2001 From: justin sun <33591641+justinsunyt@users.noreply.github.com> Date: Thu, 16 Jan 2025 19:09:53 -0500 Subject: [PATCH 6/6] remove duplicate image_result --- src/scrapybara/tools/__init__.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/scrapybara/tools/__init__.py b/src/scrapybara/tools/__init__.py index 890c152..f31379b 100644 --- a/src/scrapybara/tools/__init__.py +++ b/src/scrapybara/tools/__init__.py @@ -217,15 +217,3 @@ def __call__(self, **kwargs: Any) -> Any: finally: browser.close() - - -def image_result(base64: str) -> str: - """Return an image result that is interpretable by the model.""" - return json.dumps( - { - "output": "", - "error": "", - "base64_image": base64, - "system": None, - } - )