diff --git a/pyproject.toml b/pyproject.toml index a47562e..25b78d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "scrapybara" [tool.poetry] name = "scrapybara" -version = "2.4.1" +version = "2.4.2" description = "" readme = "README.md" authors = [] diff --git a/src/scrapybara/anthropic/__init__.py b/src/scrapybara/anthropic/__init__.py index 8c83a18..d9c9f0f 100644 --- a/src/scrapybara/anthropic/__init__.py +++ b/src/scrapybara/anthropic/__init__.py @@ -120,3 +120,12 @@ def __init__( * Be concise! """ """Recommended Anthropic system prompt for Windows instances""" + +STRUCTURED_OUTPUT_SECTION = """ +* When you have completed your task and are ready to provide the final result to the user, use the 'structured_output' tool +* This tool allows you to output structured data according to the provided schema +* Ensure that your output matches the expected schema by providing the correct fields and data types +* The output from this tool will be passed directly back to the user as the final result +* Do not present the final result in plain text; always use the 'structured_output' tool for the final output +""" +"""Section to add to system prompt when structured output is being used""" \ No newline at end of file diff --git a/src/scrapybara/client.py b/src/scrapybara/client.py index 7d4d55c..acca6f7 100644 --- a/src/scrapybara/client.py +++ b/src/scrapybara/client.py @@ -1697,6 +1697,8 @@ def act( prompt: Optional[str] = None, messages: Optional[List[Message]] = None, schema: Optional[Type[SchemaT]] = None, + on_assistant_message: Optional[Callable[[AssistantMessage], None]] = None, + on_tool_message: Optional[Callable[[ToolMessage], None]] = None, on_step: Optional[Callable[[Step], None]] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, @@ -1713,6 +1715,8 @@ def act( prompt: Initial user prompt messages: List of messages to start with schema: Optional Pydantic model class to structure the final output + on_assistant_message: Callback for each assistant message + on_tool_message: Callback for each tool message on_step: Callback for each step of the conversation temperature: Optional temperature parameter for the model max_tokens: Optional max tokens parameter for the model @@ -1738,6 +1742,8 @@ def act( prompt=prompt, messages=messages, schema=schema, + on_assistant_message=on_assistant_message, + on_tool_message=on_tool_message, on_step=on_step, temperature=temperature, max_tokens=max_tokens, @@ -1795,6 +1801,8 @@ def act_stream( prompt: Optional[str] = None, messages: Optional[List[Message]] = None, schema: Optional[Type[BaseModel]] = None, + on_assistant_message: Optional[Callable[[AssistantMessage], None]] = None, + on_tool_message: Optional[Callable[[ToolMessage], None]] = None, on_step: Optional[Callable[[Step], None]] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, @@ -1811,6 +1819,8 @@ def act_stream( prompt: Initial user prompt messages: List of messages to start with schema: Optional Pydantic model class to structure the final output + on_assistant_message: Callback for each assistant message + on_tool_message: Callback for each tool message on_step: Callback for each step of the conversation temperature: Optional temperature parameter for the model max_tokens: Optional max tokens parameter for the model @@ -1850,6 +1860,32 @@ def act_stream( raise ValueError("Schema is not supported with ui-tars-72b model.") else: current_tools.append(StructuredOutputTool(schema)) + + # If a schema is provided and system matches one of our default prompts, add structured output section + if system is not None: + if model.provider == "anthropic": + from .anthropic import ( + UBUNTU_SYSTEM_PROMPT, + BROWSER_SYSTEM_PROMPT, + WINDOWS_SYSTEM_PROMPT, + STRUCTURED_OUTPUT_SECTION + ) + + if system in (UBUNTU_SYSTEM_PROMPT, BROWSER_SYSTEM_PROMPT, WINDOWS_SYSTEM_PROMPT): + # For Anthropic prompts, add inside the system capability section + system = system.replace("", f"{STRUCTURED_OUTPUT_SECTION}\n") + + elif model.provider == "openai": + from .openai import ( + UBUNTU_SYSTEM_PROMPT, + BROWSER_SYSTEM_PROMPT, + WINDOWS_SYSTEM_PROMPT, + STRUCTURED_OUTPUT_SECTION + ) + + if system in (UBUNTU_SYSTEM_PROMPT, BROWSER_SYSTEM_PROMPT, WINDOWS_SYSTEM_PROMPT): + # For OpenAI prompts, simply append the structured output section + system = system + STRUCTURED_OUTPUT_SECTION while True: # Convert tools to ApiTools @@ -1879,6 +1915,8 @@ def act_stream( act_response = SingleActResponse.model_validate(response.json()) current_messages.append(act_response.message) + if on_assistant_message: + on_assistant_message(act_response.message) # Extract text from assistant message text = "\n".join( @@ -1942,6 +1980,8 @@ def act_stream( step.tool_results = tool_results tool_message = ToolMessage(content=tool_results) current_messages.append(tool_message) + if on_tool_message: + on_tool_message(tool_message) if on_step: on_step(step) @@ -2121,6 +2161,8 @@ async def act( prompt: Optional[str] = None, messages: Optional[List[Message]] = None, schema: Optional[Type[SchemaT]] = None, + on_assistant_message: Optional[Callable[[AssistantMessage], None]] = None, + on_tool_message: Optional[Callable[[ToolMessage], None]] = None, on_step: Optional[Callable[[Step], None]] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, @@ -2137,6 +2179,8 @@ async def act( prompt: Initial user prompt messages: List of messages to start with schema: Optional Pydantic model class to structure the final output + on_assistant_message: Callback for each assistant message + on_tool_message: Callback for each tool message on_step: Callback for each step of the conversation temperature: Optional temperature parameter for the model max_tokens: Optional max tokens parameter for the model @@ -2162,6 +2206,8 @@ async def act( prompt=prompt, messages=messages, schema=schema, + on_assistant_message=on_assistant_message, + on_tool_message=on_tool_message, on_step=on_step, temperature=temperature, max_tokens=max_tokens, @@ -2219,6 +2265,8 @@ async def act_stream( prompt: Optional[str] = None, messages: Optional[List[Message]] = None, schema: Optional[Type[SchemaT]] = None, + on_assistant_message: Optional[Callable[[AssistantMessage], None]] = None, + on_tool_message: Optional[Callable[[ToolMessage], None]] = None, on_step: Optional[Callable[[Step], None]] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, @@ -2235,6 +2283,8 @@ async def act_stream( prompt: Initial user prompt messages: List of messages to start with schema: Optional Pydantic model class to structure the final output + on_assistant_message: Callback for each assistant message + on_tool_message: Callback for each tool message on_step: Callback for each step of the conversation temperature: Optional temperature parameter for the model max_tokens: Optional max tokens parameter for the model @@ -2274,6 +2324,32 @@ async def act_stream( raise ValueError("Schema is not supported with ui-tars-72b model.") else: current_tools.append(StructuredOutputTool(schema)) + + # If a schema is provided and system matches one of our default prompts, add structured output section + if system is not None: + if model.provider == "anthropic": + from .anthropic import ( + UBUNTU_SYSTEM_PROMPT, + BROWSER_SYSTEM_PROMPT, + WINDOWS_SYSTEM_PROMPT, + STRUCTURED_OUTPUT_SECTION + ) + + if system in (UBUNTU_SYSTEM_PROMPT, BROWSER_SYSTEM_PROMPT, WINDOWS_SYSTEM_PROMPT): + # For Anthropic prompts, add inside the system capability section + system = system.replace("", f"{STRUCTURED_OUTPUT_SECTION}\n") + + elif model.provider == "openai": + from .openai import ( + UBUNTU_SYSTEM_PROMPT, + BROWSER_SYSTEM_PROMPT, + WINDOWS_SYSTEM_PROMPT, + STRUCTURED_OUTPUT_SECTION + ) + + if system in (UBUNTU_SYSTEM_PROMPT, BROWSER_SYSTEM_PROMPT, WINDOWS_SYSTEM_PROMPT): + # For OpenAI prompts, simply append the structured output section + system = system + STRUCTURED_OUTPUT_SECTION while True: # Convert tools to ApiTools @@ -2303,6 +2379,10 @@ async def act_stream( act_response = SingleActResponse.model_validate(response.json()) current_messages.append(act_response.message) + if on_assistant_message: + result = on_assistant_message(act_response.message) + if inspect.isawaitable(result): + await result # Extract text from assistant message text = "\n".join( @@ -2370,6 +2450,10 @@ async def act_stream( step.tool_results = tool_results tool_message = ToolMessage(content=tool_results) current_messages.append(tool_message) + if on_tool_message: + result = on_tool_message(tool_message) + if inspect.isawaitable(result): + await result if on_step: result = on_step(step) diff --git a/src/scrapybara/core/client_wrapper.py b/src/scrapybara/core/client_wrapper.py index 4b9f004..4bbd3e6 100644 --- a/src/scrapybara/core/client_wrapper.py +++ b/src/scrapybara/core/client_wrapper.py @@ -16,7 +16,7 @@ def get_headers(self) -> typing.Dict[str, str]: headers: typing.Dict[str, str] = { "X-Fern-Language": "Python", "X-Fern-SDK-Name": "scrapybara", - "X-Fern-SDK-Version": "2.4.1", + "X-Fern-SDK-Version": "2.4.2", } headers["x-api-key"] = self.api_key return headers diff --git a/src/scrapybara/openai/__init__.py b/src/scrapybara/openai/__init__.py index 6d1d1d3..e4bdde2 100644 --- a/src/scrapybara/openai/__init__.py +++ b/src/scrapybara/openai/__init__.py @@ -137,4 +137,13 @@ def __init__( - If a user's request implies the need for external information, assume they want you to search for it and provide the answer directly. ### Date Context -Today's date is {datetime.today().strftime('%A, %B %d, %Y')}.""" \ No newline at end of file +Today's date is {datetime.today().strftime('%A, %B %d, %Y')}.""" + +STRUCTURED_OUTPUT_SECTION = """ +### Final Output +- When you have completed your task and are ready to provide the final result to the user, use the 'structured_output' tool. +- This tool allows you to output structured data according to the provided schema. +- Ensure that your output matches the expected schema by providing the correct fields and data types as specified in the tool's parameters. +- The output from this tool will be passed directly back to the user as the final result. +- Do not present the final result in plain text; always use the 'structured_output' tool for the final output. +""" \ No newline at end of file