Scrapybara · justinsunyt · Mar 21, 2025 · Mar 14, 2025 · Mar 14, 2025 · Mar 14, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ name = "scrapybara"
 
 [tool.poetry]
 name = "scrapybara"
-version = "2.4.1"
+version = "2.4.2"
 description = ""
 readme = "README.md"
 authors = []

diff --git a/src/scrapybara/anthropic/__init__.py b/src/scrapybara/anthropic/__init__.py
@@ -120,3 +120,12 @@ def __init__(
 * Be concise!
 </IMPORTANT>"""
 """Recommended Anthropic system prompt for Windows instances"""
+
+STRUCTURED_OUTPUT_SECTION = """
+* When you have completed your task and are ready to provide the final result to the user, use the 'structured_output' tool
+* This tool allows you to output structured data according to the provided schema
+* Ensure that your output matches the expected schema by providing the correct fields and data types
+* The output from this tool will be passed directly back to the user as the final result
+* Do not present the final result in plain text; always use the 'structured_output' tool for the final output
+"""
+"""Section to add to system prompt when structured output is being used"""
diff --git a/src/scrapybara/client.py b/src/scrapybara/client.py
@@ -1697,6 +1697,8 @@ def act(
         prompt: Optional[str] = None,
         messages: Optional[List[Message]] = None,
         schema: Optional[Type[SchemaT]] = None,
+        on_assistant_message: Optional[Callable[[AssistantMessage], None]] = None,
+        on_tool_message: Optional[Callable[[ToolMessage], None]] = None,
         on_step: Optional[Callable[[Step], None]] = None,
         temperature: Optional[float] = None,
         max_tokens: Optional[int] = None,
@@ -1713,6 +1715,8 @@ def act(
             prompt: Initial user prompt
             messages: List of messages to start with
             schema: Optional Pydantic model class to structure the final output
+            on_assistant_message: Callback for each assistant message
+            on_tool_message: Callback for each tool message
             on_step: Callback for each step of the conversation
             temperature: Optional temperature parameter for the model
             max_tokens: Optional max tokens parameter for the model
@@ -1738,6 +1742,8 @@ def act(
             prompt=prompt,
             messages=messages,
             schema=schema,
+            on_assistant_message=on_assistant_message,
+            on_tool_message=on_tool_message,
             on_step=on_step,
             temperature=temperature,
             max_tokens=max_tokens,
@@ -1795,6 +1801,8 @@ def act_stream(
         prompt: Optional[str] = None,
         messages: Optional[List[Message]] = None,
         schema: Optional[Type[BaseModel]] = None,
+        on_assistant_message: Optional[Callable[[AssistantMessage], None]] = None,
+        on_tool_message: Optional[Callable[[ToolMessage], None]] = None,
         on_step: Optional[Callable[[Step], None]] = None,
         temperature: Optional[float] = None,
         max_tokens: Optional[int] = None,
@@ -1811,6 +1819,8 @@ def act_stream(
             prompt: Initial user prompt
             messages: List of messages to start with
             schema: Optional Pydantic model class to structure the final output
+            on_assistant_message: Callback for each assistant message
+            on_tool_message: Callback for each tool message
             on_step: Callback for each step of the conversation
             temperature: Optional temperature parameter for the model
             max_tokens: Optional max tokens parameter for the model
@@ -1850,6 +1860,32 @@ def act_stream(
                 raise ValueError("Schema is not supported with ui-tars-72b model.")
             else:
                 current_tools.append(StructuredOutputTool(schema))
+
+                # If a schema is provided and system matches one of our default prompts, add structured output section
+                if system is not None:
+                    if model.provider == "anthropic":
+                        from .anthropic import (
+                            UBUNTU_SYSTEM_PROMPT, 
+                            BROWSER_SYSTEM_PROMPT, 
+                            WINDOWS_SYSTEM_PROMPT, 
+                            STRUCTURED_OUTPUT_SECTION
+                        )
+
+                        if system in (UBUNTU_SYSTEM_PROMPT, BROWSER_SYSTEM_PROMPT, WINDOWS_SYSTEM_PROMPT):
+                            # For Anthropic prompts, add inside the system capability section
+                            system = system.replace("</SYSTEM_CAPABILITY>", f"{STRUCTURED_OUTPUT_SECTION}\n</SYSTEM_CAPABILITY>")
+
+                    elif model.provider == "openai":
+                        from .openai import (
+                            UBUNTU_SYSTEM_PROMPT, 
+                            BROWSER_SYSTEM_PROMPT, 
+                            WINDOWS_SYSTEM_PROMPT, 
+                            STRUCTURED_OUTPUT_SECTION
+                        )
+
+                        if system in (UBUNTU_SYSTEM_PROMPT, BROWSER_SYSTEM_PROMPT, WINDOWS_SYSTEM_PROMPT):
+                            # For OpenAI prompts, simply append the structured output section
+                            system = system + STRUCTURED_OUTPUT_SECTION
 
         while True:
             # Convert tools to ApiTools
@@ -1879,6 +1915,8 @@ def act_stream(
 
             act_response = SingleActResponse.model_validate(response.json())
             current_messages.append(act_response.message)
+            if on_assistant_message:
+                on_assistant_message(act_response.message)
 
             # Extract text from assistant message
             text = "\n".join(
@@ -1942,6 +1980,8 @@ def act_stream(
                 step.tool_results = tool_results
                 tool_message = ToolMessage(content=tool_results)
                 current_messages.append(tool_message)
+                if on_tool_message:
+                    on_tool_message(tool_message)
 
             if on_step:
                 on_step(step)
@@ -2121,6 +2161,8 @@ async def act(
         prompt: Optional[str] = None,
         messages: Optional[List[Message]] = None,
         schema: Optional[Type[SchemaT]] = None,
+        on_assistant_message: Optional[Callable[[AssistantMessage], None]] = None,
+        on_tool_message: Optional[Callable[[ToolMessage], None]] = None,
         on_step: Optional[Callable[[Step], None]] = None,
         temperature: Optional[float] = None,
         max_tokens: Optional[int] = None,
@@ -2137,6 +2179,8 @@ async def act(
             prompt: Initial user prompt
             messages: List of messages to start with
             schema: Optional Pydantic model class to structure the final output
+            on_assistant_message: Callback for each assistant message
+            on_tool_message: Callback for each tool message
             on_step: Callback for each step of the conversation
             temperature: Optional temperature parameter for the model
             max_tokens: Optional max tokens parameter for the model
@@ -2162,6 +2206,8 @@ async def act(
             prompt=prompt,
             messages=messages,
             schema=schema,
+            on_assistant_message=on_assistant_message,
+            on_tool_message=on_tool_message,
             on_step=on_step,
             temperature=temperature,
             max_tokens=max_tokens,
@@ -2219,6 +2265,8 @@ async def act_stream(
         prompt: Optional[str] = None,
         messages: Optional[List[Message]] = None,
         schema: Optional[Type[SchemaT]] = None,
+        on_assistant_message: Optional[Callable[[AssistantMessage], None]] = None,
+        on_tool_message: Optional[Callable[[ToolMessage], None]] = None,
         on_step: Optional[Callable[[Step], None]] = None,
         temperature: Optional[float] = None,
         max_tokens: Optional[int] = None,
@@ -2235,6 +2283,8 @@ async def act_stream(
             prompt: Initial user prompt
             messages: List of messages to start with
             schema: Optional Pydantic model class to structure the final output
+            on_assistant_message: Callback for each assistant message
+            on_tool_message: Callback for each tool message
             on_step: Callback for each step of the conversation
             temperature: Optional temperature parameter for the model
             max_tokens: Optional max tokens parameter for the model
@@ -2274,6 +2324,32 @@ async def act_stream(
                 raise ValueError("Schema is not supported with ui-tars-72b model.")
             else:
                 current_tools.append(StructuredOutputTool(schema))
+
+                # If a schema is provided and system matches one of our default prompts, add structured output section
+                if system is not None:
+                    if model.provider == "anthropic":
+                        from .anthropic import (
+                            UBUNTU_SYSTEM_PROMPT, 
+                            BROWSER_SYSTEM_PROMPT, 
+                            WINDOWS_SYSTEM_PROMPT, 
+                            STRUCTURED_OUTPUT_SECTION
+                        )
+
+                        if system in (UBUNTU_SYSTEM_PROMPT, BROWSER_SYSTEM_PROMPT, WINDOWS_SYSTEM_PROMPT):
+                            # For Anthropic prompts, add inside the system capability section
+                            system = system.replace("</SYSTEM_CAPABILITY>", f"{STRUCTURED_OUTPUT_SECTION}\n</SYSTEM_CAPABILITY>")
+
+                    elif model.provider == "openai":
+                        from .openai import (
+                            UBUNTU_SYSTEM_PROMPT, 
+                            BROWSER_SYSTEM_PROMPT, 
+                            WINDOWS_SYSTEM_PROMPT, 
+                            STRUCTURED_OUTPUT_SECTION
+                        )
+
+                        if system in (UBUNTU_SYSTEM_PROMPT, BROWSER_SYSTEM_PROMPT, WINDOWS_SYSTEM_PROMPT):
+                            # For OpenAI prompts, simply append the structured output section
+                            system = system + STRUCTURED_OUTPUT_SECTION
 
         while True:
             # Convert tools to ApiTools
@@ -2303,6 +2379,10 @@ async def act_stream(
 
             act_response = SingleActResponse.model_validate(response.json())
             current_messages.append(act_response.message)
+            if on_assistant_message:
+                result = on_assistant_message(act_response.message)
+                if inspect.isawaitable(result):
+                    await result
 
             # Extract text from assistant message
             text = "\n".join(
@@ -2370,6 +2450,10 @@ async def act_stream(
                 step.tool_results = tool_results
                 tool_message = ToolMessage(content=tool_results)
                 current_messages.append(tool_message)
+                if on_tool_message:
+                    result = on_tool_message(tool_message)
+                    if inspect.isawaitable(result):
+                        await result
 
             if on_step:
                 result = on_step(step)

diff --git a/src/scrapybara/core/client_wrapper.py b/src/scrapybara/core/client_wrapper.py
@@ -16,7 +16,7 @@ def get_headers(self) -> typing.Dict[str, str]:
         headers: typing.Dict[str, str] = {
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "scrapybara",
-            "X-Fern-SDK-Version": "2.4.1",
+            "X-Fern-SDK-Version": "2.4.2",
         }
         headers["x-api-key"] = self.api_key
         return headers

diff --git a/src/scrapybara/openai/__init__.py b/src/scrapybara/openai/__init__.py
@@ -137,4 +137,13 @@ def __init__(
 - If a user's request implies the need for external information, assume they want you to search for it and provide the answer directly.  
 
 ### Date Context  
-Today's date is {datetime.today().strftime('%A, %B %d, %Y')}."""
+Today's date is {datetime.today().strftime('%A, %B %d, %Y')}."""
+
+STRUCTURED_OUTPUT_SECTION = """
+### Final Output  
+- When you have completed your task and are ready to provide the final result to the user, use the 'structured_output' tool.  
+- This tool allows you to output structured data according to the provided schema.  
+- Ensure that your output matches the expected schema by providing the correct fields and data types as specified in the tool's parameters.  
+- The output from this tool will be passed directly back to the user as the final result.  
+- Do not present the final result in plain text; always use the 'structured_output' tool for the final output.
+"""