From 54bc7674d148dc89d87923068049c946dd6ba34c Mon Sep 17 00:00:00 2001 From: fern-api <115122769+fern-api[bot]@users.noreply.github.com> Date: Fri, 7 Mar 2025 04:53:02 +0000 Subject: [PATCH 1/6] SDK regeneration --- pyproject.toml | 2 +- src/scrapybara/core/client_wrapper.py | 2 +- src/scrapybara/instance/types/request.py | 2 +- src/scrapybara/types/scroll_action.py | 2 +- src/scrapybara/types/status.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ccb1c22..a8190da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "scrapybara" [tool.poetry] name = "scrapybara" -version = "2.3.1" +version = "2.3.2" description = "" readme = "README.md" authors = [] diff --git a/src/scrapybara/core/client_wrapper.py b/src/scrapybara/core/client_wrapper.py index eb85936..de6a855 100644 --- a/src/scrapybara/core/client_wrapper.py +++ b/src/scrapybara/core/client_wrapper.py @@ -16,7 +16,7 @@ def get_headers(self) -> typing.Dict[str, str]: headers: typing.Dict[str, str] = { "X-Fern-Language": "Python", "X-Fern-SDK-Name": "scrapybara", - "X-Fern-SDK-Version": "2.3.1", + "X-Fern-SDK-Version": "2.3.2", } headers["x-api-key"] = self.api_key return headers diff --git a/src/scrapybara/instance/types/request.py b/src/scrapybara/instance/types/request.py index bf94fec..141e199 100644 --- a/src/scrapybara/instance/types/request.py +++ b/src/scrapybara/instance/types/request.py @@ -59,7 +59,7 @@ class Config: class Request_Scroll(UniversalBaseModel): action: typing.Literal["scroll"] = "scroll" - coordinates: typing.List[int] + coordinates: typing.Optional[typing.List[int]] = None delta_x: typing.Optional[float] = None delta_y: typing.Optional[float] = None hold_keys: typing.Optional[typing.List[str]] = None diff --git a/src/scrapybara/types/scroll_action.py b/src/scrapybara/types/scroll_action.py index b03bf80..9bee45f 100644 --- a/src/scrapybara/types/scroll_action.py +++ b/src/scrapybara/types/scroll_action.py @@ -7,7 +7,7 @@ class ScrollAction(UniversalBaseModel): - coordinates: typing.List[int] + coordinates: typing.Optional[typing.List[int]] = None delta_x: typing.Optional[float] = None delta_y: typing.Optional[float] = None hold_keys: typing.Optional[typing.List[str]] = None diff --git a/src/scrapybara/types/status.py b/src/scrapybara/types/status.py index 0c98034..3fb3c9c 100644 --- a/src/scrapybara/types/status.py +++ b/src/scrapybara/types/status.py @@ -2,4 +2,4 @@ import typing -Status = typing.Union[typing.Literal["deploying", "running", "paused", "terminated", "error", "warm_pool"], typing.Any] +Status = typing.Union[typing.Literal["deploying", "running", "paused", "terminated", "error"], typing.Any] From 4bd8719888f08f23539bb7ddb69d3f586156e7a6 Mon Sep 17 00:00:00 2001 From: Justin Sun Date: Thu, 6 Mar 2025 21:30:38 -0800 Subject: [PATCH 2/6] feat: make coordinates optional for scroll --- src/scrapybara/client.py | 4 ++-- src/scrapybara/tools/__init__.py | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/scrapybara/client.py b/src/scrapybara/client.py index 1c9e4bc..f7e162f 100644 --- a/src/scrapybara/client.py +++ b/src/scrapybara/client.py @@ -684,7 +684,7 @@ def computer( self, *, action: Literal["scroll"], - coordinates: List[int], + coordinates: Optional[List[int]] = None, delta_x: Optional[float] = 0, delta_y: Optional[float] = 0, hold_keys: Optional[List[str]] = None, @@ -992,7 +992,7 @@ async def computer( self, *, action: Literal["scroll"], - coordinates: List[int], + coordinates: Optional[List[int]] = None, delta_x: Optional[float] = 0, delta_y: Optional[float] = 0, hold_keys: Optional[List[str]] = None, diff --git a/src/scrapybara/tools/__init__.py b/src/scrapybara/tools/__init__.py index 7efd2cf..0bd8744 100644 --- a/src/scrapybara/tools/__init__.py +++ b/src/scrapybara/tools/__init__.py @@ -75,8 +75,6 @@ def __call__(self, **kwargs: Any) -> Any: hold_keys=params.hold_keys, ) elif params.action == "scroll": - if not params.coordinates: - raise ValueError("coordinates is required for scroll action") return self._instance.computer( action=params.action, coordinates=params.coordinates, From c74438d199e8099a110743c1361ce2397bb5e7e5 Mon Sep 17 00:00:00 2001 From: justin sun <33591641+justinsunyt@users.noreply.github.com> Date: Thu, 6 Mar 2025 21:32:08 -0800 Subject: [PATCH 3/6] merge action parsing (#33) * feat: pass action objects directly to computer() * bring back the overloads --------- Co-authored-by: Cooper Miller --- src/scrapybara/client.py | 436 +++++++++++++++++++++++++++++++++------ 1 file changed, 376 insertions(+), 60 deletions(-) diff --git a/src/scrapybara/client.py b/src/scrapybara/client.py index f7e162f..3b333e2 100644 --- a/src/scrapybara/client.py +++ b/src/scrapybara/client.py @@ -83,6 +83,17 @@ Request_TakeScreenshot, Request_GetCursorPosition, ) +from .types import ( + MoveMouseAction, + ClickMouseAction, + DragMouseAction, + ScrollAction, + PressKeyAction, + TypeTextAction, + WaitAction, + TakeScreenshotAction, + GetCursorPositionAction, +) OMIT = typing.cast(typing.Any, ...) SchemaT = TypeVar("SchemaT", bound=BaseModel) @@ -646,6 +657,78 @@ def get_stream_url( self.id, request_options=request_options ) + @overload + def computer( + self, + *, + action: MoveMouseAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: ClickMouseAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: DragMouseAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: ScrollAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: PressKeyAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: TypeTextAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: WaitAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: TakeScreenshotAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: GetCursorPositionAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + @overload def computer( self, @@ -739,7 +822,18 @@ def computer( def computer( self, *, - action: Action, + action: Union[ + Action, + MoveMouseAction, + ClickMouseAction, + DragMouseAction, + ScrollAction, + PressKeyAction, + TypeTextAction, + WaitAction, + TakeScreenshotAction, + GetCursorPositionAction, + ], button: Optional[Button] = None, click_type: Optional[ClickMouseActionClickType] = "click", coordinates: Optional[List[int]] = None, @@ -753,37 +847,82 @@ def computer( duration: Optional[float] = None, request_options: Optional[RequestOptions] = None, ) -> ComputerResponse: + """Control computer actions like mouse movements, clicks, and keyboard input. + + This method supports two ways of specifying actions: + + 1. Using action objects (recommended): + ```python + click_action = ClickMouseAction( + button="left", + coordinates=[500, 500] + ) + instance.computer(action=click_action) + ``` + + 2. Using string action types with parameters (legacy): + ```python + instance.computer( + action="click_mouse", + button="left", + coordinates=[500, 500] + ) + ``` + + Args: + action: Either a string action type or an action object + button: The mouse button to use (for click actions) + click_type: The type of click to perform + coordinates: Coordinates for mouse actions + delta_x: X delta for scroll actions + delta_y: Y delta for scroll actions + num_clicks: Number of clicks to perform + hold_keys: Keys to hold during the action + path: Path for drag mouse actions + keys: Keys to press + text: Text to type + duration: Duration for wait actions + request_options: Options for the request + + Returns: + ComputerResponse: Response from the action + """ request: Any = None - if action == "move_mouse": - request = Request_MoveMouse(coordinates=coordinates, hold_keys=hold_keys) - elif action == "click_mouse": - request = Request_ClickMouse( - button=button, - click_type=click_type, - coordinates=coordinates, - num_clicks=num_clicks, - hold_keys=hold_keys, - ) - elif action == "drag_mouse": - request = Request_DragMouse(path=path, hold_keys=hold_keys) - elif action == "scroll": - request = Request_Scroll( - coordinates=coordinates, - delta_x=delta_x, - delta_y=delta_y, - hold_keys=hold_keys, - ) - elif action == "press_key": - request = Request_PressKey(keys=keys, duration=duration) - elif action == "type_text": - request = Request_TypeText(text=text, hold_keys=hold_keys) - elif action == "wait": - request = Request_Wait(duration=duration) - elif action == "take_screenshot": - request = Request_TakeScreenshot() - elif action == "get_cursor_position": - request = Request_GetCursorPosition() + # Check if action is an action object + request = _create_request_from_action(action) + + # If it wasn't an object or the object wasn't recognized, use the legacy string-based approach + if request is None: + if action == "move_mouse": + request = Request_MoveMouse(coordinates=coordinates, hold_keys=hold_keys) + elif action == "click_mouse": + request = Request_ClickMouse( + button=button, + click_type=click_type, + coordinates=coordinates, + num_clicks=num_clicks, + hold_keys=hold_keys, + ) + elif action == "drag_mouse": + request = Request_DragMouse(path=path, hold_keys=hold_keys) + elif action == "scroll": + request = Request_Scroll( + coordinates=coordinates, + delta_x=delta_x, + delta_y=delta_y, + hold_keys=hold_keys, + ) + elif action == "press_key": + request = Request_PressKey(keys=keys, duration=duration) + elif action == "type_text": + request = Request_TypeText(text=text, hold_keys=hold_keys) + elif action == "wait": + request = Request_Wait(duration=duration) + elif action == "take_screenshot": + request = Request_TakeScreenshot() + elif action == "get_cursor_position": + request = Request_GetCursorPosition() return self._client.instance.computer( self.id, @@ -954,6 +1093,78 @@ async def get_stream_url( self.id, request_options=request_options ) + @overload + async def computer( + self, + *, + action: MoveMouseAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: ClickMouseAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: DragMouseAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: ScrollAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: PressKeyAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: TypeTextAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: WaitAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: TakeScreenshotAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: GetCursorPositionAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + @overload async def computer( self, @@ -1047,7 +1258,18 @@ async def computer( async def computer( self, *, - action: Action, + action: Union[ + Action, + MoveMouseAction, + ClickMouseAction, + DragMouseAction, + ScrollAction, + PressKeyAction, + TypeTextAction, + WaitAction, + TakeScreenshotAction, + GetCursorPositionAction, + ], button: Optional[Button] = None, click_type: Optional[ClickMouseActionClickType] = "click", coordinates: Optional[List[int]] = None, @@ -1061,37 +1283,82 @@ async def computer( duration: Optional[float] = None, request_options: Optional[RequestOptions] = None, ) -> ComputerResponse: + """Control computer actions like mouse movements, clicks, and keyboard input. + + This method supports two ways of specifying actions: + + 1. Using action objects (recommended): + ```python + click_action = ClickMouseAction( + button="left", + coordinates=[500, 500] + ) + await instance.computer(action=click_action) + ``` + + 2. Using string action types with parameters (legacy): + ```python + await instance.computer( + action="click_mouse", + button="left", + coordinates=[500, 500] + ) + ``` + + Args: + action: Either a string action type or an action object + button: The mouse button to use (for click actions) + click_type: The type of click to perform + coordinates: Coordinates for mouse actions + delta_x: X delta for scroll actions + delta_y: Y delta for scroll actions + num_clicks: Number of clicks to perform + hold_keys: Keys to hold during the action + path: Path for drag mouse actions + keys: Keys to press + text: Text to type + duration: Duration for wait actions + request_options: Options for the request + + Returns: + ComputerResponse: Response from the action + """ request: Any = None - if action == "move_mouse": - request = Request_MoveMouse(coordinates=coordinates, hold_keys=hold_keys) - elif action == "click_mouse": - request = Request_ClickMouse( - button=button, - click_type=click_type, - coordinates=coordinates, - num_clicks=num_clicks, - hold_keys=hold_keys, - ) - elif action == "drag_mouse": - request = Request_DragMouse(path=path, hold_keys=hold_keys) - elif action == "scroll": - request = Request_Scroll( - coordinates=coordinates, - delta_x=delta_x, - delta_y=delta_y, - hold_keys=hold_keys, - ) - elif action == "press_key": - request = Request_PressKey(keys=keys, duration=duration) - elif action == "type_text": - request = Request_TypeText(text=text, hold_keys=hold_keys) - elif action == "wait": - request = Request_Wait(duration=duration) - elif action == "take_screenshot": - request = Request_TakeScreenshot() - elif action == "get_cursor_position": - request = Request_GetCursorPosition() + # Check if action is an action object + request = _create_request_from_action(action) + + # If it wasn't an object or the object wasn't recognized, use the legacy string-based approach + if request is None: + if action == "move_mouse": + request = Request_MoveMouse(coordinates=coordinates, hold_keys=hold_keys) + elif action == "click_mouse": + request = Request_ClickMouse( + button=button, + click_type=click_type, + coordinates=coordinates, + num_clicks=num_clicks, + hold_keys=hold_keys, + ) + elif action == "drag_mouse": + request = Request_DragMouse(path=path, hold_keys=hold_keys) + elif action == "scroll": + request = Request_Scroll( + coordinates=coordinates, + delta_x=delta_x, + delta_y=delta_y, + hold_keys=hold_keys, + ) + elif action == "press_key": + request = Request_PressKey(keys=keys, duration=duration) + elif action == "type_text": + request = Request_TypeText(text=text, hold_keys=hold_keys) + elif action == "wait": + request = Request_Wait(duration=duration) + elif action == "take_screenshot": + request = Request_TakeScreenshot() + elif action == "get_cursor_position": + request = Request_GetCursorPosition() return await self._client.instance.computer( self.id, @@ -2053,3 +2320,52 @@ async def act_stream( if not has_tool_calls or has_structured_output: break + + +def _create_request_from_action(action): + """Helper function to create a request object from an action object.""" + if isinstance(action, MoveMouseAction): + return Request_MoveMouse( + coordinates=action.coordinates, + hold_keys=action.hold_keys + ) + elif isinstance(action, ClickMouseAction): + return Request_ClickMouse( + button=action.button, + click_type=action.click_type, + coordinates=action.coordinates, + num_clicks=action.num_clicks, + hold_keys=action.hold_keys, + ) + elif isinstance(action, DragMouseAction): + return Request_DragMouse( + path=action.path, + hold_keys=action.hold_keys + ) + elif isinstance(action, ScrollAction): + return Request_Scroll( + coordinates=action.coordinates, + delta_x=action.delta_x, + delta_y=action.delta_y, + hold_keys=action.hold_keys, + ) + elif isinstance(action, PressKeyAction): + return Request_PressKey( + keys=action.keys, + duration=action.duration + ) + elif isinstance(action, TypeTextAction): + return Request_TypeText( + text=action.text, + hold_keys=action.hold_keys + ) + elif isinstance(action, WaitAction): + return Request_Wait( + duration=action.duration + ) + elif isinstance(action, TakeScreenshotAction): + return Request_TakeScreenshot() + elif isinstance(action, GetCursorPositionAction): + return Request_GetCursorPosition() + else: + return None From 9c92238f87c0b84ff28ba4d06d186e063801c9db Mon Sep 17 00:00:00 2001 From: Justin Sun Date: Thu, 6 Mar 2025 21:53:52 -0800 Subject: [PATCH 4/6] feat: filter images --- src/scrapybara/client.py | 49 +++++++++++++++++++++++++++++++++++-- tests/custom/test_client.py | 4 +-- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/src/scrapybara/client.py b/src/scrapybara/client.py index 3b333e2..23bebc7 100644 --- a/src/scrapybara/client.py +++ b/src/scrapybara/client.py @@ -1667,6 +1667,7 @@ def act( on_step: Optional[Callable[[Step], None]] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, + images_to_keep: Optional[int] = 4, request_options: Optional[RequestOptions] = None, ) -> ActResponse[SchemaT]: """ @@ -1682,6 +1683,7 @@ def act( on_step: Callback for each step of the conversation temperature: Optional temperature parameter for the model max_tokens: Optional max tokens parameter for the model + images_to_keep: Optional maximum number of most recent images to retain in messages and model call, defaults to 4 request_options: Optional request configuration Returns: @@ -1706,6 +1708,7 @@ def act( on_step=on_step, temperature=temperature, max_tokens=max_tokens, + images_to_keep=images_to_keep, request_options=request_options, ): steps.append(step) @@ -1743,6 +1746,8 @@ def act( total_tokens=total_tokens, ) + _filter_images(result_messages, images_to_keep) + return ActResponse( messages=result_messages, steps=steps, text=text, output=output, usage=usage ) @@ -1759,6 +1764,7 @@ def act_stream( on_step: Optional[Callable[[Step], None]] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, + images_to_keep: Optional[int] = 4, request_options: Optional[RequestOptions] = None, ) -> Generator[Step, None, None]: """ @@ -1774,6 +1780,7 @@ def act_stream( on_step: Callback for each step of the conversation temperature: Optional temperature parameter for the model max_tokens: Optional max tokens parameter for the model + images_to_keep: Optional maximum number of most recent images to retain in messages and model call, defaults to 4 request_options: Optional request configuration Yields: @@ -1813,6 +1820,8 @@ def act_stream( while True: # Convert tools to ApiTools api_tools = [ApiTool.from_tool(tool) for tool in current_tools] + + _filter_images(current_messages, images_to_keep) request = SingleActRequest( model=model, @@ -2076,6 +2085,7 @@ async def act( on_step: Optional[Callable[[Step], None]] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, + images_to_keep: Optional[int] = 4, request_options: Optional[RequestOptions] = None, ) -> ActResponse[SchemaT]: """ @@ -2091,6 +2101,7 @@ async def act( on_step: Callback for each step of the conversation temperature: Optional temperature parameter for the model max_tokens: Optional max tokens parameter for the model + images_to_keep: Optional maximum number of most recent images to retain in messages and model call, defaults to 4 request_options: Optional request configuration Returns: @@ -2112,9 +2123,10 @@ async def act( prompt=prompt, messages=messages, schema=schema, + on_step=on_step, temperature=temperature, max_tokens=max_tokens, - on_step=on_step, + images_to_keep=images_to_keep, request_options=request_options, ): steps.append(step) @@ -2152,6 +2164,8 @@ async def act( total_tokens=total_tokens, ) + _filter_images(result_messages, images_to_keep) + return ActResponse( messages=result_messages, steps=steps, text=text, output=output, usage=usage ) @@ -2168,6 +2182,7 @@ async def act_stream( on_step: Optional[Callable[[Step], None]] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, + images_to_keep: Optional[int] = 4, request_options: Optional[RequestOptions] = None, ) -> AsyncGenerator[Step, None]: """ @@ -2183,6 +2198,7 @@ async def act_stream( on_step: Callback for each step of the conversation temperature: Optional temperature parameter for the model max_tokens: Optional max tokens parameter for the model + images_to_keep: Optional maximum number of most recent images to retain in messages and model call, defaults to 4 request_options: Optional request configuration Yields: @@ -2223,6 +2239,8 @@ async def act_stream( # Convert tools to ApiTools api_tools = [ApiTool.from_tool(tool) for tool in current_tools] + _filter_images(current_messages, images_to_keep) + request = SingleActRequest( model=model, system=system, @@ -2321,7 +2339,6 @@ async def act_stream( if not has_tool_calls or has_structured_output: break - def _create_request_from_action(action): """Helper function to create a request object from an action object.""" if isinstance(action, MoveMouseAction): @@ -2369,3 +2386,31 @@ def _create_request_from_action(action): return Request_GetCursorPosition() else: return None + +def _filter_images(messages: List[Message], images_to_keep: int): + """ + Helper function to filter base64 images in messages, keeping only the latest ones up to specified limit. + + Args: + messages: List of messages to filter + images_to_keep: Maximum number of images to keep + """ + images_kept = 0 + + for i in range(len(messages) - 1, -1, -1): + msg = messages[i] + + if isinstance(msg, ToolMessage) and msg.content: + for j in range(len(msg.content) - 1, -1, -1): + tool_result = msg.content[j] + + if (tool_result and + hasattr(tool_result, "result") and + tool_result.result and + isinstance(tool_result.result, dict) and + "base_64_image" in tool_result.result): + + if images_kept < images_to_keep: + images_kept += 1 + else: + del tool_result.result["base_64_image"] \ No newline at end of file diff --git a/tests/custom/test_client.py b/tests/custom/test_client.py index 78e296f..533d843 100644 --- a/tests/custom/test_client.py +++ b/tests/custom/test_client.py @@ -178,6 +178,6 @@ def test_browser_thinking() -> None: if __name__ == "__main__": test_ubuntu() test_browser() - test_ubuntu_thinking() - test_browser_thinking() + # test_ubuntu_thinking() + # test_browser_thinking() # test_windows() From 0743dce2a79f94597a18ac6e21d950bdcc520fc3 Mon Sep 17 00:00:00 2001 From: Justin Sun Date: Thu, 6 Mar 2025 21:56:49 -0800 Subject: [PATCH 5/6] fix: images_to_keep type --- src/scrapybara/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scrapybara/client.py b/src/scrapybara/client.py index 23bebc7..cb24321 100644 --- a/src/scrapybara/client.py +++ b/src/scrapybara/client.py @@ -2387,7 +2387,7 @@ def _create_request_from_action(action): else: return None -def _filter_images(messages: List[Message], images_to_keep: int): +def _filter_images(messages: List[Message], images_to_keep: Optional[int] = 4): """ Helper function to filter base64 images in messages, keeping only the latest ones up to specified limit. From 20230bcfc07373a55c7a1cc185559dca3ba15229 Mon Sep 17 00:00:00 2001 From: Justin Sun Date: Thu, 6 Mar 2025 22:02:00 -0800 Subject: [PATCH 6/6] fix: images_to_keep bruh --- src/scrapybara/client.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/scrapybara/client.py b/src/scrapybara/client.py index cb24321..5546ce3 100644 --- a/src/scrapybara/client.py +++ b/src/scrapybara/client.py @@ -1746,7 +1746,7 @@ def act( total_tokens=total_tokens, ) - _filter_images(result_messages, images_to_keep) + _filter_images(result_messages, images_to_keep or 4) return ActResponse( messages=result_messages, steps=steps, text=text, output=output, usage=usage @@ -1821,7 +1821,7 @@ def act_stream( # Convert tools to ApiTools api_tools = [ApiTool.from_tool(tool) for tool in current_tools] - _filter_images(current_messages, images_to_keep) + _filter_images(current_messages, images_to_keep or 4) request = SingleActRequest( model=model, @@ -2164,7 +2164,7 @@ async def act( total_tokens=total_tokens, ) - _filter_images(result_messages, images_to_keep) + _filter_images(result_messages, images_to_keep or 4) return ActResponse( messages=result_messages, steps=steps, text=text, output=output, usage=usage @@ -2239,7 +2239,7 @@ async def act_stream( # Convert tools to ApiTools api_tools = [ApiTool.from_tool(tool) for tool in current_tools] - _filter_images(current_messages, images_to_keep) + _filter_images(current_messages, images_to_keep or 4) request = SingleActRequest( model=model, @@ -2387,7 +2387,7 @@ def _create_request_from_action(action): else: return None -def _filter_images(messages: List[Message], images_to_keep: Optional[int] = 4): +def _filter_images(messages: List[Message], images_to_keep: int): """ Helper function to filter base64 images in messages, keeping only the latest ones up to specified limit.