From 78fde4a029c817febcfe9951fa759953ed012a83 Mon Sep 17 00:00:00 2001 From: Cooper Miller Date: Tue, 4 Mar 2025 13:26:35 -0600 Subject: [PATCH 1/2] feat: pass action objects directly to computer() --- src/scrapybara/client.py | 452 ++++++++++++++++++--------------------- 1 file changed, 210 insertions(+), 242 deletions(-) diff --git a/src/scrapybara/client.py b/src/scrapybara/client.py index 446f2b6..9798bc6 100644 --- a/src/scrapybara/client.py +++ b/src/scrapybara/client.py @@ -12,8 +12,6 @@ Generator, Callable, AsyncGenerator, - Literal, - overload, ) import typing import os @@ -83,6 +81,17 @@ Request_TakeScreenshot, Request_GetCursorPosition, ) +from .types import ( + MoveMouseAction, + ClickMouseAction, + DragMouseAction, + ScrollAction, + PressKeyAction, + TypeTextAction, + WaitAction, + TakeScreenshotAction, + GetCursorPositionAction, +) OMIT = typing.cast(typing.Any, ...) SchemaT = TypeVar("SchemaT", bound=BaseModel) @@ -646,100 +655,10 @@ def get_stream_url( self.id, request_options=request_options ) - @overload - def computer( - self, - *, - action: Literal["move_mouse"], - coordinates: List[int], - hold_keys: Optional[List[str]] = None, - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - def computer( - self, - *, - action: Literal["click_mouse"], - button: Button, - click_type: Optional[ClickMouseActionClickType] = None, - coordinates: Optional[List[int]] = None, - num_clicks: Optional[int] = None, - hold_keys: Optional[List[str]] = None, - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload def computer( self, *, - action: Literal["drag_mouse"], - path: List[List[int]], - hold_keys: Optional[List[str]] = None, - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - def computer( - self, - *, - action: Literal["scroll"], - coordinates: List[int], - delta_x: Optional[float] = None, - delta_y: Optional[float] = None, - hold_keys: Optional[List[str]] = None, - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - def computer( - self, - *, - action: Literal["press_key"], - keys: List[str], - duration: Optional[float] = None, - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - def computer( - self, - *, - action: Literal["type_text"], - text: str, - hold_keys: Optional[List[str]] = None, - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - def computer( - self, - *, - action: Literal["wait"], - duration: float, - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - def computer( - self, - *, - action: Literal["take_screenshot"], - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - def computer( - self, - *, - action: Literal["get_cursor_position"], - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - def computer( - self, - *, - action: Action, + action: Union[Action, MoveMouseAction, ClickMouseAction, DragMouseAction, ScrollAction, PressKeyAction, TypeTextAction, WaitAction, TakeScreenshotAction, GetCursorPositionAction], button: Optional[Button] = None, click_type: Optional[ClickMouseActionClickType] = None, coordinates: Optional[List[int]] = None, @@ -753,37 +672,82 @@ def computer( duration: Optional[float] = None, request_options: Optional[RequestOptions] = None, ) -> ComputerResponse: + """Control computer actions like mouse movements, clicks, and keyboard input. + + This method supports two ways of specifying actions: + + 1. Using action objects (recommended): + ```python + click_action = ClickMouseAction( + button="left", + coordinates=[500, 500] + ) + instance.computer(action=click_action) + ``` + + 2. Using string action types with parameters (legacy): + ```python + instance.computer( + action="click_mouse", + button="left", + coordinates=[500, 500] + ) + ``` + + Args: + action: Either a string action type or an action object + button: The mouse button to use (for click actions) + click_type: The type of click to perform + coordinates: Coordinates for mouse actions + delta_x: X delta for scroll actions + delta_y: Y delta for scroll actions + num_clicks: Number of clicks to perform + hold_keys: Keys to hold during the action + path: Path for drag mouse actions + keys: Keys to press + text: Text to type + duration: Duration for wait actions + request_options: Options for the request + + Returns: + ComputerResponse: Response from the action + """ request: Any = None - if action == "move_mouse": - request = Request_MoveMouse(coordinates=coordinates, hold_keys=hold_keys) - elif action == "click_mouse": - request = Request_ClickMouse( - button=button, - click_type=click_type, - coordinates=coordinates, - num_clicks=num_clicks, - hold_keys=hold_keys, - ) - elif action == "drag_mouse": - request = Request_DragMouse(path=path, hold_keys=hold_keys) - elif action == "scroll": - request = Request_Scroll( - coordinates=coordinates, - delta_x=delta_x, - delta_y=delta_y, - hold_keys=hold_keys, - ) - elif action == "press_key": - request = Request_PressKey(keys=keys, duration=duration) - elif action == "type_text": - request = Request_TypeText(text=text, hold_keys=hold_keys) - elif action == "wait": - request = Request_Wait(duration=duration) - elif action == "take_screenshot": - request = Request_TakeScreenshot() - elif action == "get_cursor_position": - request = Request_GetCursorPosition() + # Check if action is an action object + request = _create_request_from_action(action) + + # If it wasn't an object or the object wasn't recognized, use the legacy string-based approach + if request is None: + if action == "move_mouse": + request = Request_MoveMouse(coordinates=coordinates, hold_keys=hold_keys) + elif action == "click_mouse": + request = Request_ClickMouse( + button=button, + click_type=click_type, + coordinates=coordinates, + num_clicks=num_clicks, + hold_keys=hold_keys, + ) + elif action == "drag_mouse": + request = Request_DragMouse(path=path, hold_keys=hold_keys) + elif action == "scroll": + request = Request_Scroll( + coordinates=coordinates, + delta_x=delta_x, + delta_y=delta_y, + hold_keys=hold_keys, + ) + elif action == "press_key": + request = Request_PressKey(keys=keys, duration=duration) + elif action == "type_text": + request = Request_TypeText(text=text, hold_keys=hold_keys) + elif action == "wait": + request = Request_Wait(duration=duration) + elif action == "take_screenshot": + request = Request_TakeScreenshot() + elif action == "get_cursor_position": + request = Request_GetCursorPosition() return self._client.instance.computer( self.id, @@ -954,100 +918,10 @@ async def get_stream_url( self.id, request_options=request_options ) - @overload async def computer( self, *, - action: Literal["move_mouse"], - coordinates: List[int], - hold_keys: Optional[List[str]] = None, - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - async def computer( - self, - *, - action: Literal["click_mouse"], - button: Button, - click_type: Optional[ClickMouseActionClickType] = None, - coordinates: Optional[List[int]] = None, - num_clicks: Optional[int] = None, - hold_keys: Optional[List[str]] = None, - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - async def computer( - self, - *, - action: Literal["drag_mouse"], - path: List[List[int]], - hold_keys: Optional[List[str]] = None, - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - async def computer( - self, - *, - action: Literal["scroll"], - coordinates: List[int], - delta_x: Optional[float] = None, - delta_y: Optional[float] = None, - hold_keys: Optional[List[str]] = None, - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - async def computer( - self, - *, - action: Literal["press_key"], - keys: List[str], - duration: Optional[float] = None, - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - async def computer( - self, - *, - action: Literal["type_text"], - text: str, - hold_keys: Optional[List[str]] = None, - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - async def computer( - self, - *, - action: Literal["wait"], - duration: float, - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - async def computer( - self, - *, - action: Literal["take_screenshot"], - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - @overload - async def computer( - self, - *, - action: Literal["get_cursor_position"], - request_options: Optional[RequestOptions] = None, - ) -> ComputerResponse: ... - - async def computer( - self, - *, - action: Action, + action: Union[Action, MoveMouseAction, ClickMouseAction, DragMouseAction, ScrollAction, PressKeyAction, TypeTextAction, WaitAction, TakeScreenshotAction, GetCursorPositionAction], button: Optional[Button] = None, click_type: Optional[ClickMouseActionClickType] = None, coordinates: Optional[List[int]] = None, @@ -1061,37 +935,82 @@ async def computer( duration: Optional[float] = None, request_options: Optional[RequestOptions] = None, ) -> ComputerResponse: + """Control computer actions like mouse movements, clicks, and keyboard input. + + This method supports two ways of specifying actions: + + 1. Using action objects (recommended): + ```python + click_action = ClickMouseAction( + button="left", + coordinates=[500, 500] + ) + await instance.computer(action=click_action) + ``` + + 2. Using string action types with parameters (legacy): + ```python + await instance.computer( + action="click_mouse", + button="left", + coordinates=[500, 500] + ) + ``` + + Args: + action: Either a string action type or an action object + button: The mouse button to use (for click actions) + click_type: The type of click to perform + coordinates: Coordinates for mouse actions + delta_x: X delta for scroll actions + delta_y: Y delta for scroll actions + num_clicks: Number of clicks to perform + hold_keys: Keys to hold during the action + path: Path for drag mouse actions + keys: Keys to press + text: Text to type + duration: Duration for wait actions + request_options: Options for the request + + Returns: + ComputerResponse: Response from the action + """ request: Any = None - if action == "move_mouse": - request = Request_MoveMouse(coordinates=coordinates, hold_keys=hold_keys) - elif action == "click_mouse": - request = Request_ClickMouse( - button=button, - click_type=click_type, - coordinates=coordinates, - num_clicks=num_clicks, - hold_keys=hold_keys, - ) - elif action == "drag_mouse": - request = Request_DragMouse(path=path, hold_keys=hold_keys) - elif action == "scroll": - request = Request_Scroll( - coordinates=coordinates, - delta_x=delta_x, - delta_y=delta_y, - hold_keys=hold_keys, - ) - elif action == "press_key": - request = Request_PressKey(keys=keys, duration=duration) - elif action == "type_text": - request = Request_TypeText(text=text, hold_keys=hold_keys) - elif action == "wait": - request = Request_Wait(duration=duration) - elif action == "take_screenshot": - request = Request_TakeScreenshot() - elif action == "get_cursor_position": - request = Request_GetCursorPosition() + # Check if action is an action object + request = _create_request_from_action(action) + + # If it wasn't an object or the object wasn't recognized, use the legacy string-based approach + if request is None: + if action == "move_mouse": + request = Request_MoveMouse(coordinates=coordinates, hold_keys=hold_keys) + elif action == "click_mouse": + request = Request_ClickMouse( + button=button, + click_type=click_type, + coordinates=coordinates, + num_clicks=num_clicks, + hold_keys=hold_keys, + ) + elif action == "drag_mouse": + request = Request_DragMouse(path=path, hold_keys=hold_keys) + elif action == "scroll": + request = Request_Scroll( + coordinates=coordinates, + delta_x=delta_x, + delta_y=delta_y, + hold_keys=hold_keys, + ) + elif action == "press_key": + request = Request_PressKey(keys=keys, duration=duration) + elif action == "type_text": + request = Request_TypeText(text=text, hold_keys=hold_keys) + elif action == "wait": + request = Request_Wait(duration=duration) + elif action == "take_screenshot": + request = Request_TakeScreenshot() + elif action == "get_cursor_position": + request = Request_GetCursorPosition() return await self._client.instance.computer( self.id, @@ -2053,3 +1972,52 @@ async def act_stream( if not has_tool_calls or has_structured_output: break + + +def _create_request_from_action(action): + """Helper function to create a request object from an action object.""" + if isinstance(action, MoveMouseAction): + return Request_MoveMouse( + coordinates=action.coordinates, + hold_keys=action.hold_keys + ) + elif isinstance(action, ClickMouseAction): + return Request_ClickMouse( + button=action.button, + click_type=action.click_type, + coordinates=action.coordinates, + num_clicks=action.num_clicks, + hold_keys=action.hold_keys, + ) + elif isinstance(action, DragMouseAction): + return Request_DragMouse( + path=action.path, + hold_keys=action.hold_keys + ) + elif isinstance(action, ScrollAction): + return Request_Scroll( + coordinates=action.coordinates, + delta_x=action.delta_x, + delta_y=action.delta_y, + hold_keys=action.hold_keys, + ) + elif isinstance(action, PressKeyAction): + return Request_PressKey( + keys=action.keys, + duration=action.duration + ) + elif isinstance(action, TypeTextAction): + return Request_TypeText( + text=action.text, + hold_keys=action.hold_keys + ) + elif isinstance(action, WaitAction): + return Request_Wait( + duration=action.duration + ) + elif isinstance(action, TakeScreenshotAction): + return Request_TakeScreenshot() + elif isinstance(action, GetCursorPositionAction): + return Request_GetCursorPosition() + else: + return None From 703a109b9b3b64792cf43a519a8b1c3083fbbc0f Mon Sep 17 00:00:00 2001 From: Cooper Miller Date: Tue, 4 Mar 2025 13:34:46 -0600 Subject: [PATCH 2/2] bring back the overloads --- src/scrapybara/client.py | 352 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 350 insertions(+), 2 deletions(-) diff --git a/src/scrapybara/client.py b/src/scrapybara/client.py index 9798bc6..20c7489 100644 --- a/src/scrapybara/client.py +++ b/src/scrapybara/client.py @@ -12,6 +12,8 @@ Generator, Callable, AsyncGenerator, + Literal, + overload, ) import typing import os @@ -655,10 +657,183 @@ def get_stream_url( self.id, request_options=request_options ) + @overload def computer( self, *, - action: Union[Action, MoveMouseAction, ClickMouseAction, DragMouseAction, ScrollAction, PressKeyAction, TypeTextAction, WaitAction, TakeScreenshotAction, GetCursorPositionAction], + action: MoveMouseAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: ClickMouseAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: DragMouseAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: ScrollAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: PressKeyAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: TypeTextAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: WaitAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: TakeScreenshotAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: GetCursorPositionAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: Literal["move_mouse"], + coordinates: List[int], + hold_keys: Optional[List[str]] = None, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: Literal["click_mouse"], + button: Button, + click_type: Optional[ClickMouseActionClickType] = None, + coordinates: Optional[List[int]] = None, + num_clicks: Optional[int] = None, + hold_keys: Optional[List[str]] = None, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: Literal["drag_mouse"], + path: List[List[int]], + hold_keys: Optional[List[str]] = None, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: Literal["scroll"], + coordinates: Optional[List[int]] = None, + delta_x: Optional[float] = None, + delta_y: Optional[float] = None, + hold_keys: Optional[List[str]] = None, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: Literal["press_key"], + keys: List[str], + duration: Optional[float] = None, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: Literal["type_text"], + text: str, + hold_keys: Optional[List[str]] = None, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: Literal["wait"], + duration: float, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: Literal["take_screenshot"], + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + def computer( + self, + *, + action: Literal["get_cursor_position"], + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + def computer( + self, + *, + action: Union[ + Action, + MoveMouseAction, + ClickMouseAction, + DragMouseAction, + ScrollAction, + PressKeyAction, + TypeTextAction, + WaitAction, + TakeScreenshotAction, + GetCursorPositionAction, + ], button: Optional[Button] = None, click_type: Optional[ClickMouseActionClickType] = None, coordinates: Optional[List[int]] = None, @@ -918,10 +1093,183 @@ async def get_stream_url( self.id, request_options=request_options ) + @overload + async def computer( + self, + *, + action: MoveMouseAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: ClickMouseAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: DragMouseAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: ScrollAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: PressKeyAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: TypeTextAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: WaitAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: TakeScreenshotAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: GetCursorPositionAction, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: Literal["move_mouse"], + coordinates: List[int], + hold_keys: Optional[List[str]] = None, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: Literal["click_mouse"], + button: Button, + click_type: Optional[ClickMouseActionClickType] = None, + coordinates: Optional[List[int]] = None, + num_clicks: Optional[int] = None, + hold_keys: Optional[List[str]] = None, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: Literal["drag_mouse"], + path: List[List[int]], + hold_keys: Optional[List[str]] = None, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: Literal["scroll"], + coordinates: Optional[List[int]] = None, + delta_x: Optional[float] = None, + delta_y: Optional[float] = None, + hold_keys: Optional[List[str]] = None, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: Literal["press_key"], + keys: List[str], + duration: Optional[float] = None, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: Literal["type_text"], + text: str, + hold_keys: Optional[List[str]] = None, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: Literal["wait"], + duration: float, + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: Literal["take_screenshot"], + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + + @overload + async def computer( + self, + *, + action: Literal["get_cursor_position"], + request_options: Optional[RequestOptions] = None, + ) -> ComputerResponse: ... + async def computer( self, *, - action: Union[Action, MoveMouseAction, ClickMouseAction, DragMouseAction, ScrollAction, PressKeyAction, TypeTextAction, WaitAction, TakeScreenshotAction, GetCursorPositionAction], + action: Union[ + Action, + MoveMouseAction, + ClickMouseAction, + DragMouseAction, + ScrollAction, + PressKeyAction, + TypeTextAction, + WaitAction, + TakeScreenshotAction, + GetCursorPositionAction, + ], button: Optional[Button] = None, click_type: Optional[ClickMouseActionClickType] = None, coordinates: Optional[List[int]] = None,