diff --git a/src/scrapybara/client.py b/src/scrapybara/client.py index e02a3e1..334c8b6 100644 --- a/src/scrapybara/client.py +++ b/src/scrapybara/client.py @@ -4,6 +4,8 @@ import httpx import os +import typing +from pydantic import BaseModel, ValidationError from scrapybara.environment import ScrapybaraEnvironment from .core.request_options import RequestOptions from .types import ( @@ -31,6 +33,8 @@ OMIT = typing.cast(typing.Any, ...) +PydanticModelT = typing.TypeVar("PydanticModelT", bound=BaseModel) + class Agent: def __init__(self, instance_id: str, client: BaseClient): @@ -71,6 +75,33 @@ def scrape( request_options=request_options, ) + def scrape_to_pydantic( + self, + *, + cmd: typing.Optional[str] = OMIT, + schema: PydanticModelT, + model: typing.Optional[typing.Literal["claude"]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> PydanticModelT: + cmd = cmd if cmd else (str(schema.__doc__) if schema.__doc__ else None) + if cmd is None: + raise ValueError( + "No command provided, please provide a 'cmd' parameter or docstring in schema class." + ) + + response = self._client.agent.scrape( + self.instance_id, + cmd=cmd, + schema=schema.model_json_schema(), + model=model, + request_options=request_options, + ) + + try: + return schema.model_validate(response.data) + except ValidationError as e: + raise ValidationError(f"Validation error at client side: {e}") from e + class AsyncAgent: def __init__(self, instance_id: str, client: AsyncBaseClient): @@ -111,6 +142,33 @@ async def scrape( request_options=request_options, ) + async def scrape_to_pydantic( + self, + *, + cmd: typing.Optional[str] = OMIT, + schema: PydanticModelT, + model: typing.Optional[typing.Literal["claude"]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> PydanticModelT: + cmd = cmd if cmd else (str(schema.__doc__) if schema.__doc__ else None) + if cmd is None: + raise ValueError( + "No command provided, please provide a 'cmd' parameter or docstring in schema class." + ) + + response = await self._client.agent.scrape( + self.instance_id, + cmd=cmd, + schema=schema.model_json_schema(), + model=model, + request_options=request_options, + ) + + try: + return schema.model_validate(response.data) + except ValidationError as e: + raise ValidationError(f"Validation error at client side: {e}") from e + class Browser: def __init__(self, instance_id: str, client: BaseClient):