|
| 1 | +""" |
| 2 | +Asynchronous HTTP client for the ScrapeGraphAI API. |
| 3 | +
|
| 4 | +This module provides an asynchronous client for interacting with all ScrapeGraphAI |
| 5 | +API endpoints including smartscraper, searchscraper, crawl, agentic scraper, |
| 6 | +markdownify, schema generation, scheduled jobs, and utility functions. |
| 7 | +
|
| 8 | +The AsyncClient class supports: |
| 9 | +- API key authentication |
| 10 | +- SSL verification configuration |
| 11 | +- Request timeout configuration |
| 12 | +- Automatic retry logic with exponential backoff |
| 13 | +- Mock mode for testing |
| 14 | +- Async context manager support for proper resource cleanup |
| 15 | +- Concurrent requests using asyncio |
| 16 | +
|
| 17 | +Example: |
| 18 | + Basic usage with environment variables: |
| 19 | + >>> import asyncio |
| 20 | + >>> from scrapegraph_py import AsyncClient |
| 21 | + >>> async def main(): |
| 22 | + ... client = AsyncClient.from_env() |
| 23 | + ... result = await client.smartscraper( |
| 24 | + ... website_url="https://example.com", |
| 25 | + ... user_prompt="Extract product information" |
| 26 | + ... ) |
| 27 | + ... await client.close() |
| 28 | + >>> asyncio.run(main()) |
| 29 | +
|
| 30 | + Using async context manager: |
| 31 | + >>> async def main(): |
| 32 | + ... async with AsyncClient(api_key="sgai-...") as client: |
| 33 | + ... result = await client.scrape(website_url="https://example.com") |
| 34 | + >>> asyncio.run(main()) |
| 35 | +""" |
1 | 36 | import asyncio |
2 | 37 | from typing import Any, Dict, Optional, Callable |
3 | 38 |
|
|
45 | 80 |
|
46 | 81 |
|
47 | 82 | class AsyncClient: |
| 83 | + """ |
| 84 | + Asynchronous client for the ScrapeGraphAI API. |
| 85 | +
|
| 86 | + This class provides asynchronous methods for all ScrapeGraphAI API endpoints. |
| 87 | + It handles authentication, request management, error handling, and supports |
| 88 | + mock mode for testing. Uses aiohttp for efficient async HTTP requests. |
| 89 | +
|
| 90 | + Attributes: |
| 91 | + api_key (str): The API key for authentication |
| 92 | + headers (dict): Default headers including API key |
| 93 | + timeout (ClientTimeout): Request timeout configuration |
| 94 | + max_retries (int): Maximum number of retry attempts |
| 95 | + retry_delay (float): Base delay between retries in seconds |
| 96 | + mock (bool): Whether mock mode is enabled |
| 97 | + session (ClientSession): Aiohttp session for connection pooling |
| 98 | +
|
| 99 | + Example: |
| 100 | + >>> async def example(): |
| 101 | + ... async with AsyncClient.from_env() as client: |
| 102 | + ... result = await client.smartscraper( |
| 103 | + ... website_url="https://example.com", |
| 104 | + ... user_prompt="Extract all products" |
| 105 | + ... ) |
| 106 | + """ |
48 | 107 | @classmethod |
49 | 108 | def from_env( |
50 | 109 | cls, |
@@ -145,7 +204,25 @@ def __init__( |
145 | 204 | logger.info("✅ AsyncClient initialized successfully") |
146 | 205 |
|
147 | 206 | async def _make_request(self, method: str, url: str, **kwargs) -> Any: |
148 | | - """Make HTTP request with retry logic.""" |
| 207 | + """ |
| 208 | + Make asynchronous HTTP request with retry logic and error handling. |
| 209 | +
|
| 210 | + Args: |
| 211 | + method: HTTP method (GET, POST, etc.) |
| 212 | + url: Full URL for the request |
| 213 | + **kwargs: Additional arguments to pass to aiohttp |
| 214 | +
|
| 215 | + Returns: |
| 216 | + Parsed JSON response data |
| 217 | +
|
| 218 | + Raises: |
| 219 | + APIError: If the API returns an error response |
| 220 | + ConnectionError: If unable to connect after all retries |
| 221 | +
|
| 222 | + Note: |
| 223 | + In mock mode, this method returns deterministic responses without |
| 224 | + making actual HTTP requests. |
| 225 | + """ |
149 | 226 | # Short-circuit when mock mode is enabled |
150 | 227 | if getattr(self, "mock", False): |
151 | 228 | return self._mock_response(method, url, **kwargs) |
|
0 commit comments