Skip to content

Commit c305238

Browse files
committed
Delegate public url related functionality to client
1 parent 15bb6e0 commit c305238

File tree

6 files changed

+58
-112
lines changed

6 files changed

+58
-112
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ keywords = [
3434
"scraping",
3535
]
3636
dependencies = [
37-
"apify-client>=2.0.0,<3.0.0",
37+
"apify-client>=2.2.0,<3.0.0",
3838
"apify-shared>=2.0.0,<3.0.0",
3939
"crawlee>=1.0.2,<2.0.0",
4040
"cachetools>=5.5.0",

src/apify/storage_clients/_apify/_dataset_client.py

Lines changed: 12 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
from __future__ import annotations
22

33
import asyncio
4+
import warnings
45
from logging import getLogger
56
from typing import TYPE_CHECKING, Any
67

78
from typing_extensions import override
89

9-
from apify_client import ApifyClientAsync
1010
from crawlee._utils.byte_size import ByteSize
1111
from crawlee._utils.file import json_dumps
1212
from crawlee.storage_clients._base import DatasetClient
1313
from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
1414
from crawlee.storages import Dataset
1515

16-
from ._utils import AliasResolver
16+
from ._utils import AliasResolver, create_apify_client
1717

1818
if TYPE_CHECKING:
1919
from collections.abc import AsyncIterator
@@ -52,12 +52,17 @@ def __init__(
5252
self._api_client = api_client
5353
"""The Apify dataset client for API operations."""
5454

55-
self._api_public_base_url = api_public_base_url
56-
"""The public base URL for accessing the key-value store records."""
57-
5855
self._lock = lock
5956
"""A lock to ensure that only one operation is performed at a time."""
6057

58+
if api_public_base_url:
59+
# Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
60+
warnings.warn(
61+
'api_public_base_url argument is deprecated and will be removed in version 4.0.0',
62+
DeprecationWarning,
63+
stacklevel=2,
64+
)
65+
6166
@override
6267
async def get_metadata(self) -> DatasetMetadata:
6368
metadata = await self._api_client.get()
@@ -99,29 +104,7 @@ async def open(
99104
if sum(1 for param in [id, name, alias] if param is not None) > 1:
100105
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
101106

102-
token = configuration.token
103-
if not token:
104-
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
105-
106-
api_url = configuration.api_base_url
107-
if not api_url:
108-
raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
109-
110-
api_public_base_url = configuration.api_public_base_url
111-
if not api_public_base_url:
112-
raise ValueError(
113-
'Apify storage client requires a valid API public base URL in Configuration '
114-
f'(api_public_base_url={api_public_base_url}).'
115-
)
116-
117-
# Create Apify client with the provided token and API URL.
118-
apify_client_async = ApifyClientAsync(
119-
token=token,
120-
api_url=api_url,
121-
max_retries=8,
122-
min_delay_between_retries_millis=500,
123-
timeout_secs=360,
124-
)
107+
apify_client_async = create_apify_client(configuration)
125108
apify_datasets_client = apify_client_async.datasets()
126109

127110
# Normalize unnamed default storage in cases where not defined in `configuration.default_dataset_id` to unnamed
@@ -178,7 +161,7 @@ async def open(
178161

179162
return cls(
180163
api_client=apify_dataset_client,
181-
api_public_base_url=api_public_base_url,
164+
api_public_base_url='', # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
182165
lock=asyncio.Lock(),
183166
)
184167

src/apify/storage_clients/_apify/_key_value_store_client.py

Lines changed: 12 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,18 @@
11
from __future__ import annotations
22

33
import asyncio
4+
import warnings
45
from logging import getLogger
56
from typing import TYPE_CHECKING, Any
67

78
from typing_extensions import override
8-
from yarl import URL
99

10-
from apify_client import ApifyClientAsync
1110
from crawlee.storage_clients._base import KeyValueStoreClient
1211
from crawlee.storage_clients.models import KeyValueStoreRecord, KeyValueStoreRecordMetadata
1312
from crawlee.storages import KeyValueStore
1413

1514
from ._models import ApifyKeyValueStoreMetadata, KeyValueStoreListKeysPage
16-
from ._utils import AliasResolver
17-
from apify._crypto import create_hmac_signature
15+
from ._utils import AliasResolver, create_apify_client
1816

1917
if TYPE_CHECKING:
2018
from collections.abc import AsyncIterator
@@ -43,12 +41,17 @@ def __init__(
4341
self._api_client = api_client
4442
"""The Apify KVS client for API operations."""
4543

46-
self._api_public_base_url = api_public_base_url
47-
"""The public base URL for accessing the key-value store records."""
48-
4944
self._lock = lock
5045
"""A lock to ensure that only one operation is performed at a time."""
5146

47+
if api_public_base_url:
48+
# Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
49+
warnings.warn(
50+
'api_public_base_url argument is deprecated and will be removed in version 4.0.0',
51+
DeprecationWarning,
52+
stacklevel=2,
53+
)
54+
5255
@override
5356
async def get_metadata(self) -> ApifyKeyValueStoreMetadata:
5457
metadata = await self._api_client.get()
@@ -90,29 +93,7 @@ async def open(
9093
if sum(1 for param in [id, name, alias] if param is not None) > 1:
9194
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
9295

93-
token = configuration.token
94-
if not token:
95-
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
96-
97-
api_url = configuration.api_base_url
98-
if not api_url:
99-
raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
100-
101-
api_public_base_url = configuration.api_public_base_url
102-
if not api_public_base_url:
103-
raise ValueError(
104-
'Apify storage client requires a valid API public base URL in Configuration '
105-
f'(api_public_base_url={api_public_base_url}).'
106-
)
107-
108-
# Create Apify client with the provided token and API URL.
109-
apify_client_async = ApifyClientAsync(
110-
token=token,
111-
api_url=api_url,
112-
max_retries=8,
113-
min_delay_between_retries_millis=500,
114-
timeout_secs=360,
115-
)
96+
apify_client_async = create_apify_client(configuration)
11697
apify_kvss_client = apify_client_async.key_value_stores()
11798

11899
# Normalize unnamed default storage in cases where not defined in `configuration.default_key_value_store_id` to
@@ -170,7 +151,7 @@ async def open(
170151

171152
return cls(
172153
api_client=apify_kvs_client,
173-
api_public_base_url=api_public_base_url,
154+
api_public_base_url='', # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
174155
lock=asyncio.Lock(),
175156
)
176157

@@ -241,25 +222,3 @@ async def iterate_keys(
241222
@override
242223
async def record_exists(self, key: str) -> bool:
243224
return await self._api_client.record_exists(key=key)
244-
245-
async def get_public_url(self, key: str) -> str:
246-
"""Get a URL for the given key that may be used to publicly access the value in the remote key-value store.
247-
248-
Args:
249-
key: The key for which the URL should be generated.
250-
251-
Returns:
252-
A public URL that can be used to access the value of the given key in the KVS.
253-
"""
254-
if self._api_client.resource_id is None:
255-
raise ValueError('resource_id cannot be None when generating a public URL')
256-
257-
public_url = (
258-
URL(self._api_public_base_url) / 'v2' / 'key-value-stores' / self._api_client.resource_id / 'records' / key
259-
)
260-
metadata = await self.get_metadata()
261-
262-
if metadata.url_signing_secret_key is not None:
263-
public_url = public_url.with_query(signature=create_hmac_signature(metadata.url_signing_secret_key, key))
264-
265-
return str(public_url)

src/apify/storage_clients/_apify/_request_queue_client.py

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from typing_extensions import override
77

8-
from apify_client import ApifyClientAsync
98
from crawlee._utils.crypto import crypto_random_object_id
109
from crawlee.storage_clients._base import RequestQueueClient
1110
from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
@@ -14,7 +13,7 @@
1413
from ._models import ApifyRequestQueueMetadata, RequestQueueStats
1514
from ._request_queue_shared_client import ApifyRequestQueueSharedClient
1615
from ._request_queue_single_client import ApifyRequestQueueSingleClient
17-
from ._utils import AliasResolver
16+
from ._utils import AliasResolver, create_apify_client
1817

1918
if TYPE_CHECKING:
2019
from collections.abc import Sequence
@@ -228,29 +227,7 @@ async def open(
228227
if sum(1 for param in [id, name, alias] if param is not None) > 1:
229228
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
230229

231-
token = configuration.token
232-
if not token:
233-
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
234-
235-
api_url = configuration.api_base_url
236-
if not api_url:
237-
raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
238-
239-
api_public_base_url = configuration.api_public_base_url
240-
if not api_public_base_url:
241-
raise ValueError(
242-
'Apify storage client requires a valid API public base URL in Configuration '
243-
f'(api_public_base_url={api_public_base_url}).'
244-
)
245-
246-
# Create Apify client with the provided token and API URL.
247-
apify_client_async = ApifyClientAsync(
248-
token=token,
249-
api_url=api_url,
250-
max_retries=8,
251-
min_delay_between_retries_millis=500,
252-
timeout_secs=360,
253-
)
230+
apify_client_async = create_apify_client(configuration)
254231
apify_rqs_client = apify_client_async.request_queues()
255232

256233
# Normalize unnamed default storage in cases where not defined in `configuration.default_request_queue_id` to

src/apify/storage_clients/_apify/_utils.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,3 +192,30 @@ def unique_key_to_request_id(unique_key: str, *, request_id_length: int = 15) ->
192192

193193
# Truncate the key to the desired length
194194
return url_safe_key[:request_id_length]
195+
196+
197+
def create_apify_client(configuration: Configuration) -> ApifyClientAsync:
198+
"""Create and return an ApifyClientAsync instance using the provided configuration."""
199+
if not configuration.token:
200+
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={configuration.token}).')
201+
202+
api_url = configuration.api_base_url
203+
if not api_url:
204+
raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
205+
206+
api_public_base_url = configuration.api_public_base_url
207+
if not api_public_base_url:
208+
raise ValueError(
209+
'Apify storage client requires a valid API public base URL in Configuration '
210+
f'(api_public_base_url={api_public_base_url}).'
211+
)
212+
213+
# Create Apify client with the provided token and API URL.
214+
return ApifyClientAsync(
215+
token=configuration.token,
216+
api_url=api_url,
217+
api_public_url=api_public_base_url,
218+
max_retries=8,
219+
min_delay_between_retries_millis=500,
220+
timeout_secs=360,
221+
)

uv.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)