Skip to content

Commit 5c83d22

Browse files
committed
add provider tests for AnyResponse
1 parent b4a79b0 commit 5c83d22

File tree

2 files changed

+138
-13
lines changed

2 files changed

+138
-13
lines changed

scrapy_zyte_api/providers.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ async def __call__( # noqa: C901
118118
options["extractFrom"] = extract_from.value
119119
break
120120

121+
extract_from = None # type: ignore[assignment]
121122
for item_type, kw in item_keywords.items():
122123
options_name = f"{kw}Options"
123124
if item_type not in to_provide_stripped and options_name in zyte_api_meta:
@@ -130,9 +131,9 @@ async def __call__( # noqa: C901
130131
html_requested = True
131132
elif extract_from == "httpResponseBody":
132133
param_parser = _ParamParser(crawler)
133-
params = param_parser.parse(request)
134-
del params["url"]
135-
zyte_api_meta.update(params)
134+
http_request_params = param_parser.parse(request)
135+
del http_request_params["url"]
136+
zyte_api_meta.update(http_request_params)
136137

137138
if html_requested:
138139
zyte_api_meta["browserHtml"] = True
@@ -157,19 +158,24 @@ async def __call__( # noqa: C901
157158
if BrowserHtml in to_provide:
158159
results.append(html)
159160
self.update_cache(request, {BrowserHtml: html})
161+
162+
browser_response = None
160163
if BrowserResponse in to_provide:
161-
response = BrowserResponse(
164+
browser_response = BrowserResponse(
162165
url=api_response.url,
163166
status=api_response.status,
164167
html=html,
165168
)
166-
results.append(response)
167-
self.update_cache(request, {BrowserResponse: response})
169+
results.append(browser_response)
170+
self.update_cache(request, {BrowserResponse: browser_response})
168171

169172
if AnyResponse in to_provide:
173+
any_response = None
174+
170175
if "browserHtml" in api_response.raw_api_response:
171176
any_response = AnyResponse(
172-
response=BrowserResponse(
177+
response=browser_response
178+
or BrowserResponse(
173179
url=api_response.url,
174180
status=api_response.status,
175181
html=html,
@@ -190,8 +196,9 @@ async def __call__( # noqa: C901
190196
)
191197
)
192198

193-
results.append(any_response)
194-
self.update_cache(request, {AnyResponse: any_response})
199+
if any_response:
200+
results.append(any_response)
201+
self.update_cache(request, {AnyResponse: any_response})
195202

196203
for cls in to_provide:
197204
cls_stripped = strip_annotated(cls)
@@ -200,7 +207,7 @@ async def __call__( # noqa: C901
200207
if not kw:
201208
continue
202209
assert issubclass(cls_stripped, Item)
203-
item = cls_stripped.from_dict(api_response.raw_api_response[kw])
210+
item = cls_stripped.from_dict(api_response.raw_api_response[kw]) # type: ignore[attr-defined]
204211
if is_typing_annotated(cls):
205212
item = AnnotatedResult(item, cls.__metadata__) # type: ignore[attr-defined]
206213
results.append(item)

tests/test_providers.py

Lines changed: 121 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,32 @@
44

55
pytest.importorskip("scrapy_poet")
66

7+
import asyncio
8+
79
import attrs
810
from pytest_twisted import ensureDeferred
911
from scrapy import Request, Spider
1012
from scrapy_poet import DummyResponse
13+
from scrapy_poet.injection import Injector
1114
from scrapy_poet.utils.testing import HtmlResource, crawl_single_item
1215
from scrapy_poet.utils.testing import create_scrapy_settings as _create_scrapy_settings
13-
from twisted.internet import reactor
16+
from twisted.internet import defer, reactor
1417
from twisted.web.client import Agent, readBody
15-
from web_poet import BrowserHtml, BrowserResponse, ItemPage, field, handle_urls
18+
from web_poet import (
19+
AnyResponse,
20+
BrowserHtml,
21+
BrowserResponse,
22+
HttpResponse,
23+
ItemPage,
24+
field,
25+
handle_urls,
26+
)
1627
from zyte_common_items import BasePage, Product
1728

1829
from scrapy_zyte_api._annotations import ExtractFrom
1930
from scrapy_zyte_api.providers import ZyteApiProvider
2031

21-
from . import SETTINGS
32+
from . import SETTINGS, get_crawler
2233
from .mockserver import get_ephemeral_port
2334

2435

@@ -263,3 +274,110 @@ def parse_(self, response: DummyResponse, page: AnnotatedProductPage): # type:
263274
item, _, _ = await crawl_single_item(AnnotatedZyteAPISpider, HtmlResource, settings)
264275
assert item is None
265276
assert "Multiple different extractFrom specified for product" in caplog.text
277+
278+
279+
@defer.inlineCallbacks
280+
def run_provider(server, to_provide, settings_dict=None, request_meta=None):
281+
class AnyResponseSpider(Spider):
282+
name = "any_response"
283+
284+
request = Request(server.urljoin("/some-page"), meta=request_meta)
285+
settings = create_scrapy_settings()
286+
settings["ZYTE_API_URL"] = server.urljoin("/")
287+
if settings_dict:
288+
settings.update(settings_dict)
289+
crawler = get_crawler(settings, AnyResponseSpider)
290+
yield from crawler.engine.open_spider(crawler.spider)
291+
injector = Injector(crawler)
292+
provider = ZyteApiProvider(injector)
293+
294+
coro = provider(to_provide, request, crawler)
295+
results = yield defer.Deferred.fromFuture(asyncio.ensure_future(coro))
296+
297+
return results
298+
299+
300+
@defer.inlineCallbacks
301+
def test_provider_any_response(mockserver):
302+
# Use only one instance of the mockserver for faster tests.
303+
def provide(*args, **kwargs):
304+
return run_provider(mockserver, *args, **kwargs)
305+
306+
results = yield provide(set())
307+
assert results == []
308+
309+
# Having only AnyResponse without any of the other responses to re-use
310+
# does not result in anything.
311+
results = yield provide(
312+
{
313+
AnyResponse,
314+
}
315+
)
316+
assert results == []
317+
318+
# Same case as above, since no response is available.
319+
results = yield provide({AnyResponse, Product})
320+
assert len(results) == 1
321+
assert type(results[0]) == Product
322+
323+
# AnyResponse should re-use BrowserResponse if available.
324+
results = yield provide({AnyResponse, BrowserResponse})
325+
assert len(results) == 2
326+
assert type(results[0]) == BrowserResponse
327+
assert type(results[1]) == AnyResponse
328+
assert id(results[0]) == id(results[1].response)
329+
330+
# AnyResponse should re-use BrowserHtml if available.
331+
results = yield provide({AnyResponse, BrowserHtml})
332+
assert len(results) == 2
333+
assert type(results[0]) == BrowserHtml
334+
assert type(results[1]) == AnyResponse
335+
assert results[0] == results[1].response.html # diff instance due to casting
336+
337+
results = yield provide({AnyResponse, BrowserResponse, BrowserHtml})
338+
assert len(results) == 3
339+
assert type(results[0]) == BrowserHtml
340+
assert type(results[1]) == BrowserResponse
341+
assert type(results[2]) == AnyResponse
342+
assert results[0] == results[1].html # diff instance due to casting
343+
assert results[0] == results[2].response.html
344+
345+
# NOTES: This is hard to test in this setup and would result in being empty.
346+
# This will be tested in a spider-setup instead so that HttpResponseProvider
347+
# can participate.
348+
# results = yield provide({AnyResponse, HttpResponse})
349+
# assert results == []
350+
351+
# For the following cases, extraction source isn't available in `to_provided`
352+
# but are in the `*.extractFrom` parameter.
353+
354+
settings_dict = {
355+
"ZYTE_API_PROVIDER_PARAMS": {"productOptions": {"extractFrom": "browserHtml"}}
356+
}
357+
358+
results = yield provide({AnyResponse, Product}, settings_dict)
359+
assert len(results) == 2
360+
assert type(results[0]) == AnyResponse
361+
assert type(results[0].response) == BrowserResponse
362+
assert type(results[1]) == Product
363+
364+
results = yield provide({AnyResponse, BrowserHtml, Product}, settings_dict)
365+
assert len(results) == 3
366+
assert type(results[0]) == BrowserHtml
367+
assert type(results[1]) == AnyResponse
368+
assert type(results[1].response) == BrowserResponse
369+
assert type(results[2]) == Product
370+
assert results[0] == results[1].response.html # diff instance due to casting
371+
372+
settings_dict = {
373+
"ZYTE_API_PROVIDER_PARAMS": {
374+
"productOptions": {"extractFrom": "httpResponseBody"}
375+
}
376+
}
377+
request_meta = {"zyte_api": {"httpResponseBody": True, "httpResponseHeaders": True}}
378+
379+
results = yield provide({AnyResponse, Product}, settings_dict, request_meta)
380+
assert len(results) == 2
381+
assert type(results[0]) == AnyResponse
382+
assert type(results[0].response) == HttpResponse
383+
assert type(results[1]) == Product

0 commit comments

Comments
 (0)