Skip to content

Commit bcba999

Browse files
committed
add more test cases
1 parent 531cc98 commit bcba999

File tree

1 file changed

+340
-0
lines changed

1 file changed

+340
-0
lines changed

tests/test_providers.py

Lines changed: 340 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from zyte_common_items import BasePage, Product
2828

2929
from scrapy_zyte_api._annotations import ExtractFrom
30+
from scrapy_zyte_api.handler import ScrapyZyteAPIDownloadHandler
3031
from scrapy_zyte_api.providers import ZyteApiProvider
3132

3233
from . import SETTINGS, get_crawler
@@ -382,3 +383,342 @@ def provide(*args, **kwargs):
382383
assert type(results[0]) == AnyResponse
383384
assert type(results[0].response) == HttpResponse
384385
assert type(results[1]) == Product
386+
387+
388+
class RecordingHandler(ScrapyZyteAPIDownloadHandler):
389+
"""Subclasses the original handler in order to record the Zyte API parameters
390+
used for each downloading request, as well as counting the number of Zyte API
391+
requests.
392+
"""
393+
394+
def __init__(self, *args, **kwargs):
395+
super().__init__(*args, **kwargs)
396+
self.params = []
397+
398+
def _log_request(self, params):
399+
self.params.append(params)
400+
401+
402+
def provider_settings(server):
403+
settings = create_scrapy_settings()
404+
settings["ZYTE_API_URL"] = server.urljoin("/")
405+
settings["ZYTE_API_TRANSPARENT_MODE"] = True
406+
settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 1100}
407+
settings["DOWNLOAD_HANDLERS"]["http"] = RecordingHandler
408+
return settings
409+
410+
411+
@ensureDeferred
412+
async def test_provider_any_response_only(mockserver):
413+
@attrs.define
414+
class SomePage(BasePage):
415+
response: AnyResponse
416+
417+
class ZyteAPISpider(Spider):
418+
def start_requests(self):
419+
yield Request(self.url, callback=self.parse_)
420+
421+
def parse_(self, response: DummyResponse, page: SomePage):
422+
yield {"page": page}
423+
424+
settings = provider_settings(mockserver)
425+
item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings)
426+
params = crawler.engine.downloader.handlers._handlers["http"].params
427+
428+
assert len(params) == 1
429+
assert params[0].keys() == {"url"}
430+
assert item is None
431+
432+
433+
@ensureDeferred
434+
async def test_provider_any_response_product(mockserver):
435+
@attrs.define
436+
class SomePage(BasePage):
437+
response: AnyResponse
438+
product: Product
439+
440+
class ZyteAPISpider(Spider):
441+
def start_requests(self):
442+
yield Request(self.url, callback=self.parse_)
443+
444+
def parse_(self, response: DummyResponse, page: SomePage):
445+
yield {"page": page}
446+
447+
settings = provider_settings(mockserver)
448+
item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings)
449+
params = crawler.engine.downloader.handlers._handlers["http"].params
450+
451+
assert len(params) == 1
452+
assert params[0].keys() == {"url", "product"}
453+
assert item is None
454+
455+
456+
@ensureDeferred
457+
async def test_provider_any_response_product_extract_from_browser_html(mockserver):
458+
@attrs.define
459+
class SomePage(BasePage):
460+
response: AnyResponse
461+
product: Product
462+
463+
class ZyteAPISpider(Spider):
464+
def start_requests(self):
465+
yield Request(self.url, callback=self.parse_)
466+
467+
def parse_(self, response: DummyResponse, page: SomePage):
468+
yield {"page": page}
469+
470+
settings = provider_settings(mockserver)
471+
settings["ZYTE_API_PROVIDER_PARAMS"] = {
472+
"productOptions": {"extractFrom": "browserHtml"}
473+
}
474+
item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings)
475+
params = crawler.engine.downloader.handlers._handlers["http"].params
476+
477+
assert len(params) == 1
478+
assert params[0].keys() == {"url", "product", "browserHtml", "productOptions"}
479+
480+
assert type(item["page"].response) == AnyResponse
481+
assert type(item["page"].response.response) == BrowserResponse
482+
assert type(item["page"].product) == Product
483+
484+
485+
@ensureDeferred
486+
async def test_provider_any_response_product_extract_from_browser_html_2(mockserver):
487+
@attrs.define
488+
class SomePage(BasePage):
489+
response: AnyResponse
490+
browser_response: BrowserResponse
491+
product: Product
492+
493+
class ZyteAPISpider(Spider):
494+
def start_requests(self):
495+
yield Request(self.url, callback=self.parse_)
496+
497+
def parse_(self, response: DummyResponse, page: SomePage):
498+
yield {"page": page}
499+
500+
settings = provider_settings(mockserver)
501+
settings["ZYTE_API_PROVIDER_PARAMS"] = {
502+
"productOptions": {"extractFrom": "browserHtml"}
503+
}
504+
item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings)
505+
params = crawler.engine.downloader.handlers._handlers["http"].params
506+
507+
assert len(params) == 1
508+
assert params[0].keys() == {"url", "product", "browserHtml", "productOptions"}
509+
510+
assert type(item["page"].response) == AnyResponse
511+
assert type(item["page"].response.response) == BrowserResponse
512+
assert type(item["page"].browser_response) == BrowserResponse
513+
assert type(item["page"].product) == Product
514+
515+
assert id(item["page"].browser_response) == id(item["page"].response.response)
516+
517+
518+
@ensureDeferred
519+
async def test_provider_any_response_product_extract_from_http_response(mockserver):
520+
@attrs.define
521+
class SomePage(BasePage):
522+
response: AnyResponse
523+
product: Product
524+
525+
class ZyteAPISpider(Spider):
526+
def start_requests(self):
527+
yield Request(self.url, callback=self.parse_)
528+
529+
def parse_(self, response: DummyResponse, page: SomePage):
530+
yield {"page": page}
531+
532+
settings = provider_settings(mockserver)
533+
settings["ZYTE_API_PROVIDER_PARAMS"] = {
534+
"productOptions": {"extractFrom": "httpResponseBody"}
535+
}
536+
item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings)
537+
params = crawler.engine.downloader.handlers._handlers["http"].params
538+
539+
assert len(params) == 1
540+
assert params[0].keys() == {
541+
"url",
542+
"product",
543+
"httpResponseBody",
544+
"productOptions",
545+
"httpResponseHeaders",
546+
"customHttpRequestHeaders",
547+
}
548+
549+
assert type(item["page"].response) == AnyResponse
550+
assert type(item["page"].response.response) == HttpResponse
551+
assert type(item["page"].product) == Product
552+
553+
554+
@ensureDeferred
555+
async def test_provider_any_response_product_extract_from_http_response_2(mockserver):
556+
@attrs.define
557+
class SomePage(BasePage):
558+
response: AnyResponse
559+
http_response: HttpResponse
560+
product: Product
561+
562+
class ZyteAPISpider(Spider):
563+
def start_requests(self):
564+
yield Request(self.url, callback=self.parse_)
565+
566+
def parse_(self, response: DummyResponse, page: SomePage):
567+
yield {"page": page}
568+
569+
settings = provider_settings(mockserver)
570+
settings["ZYTE_API_PROVIDER_PARAMS"] = {
571+
"productOptions": {"extractFrom": "httpResponseBody"}
572+
}
573+
item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings)
574+
params = crawler.engine.downloader.handlers._handlers["http"].params
575+
576+
assert len(params) == 1
577+
assert params[0].keys() == {
578+
"url",
579+
"product",
580+
"httpResponseBody",
581+
"productOptions",
582+
"httpResponseHeaders",
583+
"customHttpRequestHeaders",
584+
}
585+
586+
assert type(item["page"].response) == AnyResponse
587+
assert type(item["page"].response.response) == HttpResponse
588+
assert type(item["page"].product) == Product
589+
assert type(item["page"].http_response) == HttpResponse
590+
591+
592+
@ensureDeferred
593+
async def test_provider_any_response_browser_html(mockserver):
594+
@attrs.define
595+
class SomePage(BasePage):
596+
response: AnyResponse
597+
html: BrowserHtml
598+
599+
class ZyteAPISpider(Spider):
600+
def start_requests(self):
601+
yield Request(self.url, callback=self.parse_)
602+
603+
def parse_(self, response: DummyResponse, page: SomePage):
604+
yield {"page": page}
605+
606+
settings = provider_settings(mockserver)
607+
item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings)
608+
params = crawler.engine.downloader.handlers._handlers["http"].params
609+
610+
assert len(params) == 1
611+
assert params[0].keys() == {"url", "browserHtml"}
612+
613+
assert type(item["page"].response) == AnyResponse
614+
assert type(item["page"].response.response) == BrowserResponse
615+
assert type(item["page"].html) == BrowserHtml
616+
617+
618+
@ensureDeferred
619+
async def test_provider_any_response_browser_response(mockserver):
620+
@attrs.define
621+
class SomePage(BasePage):
622+
response: AnyResponse
623+
browser_response: BrowserResponse
624+
625+
class ZyteAPISpider(Spider):
626+
def start_requests(self):
627+
yield Request(self.url, callback=self.parse_)
628+
629+
def parse_(self, response: DummyResponse, page: SomePage):
630+
yield {"page": page}
631+
632+
settings = provider_settings(mockserver)
633+
item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings)
634+
params = crawler.engine.downloader.handlers._handlers["http"].params
635+
636+
assert len(params) == 1
637+
assert params[0].keys() == {"url", "browserHtml"}
638+
639+
assert type(item["page"].response) == AnyResponse
640+
assert type(item["page"].response.response) == BrowserResponse
641+
assert type(item["page"].browser_response) == BrowserResponse
642+
643+
644+
@ensureDeferred
645+
async def test_provider_any_response_browser_html_response(mockserver):
646+
@attrs.define
647+
class SomePage(BasePage):
648+
response: AnyResponse
649+
browser_response: BrowserResponse
650+
html: BrowserHtml
651+
652+
class ZyteAPISpider(Spider):
653+
def start_requests(self):
654+
yield Request(self.url, callback=self.parse_)
655+
656+
def parse_(self, response: DummyResponse, page: SomePage):
657+
yield {"page": page}
658+
659+
settings = provider_settings(mockserver)
660+
item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings)
661+
params = crawler.engine.downloader.handlers._handlers["http"].params
662+
663+
assert len(params) == 1
664+
assert params[0].keys() == {"url", "browserHtml"}
665+
666+
assert type(item["page"].response) == AnyResponse
667+
assert type(item["page"].response.response) == BrowserResponse
668+
assert type(item["page"].browser_response) == BrowserResponse
669+
assert type(item["page"].html) == BrowserHtml
670+
671+
672+
@ensureDeferred
673+
async def test_provider_any_response_http_response(mockserver):
674+
@attrs.define
675+
class SomePage(BasePage):
676+
response: AnyResponse
677+
http_response: HttpResponse
678+
679+
class ZyteAPISpider(Spider):
680+
def start_requests(self):
681+
yield Request(self.url, callback=self.parse_)
682+
683+
def parse_(self, response: DummyResponse, page: SomePage):
684+
yield {"page": page}
685+
686+
settings = provider_settings(mockserver)
687+
item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings)
688+
params = crawler.engine.downloader.handlers._handlers["http"].params
689+
690+
assert len(params) == 1
691+
assert params[0].keys() == {"url", "HttpResponseBody"}
692+
693+
assert type(item["page"].response) == AnyResponse
694+
assert type(item["page"].response.response) == BrowserResponse
695+
assert type(item["page"].http_response) == HttpResponse
696+
697+
698+
@ensureDeferred
699+
async def test_provider_any_response_browser_http_response(mockserver):
700+
@attrs.define
701+
class SomePage(BasePage):
702+
response: AnyResponse
703+
browser_response: BrowserResponse
704+
http_response: HttpResponse
705+
706+
class ZyteAPISpider(Spider):
707+
def start_requests(self):
708+
yield Request(self.url, callback=self.parse_)
709+
710+
def parse_(self, response: DummyResponse, page: SomePage):
711+
yield {"page": page}
712+
713+
settings = provider_settings(mockserver)
714+
item, url, crawler = await crawl_single_item(ZyteAPISpider, HtmlResource, settings)
715+
params = crawler.engine.downloader.handlers._handlers["http"].params
716+
717+
assert len(params) == 2
718+
assert params[0].keys() == {"url", "HttpResponseBody"}
719+
assert params[1].keys() == {"url", "BrowserHtml"}
720+
721+
assert type(item["page"].response) == AnyResponse
722+
assert type(item["page"].response.response) == BrowserResponse
723+
assert type(item["page"].browser_response) == BrowserResponse
724+
assert type(item["page"].http_response) == HttpResponse

0 commit comments

Comments
 (0)