diff --git a/scrapy_poet/page_input_providers.py b/scrapy_poet/page_input_providers.py index 672909f0..28960f12 100644 --- a/scrapy_poet/page_input_providers.py +++ b/scrapy_poet/page_input_providers.py @@ -22,6 +22,8 @@ from scrapy.utils.defer import maybe_deferred_to_future from web_poet import ( HttpClient, + HttpRequest, + HttpRequestHeaders, HttpResponse, HttpResponseHeaders, PageParams, @@ -144,6 +146,29 @@ def __init__(self, injector): # injection breaks the method overriding rules and mypy then complains. +class HttpRequestProvider(PageObjectInputProvider): + """This class provides :class:`web_poet.HttpRequest + ` instances. + """ + + provided_classes = {HttpRequest} + name = "request_data" + + def __call__(self, to_provide: Set[Callable], request: Request): + """Builds a :class:`web_poet.HttpRequest + ` instance using a + :class:`scrapy.http.Request` instance. + """ + return [ + HttpRequest( + url=RequestUrl(request.url), + method=request.method, + headers=HttpRequestHeaders.from_bytes_dict(request.headers), + body=request.body, + ) + ] + + class HttpResponseProvider(PageObjectInputProvider): """This class provides :class:`web_poet.HttpResponse ` instances. diff --git a/setup.py b/setup.py index 640d3a3e..52fc431b 100755 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ "time_machine >= 2.2.0", "twisted >= 18.9.0", "url-matcher >= 0.2.0", - "web-poet >= 0.15", + "web-poet >= 0.15.1", ], classifiers=[ "Development Status :: 3 - Alpha", diff --git a/tests/test_providers.py b/tests/test_providers.py index 892d9583..0ce9acdf 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -8,13 +8,21 @@ from scrapy.settings import Settings from scrapy.utils.test import get_crawler from twisted.python.failure import Failure -from web_poet import HttpClient, HttpResponse +from web_poet import ( + HttpClient, + HttpRequest, + HttpRequestBody, + HttpRequestHeaders, + HttpResponse, + RequestUrl, +) from web_poet.serialization import SerializedLeafData, register_serialization from scrapy_poet import HttpResponseProvider from scrapy_poet.injection import Injector from scrapy_poet.page_input_providers import ( HttpClientProvider, + HttpRequestProvider, ItemProvider, PageObjectInputProvider, PageParamsProvider, @@ -204,6 +212,37 @@ async def test_http_client_provider(settings): assert results[0]._request_downloader == mock_factory.return_value +@ensureDeferred +async def test_http_request_provider(settings): + crawler = get_crawler(Spider, settings) + injector = Injector(crawler) + provider = HttpRequestProvider(injector) + + empty_scrapy_request = scrapy.http.Request("https://example.com") + (empty_request,) = provider(set(), empty_scrapy_request) + assert isinstance(empty_request, HttpRequest) + assert isinstance(empty_request.url, RequestUrl) + assert str(empty_request.url) == "https://example.com" + assert empty_request.method == "GET" + assert isinstance(empty_request.headers, HttpRequestHeaders) + assert empty_request.headers == HttpRequestHeaders() + assert isinstance(empty_request.body, HttpRequestBody) + assert empty_request.body == HttpRequestBody() + + full_scrapy_request = scrapy.http.Request( + "https://example.com", method="POST", body=b"a", headers={"a": "b"} + ) + (full_request,) = provider(set(), full_scrapy_request) + assert isinstance(full_request, HttpRequest) + assert isinstance(full_request.url, RequestUrl) + assert str(full_request.url) == "https://example.com" + assert full_request.method == "POST" + assert isinstance(full_request.headers, HttpRequestHeaders) + assert full_request.headers == HttpRequestHeaders([("a", "b")]) + assert isinstance(full_request.body, HttpRequestBody) + assert full_request.body == HttpRequestBody(b"a") + + def test_page_params_provider(settings): crawler = get_crawler(Spider, settings) injector = Injector(crawler) diff --git a/tox.ini b/tox.ini index 1ca5cefe..9e539733 100644 --- a/tox.ini +++ b/tox.ini @@ -23,7 +23,7 @@ deps = sqlitedict==1.5.0 time_machine==2.2.0 url-matcher==0.2.0 - web-poet==0.15.0 + web-poet==0.15.1 # https://github.com/john-kurkowski/tldextract/issues/305 tldextract<3.6