Skip to content

Commit

Permalink
Add HttpRequestProvider
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed Nov 16, 2023
1 parent aba2b74 commit 5a5d89e
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 2 deletions.
25 changes: 25 additions & 0 deletions scrapy_poet/page_input_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from scrapy.utils.defer import maybe_deferred_to_future
from web_poet import (
HttpClient,
HttpRequest,
HttpRequestHeaders,
HttpResponse,
HttpResponseHeaders,
PageParams,
Expand Down Expand Up @@ -144,6 +146,29 @@ def __init__(self, injector):
# injection breaks the method overriding rules and mypy then complains.


class HttpRequestProvider(PageObjectInputProvider):
"""This class provides :class:`web_poet.HttpRequest
<web_poet.page_inputs.http.HttpRequest>` instances.
"""

provided_classes = {HttpRequest}
name = "request_data"

def __call__(self, to_provide: Set[Callable], request: Request):
"""Builds a :class:`web_poet.HttpRequest
<web_poet.page_inputs.http.HttpRequest>` instance using a
:class:`scrapy.http.Response` instance.
"""
return [
HttpRequest(
url=RequestUrl(request.url),
method=request.method,
headers=HttpRequestHeaders.from_bytes_dict(request.headers),
body=request.body,
)
]


class HttpResponseProvider(PageObjectInputProvider):
"""This class provides :class:`web_poet.HttpResponse
<web_poet.page_inputs.http.HttpResponse>` instances.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"time_machine >= 2.2.0",
"twisted >= 18.9.0",
"url-matcher >= 0.2.0",
"web-poet >= 0.15",
"web-poet @ git+https://github.com/Gallaecio/web-poet.git@request-headers-from-bytes", # https://github.com/scrapinghub/web-poet/pull/191
],
classifiers=[
"Development Status :: 3 - Alpha",
Expand Down
41 changes: 40 additions & 1 deletion tests/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,21 @@
from scrapy.settings import Settings
from scrapy.utils.test import get_crawler
from twisted.python.failure import Failure
from web_poet import HttpClient, HttpResponse
from web_poet import (
HttpClient,
HttpRequest,
HttpRequestBody,
HttpRequestHeaders,
HttpResponse,
RequestUrl,
)
from web_poet.serialization import SerializedLeafData, register_serialization

from scrapy_poet import HttpResponseProvider
from scrapy_poet.injection import Injector
from scrapy_poet.page_input_providers import (
HttpClientProvider,
HttpRequestProvider,
ItemProvider,
PageObjectInputProvider,
PageParamsProvider,
Expand Down Expand Up @@ -204,6 +212,37 @@ async def test_http_client_provider(settings):
assert results[0]._request_downloader == mock_factory.return_value


@ensureDeferred
async def test_http_request_provider(settings):
crawler = get_crawler(Spider, settings)
injector = Injector(crawler)
provider = HttpRequestProvider(injector)

empty_scrapy_request = scrapy.http.Request("https://example.com")
(empty_request,) = provider(set(), empty_scrapy_request)
assert isinstance(empty_request, HttpRequest)
assert isinstance(empty_request.url, RequestUrl)
assert str(empty_request.url) == "https://example.com"
assert empty_request.method == "GET"
assert isinstance(empty_request.headers, HttpRequestHeaders)
assert empty_request.headers == HttpRequestHeaders()
assert isinstance(empty_request.body, HttpRequestBody)
assert empty_request.body == HttpRequestBody()

full_scrapy_request = scrapy.http.Request(
"https://example.com", method="POST", body=b"a", headers={"a": "b"}
)
(full_request,) = provider(set(), full_scrapy_request)
assert isinstance(full_request, HttpRequest)
assert isinstance(full_request.url, RequestUrl)
assert str(full_request.url) == "https://example.com"
assert full_request.method == "POST"
assert isinstance(full_request.headers, HttpRequestHeaders)
assert full_request.headers == HttpRequestHeaders([("a", "b")])
assert isinstance(full_request.body, HttpRequestBody)
assert full_request.body == HttpRequestBody(b"a")


def test_page_params_provider(settings):
crawler = get_crawler(Spider, settings)
injector = Injector(crawler)
Expand Down

0 comments on commit 5a5d89e

Please sign in to comment.