Skip to content

Commit

Permalink
Merge pull request #173 from Gallaecio/request-providers
Browse files Browse the repository at this point in the history
Add HttpRequestProvider
  • Loading branch information
kmike authored Nov 21, 2023
2 parents aba2b74 + 644e5be commit 74ab3a8
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 3 deletions.
25 changes: 25 additions & 0 deletions scrapy_poet/page_input_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from scrapy.utils.defer import maybe_deferred_to_future
from web_poet import (
HttpClient,
HttpRequest,
HttpRequestHeaders,
HttpResponse,
HttpResponseHeaders,
PageParams,
Expand Down Expand Up @@ -144,6 +146,29 @@ def __init__(self, injector):
# injection breaks the method overriding rules and mypy then complains.


class HttpRequestProvider(PageObjectInputProvider):
"""This class provides :class:`web_poet.HttpRequest
<web_poet.page_inputs.http.HttpRequest>` instances.
"""

provided_classes = {HttpRequest}
name = "request_data"

def __call__(self, to_provide: Set[Callable], request: Request):
"""Builds a :class:`web_poet.HttpRequest
<web_poet.page_inputs.http.HttpRequest>` instance using a
:class:`scrapy.http.Request` instance.
"""
return [
HttpRequest(
url=RequestUrl(request.url),
method=request.method,
headers=HttpRequestHeaders.from_bytes_dict(request.headers),
body=request.body,
)
]


class HttpResponseProvider(PageObjectInputProvider):
"""This class provides :class:`web_poet.HttpResponse
<web_poet.page_inputs.http.HttpResponse>` instances.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"time_machine >= 2.2.0",
"twisted >= 18.9.0",
"url-matcher >= 0.2.0",
"web-poet >= 0.15",
"web-poet >= 0.15.1",
],
classifiers=[
"Development Status :: 3 - Alpha",
Expand Down
41 changes: 40 additions & 1 deletion tests/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,21 @@
from scrapy.settings import Settings
from scrapy.utils.test import get_crawler
from twisted.python.failure import Failure
from web_poet import HttpClient, HttpResponse
from web_poet import (
HttpClient,
HttpRequest,
HttpRequestBody,
HttpRequestHeaders,
HttpResponse,
RequestUrl,
)
from web_poet.serialization import SerializedLeafData, register_serialization

from scrapy_poet import HttpResponseProvider
from scrapy_poet.injection import Injector
from scrapy_poet.page_input_providers import (
HttpClientProvider,
HttpRequestProvider,
ItemProvider,
PageObjectInputProvider,
PageParamsProvider,
Expand Down Expand Up @@ -204,6 +212,37 @@ async def test_http_client_provider(settings):
assert results[0]._request_downloader == mock_factory.return_value


@ensureDeferred
async def test_http_request_provider(settings):
crawler = get_crawler(Spider, settings)
injector = Injector(crawler)
provider = HttpRequestProvider(injector)

empty_scrapy_request = scrapy.http.Request("https://example.com")
(empty_request,) = provider(set(), empty_scrapy_request)
assert isinstance(empty_request, HttpRequest)
assert isinstance(empty_request.url, RequestUrl)
assert str(empty_request.url) == "https://example.com"
assert empty_request.method == "GET"
assert isinstance(empty_request.headers, HttpRequestHeaders)
assert empty_request.headers == HttpRequestHeaders()
assert isinstance(empty_request.body, HttpRequestBody)
assert empty_request.body == HttpRequestBody()

full_scrapy_request = scrapy.http.Request(
"https://example.com", method="POST", body=b"a", headers={"a": "b"}
)
(full_request,) = provider(set(), full_scrapy_request)
assert isinstance(full_request, HttpRequest)
assert isinstance(full_request.url, RequestUrl)
assert str(full_request.url) == "https://example.com"
assert full_request.method == "POST"
assert isinstance(full_request.headers, HttpRequestHeaders)
assert full_request.headers == HttpRequestHeaders([("a", "b")])
assert isinstance(full_request.body, HttpRequestBody)
assert full_request.body == HttpRequestBody(b"a")


def test_page_params_provider(settings):
crawler = get_crawler(Spider, settings)
injector = Injector(crawler)
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ deps =
sqlitedict==1.5.0
time_machine==2.2.0
url-matcher==0.2.0
web-poet==0.15.0
web-poet==0.15.1

# https://github.com/john-kurkowski/tldextract/issues/305
tldextract<3.6
Expand Down

0 comments on commit 74ab3a8

Please sign in to comment.