Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add HttpRequestProvider #173

Merged
merged 4 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions scrapy_poet/page_input_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from scrapy.utils.defer import maybe_deferred_to_future
from web_poet import (
HttpClient,
HttpRequest,
HttpRequestHeaders,
HttpResponse,
HttpResponseHeaders,
PageParams,
Expand Down Expand Up @@ -144,6 +146,29 @@ def __init__(self, injector):
# injection breaks the method overriding rules and mypy then complains.


class HttpRequestProvider(PageObjectInputProvider):
"""This class provides :class:`web_poet.HttpRequest
<web_poet.page_inputs.http.HttpRequest>` instances.
"""

provided_classes = {HttpRequest}
name = "request_data"

def __call__(self, to_provide: Set[Callable], request: Request):
"""Builds a :class:`web_poet.HttpRequest
<web_poet.page_inputs.http.HttpRequest>` instance using a
:class:`scrapy.http.Request` instance.
"""
return [
HttpRequest(
url=RequestUrl(request.url),
method=request.method,
headers=HttpRequestHeaders.from_bytes_dict(request.headers),
body=request.body,
)
]


class HttpResponseProvider(PageObjectInputProvider):
"""This class provides :class:`web_poet.HttpResponse
<web_poet.page_inputs.http.HttpResponse>` instances.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"time_machine >= 2.2.0",
"twisted >= 18.9.0",
"url-matcher >= 0.2.0",
"web-poet >= 0.15",
"web-poet >= 0.15.1",
],
classifiers=[
"Development Status :: 3 - Alpha",
Expand Down
41 changes: 40 additions & 1 deletion tests/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,21 @@
from scrapy.settings import Settings
from scrapy.utils.test import get_crawler
from twisted.python.failure import Failure
from web_poet import HttpClient, HttpResponse
from web_poet import (
HttpClient,
HttpRequest,
HttpRequestBody,
HttpRequestHeaders,
HttpResponse,
RequestUrl,
)
from web_poet.serialization import SerializedLeafData, register_serialization

from scrapy_poet import HttpResponseProvider
from scrapy_poet.injection import Injector
from scrapy_poet.page_input_providers import (
HttpClientProvider,
HttpRequestProvider,
ItemProvider,
PageObjectInputProvider,
PageParamsProvider,
Expand Down Expand Up @@ -204,6 +212,37 @@ async def test_http_client_provider(settings):
assert results[0]._request_downloader == mock_factory.return_value


@ensureDeferred
async def test_http_request_provider(settings):
crawler = get_crawler(Spider, settings)
injector = Injector(crawler)
provider = HttpRequestProvider(injector)

empty_scrapy_request = scrapy.http.Request("https://example.com")
(empty_request,) = provider(set(), empty_scrapy_request)
assert isinstance(empty_request, HttpRequest)
assert isinstance(empty_request.url, RequestUrl)
assert str(empty_request.url) == "https://example.com"
assert empty_request.method == "GET"
assert isinstance(empty_request.headers, HttpRequestHeaders)
assert empty_request.headers == HttpRequestHeaders()
assert isinstance(empty_request.body, HttpRequestBody)
assert empty_request.body == HttpRequestBody()

full_scrapy_request = scrapy.http.Request(
"https://example.com", method="POST", body=b"a", headers={"a": "b"}
)
(full_request,) = provider(set(), full_scrapy_request)
assert isinstance(full_request, HttpRequest)
assert isinstance(full_request.url, RequestUrl)
assert str(full_request.url) == "https://example.com"
assert full_request.method == "POST"
assert isinstance(full_request.headers, HttpRequestHeaders)
assert full_request.headers == HttpRequestHeaders([("a", "b")])
assert isinstance(full_request.body, HttpRequestBody)
assert full_request.body == HttpRequestBody(b"a")


def test_page_params_provider(settings):
crawler = get_crawler(Spider, settings)
injector = Injector(crawler)
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ deps =
sqlitedict==1.5.0
time_machine==2.2.0
url-matcher==0.2.0
web-poet==0.15.0
web-poet==0.15.1

# https://github.com/john-kurkowski/tldextract/issues/305
tldextract<3.6
Expand Down
Loading