Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix typing issues for typed Scrapy. #166

Merged
merged 10 commits into from
Nov 4, 2024
3 changes: 2 additions & 1 deletion scrapy_poet/downloadermiddlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def process_request(
return None

logger.debug(f"Using DummyResponse instead of downloading {request}")
assert self.crawler.stats
self.crawler.stats.inc_value("scrapy_poet/dummy_response_count")
return DummyResponse(url=request.url, request=request)

Expand Down Expand Up @@ -132,7 +133,7 @@ def _skip_dependency_creation(self, request: Request, spider: Spider) -> bool:
@inlineCallbacks
def process_response(
self, request: Request, response: Response, spider: Spider
) -> Generator[Deferred, object, Response]:
) -> Generator[Deferred, object, Union[Response, Request]]:
"""This method fills :attr:`scrapy.Request.cb_kwargs
<scrapy.http.Request.cb_kwargs>` with instances for the required Page
Objects found in the callback signature.
Expand Down
4 changes: 3 additions & 1 deletion scrapy_poet/injection.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ def build_instances(
)
# All the remaining dependencies are internal so they can be built just
# following the andi plan.
assert self.crawler.stats
for cls, kwargs_spec in plan.dependencies:
if cls not in instances.keys():
result_cls: type = cast(type, cls)
Expand All @@ -295,6 +296,7 @@ def build_instances_from_providers(
plan: andi.Plan,
):
"""Build dependencies handled by registered providers"""
assert self.crawler.stats
instances: Dict[Callable, Any] = {}
scrapy_provided_dependencies = self.available_dependencies_for_providers(
request, response
Expand All @@ -320,7 +322,7 @@ def build_instances_from_providers(
)
# This one should take `web_poet.HttpRequest` but `scrapy.Request` will work as well
# TODO: add `scrapy.Request` type in request_fingerprint() annotations
fingerprint = f"{provider.name}_{request_fingerprint(request)}"
fingerprint = f"{provider.name}_{request_fingerprint(request)}" # type: ignore[arg-type]
# Return the data if it is already in the cache
try:
data = self.cache[fingerprint].items()
Expand Down
1 change: 1 addition & 0 deletions scrapy_poet/page_input_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ def __call__(self, to_provide: Set[Callable], crawler: Crawler):
<web_poet.page_inputs.client.HttpClient>` instance using Scrapy's
downloader.
"""
assert crawler.engine
downloader = create_scrapy_downloader(crawler.engine.download)
save_responses = crawler.settings.getbool("_SCRAPY_POET_SAVEFIXTURE")
return [
Expand Down
1 change: 1 addition & 0 deletions scrapy_poet/spidermiddlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def process_spider_exception(

if not isinstance(exception, Retry):
return None
assert response.request
new_request_or_none = get_retry_request(
response.request,
spider=spider,
Expand Down
3 changes: 1 addition & 2 deletions scrapy_poet/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
try:
from scrapy.http.request import NO_CALLBACK # available on Scrapy >= 2.8
except ImportError:
# NO_CALLBACK = lambda: None # noqa: E731
NO_CALLBACK = None
NO_CALLBACK = None # type: ignore[assignment]


def get_scrapy_data_path(createdir: bool = True, default_dir: str = ".scrapy") -> str:
Expand Down
8 changes: 0 additions & 8 deletions scrapy_poet/utils/testing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
from inspect import isasyncgenfunction
from typing import Dict
from unittest import mock

from scrapy import Spider, signals
from scrapy.crawler import Crawler
Expand Down Expand Up @@ -272,10 +271,3 @@ async def parse(*args, **kwargs):
# Mimic type annotations
parse.__annotations__ = callback.__annotations__
return parse


class AsyncMock(mock.MagicMock):
"""workaround since python 3.7 doesn't ship with asyncmock."""

async def __call__(self, *args, **kwargs):
return super().__call__(*args, **kwargs)
34 changes: 17 additions & 17 deletions tests/test_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
)
from scrapy_poet.utils.mockserver import MockServer
from scrapy_poet.utils.testing import (
AsyncMock,
DelayedResource,
EchoResource,
StatusResource,
Expand All @@ -40,7 +39,7 @@

@pytest.fixture
def scrapy_downloader() -> Callable:
mock_downloader = AsyncMock()
mock_downloader = mock.AsyncMock()
return create_scrapy_downloader(mock_downloader)


Expand Down Expand Up @@ -69,12 +68,12 @@ async def test_scrapy_poet_downloader(fake_http_response) -> None:
req = web_poet.HttpRequest("https://example.com")

with mock.patch(
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=AsyncMock
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=mock.AsyncMock
) as mock_dtf:

mock_dtf.return_value = fake_http_response

mock_downloader = mock.MagicMock(return_value=AsyncMock)
mock_downloader = mock.MagicMock(return_value=mock.AsyncMock)
scrapy_downloader = create_scrapy_downloader(mock_downloader)

response = await scrapy_downloader(req)
Expand All @@ -96,10 +95,10 @@ async def test_scrapy_poet_downloader_ignored_request() -> None:
req = web_poet.HttpRequest("https://example.com")

with mock.patch(
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=AsyncMock
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=mock.AsyncMock
) as mock_dtf:
mock_dtf.side_effect = scrapy.exceptions.IgnoreRequest
mock_downloader = mock.MagicMock(return_value=AsyncMock)
mock_downloader = mock.MagicMock(return_value=mock.AsyncMock)
scrapy_downloader = create_scrapy_downloader(mock_downloader)

with pytest.raises(web_poet.exceptions.HttpError):
Expand All @@ -111,10 +110,10 @@ async def test_scrapy_poet_downloader_twisted_error() -> None:
req = web_poet.HttpRequest("https://example.com")

with mock.patch(
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=AsyncMock
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=mock.AsyncMock
) as mock_dtf:
mock_dtf.side_effect = twisted.internet.error.TimeoutError
mock_downloader = mock.MagicMock(return_value=AsyncMock)
mock_downloader = mock.MagicMock(return_value=mock.AsyncMock)
scrapy_downloader = create_scrapy_downloader(mock_downloader)

with pytest.raises(web_poet.exceptions.HttpRequestError):
Expand All @@ -126,10 +125,10 @@ async def test_scrapy_poet_downloader_head_redirect(fake_http_response) -> None:
req = web_poet.HttpRequest("https://example.com", method="HEAD")

with mock.patch(
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=AsyncMock
"scrapy_poet.downloader.maybe_deferred_to_future", new_callable=mock.AsyncMock
) as mock_dtf:
mock_dtf.return_value = fake_http_response
mock_downloader = mock.MagicMock(return_value=AsyncMock)
mock_downloader = mock.MagicMock(return_value=mock.AsyncMock)
scrapy_downloader = create_scrapy_downloader(mock_downloader)

await scrapy_downloader(req)
Expand Down Expand Up @@ -454,6 +453,7 @@ async def __call__(
custom_request = Request(
request.url, body=request.body, callback=NO_CALLBACK
)
assert crawler.engine
scrapy_response: Response = await maybe_deferred_to_future(
crawler.engine.download(custom_request)
)
Expand Down Expand Up @@ -493,7 +493,7 @@ class TestSpider(Spider):
def start_requests(self):
yield Request(server.root_url, callback=self.parse)

async def parse(self, response: DummyResponse, page: ItemPage):
async def parse(self, response: DummyResponse, page: ItemPage): # type: ignore[override]
item = await page.to_item()
items.append(item)

Expand Down Expand Up @@ -570,7 +570,7 @@ def test_parse_callback_none_dummy_response() -> None:
class TestSpider(BaseSpider):
start_urls = [server.root_url]

def parse(self, response: DummyResponse):
def parse(self, response: DummyResponse): # type: ignore[override]
collected["response"] = response

crawler = make_crawler(TestSpider)
Expand Down Expand Up @@ -662,7 +662,7 @@ def test_parse_callback_none_with_deps(caplog) -> None:
class TestSpider(BaseSpider):
start_urls = [server.root_url]

def parse(self, response: DummyResponse, page: BasicPage):
def parse(self, response: DummyResponse, page: BasicPage): # type: ignore[override]
pass

crawler = make_crawler(TestSpider)
Expand Down Expand Up @@ -705,7 +705,7 @@ def start_requests(self):
page = BasicPage(web_poet.HttpResponse("https://example.com", b""))
yield Request(server.root_url, cb_kwargs={"page": page})

def parse(self, response: DummyResponse, page: BasicPage):
def parse(self, response: DummyResponse, page: BasicPage): # type: ignore[override]
collected["response"] = response

crawler = make_crawler(TestSpider)
Expand Down Expand Up @@ -740,7 +740,7 @@ def start_requests(self):
page = BasicPage(web_poet.HttpResponse("https://example.com", b""))
yield Request(server.root_url, cb_kwargs={"page": page})

def parse(
def parse( # type: ignore[override]
self,
response: DummyResponse,
page: BasicPage,
Expand Down Expand Up @@ -782,7 +782,7 @@ def test_parse_callback_NO_CALLBACK(caplog) -> None:
class TestSpider(BaseSpider):
start_urls = [server.root_url]

def parse(self, response: DummyResponse):
def parse(self, response: DummyResponse): # type: ignore[override]
collected["response"] = response

crawler = make_crawler(TestSpider)
Expand Down Expand Up @@ -814,7 +814,7 @@ def test_parse_callback_NO_CALLBACK_with_page_dep(caplog) -> None:
class TestSpider(BaseSpider):
start_urls = [server.root_url]

def parse(self, response: DummyResponse, page: BasicPage):
def parse(self, response: DummyResponse, page: BasicPage): # type: ignore[override]
collected["response"] = response

crawler = make_crawler(TestSpider)
Expand Down
1 change: 1 addition & 0 deletions tests/test_injection.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ def _assert_instances(
reqmeta: Optional[Dict[str, Any]] = None,
) -> Generator[Any, Any, None]:
response = get_response_for_testing(callback, meta=reqmeta)
assert response.request
request = response.request

plan = injector.build_plan(response.request)
Expand Down
10 changes: 5 additions & 5 deletions tests/test_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ class SkipDownloadSpider(scrapy.Spider):
def start_requests(self):
yield Request(url=self.url, callback=self.parse)

def parse(self, response: DummyResponse):
def parse(self, response: DummyResponse): # type: ignore[override]
return {
"response": response,
}
Expand Down Expand Up @@ -332,7 +332,7 @@ class RequestUrlSpider(scrapy.Spider):
def start_requests(self):
yield Request(url=self.url, callback=self.parse)

def parse(self, response: DummyResponse, url: RequestUrl):
def parse(self, response: DummyResponse, url: RequestUrl): # type: ignore[override]
return {
"response": response,
"url": url,
Expand All @@ -359,7 +359,7 @@ class ResponseUrlSpider(scrapy.Spider):
def start_requests(self):
yield Request(url=self.url, callback=self.parse)

def parse(self, response: DummyResponse, url: ResponseUrl):
def parse(self, response: DummyResponse, url: ResponseUrl): # type: ignore[override]
return {
"response": response,
"url": url,
Expand Down Expand Up @@ -396,7 +396,7 @@ class ResponseUrlPageSpider(scrapy.Spider):
def start_requests(self):
yield Request(url=self.url, callback=self.parse)

def parse(self, response: DummyResponse, page: ResponseUrlPage):
def parse(self, response: DummyResponse, page: ResponseUrlPage): # type: ignore[override]
return page.to_item()


Expand Down Expand Up @@ -427,7 +427,7 @@ class RequestUrlPageSpider(scrapy.Spider):
def start_requests(self):
yield Request(url=self.url, callback=self.parse)

def parse(self, response: DummyResponse, page: RequestUrlPage):
def parse(self, response: DummyResponse, page: RequestUrlPage): # type: ignore[override]
return page.to_item()


Expand Down
17 changes: 8 additions & 9 deletions tests/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,7 @@
StatsProvider,
)
from scrapy_poet.utils.mockserver import get_ephemeral_port
from scrapy_poet.utils.testing import (
AsyncMock,
HtmlResource,
ProductHtml,
crawl_single_item,
)
from scrapy_poet.utils.testing import HtmlResource, ProductHtml, crawl_single_item


class NonProductHtml(HtmlResource):
Expand Down Expand Up @@ -75,7 +70,9 @@ def __call__(
assert isinstance(spider, scrapy.Spider)
ret: List[Any] = []
if Price in to_provide:
ret.append(Price(response.css(".price::text").get()))
price = response.css(".price::text").get()
assert price is not None
ret.append(Price(price))
if Html in to_provide:
ret.append(Html("Price Html!"))
return ret
Expand All @@ -89,7 +86,9 @@ def __call__(self, to_provide, response: scrapy.http.Response, settings: Setting
assert isinstance(settings, Settings)
ret: List[Any] = []
if Name in to_provide:
ret.append(Name(response.css(".name::text").get()))
name = response.css(".name::text").get()
assert name is not None
ret.append(Name(name))
if Html in to_provide:
ret.append(Html("Name Html!"))
return ret
Expand Down Expand Up @@ -200,7 +199,7 @@ def test_price_first_spider(settings):
@ensureDeferred
async def test_http_client_provider(settings):
crawler = get_crawler(Spider, settings)
crawler.engine = AsyncMock()
crawler.engine = mock.AsyncMock()
injector = Injector(crawler)

with mock.patch(
Expand Down
24 changes: 12 additions & 12 deletions tests/test_response_required_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,7 @@
HttpResponseProvider,
PageObjectInputProvider,
)
from scrapy_poet.utils import is_min_scrapy_version

# See: https://github.com/scrapinghub/scrapy-poet/issues/118
try:
from scrapy.http.request import NO_CALLBACK # available on Scrapy >= 2.8
except ImportError:
NO_CALLBACK = lambda: None # noqa: E731
from scrapy_poet.utils import NO_CALLBACK, is_min_scrapy_version


@attr.s(auto_attribs=True)
Expand Down Expand Up @@ -70,13 +64,13 @@ def __call__(self, to_provide):
class TextProductProvider(HttpResponseProvider):
# This is wrong. You should not annotate provider dependencies with classes
# like TextResponse or HtmlResponse, you should use Response instead.
def __call__(self, to_provide, response: TextResponse):
def __call__(self, to_provide, response: TextResponse): # type: ignore[override]
return super().__call__(to_provide, response)


class StringProductProvider(HttpResponseProvider):
def __call__(self, to_provide, response: str):
return super().__call__(to_provide, response)
def __call__(self, to_provide, response: str): # type: ignore[override]
return super().__call__(to_provide, response) # type: ignore[arg-type]


@attr.s(auto_attribs=True)
Expand Down Expand Up @@ -224,8 +218,11 @@ def test_is_provider_using_response():
reason="tests Scrapy < 2.8 before NO_CALLBACK was introduced",
)
def test_is_callback_using_response_for_scrapy28_below() -> None:
def cb(_: Any) -> Any:
return _

spider = MySpider()
request = Request("https://example.com", callback=lambda _: _)
request = Request("https://example.com", callback=cb)
assert is_callback_requiring_scrapy_response(spider.parse, request.callback) is True
assert (
is_callback_requiring_scrapy_response(spider.parse2, request.callback) is True
Expand Down Expand Up @@ -362,8 +359,11 @@ def test_is_callback_using_response_for_scrapy28_below() -> None:
reason="NO_CALLBACK not available in Scrapy < 2.8",
)
def test_is_callback_using_response_for_scrapy28_and_above() -> None:
def cb(_: Any) -> Any:
return _

spider = MySpider()
request_with_callback = Request("https://example.com", callback=lambda _: _)
request_with_callback = Request("https://example.com", callback=cb)
request_no_callback = Request("https://example.com", callback=NO_CALLBACK)

with warnings.catch_warnings(record=True) as caught_warnings:
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ deps =
deps =
mypy==1.11.2
pytest
Scrapy @ git+https://github.com/scrapy/scrapy@master

commands = mypy --ignore-missing-imports scrapy_poet tests

Expand Down
Loading