diff --git a/scrapy_poet/downloadermiddlewares.py b/scrapy_poet/downloadermiddlewares.py index 86cdceef..25488de0 100644 --- a/scrapy_poet/downloadermiddlewares.py +++ b/scrapy_poet/downloadermiddlewares.py @@ -99,6 +99,7 @@ def process_request( return None logger.debug(f"Using DummyResponse instead of downloading {request}") + assert self.crawler.stats self.crawler.stats.inc_value("scrapy_poet/dummy_response_count") return DummyResponse(url=request.url, request=request) @@ -132,7 +133,7 @@ def _skip_dependency_creation(self, request: Request, spider: Spider) -> bool: @inlineCallbacks def process_response( self, request: Request, response: Response, spider: Spider - ) -> Generator[Deferred, object, Response]: + ) -> Generator[Deferred, object, Union[Response, Request]]: """This method fills :attr:`scrapy.Request.cb_kwargs ` with instances for the required Page Objects found in the callback signature. diff --git a/scrapy_poet/injection.py b/scrapy_poet/injection.py index 2e8189f0..769f5ac3 100644 --- a/scrapy_poet/injection.py +++ b/scrapy_poet/injection.py @@ -272,6 +272,7 @@ def build_instances( ) # All the remaining dependencies are internal so they can be built just # following the andi plan. + assert self.crawler.stats for cls, kwargs_spec in plan.dependencies: if cls not in instances.keys(): result_cls: type = cast(type, cls) @@ -295,6 +296,7 @@ def build_instances_from_providers( plan: andi.Plan, ): """Build dependencies handled by registered providers""" + assert self.crawler.stats instances: Dict[Callable, Any] = {} scrapy_provided_dependencies = self.available_dependencies_for_providers( request, response @@ -320,7 +322,7 @@ def build_instances_from_providers( ) # This one should take `web_poet.HttpRequest` but `scrapy.Request` will work as well # TODO: add `scrapy.Request` type in request_fingerprint() annotations - fingerprint = f"{provider.name}_{request_fingerprint(request)}" + fingerprint = f"{provider.name}_{request_fingerprint(request)}" # type: ignore[arg-type] # Return the data if it is already in the cache try: data = self.cache[fingerprint].items() diff --git a/scrapy_poet/page_input_providers.py b/scrapy_poet/page_input_providers.py index 3746fddf..45f4ee03 100644 --- a/scrapy_poet/page_input_providers.py +++ b/scrapy_poet/page_input_providers.py @@ -189,6 +189,7 @@ def __call__(self, to_provide: Set[Callable], crawler: Crawler): ` instance using Scrapy's downloader. """ + assert crawler.engine downloader = create_scrapy_downloader(crawler.engine.download) save_responses = crawler.settings.getbool("_SCRAPY_POET_SAVEFIXTURE") return [ diff --git a/scrapy_poet/spidermiddlewares.py b/scrapy_poet/spidermiddlewares.py index f4900154..8f7a1fda 100644 --- a/scrapy_poet/spidermiddlewares.py +++ b/scrapy_poet/spidermiddlewares.py @@ -21,6 +21,7 @@ def process_spider_exception( if not isinstance(exception, Retry): return None + assert response.request new_request_or_none = get_retry_request( response.request, spider=spider, diff --git a/scrapy_poet/utils/__init__.py b/scrapy_poet/utils/__init__.py index bc08b73e..6fd90a08 100644 --- a/scrapy_poet/utils/__init__.py +++ b/scrapy_poet/utils/__init__.py @@ -20,8 +20,7 @@ try: from scrapy.http.request import NO_CALLBACK # available on Scrapy >= 2.8 except ImportError: - # NO_CALLBACK = lambda: None # noqa: E731 - NO_CALLBACK = None + NO_CALLBACK = None # type: ignore[assignment] def get_scrapy_data_path(createdir: bool = True, default_dir: str = ".scrapy") -> str: diff --git a/scrapy_poet/utils/testing.py b/scrapy_poet/utils/testing.py index b20224b4..788128a9 100644 --- a/scrapy_poet/utils/testing.py +++ b/scrapy_poet/utils/testing.py @@ -1,7 +1,6 @@ import json from inspect import isasyncgenfunction from typing import Dict -from unittest import mock from scrapy import Spider, signals from scrapy.crawler import Crawler @@ -272,10 +271,3 @@ async def parse(*args, **kwargs): # Mimic type annotations parse.__annotations__ = callback.__annotations__ return parse - - -class AsyncMock(mock.MagicMock): - """workaround since python 3.7 doesn't ship with asyncmock.""" - - async def __call__(self, *args, **kwargs): - return super().__call__(*args, **kwargs) diff --git a/tests/test_downloader.py b/tests/test_downloader.py index ede5a736..45e86b66 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -29,7 +29,6 @@ ) from scrapy_poet.utils.mockserver import MockServer from scrapy_poet.utils.testing import ( - AsyncMock, DelayedResource, EchoResource, StatusResource, @@ -40,7 +39,7 @@ @pytest.fixture def scrapy_downloader() -> Callable: - mock_downloader = AsyncMock() + mock_downloader = mock.AsyncMock() return create_scrapy_downloader(mock_downloader) @@ -69,12 +68,12 @@ async def test_scrapy_poet_downloader(fake_http_response) -> None: req = web_poet.HttpRequest("https://example.com") with mock.patch( - "scrapy_poet.downloader.maybe_deferred_to_future", new_callable=AsyncMock + "scrapy_poet.downloader.maybe_deferred_to_future", new_callable=mock.AsyncMock ) as mock_dtf: mock_dtf.return_value = fake_http_response - mock_downloader = mock.MagicMock(return_value=AsyncMock) + mock_downloader = mock.MagicMock(return_value=mock.AsyncMock) scrapy_downloader = create_scrapy_downloader(mock_downloader) response = await scrapy_downloader(req) @@ -96,10 +95,10 @@ async def test_scrapy_poet_downloader_ignored_request() -> None: req = web_poet.HttpRequest("https://example.com") with mock.patch( - "scrapy_poet.downloader.maybe_deferred_to_future", new_callable=AsyncMock + "scrapy_poet.downloader.maybe_deferred_to_future", new_callable=mock.AsyncMock ) as mock_dtf: mock_dtf.side_effect = scrapy.exceptions.IgnoreRequest - mock_downloader = mock.MagicMock(return_value=AsyncMock) + mock_downloader = mock.MagicMock(return_value=mock.AsyncMock) scrapy_downloader = create_scrapy_downloader(mock_downloader) with pytest.raises(web_poet.exceptions.HttpError): @@ -111,10 +110,10 @@ async def test_scrapy_poet_downloader_twisted_error() -> None: req = web_poet.HttpRequest("https://example.com") with mock.patch( - "scrapy_poet.downloader.maybe_deferred_to_future", new_callable=AsyncMock + "scrapy_poet.downloader.maybe_deferred_to_future", new_callable=mock.AsyncMock ) as mock_dtf: mock_dtf.side_effect = twisted.internet.error.TimeoutError - mock_downloader = mock.MagicMock(return_value=AsyncMock) + mock_downloader = mock.MagicMock(return_value=mock.AsyncMock) scrapy_downloader = create_scrapy_downloader(mock_downloader) with pytest.raises(web_poet.exceptions.HttpRequestError): @@ -126,10 +125,10 @@ async def test_scrapy_poet_downloader_head_redirect(fake_http_response) -> None: req = web_poet.HttpRequest("https://example.com", method="HEAD") with mock.patch( - "scrapy_poet.downloader.maybe_deferred_to_future", new_callable=AsyncMock + "scrapy_poet.downloader.maybe_deferred_to_future", new_callable=mock.AsyncMock ) as mock_dtf: mock_dtf.return_value = fake_http_response - mock_downloader = mock.MagicMock(return_value=AsyncMock) + mock_downloader = mock.MagicMock(return_value=mock.AsyncMock) scrapy_downloader = create_scrapy_downloader(mock_downloader) await scrapy_downloader(req) @@ -454,6 +453,7 @@ async def __call__( custom_request = Request( request.url, body=request.body, callback=NO_CALLBACK ) + assert crawler.engine scrapy_response: Response = await maybe_deferred_to_future( crawler.engine.download(custom_request) ) @@ -493,7 +493,7 @@ class TestSpider(Spider): def start_requests(self): yield Request(server.root_url, callback=self.parse) - async def parse(self, response: DummyResponse, page: ItemPage): + async def parse(self, response: DummyResponse, page: ItemPage): # type: ignore[override] item = await page.to_item() items.append(item) @@ -570,7 +570,7 @@ def test_parse_callback_none_dummy_response() -> None: class TestSpider(BaseSpider): start_urls = [server.root_url] - def parse(self, response: DummyResponse): + def parse(self, response: DummyResponse): # type: ignore[override] collected["response"] = response crawler = make_crawler(TestSpider) @@ -662,7 +662,7 @@ def test_parse_callback_none_with_deps(caplog) -> None: class TestSpider(BaseSpider): start_urls = [server.root_url] - def parse(self, response: DummyResponse, page: BasicPage): + def parse(self, response: DummyResponse, page: BasicPage): # type: ignore[override] pass crawler = make_crawler(TestSpider) @@ -705,7 +705,7 @@ def start_requests(self): page = BasicPage(web_poet.HttpResponse("https://example.com", b"")) yield Request(server.root_url, cb_kwargs={"page": page}) - def parse(self, response: DummyResponse, page: BasicPage): + def parse(self, response: DummyResponse, page: BasicPage): # type: ignore[override] collected["response"] = response crawler = make_crawler(TestSpider) @@ -740,7 +740,7 @@ def start_requests(self): page = BasicPage(web_poet.HttpResponse("https://example.com", b"")) yield Request(server.root_url, cb_kwargs={"page": page}) - def parse( + def parse( # type: ignore[override] self, response: DummyResponse, page: BasicPage, @@ -782,7 +782,7 @@ def test_parse_callback_NO_CALLBACK(caplog) -> None: class TestSpider(BaseSpider): start_urls = [server.root_url] - def parse(self, response: DummyResponse): + def parse(self, response: DummyResponse): # type: ignore[override] collected["response"] = response crawler = make_crawler(TestSpider) @@ -814,7 +814,7 @@ def test_parse_callback_NO_CALLBACK_with_page_dep(caplog) -> None: class TestSpider(BaseSpider): start_urls = [server.root_url] - def parse(self, response: DummyResponse, page: BasicPage): + def parse(self, response: DummyResponse, page: BasicPage): # type: ignore[override] collected["response"] = response crawler = make_crawler(TestSpider) diff --git a/tests/test_injection.py b/tests/test_injection.py index 44ca6eb0..696dadd5 100644 --- a/tests/test_injection.py +++ b/tests/test_injection.py @@ -302,6 +302,7 @@ def _assert_instances( reqmeta: Optional[Dict[str, Any]] = None, ) -> Generator[Any, Any, None]: response = get_response_for_testing(callback, meta=reqmeta) + assert response.request request = response.request plan = injector.build_plan(response.request) diff --git a/tests/test_middleware.py b/tests/test_middleware.py index 821ebb04..aa85536b 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -301,7 +301,7 @@ class SkipDownloadSpider(scrapy.Spider): def start_requests(self): yield Request(url=self.url, callback=self.parse) - def parse(self, response: DummyResponse): + def parse(self, response: DummyResponse): # type: ignore[override] return { "response": response, } @@ -332,7 +332,7 @@ class RequestUrlSpider(scrapy.Spider): def start_requests(self): yield Request(url=self.url, callback=self.parse) - def parse(self, response: DummyResponse, url: RequestUrl): + def parse(self, response: DummyResponse, url: RequestUrl): # type: ignore[override] return { "response": response, "url": url, @@ -359,7 +359,7 @@ class ResponseUrlSpider(scrapy.Spider): def start_requests(self): yield Request(url=self.url, callback=self.parse) - def parse(self, response: DummyResponse, url: ResponseUrl): + def parse(self, response: DummyResponse, url: ResponseUrl): # type: ignore[override] return { "response": response, "url": url, @@ -396,7 +396,7 @@ class ResponseUrlPageSpider(scrapy.Spider): def start_requests(self): yield Request(url=self.url, callback=self.parse) - def parse(self, response: DummyResponse, page: ResponseUrlPage): + def parse(self, response: DummyResponse, page: ResponseUrlPage): # type: ignore[override] return page.to_item() @@ -427,7 +427,7 @@ class RequestUrlPageSpider(scrapy.Spider): def start_requests(self): yield Request(url=self.url, callback=self.parse) - def parse(self, response: DummyResponse, page: RequestUrlPage): + def parse(self, response: DummyResponse, page: RequestUrlPage): # type: ignore[override] return page.to_item() diff --git a/tests/test_providers.py b/tests/test_providers.py index 2b6cd0ed..d60e21d0 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -30,12 +30,7 @@ StatsProvider, ) from scrapy_poet.utils.mockserver import get_ephemeral_port -from scrapy_poet.utils.testing import ( - AsyncMock, - HtmlResource, - ProductHtml, - crawl_single_item, -) +from scrapy_poet.utils.testing import HtmlResource, ProductHtml, crawl_single_item class NonProductHtml(HtmlResource): @@ -75,7 +70,9 @@ def __call__( assert isinstance(spider, scrapy.Spider) ret: List[Any] = [] if Price in to_provide: - ret.append(Price(response.css(".price::text").get())) + price = response.css(".price::text").get() + assert price is not None + ret.append(Price(price)) if Html in to_provide: ret.append(Html("Price Html!")) return ret @@ -89,7 +86,9 @@ def __call__(self, to_provide, response: scrapy.http.Response, settings: Setting assert isinstance(settings, Settings) ret: List[Any] = [] if Name in to_provide: - ret.append(Name(response.css(".name::text").get())) + name = response.css(".name::text").get() + assert name is not None + ret.append(Name(name)) if Html in to_provide: ret.append(Html("Name Html!")) return ret @@ -200,7 +199,7 @@ def test_price_first_spider(settings): @ensureDeferred async def test_http_client_provider(settings): crawler = get_crawler(Spider, settings) - crawler.engine = AsyncMock() + crawler.engine = mock.AsyncMock() injector = Injector(crawler) with mock.patch( diff --git a/tests/test_response_required_logic.py b/tests/test_response_required_logic.py index b4721002..b774cc3d 100644 --- a/tests/test_response_required_logic.py +++ b/tests/test_response_required_logic.py @@ -22,13 +22,7 @@ HttpResponseProvider, PageObjectInputProvider, ) -from scrapy_poet.utils import is_min_scrapy_version - -# See: https://github.com/scrapinghub/scrapy-poet/issues/118 -try: - from scrapy.http.request import NO_CALLBACK # available on Scrapy >= 2.8 -except ImportError: - NO_CALLBACK = lambda: None # noqa: E731 +from scrapy_poet.utils import NO_CALLBACK, is_min_scrapy_version @attr.s(auto_attribs=True) @@ -70,13 +64,13 @@ def __call__(self, to_provide): class TextProductProvider(HttpResponseProvider): # This is wrong. You should not annotate provider dependencies with classes # like TextResponse or HtmlResponse, you should use Response instead. - def __call__(self, to_provide, response: TextResponse): + def __call__(self, to_provide, response: TextResponse): # type: ignore[override] return super().__call__(to_provide, response) class StringProductProvider(HttpResponseProvider): - def __call__(self, to_provide, response: str): - return super().__call__(to_provide, response) + def __call__(self, to_provide, response: str): # type: ignore[override] + return super().__call__(to_provide, response) # type: ignore[arg-type] @attr.s(auto_attribs=True) @@ -224,8 +218,11 @@ def test_is_provider_using_response(): reason="tests Scrapy < 2.8 before NO_CALLBACK was introduced", ) def test_is_callback_using_response_for_scrapy28_below() -> None: + def cb(_: Any) -> Any: + return _ + spider = MySpider() - request = Request("https://example.com", callback=lambda _: _) + request = Request("https://example.com", callback=cb) assert is_callback_requiring_scrapy_response(spider.parse, request.callback) is True assert ( is_callback_requiring_scrapy_response(spider.parse2, request.callback) is True @@ -362,8 +359,11 @@ def test_is_callback_using_response_for_scrapy28_below() -> None: reason="NO_CALLBACK not available in Scrapy < 2.8", ) def test_is_callback_using_response_for_scrapy28_and_above() -> None: + def cb(_: Any) -> Any: + return _ + spider = MySpider() - request_with_callback = Request("https://example.com", callback=lambda _: _) + request_with_callback = Request("https://example.com", callback=cb) request_no_callback = Request("https://example.com", callback=NO_CALLBACK) with warnings.catch_warnings(record=True) as caught_warnings: diff --git a/tox.ini b/tox.ini index 8487e181..bf462ba6 100644 --- a/tox.ini +++ b/tox.ini @@ -79,6 +79,7 @@ deps = deps = mypy==1.11.2 pytest + Scrapy @ git+https://github.com/scrapy/scrapy@master commands = mypy --ignore-missing-imports scrapy_poet tests