|
11 | 11 | from importlib.metadata import version as package_version
|
12 | 12 |
|
13 | 13 | from scrapy import Request, Spider
|
14 |
| -from web_poet import ItemPage, WebPage |
| 14 | +from scrapy.http import Response |
| 15 | +from web_poet import HttpResponse, ItemPage, PageParams, RequestUrl, WebPage |
15 | 16 |
|
16 |
| -from scrapy_poet import ScrapyPoetRequestFingerprinter |
| 17 | +from scrapy_poet import DummyResponse, ScrapyPoetRequestFingerprinter |
17 | 18 | from scrapy_poet.utils.testing import get_crawler as _get_crawler
|
18 | 19 |
|
19 | 20 | ANDI_VERSION = Version(package_version("andi"))
|
@@ -113,6 +114,97 @@ async def parse_web(self, response, web: WebPage):
|
113 | 114 | assert fingerprint1 != fingerprint2
|
114 | 115 |
|
115 | 116 |
|
| 117 | +def test_response_typing(): |
| 118 | + """The type of the response parameter is ignored, even when it is |
| 119 | + DummyResponse.""" |
| 120 | + |
| 121 | + class TestSpider(Spider): |
| 122 | + name = "test_spider" |
| 123 | + |
| 124 | + async def parse_untyped(self, response, web: WebPage): |
| 125 | + pass |
| 126 | + |
| 127 | + async def parse_typed(self, response: Response, web: WebPage): |
| 128 | + pass |
| 129 | + |
| 130 | + async def parse_dummy(self, response: DummyResponse, web: WebPage): |
| 131 | + pass |
| 132 | + |
| 133 | + crawler = get_crawler(spider_cls=TestSpider) |
| 134 | + fingerprinter = crawler.request_fingerprinter |
| 135 | + request1 = Request("https://toscrape.com", callback=crawler.spider.parse_untyped) |
| 136 | + fingerprint1 = fingerprinter.fingerprint(request1) |
| 137 | + request2 = Request("https://toscrape.com", callback=crawler.spider.parse_typed) |
| 138 | + fingerprint2 = fingerprinter.fingerprint(request2) |
| 139 | + request3 = Request("https://toscrape.com", callback=crawler.spider.parse_dummy) |
| 140 | + fingerprint3 = fingerprinter.fingerprint(request3) |
| 141 | + assert fingerprint1 == fingerprint2 |
| 142 | + assert fingerprint1 == fingerprint3 |
| 143 | + |
| 144 | + |
| 145 | +def test_responseless_inputs(): |
| 146 | + """Inputs that have no impact on the actual requests sent because they do |
| 147 | + not require sending a request at all are considered valid, different |
| 148 | + dependencies for fingerprinting purposes nonetheless.""" |
| 149 | + |
| 150 | + class TestSpider(Spider): |
| 151 | + name = "test_spider" |
| 152 | + |
| 153 | + async def parse_nothing(self, response: DummyResponse): |
| 154 | + pass |
| 155 | + |
| 156 | + async def parse_page_params( |
| 157 | + self, response: DummyResponse, page_params: PageParams |
| 158 | + ): |
| 159 | + # NOTE: requesting PageParams or not should not affect the request |
| 160 | + # fingerprinting, setting page_params on the request should. |
| 161 | + pass |
| 162 | + |
| 163 | + async def parse_request_url( |
| 164 | + self, response: DummyResponse, request_url: RequestUrl |
| 165 | + ): |
| 166 | + pass |
| 167 | + |
| 168 | + crawler = get_crawler(spider_cls=TestSpider) |
| 169 | + fingerprinter = crawler.request_fingerprinter |
| 170 | + request1 = Request("https://toscrape.com", callback=crawler.spider.parse_nothing) |
| 171 | + fingerprint1 = fingerprinter.fingerprint(request1) |
| 172 | + request2 = Request( |
| 173 | + "https://toscrape.com", callback=crawler.spider.parse_page_params |
| 174 | + ) |
| 175 | + fingerprint2 = fingerprinter.fingerprint(request2) |
| 176 | + request3 = Request( |
| 177 | + "https://toscrape.com", callback=crawler.spider.parse_request_url |
| 178 | + ) |
| 179 | + fingerprint3 = fingerprinter.fingerprint(request3) |
| 180 | + assert fingerprint1 != fingerprint2 |
| 181 | + assert fingerprint1 != fingerprint3 |
| 182 | + assert fingerprint2 != fingerprint3 |
| 183 | + |
| 184 | + |
| 185 | +def test_dep_resolution(): |
| 186 | + """We do not resolve dependencies, so it is possible for 2 callbacks that |
| 187 | + when resolved have identical dependencies to get a different |
| 188 | + fingerprint.""" |
| 189 | + |
| 190 | + class TestSpider(Spider): |
| 191 | + name = "test_spider" |
| 192 | + |
| 193 | + async def parse_a(self, response, web: WebPage): |
| 194 | + pass |
| 195 | + |
| 196 | + async def parse_b(self, response, web: WebPage, http_response: HttpResponse): |
| 197 | + pass |
| 198 | + |
| 199 | + crawler = get_crawler(spider_cls=TestSpider) |
| 200 | + fingerprinter = crawler.request_fingerprinter |
| 201 | + request1 = Request("https://toscrape.com", callback=crawler.spider.parse_a) |
| 202 | + fingerprint1 = fingerprinter.fingerprint(request1) |
| 203 | + request2 = Request("https://toscrape.com", callback=crawler.spider.parse_b) |
| 204 | + fingerprint2 = fingerprinter.fingerprint(request2) |
| 205 | + assert fingerprint1 != fingerprint2 |
| 206 | + |
| 207 | + |
116 | 208 | @pytest.mark.skipif(
|
117 | 209 | sys.version_info < (3, 9), reason="No Annotated support in Python < 3.9"
|
118 | 210 | )
|
|
0 commit comments