diff --git a/scrapy_poet/_request_fingerprinter.py b/scrapy_poet/_request_fingerprinter.py index c5223043..9b9cbdac 100644 --- a/scrapy_poet/_request_fingerprinter.py +++ b/scrapy_poet/_request_fingerprinter.py @@ -1,15 +1,15 @@ -from typing import TYPE_CHECKING - try: from scrapy.utils.request import RequestFingerprinter # NOQA except ImportError: + from typing import TYPE_CHECKING + if not TYPE_CHECKING: ScrapyPoetRequestFingerprinter = None else: import hashlib import json from functools import cached_property - from typing import Callable, Dict, List, Optional + from typing import Callable, Dict, List, Optional, get_args, get_origin from weakref import WeakKeyDictionary from scrapy import Request @@ -20,6 +20,17 @@ from scrapy_poet import InjectionMiddleware from scrapy_poet.injection import get_callback + def _serialize_dep(cls): + try: + from typing import Annotated + except ImportError: + pass + else: + if get_origin(cls) is Annotated: + annotated, *annotations = get_args(cls) + return f"{_serialize_dep(annotated)}{repr(annotations)}" + return f"{cls.__module__}.{cls.__qualname__}" + class ScrapyPoetRequestFingerprinter: @classmethod def from_crawler(cls, crawler): @@ -62,7 +73,7 @@ def _get_deps(self, request: Request) -> Optional[List[str]]: root_deps = plan[-1][1] if not root_deps: return None - return sorted([repr(cls) for cls in root_deps.values()]) + return sorted([_serialize_dep(cls) for cls in root_deps.values()]) def fingerprint_deps(self, request: Request) -> Optional[bytes]: """Return a fingerprint based on dependencies requested through diff --git a/tests/test_request_fingerprinter.py b/tests/test_request_fingerprinter.py index 35f3cb5c..61f6af9d 100644 --- a/tests/test_request_fingerprinter.py +++ b/tests/test_request_fingerprinter.py @@ -15,6 +15,7 @@ from web_poet import HttpResponse, ItemPage, PageParams, RequestUrl, WebPage from scrapy_poet import DummyResponse, ScrapyPoetRequestFingerprinter +from scrapy_poet._request_fingerprinter import _serialize_dep from scrapy_poet.utils.testing import get_crawler as _get_crawler ANDI_VERSION = Version(package_version("andi")) @@ -233,6 +234,22 @@ async def parse_b(self, response, b: Annotated[WebPage, "b"]): assert fingerprint1 != fingerprint2 +def test_serialize_dep(): + assert _serialize_dep(HttpResponse) == "web_poet.page_inputs.http.HttpResponse" + + +@pytest.mark.skipif( + sys.version_info < (3, 9), reason="No Annotated support in Python < 3.9" +) +def test_serialize_dep_annotated(): + from typing import Annotated + + assert ( + _serialize_dep(Annotated[HttpResponse, "foo"]) + == "web_poet.page_inputs.http.HttpResponse['foo']" + ) + + def test_base_default(): class TestSpider(Spider): name = "test_spider"