Skip to content

Commit

Permalink
Use fully-qualified names for dependency serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed Nov 13, 2023
1 parent dc40040 commit 3af6693
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 4 deletions.
19 changes: 15 additions & 4 deletions scrapy_poet/_request_fingerprinter.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from typing import TYPE_CHECKING

try:
from scrapy.utils.request import RequestFingerprinter # NOQA
except ImportError:
from typing import TYPE_CHECKING

if not TYPE_CHECKING:
ScrapyPoetRequestFingerprinter = None
else:
import hashlib
import json
from functools import cached_property
from typing import Callable, Dict, List, Optional
from typing import Callable, Dict, List, Optional, get_args, get_origin
from weakref import WeakKeyDictionary

from scrapy import Request
Expand All @@ -20,6 +20,17 @@
from scrapy_poet import InjectionMiddleware
from scrapy_poet.injection import get_callback

def _serialize_dep(cls):
try:
from typing import Annotated
except ImportError:
pass
else:
if get_origin(cls) is Annotated:
annotated, *annotations = get_args(cls)
return f"{_serialize_dep(annotated)}{repr(annotations)}"
return f"{cls.__module__}.{cls.__qualname__}"

class ScrapyPoetRequestFingerprinter:
@classmethod
def from_crawler(cls, crawler):
Expand Down Expand Up @@ -62,7 +73,7 @@ def _get_deps(self, request: Request) -> Optional[List[str]]:
root_deps = plan[-1][1]
if not root_deps:
return None
return sorted([repr(cls) for cls in root_deps.values()])
return sorted([_serialize_dep(cls) for cls in root_deps.values()])

def fingerprint_deps(self, request: Request) -> Optional[bytes]:
"""Return a fingerprint based on dependencies requested through
Expand Down
17 changes: 17 additions & 0 deletions tests/test_request_fingerprinter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from web_poet import HttpResponse, ItemPage, PageParams, RequestUrl, WebPage

from scrapy_poet import DummyResponse, ScrapyPoetRequestFingerprinter
from scrapy_poet._request_fingerprinter import _serialize_dep
from scrapy_poet.utils.testing import get_crawler as _get_crawler

ANDI_VERSION = Version(package_version("andi"))
Expand Down Expand Up @@ -233,6 +234,22 @@ async def parse_b(self, response, b: Annotated[WebPage, "b"]):
assert fingerprint1 != fingerprint2


def test_serialize_dep():
assert _serialize_dep(HttpResponse) == "web_poet.page_inputs.http.HttpResponse"


@pytest.mark.skipif(
sys.version_info < (3, 9), reason="No Annotated support in Python < 3.9"
)
def test_serialize_dep_annotated():
from typing import Annotated

assert (
_serialize_dep(Annotated[HttpResponse, "foo"])
== "web_poet.page_inputs.http.HttpResponse['foo']"
)


def test_base_default():
class TestSpider(Spider):
name = "test_spider"
Expand Down

0 comments on commit 3af6693

Please sign in to comment.