diff --git a/docs/dynamic-deps.rst b/docs/dynamic-deps.rst new file mode 100644 index 00000000..529cf9f1 --- /dev/null +++ b/docs/dynamic-deps.rst @@ -0,0 +1,60 @@ +.. _dynamic-deps: + +==================== +Dynamic dependencies +==================== + +Normally the dependencies for a callback are specified statically, as type +hints for its arguments: + +.. code-block:: python + + import scrapy + + + class BooksSpider(scrapy.Spider): + ... + + def start_requests(self): + yield scrapy.Request("http://books.toscrape.com/", self.parse_book) + + + def parse_book(self, response, book_page: BookPage, other_dep: OtherDep): + ... + +In some cases some or all of the dependencies need to be specified dynamically +instead, e.g. because they need to be different for different requests using +the same callback. You can use :class:`scrapy_poet.DynamicDeps +` for this. If you add a callback argument +with this type you can pass a list of additional dependency types in the +request meta dictionary using the "inject" key: + +.. code-block:: python + + import scrapy + + + class BooksSpider(scrapy.Spider): + ... + + def start_requests(self): + yield scrapy.Request( + "http://books.toscrape.com/", + self.parse_book, + meta={"inject": [OtherDep]}, + ) + + + def parse_book(self, response, book_page: BookPage, dynamic: DynamicDeps): + # access the dynamic dependency values by their type: + other_dep = dynamic[OtherDep] + ... + # or get them and their types at the run time: + for dep_type, dep in dynamic.items(): + if dep_type is OtherDep: + ... + +The types passed this way are used in the dependency resolution as usual, with +the created instances available in the :class:`scrapy_poet.DynamicDeps +` instance, which is a dictionary with +dependency types as keys and their instances as values. diff --git a/docs/index.rst b/docs/index.rst index f21cb4ca..1b022b83 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -44,6 +44,7 @@ To get started, see :ref:`intro-install` and :ref:`intro-tutorial`. :maxdepth: 1 rules-from-web-poet + dynamic-deps stats providers testing diff --git a/scrapy_poet/__init__.py b/scrapy_poet/__init__.py index 47756b50..393fd303 100644 --- a/scrapy_poet/__init__.py +++ b/scrapy_poet/__init__.py @@ -1,5 +1,6 @@ from .api import DummyResponse, callback_for from .downloadermiddlewares import DownloaderStatsMiddleware, InjectionMiddleware +from .injection import DynamicDeps from .page_input_providers import HttpResponseProvider, PageObjectInputProvider from .spidermiddlewares import RetryMiddleware from ._request_fingerprinter import ScrapyPoetRequestFingerprinter diff --git a/scrapy_poet/injection.py b/scrapy_poet/injection.py index 02f5f7ab..3414741a 100644 --- a/scrapy_poet/injection.py +++ b/scrapy_poet/injection.py @@ -8,6 +8,7 @@ Any, Callable, Dict, + Iterable, List, Mapping, Optional, @@ -54,6 +55,16 @@ class _UNDEFINED: pass +class DynamicDeps(dict): + """A container for dynamic dependencies provided via the ``"inject"`` request meta key. + + The dynamic dependency instances are available at the run time as dict + values with keys being dependency types. + """ + + pass + + class Injector: """ Keep all the logic required to do dependency injection in Scrapy callbacks. @@ -170,33 +181,75 @@ def build_plan(self, request: Request) -> andi.Plan: # Callable[[Callable], Optional[Callable]] but the registry # returns the typing for ``dict.get()`` method. overrides=self.registry.overrides_for(request.url).get, # type: ignore[arg-type] - custom_builder_fn=self._get_item_builder(request), + custom_builder_fn=self._get_custom_builder(request), ) - def _get_item_builder( + def _get_custom_builder( self, request: Request ) -> Callable[[Callable], Optional[Callable]]: """Return a function suitable for passing as ``custom_builder_fn`` to ``andi.plan``. The returned function can map an item to a factory for that item based - on the registry. + on the registry and also supports filling :class:`.DynamicDeps`. """ @functools.lru_cache(maxsize=None) # to minimize the registry queries - def mapping_fn(item_cls: Callable) -> Optional[Callable]: + def mapping_fn(dep_cls: Callable) -> Optional[Callable]: + # building DynamicDeps + if dep_cls is DynamicDeps: + dynamic_types = request.meta.get("inject", []) + if not dynamic_types: + return lambda: {} + return self._get_dynamic_deps_factory(dynamic_types) + + # building items from pages page_object_cls: Optional[Type[ItemPage]] = self.registry.page_cls_for_item( - request.url, cast(type, item_cls) + request.url, cast(type, dep_cls) ) if not page_object_cls: return None - async def item_factory(page: page_object_cls) -> item_cls: # type: ignore[valid-type] + async def item_factory(page: page_object_cls) -> dep_cls: # type: ignore[valid-type] return await page.to_item() # type: ignore[attr-defined] return item_factory return mapping_fn + @staticmethod + def _get_dynamic_deps_factory_text( + type_names: Iterable[str], + ) -> str: + # inspired by Python 3.11 dataclasses._create_fn() + # https://github.com/python/cpython/blob/v3.11.9/Lib/dataclasses.py#L413 + args = [f"{name}_arg: {name}" for name in type_names] + args_str = ", ".join(args) + result_args = [f"{name}: {name}_arg" for name in type_names] + result_args_str = ", ".join(result_args) + create_args_str = ", ".join(type_names) + return ( + f"def __create_fn__({create_args_str}):\n" + f" def dynamic_deps_factory({args_str}) -> DynamicDeps:\n" + f" return DynamicDeps({{{result_args_str}}})\n" + f" return dynamic_deps_factory" + ) + + @staticmethod + def _get_dynamic_deps_factory( + dynamic_types: List[type], + ) -> Callable[..., DynamicDeps]: + """Return a function that creates a :class:`.DynamicDeps` instance from its args. + + It takes instances of types from ``dynamic_types`` as args and returns + a :class:`.DynamicDeps` instance where keys are types and values are + corresponding args. It has correct type hints so that it can be used as + an ``andi`` custom builder. + """ + ns = {type_.__name__: type_ for type_ in dynamic_types} + txt = Injector._get_dynamic_deps_factory_text(ns.keys()) + exec(txt, globals(), ns) + return ns["__create_fn__"](*dynamic_types) + @inlineCallbacks def build_instances( self, @@ -480,7 +533,9 @@ class MySpider(Spider): return Injector(crawler, registry=registry) -def get_response_for_testing(callback: Callable) -> Response: +def get_response_for_testing( + callback: Callable, meta: Optional[Dict[str, Any]] = None +) -> Response: """ Return a :class:`scrapy.http.Response` with fake content with the configured callback. It is useful for testing providers. @@ -501,6 +556,6 @@ def get_response_for_testing(callback: Callable) -> Response: """.encode( "utf-8" ) - request = Request(url, callback=callback) + request = Request(url, callback=callback, meta=meta) response = Response(url, 200, None, html, request=request) return response diff --git a/tests/test_injection.py b/tests/test_injection.py index 8b6e27d4..fe68e5a9 100644 --- a/tests/test_injection.py +++ b/tests/test_injection.py @@ -1,7 +1,8 @@ import shutil import sys -from typing import Any, Callable, Dict, Generator +from typing import Any, Callable, Dict, Generator, Optional +import andi import attr import parsel import pytest @@ -16,7 +17,12 @@ from web_poet.mixins import ResponseShortcutsMixin from web_poet.rules import ApplyRule -from scrapy_poet import DummyResponse, HttpResponseProvider, PageObjectInputProvider +from scrapy_poet import ( + DummyResponse, + DynamicDeps, + HttpResponseProvider, + PageObjectInputProvider, +) from scrapy_poet.injection import ( Injector, check_all_providers_are_callable, @@ -293,8 +299,9 @@ def _assert_instances( callback: Callable, expected_instances: Dict[type, Any], expected_kwargs: Dict[str, Any], + reqmeta: Optional[Dict[str, Any]] = None, ) -> Generator[Any, Any, None]: - response = get_response_for_testing(callback) + response = get_response_for_testing(callback, meta=reqmeta) request = response.request plan = injector.build_plan(response.request) @@ -535,6 +542,129 @@ def callback( # not injected at all. assert set(kwargs.keys()) == {"expensive", "item"} + @inlineCallbacks + def test_dynamic_deps(self): + def callback(dd: DynamicDeps): + pass + + provider = get_provider({Cls1, Cls2}) + injector = get_injector_for_testing({provider: 1}) + + expected_instances = { + DynamicDeps: DynamicDeps({Cls1: Cls1(), Cls2: Cls2()}), + Cls1: Cls1(), + Cls2: Cls2(), + } + expected_kwargs = { + "dd": DynamicDeps({Cls1: Cls1(), Cls2: Cls2()}), + } + yield self._assert_instances( + injector, + callback, + expected_instances, + expected_kwargs, + reqmeta={"inject": [Cls1, Cls2]}, + ) + + @inlineCallbacks + def test_dynamic_deps_mix(self): + def callback(c1: Cls1, dd: DynamicDeps): + pass + + provider = get_provider({Cls1, Cls2}) + injector = get_injector_for_testing({provider: 1}) + + response = get_response_for_testing(callback, meta={"inject": [Cls1, Cls2]}) + request = response.request + + plan = injector.build_plan(response.request) + instances = yield from injector.build_instances(request, response, plan) + assert instances == { + DynamicDeps: DynamicDeps({Cls1: Cls1(), Cls2: Cls2()}), + Cls1: Cls1(), + Cls2: Cls2(), + } + assert instances[Cls1] is instances[DynamicDeps][Cls1] + assert instances[Cls2] is instances[DynamicDeps][Cls2] + + kwargs = yield from injector.build_callback_dependencies(request, response) + assert kwargs == { + "c1": Cls1(), + "dd": DynamicDeps({Cls1: Cls1(), Cls2: Cls2()}), + } + assert kwargs["c1"] is kwargs["dd"][Cls1] + + @inlineCallbacks + def test_dynamic_deps_no_meta(self): + def callback(dd: DynamicDeps): + pass + + provider = get_provider({Cls1, Cls2}) + injector = get_injector_for_testing({provider: 1}) + + expected_instances = { + DynamicDeps: DynamicDeps(), + } + expected_kwargs = { + "dd": DynamicDeps(), + } + yield self._assert_instances( + injector, + callback, + expected_instances, + expected_kwargs, + ) + + @inlineCallbacks + def test_dynamic_deps_page(self): + def callback(dd: DynamicDeps): + pass + + injector = get_injector_for_testing({}) + + response = get_response_for_testing(callback, meta={"inject": [PricePO]}) + request = response.request + + plan = injector.build_plan(response.request) + kwargs = yield from injector.build_callback_dependencies(request, response) + kwargs_types = {key: type(value) for key, value in kwargs.items()} + assert kwargs_types == { + "dd": DynamicDeps, + } + dd_types = {key: type(value) for key, value in kwargs["dd"].items()} + assert dd_types == { + PricePO: PricePO, + } + + instances = yield from injector.build_instances(request, response, plan) + assert set(instances) == {Html, PricePO, DynamicDeps} + + @inlineCallbacks + def test_dynamic_deps_item(self): + def callback(dd: DynamicDeps): + pass + + rules = [ApplyRule(Patterns(include=()), use=TestItemPage, to_return=TestItem)] + registry = RulesRegistry(rules=rules) + injector = get_injector_for_testing({}, registry=registry) + + response = get_response_for_testing(callback, meta={"inject": [TestItem]}) + request = response.request + + plan = injector.build_plan(response.request) + kwargs = yield from injector.build_callback_dependencies(request, response) + kwargs_types = {key: type(value) for key, value in kwargs.items()} + assert kwargs_types == { + "dd": DynamicDeps, + } + dd_types = {key: type(value) for key, value in kwargs["dd"].items()} + assert dd_types == { + TestItem: TestItem, + } + + instances = yield from injector.build_instances(request, response, plan) + assert set(instances) == {TestItemPage, TestItem, DynamicDeps} + class Html(Injectable): url = "http://example.com" @@ -833,3 +963,26 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name): response.request, response, plan ) assert injector.weak_cache.get(response.request) is None + + +def test_dynamic_deps_factory_text(): + txt = Injector._get_dynamic_deps_factory_text(["int", "Cls1"]) + assert ( + txt + == """def __create_fn__(int, Cls1): + def dynamic_deps_factory(int_arg: int, Cls1_arg: Cls1) -> DynamicDeps: + return DynamicDeps({int: int_arg, Cls1: Cls1_arg}) + return dynamic_deps_factory""" + ) + + +def test_dynamic_deps_factory(): + fn = Injector._get_dynamic_deps_factory([int, Cls1]) + args = andi.inspect(fn) + assert args == { + "Cls1_arg": [Cls1], + "int_arg": [int], + } + c = Cls1() + dd = fn(int_arg=42, Cls1_arg=c) + assert dd == {int: 42, Cls1: c}