Skip to content

Commit

Permalink
Merge pull request #201 from scrapinghub/dynamic-deps
Browse files Browse the repository at this point in the history
Initial DynamicDeps support.
  • Loading branch information
wRAR authored Jul 17, 2024
2 parents db23dd3 + 9016a7b commit ba7ac2f
Show file tree
Hide file tree
Showing 5 changed files with 281 additions and 11 deletions.
60 changes: 60 additions & 0 deletions docs/dynamic-deps.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
.. _dynamic-deps:

====================
Dynamic dependencies
====================

Normally the dependencies for a callback are specified statically, as type
hints for its arguments:

.. code-block:: python
import scrapy
class BooksSpider(scrapy.Spider):
...
def start_requests(self):
yield scrapy.Request("http://books.toscrape.com/", self.parse_book)
def parse_book(self, response, book_page: BookPage, other_dep: OtherDep):
...
In some cases some or all of the dependencies need to be specified dynamically
instead, e.g. because they need to be different for different requests using
the same callback. You can use :class:`scrapy_poet.DynamicDeps
<scrapy_poet.injection.DynamicDeps>` for this. If you add a callback argument
with this type you can pass a list of additional dependency types in the
request meta dictionary using the "inject" key:

.. code-block:: python
import scrapy
class BooksSpider(scrapy.Spider):
...
def start_requests(self):
yield scrapy.Request(
"http://books.toscrape.com/",
self.parse_book,
meta={"inject": [OtherDep]},
)
def parse_book(self, response, book_page: BookPage, dynamic: DynamicDeps):
# access the dynamic dependency values by their type:
other_dep = dynamic[OtherDep]
...
# or get them and their types at the run time:
for dep_type, dep in dynamic.items():
if dep_type is OtherDep:
...
The types passed this way are used in the dependency resolution as usual, with
the created instances available in the :class:`scrapy_poet.DynamicDeps
<scrapy_poet.injection.DynamicDeps>` instance, which is a dictionary with
dependency types as keys and their instances as values.
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ To get started, see :ref:`intro-install` and :ref:`intro-tutorial`.
:maxdepth: 1

rules-from-web-poet
dynamic-deps
stats
providers
testing
Expand Down
1 change: 1 addition & 0 deletions scrapy_poet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .api import DummyResponse, callback_for
from .downloadermiddlewares import DownloaderStatsMiddleware, InjectionMiddleware
from .injection import DynamicDeps
from .page_input_providers import HttpResponseProvider, PageObjectInputProvider
from .spidermiddlewares import RetryMiddleware
from ._request_fingerprinter import ScrapyPoetRequestFingerprinter
71 changes: 63 additions & 8 deletions scrapy_poet/injection.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Any,
Callable,
Dict,
Iterable,
List,
Mapping,
Optional,
Expand Down Expand Up @@ -54,6 +55,16 @@ class _UNDEFINED:
pass


class DynamicDeps(dict):
"""A container for dynamic dependencies provided via the ``"inject"`` request meta key.
The dynamic dependency instances are available at the run time as dict
values with keys being dependency types.
"""

pass


class Injector:
"""
Keep all the logic required to do dependency injection in Scrapy callbacks.
Expand Down Expand Up @@ -170,33 +181,75 @@ def build_plan(self, request: Request) -> andi.Plan:
# Callable[[Callable], Optional[Callable]] but the registry
# returns the typing for ``dict.get()`` method.
overrides=self.registry.overrides_for(request.url).get, # type: ignore[arg-type]
custom_builder_fn=self._get_item_builder(request),
custom_builder_fn=self._get_custom_builder(request),
)

def _get_item_builder(
def _get_custom_builder(
self, request: Request
) -> Callable[[Callable], Optional[Callable]]:
"""Return a function suitable for passing as ``custom_builder_fn`` to ``andi.plan``.
The returned function can map an item to a factory for that item based
on the registry.
on the registry and also supports filling :class:`.DynamicDeps`.
"""

@functools.lru_cache(maxsize=None) # to minimize the registry queries
def mapping_fn(item_cls: Callable) -> Optional[Callable]:
def mapping_fn(dep_cls: Callable) -> Optional[Callable]:
# building DynamicDeps
if dep_cls is DynamicDeps:
dynamic_types = request.meta.get("inject", [])
if not dynamic_types:
return lambda: {}
return self._get_dynamic_deps_factory(dynamic_types)

# building items from pages
page_object_cls: Optional[Type[ItemPage]] = self.registry.page_cls_for_item(
request.url, cast(type, item_cls)
request.url, cast(type, dep_cls)
)
if not page_object_cls:
return None

async def item_factory(page: page_object_cls) -> item_cls: # type: ignore[valid-type]
async def item_factory(page: page_object_cls) -> dep_cls: # type: ignore[valid-type]
return await page.to_item() # type: ignore[attr-defined]

return item_factory

return mapping_fn

@staticmethod
def _get_dynamic_deps_factory_text(
type_names: Iterable[str],
) -> str:
# inspired by Python 3.11 dataclasses._create_fn()
# https://github.com/python/cpython/blob/v3.11.9/Lib/dataclasses.py#L413
args = [f"{name}_arg: {name}" for name in type_names]
args_str = ", ".join(args)
result_args = [f"{name}: {name}_arg" for name in type_names]
result_args_str = ", ".join(result_args)
create_args_str = ", ".join(type_names)
return (
f"def __create_fn__({create_args_str}):\n"
f" def dynamic_deps_factory({args_str}) -> DynamicDeps:\n"
f" return DynamicDeps({{{result_args_str}}})\n"
f" return dynamic_deps_factory"
)

@staticmethod
def _get_dynamic_deps_factory(
dynamic_types: List[type],
) -> Callable[..., DynamicDeps]:
"""Return a function that creates a :class:`.DynamicDeps` instance from its args.
It takes instances of types from ``dynamic_types`` as args and returns
a :class:`.DynamicDeps` instance where keys are types and values are
corresponding args. It has correct type hints so that it can be used as
an ``andi`` custom builder.
"""
ns = {type_.__name__: type_ for type_ in dynamic_types}
txt = Injector._get_dynamic_deps_factory_text(ns.keys())
exec(txt, globals(), ns)
return ns["__create_fn__"](*dynamic_types)

@inlineCallbacks
def build_instances(
self,
Expand Down Expand Up @@ -480,7 +533,9 @@ class MySpider(Spider):
return Injector(crawler, registry=registry)


def get_response_for_testing(callback: Callable) -> Response:
def get_response_for_testing(
callback: Callable, meta: Optional[Dict[str, Any]] = None
) -> Response:
"""
Return a :class:`scrapy.http.Response` with fake content with the configured
callback. It is useful for testing providers.
Expand All @@ -501,6 +556,6 @@ def get_response_for_testing(callback: Callable) -> Response:
""".encode(
"utf-8"
)
request = Request(url, callback=callback)
request = Request(url, callback=callback, meta=meta)
response = Response(url, 200, None, html, request=request)
return response
159 changes: 156 additions & 3 deletions tests/test_injection.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import shutil
import sys
from typing import Any, Callable, Dict, Generator
from typing import Any, Callable, Dict, Generator, Optional

import andi
import attr
import parsel
import pytest
Expand All @@ -16,7 +17,12 @@
from web_poet.mixins import ResponseShortcutsMixin
from web_poet.rules import ApplyRule

from scrapy_poet import DummyResponse, HttpResponseProvider, PageObjectInputProvider
from scrapy_poet import (
DummyResponse,
DynamicDeps,
HttpResponseProvider,
PageObjectInputProvider,
)
from scrapy_poet.injection import (
Injector,
check_all_providers_are_callable,
Expand Down Expand Up @@ -293,8 +299,9 @@ def _assert_instances(
callback: Callable,
expected_instances: Dict[type, Any],
expected_kwargs: Dict[str, Any],
reqmeta: Optional[Dict[str, Any]] = None,
) -> Generator[Any, Any, None]:
response = get_response_for_testing(callback)
response = get_response_for_testing(callback, meta=reqmeta)
request = response.request

plan = injector.build_plan(response.request)
Expand Down Expand Up @@ -535,6 +542,129 @@ def callback(
# not injected at all.
assert set(kwargs.keys()) == {"expensive", "item"}

@inlineCallbacks
def test_dynamic_deps(self):
def callback(dd: DynamicDeps):
pass

provider = get_provider({Cls1, Cls2})
injector = get_injector_for_testing({provider: 1})

expected_instances = {
DynamicDeps: DynamicDeps({Cls1: Cls1(), Cls2: Cls2()}),
Cls1: Cls1(),
Cls2: Cls2(),
}
expected_kwargs = {
"dd": DynamicDeps({Cls1: Cls1(), Cls2: Cls2()}),
}
yield self._assert_instances(
injector,
callback,
expected_instances,
expected_kwargs,
reqmeta={"inject": [Cls1, Cls2]},
)

@inlineCallbacks
def test_dynamic_deps_mix(self):
def callback(c1: Cls1, dd: DynamicDeps):
pass

provider = get_provider({Cls1, Cls2})
injector = get_injector_for_testing({provider: 1})

response = get_response_for_testing(callback, meta={"inject": [Cls1, Cls2]})
request = response.request

plan = injector.build_plan(response.request)
instances = yield from injector.build_instances(request, response, plan)
assert instances == {
DynamicDeps: DynamicDeps({Cls1: Cls1(), Cls2: Cls2()}),
Cls1: Cls1(),
Cls2: Cls2(),
}
assert instances[Cls1] is instances[DynamicDeps][Cls1]
assert instances[Cls2] is instances[DynamicDeps][Cls2]

kwargs = yield from injector.build_callback_dependencies(request, response)
assert kwargs == {
"c1": Cls1(),
"dd": DynamicDeps({Cls1: Cls1(), Cls2: Cls2()}),
}
assert kwargs["c1"] is kwargs["dd"][Cls1]

@inlineCallbacks
def test_dynamic_deps_no_meta(self):
def callback(dd: DynamicDeps):
pass

provider = get_provider({Cls1, Cls2})
injector = get_injector_for_testing({provider: 1})

expected_instances = {
DynamicDeps: DynamicDeps(),
}
expected_kwargs = {
"dd": DynamicDeps(),
}
yield self._assert_instances(
injector,
callback,
expected_instances,
expected_kwargs,
)

@inlineCallbacks
def test_dynamic_deps_page(self):
def callback(dd: DynamicDeps):
pass

injector = get_injector_for_testing({})

response = get_response_for_testing(callback, meta={"inject": [PricePO]})
request = response.request

plan = injector.build_plan(response.request)
kwargs = yield from injector.build_callback_dependencies(request, response)
kwargs_types = {key: type(value) for key, value in kwargs.items()}
assert kwargs_types == {
"dd": DynamicDeps,
}
dd_types = {key: type(value) for key, value in kwargs["dd"].items()}
assert dd_types == {
PricePO: PricePO,
}

instances = yield from injector.build_instances(request, response, plan)
assert set(instances) == {Html, PricePO, DynamicDeps}

@inlineCallbacks
def test_dynamic_deps_item(self):
def callback(dd: DynamicDeps):
pass

rules = [ApplyRule(Patterns(include=()), use=TestItemPage, to_return=TestItem)]
registry = RulesRegistry(rules=rules)
injector = get_injector_for_testing({}, registry=registry)

response = get_response_for_testing(callback, meta={"inject": [TestItem]})
request = response.request

plan = injector.build_plan(response.request)
kwargs = yield from injector.build_callback_dependencies(request, response)
kwargs_types = {key: type(value) for key, value in kwargs.items()}
assert kwargs_types == {
"dd": DynamicDeps,
}
dd_types = {key: type(value) for key, value in kwargs["dd"].items()}
assert dd_types == {
TestItem: TestItem,
}

instances = yield from injector.build_instances(request, response, plan)
assert set(instances) == {TestItemPage, TestItem, DynamicDeps}


class Html(Injectable):
url = "http://example.com"
Expand Down Expand Up @@ -833,3 +963,26 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name):
response.request, response, plan
)
assert injector.weak_cache.get(response.request) is None


def test_dynamic_deps_factory_text():
txt = Injector._get_dynamic_deps_factory_text(["int", "Cls1"])
assert (
txt
== """def __create_fn__(int, Cls1):
def dynamic_deps_factory(int_arg: int, Cls1_arg: Cls1) -> DynamicDeps:
return DynamicDeps({int: int_arg, Cls1: Cls1_arg})
return dynamic_deps_factory"""
)


def test_dynamic_deps_factory():
fn = Injector._get_dynamic_deps_factory([int, Cls1])
args = andi.inspect(fn)
assert args == {
"Cls1_arg": [Cls1],
"int_arg": [int],
}
c = Cls1()
dd = fn(int_arg=42, Cls1_arg=c)
assert dd == {int: 42, Cls1: c}

0 comments on commit ba7ac2f

Please sign in to comment.