Skip to content

Commit ba7ac2f

Browse files
authored
Merge pull request #201 from scrapinghub/dynamic-deps
Initial DynamicDeps support.
2 parents db23dd3 + 9016a7b commit ba7ac2f

File tree

5 files changed

+281
-11
lines changed

5 files changed

+281
-11
lines changed

docs/dynamic-deps.rst

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
.. _dynamic-deps:
2+
3+
====================
4+
Dynamic dependencies
5+
====================
6+
7+
Normally the dependencies for a callback are specified statically, as type
8+
hints for its arguments:
9+
10+
.. code-block:: python
11+
12+
import scrapy
13+
14+
15+
class BooksSpider(scrapy.Spider):
16+
...
17+
18+
def start_requests(self):
19+
yield scrapy.Request("http://books.toscrape.com/", self.parse_book)
20+
21+
22+
def parse_book(self, response, book_page: BookPage, other_dep: OtherDep):
23+
...
24+
25+
In some cases some or all of the dependencies need to be specified dynamically
26+
instead, e.g. because they need to be different for different requests using
27+
the same callback. You can use :class:`scrapy_poet.DynamicDeps
28+
<scrapy_poet.injection.DynamicDeps>` for this. If you add a callback argument
29+
with this type you can pass a list of additional dependency types in the
30+
request meta dictionary using the "inject" key:
31+
32+
.. code-block:: python
33+
34+
import scrapy
35+
36+
37+
class BooksSpider(scrapy.Spider):
38+
...
39+
40+
def start_requests(self):
41+
yield scrapy.Request(
42+
"http://books.toscrape.com/",
43+
self.parse_book,
44+
meta={"inject": [OtherDep]},
45+
)
46+
47+
48+
def parse_book(self, response, book_page: BookPage, dynamic: DynamicDeps):
49+
# access the dynamic dependency values by their type:
50+
other_dep = dynamic[OtherDep]
51+
...
52+
# or get them and their types at the run time:
53+
for dep_type, dep in dynamic.items():
54+
if dep_type is OtherDep:
55+
...
56+
57+
The types passed this way are used in the dependency resolution as usual, with
58+
the created instances available in the :class:`scrapy_poet.DynamicDeps
59+
<scrapy_poet.injection.DynamicDeps>` instance, which is a dictionary with
60+
dependency types as keys and their instances as values.

docs/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ To get started, see :ref:`intro-install` and :ref:`intro-tutorial`.
4444
:maxdepth: 1
4545

4646
rules-from-web-poet
47+
dynamic-deps
4748
stats
4849
providers
4950
testing

scrapy_poet/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from .api import DummyResponse, callback_for
22
from .downloadermiddlewares import DownloaderStatsMiddleware, InjectionMiddleware
3+
from .injection import DynamicDeps
34
from .page_input_providers import HttpResponseProvider, PageObjectInputProvider
45
from .spidermiddlewares import RetryMiddleware
56
from ._request_fingerprinter import ScrapyPoetRequestFingerprinter

scrapy_poet/injection.py

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
Any,
99
Callable,
1010
Dict,
11+
Iterable,
1112
List,
1213
Mapping,
1314
Optional,
@@ -54,6 +55,16 @@ class _UNDEFINED:
5455
pass
5556

5657

58+
class DynamicDeps(dict):
59+
"""A container for dynamic dependencies provided via the ``"inject"`` request meta key.
60+
61+
The dynamic dependency instances are available at the run time as dict
62+
values with keys being dependency types.
63+
"""
64+
65+
pass
66+
67+
5768
class Injector:
5869
"""
5970
Keep all the logic required to do dependency injection in Scrapy callbacks.
@@ -170,33 +181,75 @@ def build_plan(self, request: Request) -> andi.Plan:
170181
# Callable[[Callable], Optional[Callable]] but the registry
171182
# returns the typing for ``dict.get()`` method.
172183
overrides=self.registry.overrides_for(request.url).get, # type: ignore[arg-type]
173-
custom_builder_fn=self._get_item_builder(request),
184+
custom_builder_fn=self._get_custom_builder(request),
174185
)
175186

176-
def _get_item_builder(
187+
def _get_custom_builder(
177188
self, request: Request
178189
) -> Callable[[Callable], Optional[Callable]]:
179190
"""Return a function suitable for passing as ``custom_builder_fn`` to ``andi.plan``.
180191
181192
The returned function can map an item to a factory for that item based
182-
on the registry.
193+
on the registry and also supports filling :class:`.DynamicDeps`.
183194
"""
184195

185196
@functools.lru_cache(maxsize=None) # to minimize the registry queries
186-
def mapping_fn(item_cls: Callable) -> Optional[Callable]:
197+
def mapping_fn(dep_cls: Callable) -> Optional[Callable]:
198+
# building DynamicDeps
199+
if dep_cls is DynamicDeps:
200+
dynamic_types = request.meta.get("inject", [])
201+
if not dynamic_types:
202+
return lambda: {}
203+
return self._get_dynamic_deps_factory(dynamic_types)
204+
205+
# building items from pages
187206
page_object_cls: Optional[Type[ItemPage]] = self.registry.page_cls_for_item(
188-
request.url, cast(type, item_cls)
207+
request.url, cast(type, dep_cls)
189208
)
190209
if not page_object_cls:
191210
return None
192211

193-
async def item_factory(page: page_object_cls) -> item_cls: # type: ignore[valid-type]
212+
async def item_factory(page: page_object_cls) -> dep_cls: # type: ignore[valid-type]
194213
return await page.to_item() # type: ignore[attr-defined]
195214

196215
return item_factory
197216

198217
return mapping_fn
199218

219+
@staticmethod
220+
def _get_dynamic_deps_factory_text(
221+
type_names: Iterable[str],
222+
) -> str:
223+
# inspired by Python 3.11 dataclasses._create_fn()
224+
# https://github.com/python/cpython/blob/v3.11.9/Lib/dataclasses.py#L413
225+
args = [f"{name}_arg: {name}" for name in type_names]
226+
args_str = ", ".join(args)
227+
result_args = [f"{name}: {name}_arg" for name in type_names]
228+
result_args_str = ", ".join(result_args)
229+
create_args_str = ", ".join(type_names)
230+
return (
231+
f"def __create_fn__({create_args_str}):\n"
232+
f" def dynamic_deps_factory({args_str}) -> DynamicDeps:\n"
233+
f" return DynamicDeps({{{result_args_str}}})\n"
234+
f" return dynamic_deps_factory"
235+
)
236+
237+
@staticmethod
238+
def _get_dynamic_deps_factory(
239+
dynamic_types: List[type],
240+
) -> Callable[..., DynamicDeps]:
241+
"""Return a function that creates a :class:`.DynamicDeps` instance from its args.
242+
243+
It takes instances of types from ``dynamic_types`` as args and returns
244+
a :class:`.DynamicDeps` instance where keys are types and values are
245+
corresponding args. It has correct type hints so that it can be used as
246+
an ``andi`` custom builder.
247+
"""
248+
ns = {type_.__name__: type_ for type_ in dynamic_types}
249+
txt = Injector._get_dynamic_deps_factory_text(ns.keys())
250+
exec(txt, globals(), ns)
251+
return ns["__create_fn__"](*dynamic_types)
252+
200253
@inlineCallbacks
201254
def build_instances(
202255
self,
@@ -480,7 +533,9 @@ class MySpider(Spider):
480533
return Injector(crawler, registry=registry)
481534

482535

483-
def get_response_for_testing(callback: Callable) -> Response:
536+
def get_response_for_testing(
537+
callback: Callable, meta: Optional[Dict[str, Any]] = None
538+
) -> Response:
484539
"""
485540
Return a :class:`scrapy.http.Response` with fake content with the configured
486541
callback. It is useful for testing providers.
@@ -501,6 +556,6 @@ def get_response_for_testing(callback: Callable) -> Response:
501556
""".encode(
502557
"utf-8"
503558
)
504-
request = Request(url, callback=callback)
559+
request = Request(url, callback=callback, meta=meta)
505560
response = Response(url, 200, None, html, request=request)
506561
return response

tests/test_injection.py

Lines changed: 156 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import shutil
22
import sys
3-
from typing import Any, Callable, Dict, Generator
3+
from typing import Any, Callable, Dict, Generator, Optional
44

5+
import andi
56
import attr
67
import parsel
78
import pytest
@@ -16,7 +17,12 @@
1617
from web_poet.mixins import ResponseShortcutsMixin
1718
from web_poet.rules import ApplyRule
1819

19-
from scrapy_poet import DummyResponse, HttpResponseProvider, PageObjectInputProvider
20+
from scrapy_poet import (
21+
DummyResponse,
22+
DynamicDeps,
23+
HttpResponseProvider,
24+
PageObjectInputProvider,
25+
)
2026
from scrapy_poet.injection import (
2127
Injector,
2228
check_all_providers_are_callable,
@@ -293,8 +299,9 @@ def _assert_instances(
293299
callback: Callable,
294300
expected_instances: Dict[type, Any],
295301
expected_kwargs: Dict[str, Any],
302+
reqmeta: Optional[Dict[str, Any]] = None,
296303
) -> Generator[Any, Any, None]:
297-
response = get_response_for_testing(callback)
304+
response = get_response_for_testing(callback, meta=reqmeta)
298305
request = response.request
299306

300307
plan = injector.build_plan(response.request)
@@ -535,6 +542,129 @@ def callback(
535542
# not injected at all.
536543
assert set(kwargs.keys()) == {"expensive", "item"}
537544

545+
@inlineCallbacks
546+
def test_dynamic_deps(self):
547+
def callback(dd: DynamicDeps):
548+
pass
549+
550+
provider = get_provider({Cls1, Cls2})
551+
injector = get_injector_for_testing({provider: 1})
552+
553+
expected_instances = {
554+
DynamicDeps: DynamicDeps({Cls1: Cls1(), Cls2: Cls2()}),
555+
Cls1: Cls1(),
556+
Cls2: Cls2(),
557+
}
558+
expected_kwargs = {
559+
"dd": DynamicDeps({Cls1: Cls1(), Cls2: Cls2()}),
560+
}
561+
yield self._assert_instances(
562+
injector,
563+
callback,
564+
expected_instances,
565+
expected_kwargs,
566+
reqmeta={"inject": [Cls1, Cls2]},
567+
)
568+
569+
@inlineCallbacks
570+
def test_dynamic_deps_mix(self):
571+
def callback(c1: Cls1, dd: DynamicDeps):
572+
pass
573+
574+
provider = get_provider({Cls1, Cls2})
575+
injector = get_injector_for_testing({provider: 1})
576+
577+
response = get_response_for_testing(callback, meta={"inject": [Cls1, Cls2]})
578+
request = response.request
579+
580+
plan = injector.build_plan(response.request)
581+
instances = yield from injector.build_instances(request, response, plan)
582+
assert instances == {
583+
DynamicDeps: DynamicDeps({Cls1: Cls1(), Cls2: Cls2()}),
584+
Cls1: Cls1(),
585+
Cls2: Cls2(),
586+
}
587+
assert instances[Cls1] is instances[DynamicDeps][Cls1]
588+
assert instances[Cls2] is instances[DynamicDeps][Cls2]
589+
590+
kwargs = yield from injector.build_callback_dependencies(request, response)
591+
assert kwargs == {
592+
"c1": Cls1(),
593+
"dd": DynamicDeps({Cls1: Cls1(), Cls2: Cls2()}),
594+
}
595+
assert kwargs["c1"] is kwargs["dd"][Cls1]
596+
597+
@inlineCallbacks
598+
def test_dynamic_deps_no_meta(self):
599+
def callback(dd: DynamicDeps):
600+
pass
601+
602+
provider = get_provider({Cls1, Cls2})
603+
injector = get_injector_for_testing({provider: 1})
604+
605+
expected_instances = {
606+
DynamicDeps: DynamicDeps(),
607+
}
608+
expected_kwargs = {
609+
"dd": DynamicDeps(),
610+
}
611+
yield self._assert_instances(
612+
injector,
613+
callback,
614+
expected_instances,
615+
expected_kwargs,
616+
)
617+
618+
@inlineCallbacks
619+
def test_dynamic_deps_page(self):
620+
def callback(dd: DynamicDeps):
621+
pass
622+
623+
injector = get_injector_for_testing({})
624+
625+
response = get_response_for_testing(callback, meta={"inject": [PricePO]})
626+
request = response.request
627+
628+
plan = injector.build_plan(response.request)
629+
kwargs = yield from injector.build_callback_dependencies(request, response)
630+
kwargs_types = {key: type(value) for key, value in kwargs.items()}
631+
assert kwargs_types == {
632+
"dd": DynamicDeps,
633+
}
634+
dd_types = {key: type(value) for key, value in kwargs["dd"].items()}
635+
assert dd_types == {
636+
PricePO: PricePO,
637+
}
638+
639+
instances = yield from injector.build_instances(request, response, plan)
640+
assert set(instances) == {Html, PricePO, DynamicDeps}
641+
642+
@inlineCallbacks
643+
def test_dynamic_deps_item(self):
644+
def callback(dd: DynamicDeps):
645+
pass
646+
647+
rules = [ApplyRule(Patterns(include=()), use=TestItemPage, to_return=TestItem)]
648+
registry = RulesRegistry(rules=rules)
649+
injector = get_injector_for_testing({}, registry=registry)
650+
651+
response = get_response_for_testing(callback, meta={"inject": [TestItem]})
652+
request = response.request
653+
654+
plan = injector.build_plan(response.request)
655+
kwargs = yield from injector.build_callback_dependencies(request, response)
656+
kwargs_types = {key: type(value) for key, value in kwargs.items()}
657+
assert kwargs_types == {
658+
"dd": DynamicDeps,
659+
}
660+
dd_types = {key: type(value) for key, value in kwargs["dd"].items()}
661+
assert dd_types == {
662+
TestItem: TestItem,
663+
}
664+
665+
instances = yield from injector.build_instances(request, response, plan)
666+
assert set(instances) == {TestItemPage, TestItem, DynamicDeps}
667+
538668

539669
class Html(Injectable):
540670
url = "http://example.com"
@@ -833,3 +963,26 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name):
833963
response.request, response, plan
834964
)
835965
assert injector.weak_cache.get(response.request) is None
966+
967+
968+
def test_dynamic_deps_factory_text():
969+
txt = Injector._get_dynamic_deps_factory_text(["int", "Cls1"])
970+
assert (
971+
txt
972+
== """def __create_fn__(int, Cls1):
973+
def dynamic_deps_factory(int_arg: int, Cls1_arg: Cls1) -> DynamicDeps:
974+
return DynamicDeps({int: int_arg, Cls1: Cls1_arg})
975+
return dynamic_deps_factory"""
976+
)
977+
978+
979+
def test_dynamic_deps_factory():
980+
fn = Injector._get_dynamic_deps_factory([int, Cls1])
981+
args = andi.inspect(fn)
982+
assert args == {
983+
"Cls1_arg": [Cls1],
984+
"int_arg": [int],
985+
}
986+
c = Cls1()
987+
dd = fn(int_arg=42, Cls1_arg=c)
988+
assert dd == {int: 42, Cls1: c}

0 commit comments

Comments
 (0)