Skip to content

Commit

Permalink
Merge pull request #169 from scrapinghub/annotated-support
Browse files Browse the repository at this point in the history
Initial support for typing.Annotated.
  • Loading branch information
wRAR authored Dec 12, 2023
2 parents e7475e3 + 88c58bc commit 6fe5dc4
Show file tree
Hide file tree
Showing 8 changed files with 307 additions and 39 deletions.
44 changes: 44 additions & 0 deletions docs/providers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -312,3 +312,47 @@ but not the others.
To have other settings respected, in addition to ``CONCURRENT_REQUESTS``, you'd
need to use ``crawler.engine.download`` or something like that. Alternatively,
you could implement those limits in the library itself.

Attaching metadata to dependencies
==================================

.. note:: This feature requires Python 3.9+.

Providers can support dependencies with arbitrary metadata attached and use
that metadata when creating them. Attaching the metadata is done by wrapping
the dependency class in :data:`typing.Annotated`:

.. code-block:: python
@attr.define
class MyPageObject(ItemPage):
response: Annotated[HtmlResponse, "foo", "bar"]
To handle this you need the following changes in your provider:

.. code-block:: python
from andi.typeutils import strip_annotated
from scrapy_poet import AnnotatedResult, PageObjectInputProvider
class Provider(PageObjectInputProvider):
...
def is_provided(self, type_: Callable) -> bool:
# needed so that you can list just the base type in provided_classes
return super().is_provided(strip_annotated(type_))
def __call__(self, to_provide):
result = []
for cls in to_provide:
metadata = getattr(cls, "__metadata__", None)
obj = ... # create the instance using cls and metadata
if metadata:
# wrap the instance into a scrapy_poet.AnnotatedResult object
obj = AnnotatedResult(obj, metadata)
result.append(obj)
return result
.. autoclass:: scrapy_poet.AnnotatedResult
:members:
2 changes: 1 addition & 1 deletion scrapy_poet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .api import DummyResponse, callback_for
from .api import AnnotatedResult, DummyResponse, callback_for
from .downloadermiddlewares import InjectionMiddleware
from .page_input_providers import HttpResponseProvider, PageObjectInputProvider
from .spidermiddlewares import RetryMiddleware
28 changes: 27 additions & 1 deletion scrapy_poet/api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataclasses import dataclass
from inspect import iscoroutinefunction
from typing import Callable, Optional, Type
from typing import Any, Callable, Optional, Tuple, Type

from scrapy.http import Request, Response
from web_poet.pages import ItemPage
Expand Down Expand Up @@ -133,3 +134,28 @@ def parse(*args, item: page_or_item_cls, **kwargs): # type:ignore

setattr(parse, _CALLBACK_FOR_MARKER, True)
return parse


@dataclass
class AnnotatedResult:
"""Wrapper for annotated dependencies.
When a provider gets a :data:`typing.Annotated` type as a dependency type,
it will return an ``AnnotatedResult`` instance for it so that the caller
can match the dependency to its annotation.
:param result: The wrapped dependency instance.
:type result: Any
:param metadata: The copy of the annotation.
:type metadata: Tuple[Any, ...]
"""

result: Any
metadata: Tuple[Any, ...]

def get_annotated_cls(self):
"""Returns a re-created :class:`typing.Annotated` type."""
from typing import Annotated

return Annotated[(type(self.result), *self.metadata)]
42 changes: 16 additions & 26 deletions scrapy_poet/injection.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,9 @@
from web_poet.serialization.api import deserialize_leaf, load_class, serialize
from web_poet.utils import get_fq_class_name

from scrapy_poet.api import _CALLBACK_FOR_MARKER, DummyResponse
from scrapy_poet.api import _CALLBACK_FOR_MARKER, AnnotatedResult, DummyResponse
from scrapy_poet.cache import SerializedDataCache
from scrapy_poet.injection_errors import (
InjectionError,
NonCallableProviderError,
UndeclaredProvidedTypeError,
)
Expand Down Expand Up @@ -261,13 +260,20 @@ def build_instances_from_providers(
self.crawler.stats.inc_value("poet/cache/firsthand")
raise

objs_by_type: Dict[Callable, Any] = {type(obj): obj for obj in objs}
objs_by_type: Dict[Callable, Any] = {}
for obj in objs:
if isinstance(obj, AnnotatedResult):
cls = obj.get_annotated_cls()
obj = obj.result
else:
cls = type(obj)
objs_by_type[cls] = obj
extra_classes = objs_by_type.keys() - provided_classes
if extra_classes:
raise UndeclaredProvidedTypeError(
f"{provider} has returned instances of types {extra_classes} "
"that are not among the declared supported classes in the "
f"provider: {provider.provided_classes}"
f"provider: {provided_classes}"
)
instances.update(objs_by_type)

Expand Down Expand Up @@ -306,31 +312,15 @@ def is_class_provided_by_any_provider_fn(
Return a function of type ``Callable[[Type], bool]`` that return
True if the given type is provided by any of the registered providers.
The attribute ``provided_classes`` from each provided is used.
This attribute can be a :class:`set` or a ``Callable``. All sets are
joined together for efficiency.
The ``is_provided`` method from each provider is used.
"""
sets_of_types: Set[Callable] = set() # caching all sets found
individual_is_callable: List[Callable[[Callable], bool]] = [
sets_of_types.__contains__
]
callables: List[Callable[[Callable], bool]] = []
for provider in providers:
provided_classes = provider.provided_classes

if isinstance(provided_classes, (Set, frozenset)):
sets_of_types.update(provided_classes)
elif callable(provider.provided_classes):
individual_is_callable.append(provided_classes)
else:
raise InjectionError(
f"Unexpected type '{type(provided_classes)}' for "
f"'{type(provider)}.provided_classes'. Expected either 'set' "
f"or 'callable'"
)
callables.append(provider.is_provided)

def is_provided_fn(type: Callable) -> bool:
for is_provided in individual_is_callable:
if is_provided(type):
def is_provided_fn(type_: Callable) -> bool:
for is_provided in callables:
if is_provided(type_):
return True
return False

Expand Down
15 changes: 13 additions & 2 deletions scrapy_poet/page_input_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,18 @@
import asyncio
from dataclasses import make_dataclass
from inspect import isclass
from typing import Any, Callable, ClassVar, Dict, List, Optional, Set, Type, Union
from typing import (
Any,
Callable,
ClassVar,
Dict,
FrozenSet,
List,
Optional,
Set,
Type,
Union,
)
from warnings import warn
from weakref import WeakKeyDictionary

Expand Down Expand Up @@ -119,7 +130,7 @@ def is_provided(self, type_: Callable) -> bool:
Return ``True`` if the given type is provided by this provider based
on the value of the attribute ``provided_classes``
"""
if isinstance(self.provided_classes, Set):
if isinstance(self.provided_classes, (Set, FrozenSet)):
return type_ in self.provided_classes
elif callable(self.provided_classes):
return self.provided_classes(type_)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
package_data={"scrapy_poet": ["VERSION"]},
python_requires=">=3.8",
install_requires=[
"andi >= 0.4.1",
"andi >= 0.5.0",
"attrs >= 21.3.0",
"parsel >= 1.5.0",
"scrapy >= 2.6.0",
Expand Down
Loading

0 comments on commit 6fe5dc4

Please sign in to comment.