Skip to content

Commit

Permalink
Merge remote-tracking branch 'scrapinghub/master' into request-finger…
Browse files Browse the repository at this point in the history
…printing
  • Loading branch information
Gallaecio committed Dec 26, 2023
2 parents a72f7c2 + 0eef7ae commit bf93c2b
Show file tree
Hide file tree
Showing 21 changed files with 644 additions and 355 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.16.1
current_version = 0.19.0
commit = True
tag = True
tag_name = {new_version}
Expand Down
42 changes: 28 additions & 14 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,34 @@ ignore =
C408,

# To be addressed:
D100, # Missing docstring in public module
D101, # Missing docstring in public class
D102, # Missing docstring in public method
D103, # Missing docstring in public function
D104, # Missing docstring in public package
D105, # Missing docstring in magic method
D107, # Missing docstring in __init__
D200, # One-line docstring should fit on one line with quotes
D202, # No blank lines allowed after function docstring
D205, # 1 blank line required between summary line and description
D209, # Multi-line docstring closing quotes should be on a separate line
D400, # First line should end with a period
D401, # First line should be in imperative mood
D402 # First line should not be the function's "signature"
# Missing docstring in public module
D100,
# Missing docstring in public class
D101,
# Missing docstring in public method
D102,
# Missing docstring in public function
D103,
# Missing docstring in public package
D104,
# Missing docstring in magic method
D105,
# Missing docstring in __init__
D107,
# One-line docstring should fit on one line with quotes
D200,
# No blank lines allowed after function docstring
D202,
# 1 blank line required between summary line and description
D205,
# Multi-line docstring closing quotes should be on a separate line
D209,
# First line should end with a period
D400,
# First line should be in imperative mood
D401,
# First line should not be the function's "signature"
D402

per-file-ignores =
# F401: Ignore "imported but unused" errors in __init__ files, as those
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: '3.x'
python-version: '3.12'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@ jobs:
- python-version: "3.9"
- python-version: "3.10"
- python-version: "3.11"
- python-version: "3.11"
- python-version: "3.12"
- python-version: "3.12"
toxenv: "asyncio"

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand All @@ -53,7 +54,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ['3.11']
python-version: ['3.12']
tox-job: ["mypy", "docs", "linters", "twinecheck"]

steps:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ repos:
- flake8-docstrings
- flake8-string-format
repo: https://github.com/pycqa/flake8
rev: 4.0.1
rev: 6.1.0
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ sphinx:
build:
os: ubuntu-22.04
tools:
python: "3.11" # Keep in sync with .github/workflows/tests.yml
python: "3.12" # Keep in sync with .github/workflows/tests.yml

python:
install:
Expand Down
43 changes: 43 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,49 @@
Changelog
=========

0.19.0 (2023-12-26)
-------------------

* Now requires ``andi >= 0.6.0``.

* Changed the implementation of resolving and building item dependencies from
page objects. Now ``andi`` custom builders are used to create a single plan
that includes building page objects and items. This fixes problems such as
providers being called multiple times.

* :class:`~scrapy_poet.page_input_providers.ItemProvider` is now no-op. It's
no longer enabled by default and users should also stop enabling it.
* ``PageObjectInputProvider.allow_prev_instances`` and code related to it
were removed so custom providers may need updating.

* Fixed some tests.

0.18.0 (2023-12-12)
-------------------

* Now requires ``andi >= 0.5.0``.

* Add support for dependency metadata via ``typing.Annotated`` (requires
Python 3.9+).

0.17.0 (2023-12-11)
-------------------

* Now requires ``web-poet >= 0.15.1``.

* :class:`~web_poet.page_inputs.http.HttpRequest` dependencies are now
supported, via :class:`~scrapy_poet.page_input_providers.HttpRequestProvider`
(enabled by default).

* Enable :class:`~scrapy_poet.page_input_providers.StatsProvider`, which
provides :class:`~web_poet.page_inputs.stats.Stats` dependencies, by default.

* More robust disabling of
:class:`~scrapy_poet.downloadermiddlewares.InjectionMiddleware` in the
``scrapy savefixture`` command.

* Official support for Python 3.12.

0.16.1 (2023-11-02)
-------------------

Expand Down
44 changes: 44 additions & 0 deletions docs/providers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -312,3 +312,47 @@ but not the others.
To have other settings respected, in addition to ``CONCURRENT_REQUESTS``, you'd
need to use ``crawler.engine.download`` or something like that. Alternatively,
you could implement those limits in the library itself.

Attaching metadata to dependencies
==================================

.. note:: This feature requires Python 3.9+.

Providers can support dependencies with arbitrary metadata attached and use
that metadata when creating them. Attaching the metadata is done by wrapping
the dependency class in :data:`typing.Annotated`:

.. code-block:: python
@attr.define
class MyPageObject(ItemPage):
response: Annotated[HtmlResponse, "foo", "bar"]
To handle this you need the following changes in your provider:

.. code-block:: python
from andi.typeutils import strip_annotated
from scrapy_poet import AnnotatedResult, PageObjectInputProvider
class Provider(PageObjectInputProvider):
...
def is_provided(self, type_: Callable) -> bool:
# needed so that you can list just the base type in provided_classes
return super().is_provided(strip_annotated(type_))
def __call__(self, to_provide):
result = []
for cls in to_provide:
metadata = getattr(cls, "__metadata__", None)
obj = ... # create the instance using cls and metadata
if metadata:
# wrap the instance into a scrapy_poet.AnnotatedResult object
obj = AnnotatedResult(obj, metadata)
result.append(obj)
return result
.. autoclass:: scrapy_poet.AnnotatedResult
:members:
2 changes: 1 addition & 1 deletion scrapy_poet/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.16.1
0.19.0
2 changes: 1 addition & 1 deletion scrapy_poet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .api import DummyResponse, callback_for
from .api import AnnotatedResult, DummyResponse, callback_for
from .downloadermiddlewares import InjectionMiddleware
from .page_input_providers import HttpResponseProvider, PageObjectInputProvider
from .spidermiddlewares import RetryMiddleware
Expand Down
28 changes: 27 additions & 1 deletion scrapy_poet/api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataclasses import dataclass
from inspect import iscoroutinefunction
from typing import Callable, Optional, Type
from typing import Any, Callable, Optional, Tuple, Type

from scrapy.http import Request, Response
from web_poet.pages import ItemPage
Expand Down Expand Up @@ -133,3 +134,28 @@ def parse(*args, item: page_or_item_cls, **kwargs): # type:ignore

setattr(parse, _CALLBACK_FOR_MARKER, True)
return parse


@dataclass
class AnnotatedResult:
"""Wrapper for annotated dependencies.
When a provider gets a :data:`typing.Annotated` type as a dependency type,
it will return an ``AnnotatedResult`` instance for it so that the caller
can match the dependency to its annotation.
:param result: The wrapped dependency instance.
:type result: Any
:param metadata: The copy of the annotation.
:type metadata: Tuple[Any, ...]
"""

result: Any
metadata: Tuple[Any, ...]

def get_annotated_cls(self):
"""Returns a re-created :class:`typing.Annotated` type."""
from typing import Annotated

return Annotated[(type(self.result), *self.metadata)]
8 changes: 5 additions & 3 deletions scrapy_poet/commands.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
import logging
from pathlib import Path
from typing import Dict, Optional, Type
from typing import Optional, Type

import andi
import scrapy
Expand Down Expand Up @@ -38,10 +38,9 @@ def build_instances_from_providers(
request: Request,
response: Response,
plan: andi.Plan,
prev_instances: Optional[Dict] = None,
):
instances = yield super().build_instances_from_providers(
request, response, plan, prev_instances
request, response, plan
)
if request.meta.get("savefixture", False):
saved_dependencies.extend(instances.values())
Expand Down Expand Up @@ -109,6 +108,9 @@ def run(self, args, opts):
if not issubclass(cls, ItemPage):
raise UsageError(f"Error: {type_name} is not a descendant of ItemPage")

self.settings["DOWNLOADER_MIDDLEWARES"][
"scrapy_poet.InjectionMiddleware"
] = None
self.settings["DOWNLOADER_MIDDLEWARES"][
"scrapy_poet.downloadermiddlewares.InjectionMiddleware"
] = None
Expand Down
4 changes: 2 additions & 2 deletions scrapy_poet/downloadermiddlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
from .page_input_providers import (
HttpClientProvider,
HttpResponseProvider,
ItemProvider,
PageParamsProvider,
RequestUrlProvider,
ResponseUrlProvider,
StatsProvider,
)
from .utils import create_registry_instance, is_min_scrapy_version

Expand All @@ -34,7 +34,7 @@
PageParamsProvider: 700,
RequestUrlProvider: 800,
ResponseUrlProvider: 900,
ItemProvider: 2000,
StatsProvider: 1000,
}

InjectionMiddlewareTV = TypeVar("InjectionMiddlewareTV", bound="InjectionMiddleware")
Expand Down
Loading

0 comments on commit bf93c2b

Please sign in to comment.