Skip to content

Commit da73a2c

Browse files
committed
Add dependency syncing
fixes: #340
1 parent a6ebd0f commit da73a2c

File tree

9 files changed

+196
-4
lines changed

9 files changed

+196
-4
lines changed

CHANGES/340.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added ability (tech-preview) to recursively sync dependencies from a remote with `sync_dependencies=True`.

docs/workflows/sync.rst

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ Remote Create Response::
6868
],
6969
"excludes": [],
7070
"prereleases": true,
71+
"package_types": [],
72+
"keep_latest_packages": 0,
73+
"exclude_platforms": [],
74+
"sync_dependencies": false
75+
7176
}
7277

7378

@@ -119,6 +124,30 @@ Reference: `Python Remote Usage <../restapi.html#tag/Remotes:-Python>`_
119124

120125
.. _mirror-workflow:
121126

127+
Syncing Dependencies
128+
--------------------
129+
130+
When specifying included packages to sync, Pulp can also sync the dependencies of those packages::
131+
132+
$ pulp python remote create \
133+
--name 'packages-with-dependencies' \
134+
--url 'https://pypi.org/' \
135+
--sync-dependencies # Enable syncing dependencies for included packages \
136+
--includes '[
137+
"django>=4.0", # Sync the dependencies for each django version >=4.0
138+
"pulpcore[s3]", # Sync the dependencies for all pulpcore versions + extra dependencies for s3
139+
]'
140+
141+
Turning on dependency syncing will only sync the necessary dependencies to install the package for the
142+
given versions declared in the includes list. You can sync the extra dependencies of a package by
143+
using the `extras notation <https://peps.python.org/pep-0508/#extras>`_. Synced dependencies are also
144+
filtered through the other filters defined on the remote. Note that many packages have unrestricted
145+
dependencies which can cause syncs to become significantly larger than normal. It is recommended
146+
to use extra filters to trim any unwanted packages.
147+
148+
.. warning:: This feature is provided as a tech preview and could change in backwards incompatible
149+
ways in the future.
150+
122151
Creating a remote to sync all of PyPI
123152
_____________________________________
124153

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Generated by Django 4.2.5 on 2023-10-19 03:05
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
dependencies = [
8+
("python", "0011_alter_pythondistribution_distribution_ptr_and_more"),
9+
]
10+
11+
operations = [
12+
migrations.AddField(
13+
model_name="pythonremote",
14+
name="sync_dependencies",
15+
field=models.BooleanField(default=False),
16+
),
17+
]

pulp_python/app/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ class PythonRemote(Remote):
235235
exclude_platforms = ArrayField(
236236
models.CharField(max_length=10, blank=True), choices=PLATFORMS, default=list
237237
)
238+
sync_dependencies = models.BooleanField(default=False)
238239

239240
def get_remote_artifact_url(self, relative_path=None, request=None):
240241
"""Get url for remote_artifact"""

pulp_python/app/serializers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,11 @@ class PythonRemoteSerializer(core_serializers.RemoteSerializer):
325325
choices=python_models.PLATFORMS,
326326
default=list
327327
)
328+
sync_dependencies = serializers.BooleanField(
329+
required=False,
330+
help_text=_("Whether to sync dependencies specified by package metadata. (Tech Preview)"),
331+
default=False,
332+
)
328333

329334
def validate_includes(self, value):
330335
"""Validates the includes"""
@@ -351,7 +356,7 @@ def validate_excludes(self, value):
351356
class Meta:
352357
fields = core_serializers.RemoteSerializer.Meta.fields + (
353358
"includes", "excludes", "prereleases", "package_types", "keep_latest_packages",
354-
"exclude_platforms",
359+
"exclude_platforms", "sync_dependencies"
355360
)
356361
model = python_models.PythonRemote
357362

pulp_python/app/tasks/sync.py

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
import logging
2+
import tempfile
3+
from typing import Dict, Set
24

35
from aiohttp import ClientResponseError, ClientError
6+
from collections import defaultdict
7+
from itertools import chain
48
from lxml.etree import LxmlError
59
from gettext import gettext as _
6-
from os import environ, path
10+
from os import environ
711

812
from rest_framework import serializers
913

@@ -19,13 +23,14 @@
1923
PythonPackageContent,
2024
PythonRemote,
2125
)
22-
from pulp_python.app.utils import parse_metadata, PYPI_LAST_SERIAL
26+
from pulp_python.app.utils import parse_json, parse_metadata, PYPI_LAST_SERIAL
2327
from pypi_simple import parse_repo_index_page
2428

2529
from bandersnatch.mirror import Mirror
2630
from bandersnatch.master import Master
2731
from bandersnatch.configuration import BandersnatchConfig
2832
from packaging.requirements import Requirement
33+
from packaging.utils import canonicalize_name
2934
from urllib.parse import urljoin, urlsplit, urlunsplit
3035

3136
logger = logging.getLogger(__name__)
@@ -113,7 +118,8 @@ async def run(self):
113118
"""
114119
# Prevent bandersnatch from reading actual .netrc file, set to nonexistent file
115120
# See discussion on https://github.com/pulp/pulp_python/issues/581
116-
environ["NETRC"] = f"{path.curdir}/.fake-netrc"
121+
fake_netrc = tempfile.NamedTemporaryFile(dir=".", delete=False)
122+
environ["NETRC"] = fake_netrc.name
117123
# TODO Change Bandersnatch internal API to take proxy settings in from config parameters
118124
if proxy_url := self.remote.proxy_url:
119125
if self.remote.proxy_username or self.remote.proxy_password:
@@ -146,6 +152,23 @@ async def run(self):
146152
Requirement(pkg).name for pkg in self.remote.includes
147153
]
148154
await pmirror.synchronize(packages_to_sync)
155+
if pmirror.sync_dependencies:
156+
depth = 1
157+
while pmirror.dependencies_to_sync and depth <= 25: # ensure no circular loops
158+
logger.info(_("Syncing dependencies: depth {}").format(depth))
159+
depth += 1
160+
packages_to_sync = list(pmirror.dependencies_to_sync.keys())
161+
pmirror.allow_filter.allowlist_release_requirements = list(
162+
chain(*pmirror.dependencies_to_sync.values())
163+
)
164+
logger.info(
165+
f"Re-initialized release plugin {pmirror.allow_filter.name}, filtering "
166+
+ f"{pmirror.allow_filter.allowlist_release_requirements}"
167+
)
168+
pmirror.dependencies_to_sync.clear()
169+
await pmirror.synchronize(packages_to_sync)
170+
if pmirror.dependencies_to_sync:
171+
logger.warning(_("Reached dependency sync depth limit! Breaking out"))
149172

150173

151174
class PulpMirror(Mirror):
@@ -160,8 +183,18 @@ def __init__(
160183
super().__init__(master=master, workers=workers)
161184
self.synced_serial = serial
162185
self.python_stage = python_stage
186+
self.remote = self.python_stage.remote
163187
self.progress_report = progress_report
164188
self.deferred_download = deferred_download
189+
self.sync_dependencies = self.remote.includes and self.remote.sync_dependencies
190+
if self.sync_dependencies:
191+
# Find the allowlist_filter, so we can update it when syncing dependencies
192+
for fil in self.filters.filter_release_plugins():
193+
if fil.name == "allowlist_release":
194+
self.allow_filter = fil
195+
break
196+
self.already_synced: Dict[str, Set[str]] = defaultdict(set)
197+
self.dependencies_to_sync: Dict[str, Set[Requirement]] = defaultdict(set)
165198

166199
async def determine_packages_to_sync(self):
167200
"""
@@ -230,6 +263,28 @@ async def create_content(self, pkg):
230263
create a Content Unit to put into the pipeline
231264
"""
232265
for version, dists in pkg.releases.items():
266+
if self.sync_dependencies:
267+
if version in self.already_synced[pkg.name]:
268+
continue
269+
self.already_synced[pkg.name].add(version)
270+
271+
for req_spec in await self.get_required_dists(pkg, version):
272+
req = Requirement(req_spec)
273+
req.name = canonicalize_name(req.name)
274+
req.specifier.prereleases = True
275+
if req.marker:
276+
if "extra == " in str(req.marker):
277+
# Only sync the required dependency if we specified the correct 'extra'
278+
extras = set()
279+
for cur_allow_pkg in self.allow_filter.allowlist_release_requirements:
280+
if cur_allow_pkg.name == pkg.name:
281+
extras |= cur_allow_pkg.extras
282+
extra = str(req.marker).rpartition("extra == ")[2].strip("'\"")
283+
if extra not in extras:
284+
continue
285+
286+
self.dependencies_to_sync[req.name].add(req)
287+
233288
for package in dists:
234289
entry = parse_metadata(pkg.info, version, package)
235290
url = entry.pop("url")
@@ -258,3 +313,15 @@ def on_error(self, exception, **kwargs):
258313
This should have some error checking
259314
"""
260315
pass
316+
317+
async def get_required_dists(self, pkg, version):
318+
"""Returns a list of required dists from given package version."""
319+
# TODO: Can this logic live in Bandersnatch?
320+
url = urljoin(self.remote.url, f"pypi/{pkg.name}/{version}/json")
321+
downloader = self.remote.get_downloader(url=url)
322+
try:
323+
result = await downloader.run()
324+
except ClientResponseError:
325+
return []
326+
else:
327+
return parse_json(result).get("info", {}).get("requires_dist", []) or []

pulp_python/app/utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,3 +312,9 @@ def write_simple_detail(project_name, project_packages, streamed=False):
312312
detail = Template(simple_detail_template)
313313
context = {"project_name": project_name, "project_packages": project_packages}
314314
return detail.stream(**context) if streamed else detail.render(**context)
315+
316+
317+
def parse_json(download_result):
318+
"""Parses JSON file."""
319+
with open(download_result.path) as fd:
320+
return json.load(fd)

pulp_python/tests/functional/api/test_sync.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
PYTHON_LG_FIXTURE_SUMMARY,
3535
PYTHON_LG_FIXTURE_COUNTS,
3636
DJANGO_LATEST_3,
37+
DJANGO_PLUS_PYTZ,
38+
DJANGO_PLUS_PYTZ_BCRYPT,
3739
SCIPY_COUNTS,
3840
)
3941
from pulp_python.tests.functional.utils import gen_python_client, gen_python_remote
@@ -687,6 +689,68 @@ def test_proxy_auth_sync(
687689
assert content_resp.count == 2
688690

689691

692+
@pytest.mark.parallel
693+
def test_sync_dependency(
694+
python_repo, python_repo_api_client, python_content_api_client, python_remote_factory
695+
):
696+
"""Test syncing dependencies."""
697+
# The only required dependency for Django in our fixtures is pytz
698+
body = gen_python_remote(includes=["Django"], sync_dependencies=True, prereleases=True)
699+
remote = python_remote_factory(**body)
700+
sync_resp = python_repo_api_client.sync(python_repo.pulp_href, {"remote": remote.pulp_href})
701+
monitor_task(sync_resp.task)
702+
703+
repo = python_repo_api_client.read(python_repo.pulp_href)
704+
assert repo.latest_version_href[-2] == "1"
705+
706+
content_resp = python_content_api_client.list(repository_version=repo.latest_version_href)
707+
assert content_resp.count == DJANGO_PLUS_PYTZ
708+
709+
content_resp = python_content_api_client.list(
710+
repository_version=repo.latest_version_href, name="pytz"
711+
)
712+
assert content_resp.count > 0
713+
714+
715+
@pytest.mark.parallel
716+
def test_sync_dependency_extras(
717+
python_repo, python_repo_api_client, python_content_api_client, python_remote_factory
718+
):
719+
"""Test syncing dependencies w/ extras"""
720+
body = gen_python_remote(includes=["Django[bcrypt]"], sync_dependencies=True, prereleases=True)
721+
remote = python_remote_factory(**body)
722+
sync_resp = python_repo_api_client.sync(python_repo.pulp_href, {"remote": remote.pulp_href})
723+
monitor_task(sync_resp.task)
724+
725+
repo = python_repo_api_client.read(python_repo.pulp_href)
726+
assert repo.latest_version_href[-2] == "1"
727+
728+
content_resp = python_content_api_client.list(repository_version=repo.latest_version_href)
729+
assert content_resp.count == DJANGO_PLUS_PYTZ_BCRYPT
730+
731+
content_resp = python_content_api_client.list(
732+
repository_version=repo.latest_version_href, name="bcrypt"
733+
)
734+
assert content_resp.count > 0
735+
736+
737+
@pytest.mark.parallel
738+
def test_sync_dependency_not_present(
739+
python_repo, python_repo_api_client, python_content_api_client, python_remote_factory
740+
):
741+
"""Test syncing dependencies that are not present in the upstream doesn't fail the sync."""
742+
body = gen_python_remote(includes=["scipy"], sync_dependencies=True)
743+
remote = python_remote_factory(**body)
744+
sync_resp = python_repo_api_client.sync(python_repo.pulp_href, {"remote": remote.pulp_href})
745+
monitor_task(sync_resp.task)
746+
747+
repo = python_repo_api_client.read(python_repo.pulp_href)
748+
assert repo.latest_version_href[-2] == "1"
749+
750+
content_resp = python_content_api_client.list(repository_version=repo.latest_version_href)
751+
assert content_resp.count == SCIPY_COUNTS["total"]
752+
753+
690754
def sync_to_remote(self, body, create=False, mirror=False):
691755
"""Takes a body and creates/updates a remote object, then it performs a sync"""
692756
if create:

pulp_python/tests/functional/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@
158158
}
159159

160160
DJANGO_LATEST_3 = 4 # latest version has 2 dists, each other has 1
161+
DJANGO_PLUS_PYTZ = 37
162+
DJANGO_PLUS_PYTZ_BCRYPT = 45
161163
SCIPY_COUNTS = {
162164
"total": 23, # scipy has 23 different release files for the same release
163165
"windows": 8,

0 commit comments

Comments
 (0)