Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DRAFT pulp_labels POC #5787

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pulp_file/app/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class FileContentViewSet(SingleArtifactContentUploadViewSet):
"effect": "allow",
},
{
"action": ["create"],
"action": ["create", "set_label", "unset_label"],
"principal": "authenticated",
"effect": "allow",
"condition": [
Expand Down
19 changes: 19 additions & 0 deletions pulpcore/app/migrations/0123_content_pulp_labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 4.2.15 on 2024-09-08 23:15

import django.contrib.postgres.fields.hstore
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('core', '0122_record_last_replication_timestamp'),
]

operations = [
migrations.AddField(
model_name='content',
name='pulp_labels',
field=django.contrib.postgres.fields.hstore.HStoreField(default=dict),
),
]
3 changes: 3 additions & 0 deletions pulpcore/app/models/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from itertools import chain

from django.conf import settings
from django.contrib.postgres.fields import HStoreField
from django.core import validators
from django.db import IntegrityError, models, transaction
from django.forms.models import model_to_dict
Expand Down Expand Up @@ -515,6 +516,7 @@ class Content(MasterModel, QueryMixin):
Fields:
upstream_id (models.UUIDField) : identifier of content imported from an 'upstream' Pulp
timestamp_of_interest (models.DateTimeField): timestamp that prevents orphan cleanup
pulp_labels (HStoreField): Dictionary of string values.

Relations:

Expand All @@ -528,6 +530,7 @@ class Content(MasterModel, QueryMixin):
TYPE = "content"
repo_key_fields = () # Used by pulpcore.plugin.repo_version_utils.remove_duplicates
upstream_id = models.UUIDField(null=True) # Used by PulpImport/Export processing
pulp_labels = HStoreField(default=dict)

_artifacts = models.ManyToManyField(Artifact, through="ContentArtifact")
timestamp_of_interest = models.DateTimeField(auto_now=True)
Expand Down
9 changes: 7 additions & 2 deletions pulpcore/app/serializers/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
from rest_framework.validators import UniqueValidator

from pulpcore.app import models
from pulpcore.app.serializers import base, fields, DetailRelatedField
from pulpcore.app.serializers import base, fields, pulp_labels_validator, DetailRelatedField
from pulpcore.app.util import get_domain


class NoArtifactContentSerializer(base.ModelSerializer):
pulp_href = base.DetailIdentityField(view_name_pattern=r"contents(-.*/.*)-detail")
pulp_labels = serializers.HStoreField(required=False, validators=[pulp_labels_validator])

repository = DetailRelatedField(
help_text=_("A URI of a repository the new content unit should be associated with."),
required=False,
Expand Down Expand Up @@ -104,7 +106,10 @@ def create(self, validated_data):

class Meta:
model = models.Content
fields = base.ModelSerializer.Meta.fields + ("repository",)
fields = base.ModelSerializer.Meta.fields + (
"repository",
"pulp_labels",
)


class SingleArtifactContentSerializer(NoArtifactContentSerializer):
Expand Down
21 changes: 16 additions & 5 deletions pulpcore/app/serializers/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,8 +444,9 @@ class RepositoryAddRemoveContentSerializer(ModelSerializer, NestedHyperlinkedMod
remove_content_units = serializers.ListField(
help_text=_(
"A list of content units to remove from the latest repository version. "
"You may also specify '*' as an entry to remove all content. This content is "
"removed before add_content_units are added."
"You may specify '*' as an entry to remove all content, or 'key=value' pairs "
"to remove content by-label. This content is removed before add_content_units "
"are added."
Comment on lines -447 to +449
Copy link
Member

@mdellweg mdellweg Sep 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I can see, that this is still unambiguous. But should we maybe go for a new remove_q field?
You may for example remove all files from a repository by their relative_path__startswith...
So the logic could start with remove_q="pulp_label_select='foo'" and later we add support for remove_q="pulp_label_select='foo' OR relative_path__startswith='/bar/'.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having said that, should we factor this part (advanced modify) into it's own PR? It would give us time to think this high level api design over.

Given that a user can filter content by labels, they can already compile the set of content hrefs locally to call modify with them.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that a user can filter content by labels, they can already compile the set of content hrefs locally to call modify with them.

Yes - but the user-experience of calling an API with a list of possibly thousands of HREFs, for one label, is...suboptimal. And "remove all the units with a given build id" is the actual requirement we're being asked to respond to, it's the point for doing this at all.

I can see benefits to a more-general Q filter - but I'm not sure the extra flexibility is worth the extra complication?

),
child=serializers.CharField(error_messages={"invalid": "Not a valid URI of a resource."}),
required=False,
Expand Down Expand Up @@ -474,19 +475,29 @@ def validate_add_content_units(self, value):

def validate_remove_content_units(self, value):
remove_content_units = {}

# "* must be alone, and means "all-content"
if "*" in value:
if len(value) > 1:
raise serializers.ValidationError("Cannot supply content units and '*'.")
else:
return ["*"]
else:

# Q: just one label? can label/hrefs be mixed? accept full label-operand-set?
# Current: multiple labels, mixed with hrefs, only '='
# "x=y" can happen multiple times, and means "all content with label x=y, pass k=v pair"
labels_specified = [s for s in value if "=" in s]
Comment on lines +486 to +488
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically you can have a label that is just a key, with no value. So we probably should assume anything that doesn't begin with a slash to be a proper label.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ooof, good point. Pushes us towards @mdellweg 's comment above, to have a remove_q=... construct.

if labels_specified:
value = set(value) - set(labels_specified)

# Anything remaining means "content HREFs, pass UUIDs"
if value:
for url in value:
remove_content_units[extract_pk(url)] = url
content_units_pks = set(remove_content_units.keys())
existing_content_units = models.Content.objects.filter(pk__in=content_units_pks)
raise_for_unknown_content_units(existing_content_units, remove_content_units)
return list(remove_content_units.keys())

return list(remove_content_units.keys()) + labels_specified

class Meta:
model = models.RepositoryVersion
Expand Down
18 changes: 18 additions & 0 deletions pulpcore/app/tasks/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,24 @@ def add_and_remove(repository_pk, add_content_units, remove_content_units, base_
remove_content_units = latest.content.values_list("pk", flat=True)
else:
remove_content_units = []
else:
# Deal with labels in remove_content_units
# If there are none - then we can "just use" remove_content_units as-is
labels_specified = [s for s in remove_content_units if "=" in s] # Ew.
if labels_specified:
# "x=y" can happen multiple times, and means "all content with label x=y, pass UUIDS"
latest = repository.latest_version()
if latest:
# First, remove labels from set-to-be-removed, r_c_u is now a set
remove_content_units = set(remove_content_units) - set(labels_specified)
# For each label, find all content w/ that label and add to be-removed
for label in labels_specified:
kv = label.split("=")
labeled_content_pks = latest.content.filter(
**{f"pulp_labels__{kv[0]}": kv[1]}
).values_list("pk", flat=True)
remove_content_units.update(labeled_content_pks)
remove_content_units = list(remove_content_units)

with repository.new_version(base_version=base_version) as new_version:
new_version.remove_content(models.Content.objects.filter(pk__in=remove_content_units))
Expand Down
12 changes: 10 additions & 2 deletions pulpcore/app/viewsets/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
SigningServiceSerializer,
)
from pulpcore.app.util import get_viewset_for_model
from pulpcore.app.viewsets.base import NamedModelViewSet
from pulpcore.app.viewsets.base import NamedModelViewSet, LabelsMixin
from pulpcore.app.viewsets.custom_filters import LabelFilter

from .custom_filters import (
ArtifactRepositoryVersionFilter,
Expand Down Expand Up @@ -127,6 +128,8 @@ class ContentFilter(BaseFilterSet):
orphaned_for:
Return Content which has been orphaned for a given number of minutes;
-1 uses ORPHAN_PROTECTION_TIME value.
pulp_label_select:
Return Content which has has the specified label
"""

repository_version = ContentRepositoryVersionFilter()
Expand All @@ -135,6 +138,7 @@ class ContentFilter(BaseFilterSet):
orphaned_for = OrphanedFilter(
help_text="Minutes Content has been orphaned for. -1 uses ORPHAN_PROTECTION_TIME."
)
pulp_label_select = LabelFilter()


class BaseContentViewSet(NamedModelViewSet):
Expand Down Expand Up @@ -199,7 +203,11 @@ def routable(cls):


class ContentViewSet(
BaseContentViewSet, mixins.CreateModelMixin, mixins.RetrieveModelMixin, mixins.ListModelMixin
BaseContentViewSet,
mixins.CreateModelMixin,
mixins.RetrieveModelMixin,
mixins.ListModelMixin,
LabelsMixin,
):
"""
Content viewset that supports POST and GET by default.
Expand Down
1 change: 1 addition & 0 deletions pulpcore/plugin/importexport.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,5 @@ class Meta:
"content",
"content_ptr",
"timestamp_of_interest",
"pulp_labels",
)
Loading