From be57c83da0e5693ac857e3f2d68831d68e6b572f Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Fri, 21 Feb 2025 07:56:33 -0800 Subject: [PATCH 1/3] Add regression test and fix bug where license_descriptions were not synced with license_id. --- .../contentcuration/tests/test_sync.py | 38 +++++++++++++++++++ contentcuration/contentcuration/utils/sync.py | 1 + 2 files changed, 39 insertions(+) diff --git a/contentcuration/contentcuration/tests/test_sync.py b/contentcuration/contentcuration/tests/test_sync.py index 3a2ba590c5..923d8ad541 100644 --- a/contentcuration/contentcuration/tests/test_sync.py +++ b/contentcuration/contentcuration/tests/test_sync.py @@ -19,6 +19,7 @@ from contentcuration.models import Channel from contentcuration.models import ContentTag from contentcuration.models import File +from contentcuration.models import License from contentcuration.tests import testdata from contentcuration.tests.base import StudioAPITestCase from contentcuration.tests.viewsets.base import generate_create_event @@ -346,6 +347,43 @@ def test_sync_channel_titles_and_descriptions(self): for key, value in labels.items(): self.assertEqual(getattr(target_child, key), value) + def test_sync_license_description(self): + """ + Test that the license description field is synced correctly + Added as a regression test, as this was previously omitted. + """ + self.assertFalse(self.channel.has_changes()) + self.assertFalse(self.derivative_channel.has_changes()) + + contentnode = ( + self.channel.main_tree.get_descendants() + .exclude(kind_id=content_kinds.TOPIC) + .first() + ) + + special_permissions_license = License.objects.get(license_name="Special Permissions") + + contentnode.license = special_permissions_license + contentnode.license_description = "You cannot use this content on a Thursday" + contentnode.copyright_holder = "Thursday's child has far to go" + contentnode.save() + + sync_channel( + self.derivative_channel, + sync_titles_and_descriptions=False, + sync_resource_details=True, + sync_files=False, + sync_assessment_items=False, + ) + + target_child = self.derivative_channel.main_tree.get_descendants().get( + source_node_id=contentnode.node_id + ) + + self.assertEqual(target_child.license, special_permissions_license) + self.assertEqual(target_child.license_description, "You cannot use this content on a Thursday") + self.assertEqual(target_child.copyright_holder, "Thursday's child has far to go") + def test_sync_channel_other_metadata_labels(self): """ Test that calling sync channel will also bring in other metadata label updates. diff --git a/contentcuration/contentcuration/utils/sync.py b/contentcuration/contentcuration/utils/sync.py index 5b3664002a..1fb2dda566 100644 --- a/contentcuration/contentcuration/utils/sync.py +++ b/contentcuration/contentcuration/utils/sync.py @@ -70,6 +70,7 @@ def sync_node( if sync_resource_details: fields = [ "license_id", + "license_description", "copyright_holder", "author", "extra_fields", From 2f70517496f66ca0b960a00bdea02f01c834b27f Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Fri, 21 Feb 2025 12:05:12 -0800 Subject: [PATCH 2/3] Reinstate source field rectification but only for license description. --- Makefile | 2 +- ..._rectify_source_field_migraiton_command.py | 167 ++++++++++++++++++ ...ify_incorrect_contentnode_source_fields.py | 127 +++++++++++++ 3 files changed, 295 insertions(+), 1 deletion(-) create mode 100644 contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py create mode 100644 contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py diff --git a/Makefile b/Makefile index 051053bab3..619fcee41e 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ migrate: # 4) Remove the management command from this `deploy-migrate` recipe # 5) Repeat! deploy-migrate: - echo "Nothing to do here!" + python contentcuration/manage.py rectify_incorrect_contentnode_source_fields contentnodegc: python contentcuration/manage.py garbage_collect diff --git a/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py b/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py new file mode 100644 index 0000000000..f4643a87ac --- /dev/null +++ b/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py @@ -0,0 +1,167 @@ +# DELETE THIS FILE AFTER RUNNING THE MIGRATIONSSS +import datetime +import uuid + +from django.core.management import call_command +from django.utils import timezone +from le_utils.constants import content_kinds + +from contentcuration.models import Channel +from contentcuration.models import ContentNode +from contentcuration.models import License +from contentcuration.tests import testdata +from contentcuration.tests.base import StudioAPITestCase +from contentcuration.utils.publish import publish_channel + + +class TestRectifyMigrationCommand(StudioAPITestCase): + + @classmethod + def setUpClass(cls): + super(TestRectifyMigrationCommand, cls).setUpClass() + + def tearDown(self): + super(TestRectifyMigrationCommand, self).tearDown() + + def setUp(self): + super(TestRectifyMigrationCommand, self).setUp() + self.original_channel = testdata.channel() + self.license_original = License.objects.get(license_name="Special Permissions") + self.license_description_original = "License to chill" + self.original_contentnode = ContentNode.objects.create( + id=uuid.uuid4().hex, + title="Original Node", + parent=self.original_channel.main_tree, + license=self.license_original, + license_description=self.license_description_original, + original_channel_id=None, + source_channel_id=None, + author="old author" + ) + self.user = testdata.user() + self.original_channel.editors.add(self.user) + self.client.force_authenticate(user=self.user) + + def create_base_channel_and_contentnode(self, source_contentnode, source_channel): + base_channel = testdata.channel() + base_channel.public = True + base_channel.save() + base_node = ContentNode.objects.create( + id=uuid.uuid4().hex, + title="base contentnode", + parent=base_channel.main_tree, + kind_id=content_kinds.VIDEO, + original_channel_id=self.original_channel.id, + original_source_node_id=self.original_contentnode.node_id, + source_channel_id=source_channel.id, + source_node_id=source_contentnode.node_id, + author="source author", + license=self.license_original, + license_description=None, + ) + return base_node, base_channel + + def create_source_channel_and_contentnode(self): + source_channel = testdata.channel() + source_channel.public = True + source_channel.save() + source_node = ContentNode.objects.create( + id=uuid.uuid4().hex, + title="base contentnode", + parent=source_channel.main_tree, + kind_id=content_kinds.VIDEO, + license=self.license_original, + license_description="No chill", + original_channel_id=self.original_channel.id, + source_channel_id=self.original_channel.id, + source_node_id=self.original_contentnode.node_id, + original_source_node_id=self.original_contentnode.node_id, + author="source author", + ) + + return source_node, source_channel + + def run_migrations(self): + call_command('rectify_incorrect_contentnode_source_fields', user_id=self.user.id, is_test=True) + + def test_two_node_case(self): + base_node, base_channel = self.create_base_channel_and_contentnode(self.original_contentnode, self.original_channel) + + publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) + + # main_tree node still has changed=true even after the publish + for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): + node.changed = False + # This should probably again change the changed=true but suprisingly it doesnot + # Meaning the changed boolean doesnot change for the main_tree no matter what we do + # through ContentNode model methods like save. + node.save() + + ContentNode.objects.filter(pk=base_node.pk).update( + modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) + ) + + self.run_migrations() + updated_base_node = ContentNode.objects.get(pk=base_node.pk) + self.assertEqual(updated_base_node.license_description, self.original_contentnode.license_description) + self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) + + def test_three_node_case_implicit(self): + source_node, source_channel = self.create_source_channel_and_contentnode() + base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel) + source_node.aggregator = "Nami" + source_node.save() + # Implicit case + base_node.author = source_node.author + base_node.license = source_node.license + base_node.aggregator = source_node.aggregator + base_node.save() + + publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) + + for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): + node.changed = False + node.save() + + ContentNode.objects.filter(pk=base_node.pk).update( + modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) + ) + + ContentNode.objects.filter(pk=source_node.pk).update( + modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc) + ) + + self.run_migrations() + updated_base_node = ContentNode.objects.get(pk=base_node.pk) + updated_source_node = ContentNode.objects.get(pk=source_node.pk) + self.assertEqual(updated_base_node.license_description, self.original_contentnode.license_description) + self.assertEqual(updated_source_node.license_description, self.original_contentnode.license_description) + self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) + + def test_three_node_case_explicit(self): + source_node, source_channel = self.create_source_channel_and_contentnode() + base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel) + source_node.license_description = "luffy" + base_node.license_description = "zoro" + base_node.save() + source_node.save() + publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) + + for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): + node.changed = False + node.save() + + ContentNode.objects.filter(pk=base_node.pk).update( + modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) + ) + + ContentNode.objects.filter(pk=source_node.pk).update( + modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc) + ) + + self.run_migrations() + updated_base_node = ContentNode.objects.get(pk=base_node.pk) + updated_source_node = ContentNode.objects.get(pk=source_node.pk) + self.assertEqual(updated_base_node.license_description, self.original_contentnode.license_description) + self.assertEqual(updated_source_node.license_description, self.original_contentnode.license_description) + self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py new file mode 100644 index 0000000000..24d41b961e --- /dev/null +++ b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py @@ -0,0 +1,127 @@ +import logging + +from django.core.management.base import BaseCommand +from django.db.models import Exists +from django.db.models import F +from django.db.models import OuterRef +from django.db.models import Value +from django.db.models.functions import Coalesce +from django_cte import With + +from contentcuration.models import Channel +from contentcuration.models import ContentNode +from contentcuration.models import User +from contentcuration.utils.publish import publish_channel + +logger = logging.getLogger(__file__) + + +class Command(BaseCommand): + + def add_arguments(self, parser): + + parser.add_argument( + '--is_test', + action='store_true', + help="Indicate if the command is running in a test environment.", + ) + + parser.add_argument( + '--user_id', + type=int, + help="User ID for the operation", + ) + + def handle(self, *args, **options): + + is_test = options['is_test'] + user_id = options['user_id'] + + if not is_test: + user_id = User.objects.get(email='channeladmin@learningequality.org').pk + + main_trees_cte = With( + ( + Channel.objects.filter( + main_tree__isnull=False + ) + .annotate(channel_id=F("id")) + .values("channel_id", "deleted", tree_id=F("main_tree__tree_id")) + ), + name="main_trees", + ) + + nodes = main_trees_cte.join( + ContentNode.objects.all(), + tree_id=main_trees_cte.col.tree_id, + ).annotate(channel_id=main_trees_cte.col.channel_id, deleted=main_trees_cte.col.deleted) + + original_source_nodes = ( + nodes.with_cte(main_trees_cte) + .filter( + node_id=OuterRef("original_source_node_id"), + ) + .exclude( + tree_id=OuterRef("tree_id"), + ) + .annotate( + coalesced_license_description=Coalesce("license_description", Value("")), + ) + ) + diff = ( + nodes.with_cte(main_trees_cte).filter( + deleted=False, # we dont want the channel to be deleted or else we are fixing ghost nodes + source_node_id__isnull=False, + original_source_node_id__isnull=False, + ) + ).annotate( + coalesced_license_description=Coalesce("license_description", Value("")), + ) + diff_combined = diff.annotate( + original_source_node_f_changed=Exists( + original_source_nodes.exclude( + coalesced_license_description=OuterRef("coalesced_license_description") + ) + ) + ).filter(original_source_node_f_changed=True) + + final_nodes = diff_combined.values( + "id", + "channel_id", + "original_channel_id", + "original_source_node_id", + ).order_by() + + channel_ids_to_republish = set() + + for item in final_nodes: + base_node = ContentNode.objects.get(pk=item["id"]) + + original_source_channel_id = item["original_channel_id"] + original_source_node_id = item["original_source_node_id"] + tree_id = ( + Channel.objects.filter(pk=original_source_channel_id) + .values_list("main_tree__tree_id", flat=True) + .get() + ) + original_source_node = ContentNode.objects.filter( + tree_id=tree_id, node_id=original_source_node_id + ) + + base_channel = Channel.objects.get(pk=item['channel_id']) + + to_be_republished = not (base_channel.main_tree.get_family().filter(changed=True).exists()) + + if original_source_channel_id is not None and original_source_node.exists(): + # original source node exists and its license_description doesn't match + # update the base node + if base_node.license_description != original_source_node[0].license_description: + base_node.license_description = original_source_node[0].license_description + base_node.save() + + if to_be_republished and base_channel.last_published is not None: + channel_ids_to_republish.add(base_channel.id) + + # we would republish the channel + for channel_id in channel_ids_to_republish: + publish_channel(user_id, channel_id) From 6a366e29c73c093850cd113fb8748a65e4e20dc1 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Wed, 26 Feb 2025 07:38:57 -0800 Subject: [PATCH 3/3] Don't republish automatically. --- ..._rectify_source_field_migraiton_command.py | 8 ++--- ...ify_incorrect_contentnode_source_fields.py | 35 ------------------- 2 files changed, 4 insertions(+), 39 deletions(-) diff --git a/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py b/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py index f4643a87ac..96382e25af 100644 --- a/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py +++ b/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py @@ -82,7 +82,7 @@ def create_source_channel_and_contentnode(self): return source_node, source_channel def run_migrations(self): - call_command('rectify_incorrect_contentnode_source_fields', user_id=self.user.id, is_test=True) + call_command('rectify_incorrect_contentnode_source_fields') def test_two_node_case(self): base_node, base_channel = self.create_base_channel_and_contentnode(self.original_contentnode, self.original_channel) @@ -104,7 +104,7 @@ def test_two_node_case(self): self.run_migrations() updated_base_node = ContentNode.objects.get(pk=base_node.pk) self.assertEqual(updated_base_node.license_description, self.original_contentnode.license_description) - self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) + self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), True) def test_three_node_case_implicit(self): source_node, source_channel = self.create_source_channel_and_contentnode() @@ -136,7 +136,7 @@ def test_three_node_case_implicit(self): updated_source_node = ContentNode.objects.get(pk=source_node.pk) self.assertEqual(updated_base_node.license_description, self.original_contentnode.license_description) self.assertEqual(updated_source_node.license_description, self.original_contentnode.license_description) - self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) + self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), True) def test_three_node_case_explicit(self): source_node, source_channel = self.create_source_channel_and_contentnode() @@ -164,4 +164,4 @@ def test_three_node_case_explicit(self): updated_source_node = ContentNode.objects.get(pk=source_node.pk) self.assertEqual(updated_base_node.license_description, self.original_contentnode.license_description) self.assertEqual(updated_source_node.license_description, self.original_contentnode.license_description) - self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) + self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), True) diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py index 24d41b961e..c4d40af4c0 100644 --- a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py +++ b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py @@ -10,36 +10,14 @@ from contentcuration.models import Channel from contentcuration.models import ContentNode -from contentcuration.models import User -from contentcuration.utils.publish import publish_channel logger = logging.getLogger(__file__) class Command(BaseCommand): - def add_arguments(self, parser): - - parser.add_argument( - '--is_test', - action='store_true', - help="Indicate if the command is running in a test environment.", - ) - - parser.add_argument( - '--user_id', - type=int, - help="User ID for the operation", - ) - def handle(self, *args, **options): - is_test = options['is_test'] - user_id = options['user_id'] - - if not is_test: - user_id = User.objects.get(email='channeladmin@learningequality.org').pk - main_trees_cte = With( ( Channel.objects.filter( @@ -92,8 +70,6 @@ def handle(self, *args, **options): "original_source_node_id", ).order_by() - channel_ids_to_republish = set() - for item in final_nodes: base_node = ContentNode.objects.get(pk=item["id"]) @@ -108,20 +84,9 @@ def handle(self, *args, **options): tree_id=tree_id, node_id=original_source_node_id ) - base_channel = Channel.objects.get(pk=item['channel_id']) - - to_be_republished = not (base_channel.main_tree.get_family().filter(changed=True).exists()) - if original_source_channel_id is not None and original_source_node.exists(): # original source node exists and its license_description doesn't match # update the base node if base_node.license_description != original_source_node[0].license_description: base_node.license_description = original_source_node[0].license_description base_node.save() - - if to_be_republished and base_channel.last_published is not None: - channel_ids_to_republish.add(base_channel.id) - - # we would republish the channel - for channel_id in channel_ids_to_republish: - publish_channel(user_id, channel_id)