diff --git a/CHANGELOG.md b/CHANGELOG.md index 5419869b..73a35692 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ #### Breaking Changes -- feat: Change `LogEntry.change` field type to `JSONField` rather than `TextField`. This change include a migration that may take time to run depending on the number of records on your `LogEntry` table ([#407](https://github.com/jazzband/django-auditlog/pull/407)) +- feat: Change `LogEntry.change` field type to `JSONField` rather than `TextField`. This change include a migration that may take time to run depending on the number of records on your `LogEntry` table ([#407](https://github.com/jazzband/django-auditlog/pull/407))([#495](https://github.com/jazzband/django-auditlog/pull/495)) - Python: Drop support for Python 3.7 ([#546](https://github.com/jazzband/django-auditlog/pull/546)) #### Improvements diff --git a/auditlog/apps.py b/auditlog/apps.py index f6bf3fbd..aaae2768 100644 --- a/auditlog/apps.py +++ b/auditlog/apps.py @@ -11,3 +11,7 @@ def ready(self): from auditlog.registry import auditlog auditlog.register_from_settings() + + from auditlog import models + + models.changes_func = models._changes_func() diff --git a/auditlog/conf.py b/auditlog/conf.py index 2050677a..9046669a 100644 --- a/auditlog/conf.py +++ b/auditlog/conf.py @@ -32,3 +32,11 @@ settings, "AUDITLOG_CID_HEADER", "x-correlation-id" ) settings.AUDITLOG_CID_GETTER = getattr(settings, "AUDITLOG_CID_GETTER", None) + +# migration +settings.AUDITLOG_TWO_STEP_MIGRATION = getattr( + settings, "AUDITLOG_TWO_STEP_MIGRATION", False +) +settings.AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT = getattr( + settings, "AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT", False +) diff --git a/auditlog/management/commands/auditlogmigratejson.py b/auditlog/management/commands/auditlogmigratejson.py new file mode 100644 index 00000000..db27ef6c --- /dev/null +++ b/auditlog/management/commands/auditlogmigratejson.py @@ -0,0 +1,114 @@ +from math import ceil + +from django.conf import settings +from django.core.management.base import BaseCommand + +from auditlog.models import LogEntry + + +class Command(BaseCommand): + help = "Migrates changes from changes_text to json changes." + + def add_arguments(self, parser): + parser.add_argument( + "-d", + "--database", + default=None, + help="If provided, the script will use native db operations. " + "Otherwise, it will use LogEntry.objects.bulk_create", + dest="db", + type=str, + choices=["postgres", "mysql", "oracle"], + ) + parser.add_argument( + "-b", + "--bactch-size", + default=500, + help="Split the migration into multiple batches. If 0, then no batching will be done. " + "When passing a -d/database, the batch value will be ignored.", + dest="batch_size", + type=int, + ) + + def handle(self, *args, **options): + database = options["db"] + batch_size = options["batch_size"] + + if not self.check_logs(): + return + + if database: + result = self.migrate_using_sql(database) + self.stdout.write( + f"Updated {result} records using native database operations." + ) + else: + result = self.migrate_using_django(batch_size) + self.stdout.write(f"Updated {result} records using django operations.") + + self.check_logs() + + def check_logs(self): + count = self.get_logs().count() + if count: + self.stdout.write(f"There are {count} records that needs migration.") + return True + + self.stdout.write("All records are have been migrated.") + if settings.AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT: + self.stdout.write( + "You can now set AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT to False." + ) + + return False + + def get_logs(self): + return LogEntry.objects.filter( + changes_text__isnull=False, changes__isnull=True + ).exclude(changes_text__exact="") + + def migrate_using_django(self, batch_size): + def _apply_django_migration(_logs) -> int: + import json + + updated = [] + for log in _logs: + try: + log.changes = json.loads(log.changes_text) + except ValueError: + self.stderr.write( + f"ValueError was raised while migrating the log with id {log.id}." + ) + else: + updated.append(log) + + LogEntry.objects.bulk_update(updated, fields=["changes"]) + return len(updated) + + logs = self.get_logs() + + if not batch_size: + return _apply_django_migration(logs) + + total_updated = 0 + for _ in range(ceil(logs.count() / batch_size)): + total_updated += _apply_django_migration(self.get_logs()[:batch_size]) + return total_updated + + def migrate_using_sql(self, database): + from django.db import connection + + def postgres(): + with connection.cursor() as cursor: + cursor.execute( + 'UPDATE auditlog_logentry SET changes="changes_text"::jsonb' + ) + return cursor.cursor.rowcount + + if database == "postgres": + return postgres() + else: + self.stderr.write( + "Not yet implemented. Run this management command without passing a -d/--database argument." + ) + return 0 diff --git a/auditlog/migrations/0015_alter_logentry_changes.py b/auditlog/migrations/0015_alter_logentry_changes.py index a2a04dd4..ec511e23 100644 --- a/auditlog/migrations/0015_alter_logentry_changes.py +++ b/auditlog/migrations/0015_alter_logentry_changes.py @@ -1,17 +1,42 @@ # Generated by Django 4.0 on 2022-08-04 15:41 +from typing import List +from django.conf import settings from django.db import migrations, models -class Migration(migrations.Migration): - dependencies = [ - ("auditlog", "0014_logentry_cid"), - ] +def two_step_migrations() -> List: + if settings.AUDITLOG_TWO_STEP_MIGRATION: + return [ + migrations.RenameField( + model_name="logentry", + old_name="changes", + new_name="changes_text", + ), + migrations.AddField( + model_name="logentry", + name="changes", + field=models.JSONField(null=True, verbose_name="change message"), + ), + ] - operations = [ + return [ + migrations.AddField( + model_name="logentry", + name="changes_text", + field=models.TextField(blank=True, verbose_name="text change message"), + ), migrations.AlterField( model_name="logentry", name="changes", field=models.JSONField(null=True, verbose_name="change message"), ), ] + + +class Migration(migrations.Migration): + dependencies = [ + ("auditlog", "0014_logentry_cid"), + ] + + operations = [*two_step_migrations()] diff --git a/auditlog/models.py b/auditlog/models.py index 880d71b6..23ba1443 100644 --- a/auditlog/models.py +++ b/auditlog/models.py @@ -1,8 +1,9 @@ import ast +import contextlib import json from copy import deepcopy from datetime import timezone -from typing import Any, Dict, List, Union +from typing import Any, Callable, Dict, List, Union from dateutil import parser from dateutil.tz import gettz @@ -66,7 +67,7 @@ def log_create(self, instance, force_log: bool = False, **kwargs): kwargs.setdefault("additional_data", get_additional_data()) # Delete log entries with the same pk as a newly created model. - # This should only be necessary when an pk is used twice. + # This should only be necessary when a pk is used twice. if kwargs.get("action", None) is LogEntry.Action.CREATE: if ( kwargs.get("object_id", None) is not None @@ -225,7 +226,7 @@ def _get_pk_value(self, instance): pk_field = instance._meta.pk.name pk = getattr(instance, pk_field, None) - # Check to make sure that we got an pk not a model object. + # Check to make sure that we got a pk not a model object. if isinstance(pk, models.Model): pk = self._get_pk_value(pk) return pk @@ -358,6 +359,7 @@ class Action: action = models.PositiveSmallIntegerField( choices=Action.choices, verbose_name=_("action"), db_index=True ) + changes_text = models.TextField(blank=True, verbose_name=_("change message")) changes = models.JSONField(null=True, verbose_name=_("change message")) actor = models.ForeignKey( to=settings.AUTH_USER_MODEL, @@ -411,7 +413,7 @@ def changes_dict(self): """ :return: The changes recorded in this log entry as a dictionary object. """ - return self.changes or {} + return changes_func(self) @property def changes_str(self, colon=": ", arrow=" \u2192 ", separator="; "): @@ -459,7 +461,7 @@ def changes_display_dict(self): changes_display_dict[field_name] = values continue values_display = [] - # handle choices fields and Postgres ArrayField to get human readable version + # handle choices fields and Postgres ArrayField to get human-readable version choices_dict = None if getattr(field, "choices", []): choices_dict = dict(field.choices) @@ -546,7 +548,7 @@ class AuditlogHistoryField(GenericRelation): A subclass of py:class:`django.contrib.contenttypes.fields.GenericRelation` that sets some default variables. This makes it easier to access Auditlog's log entries, for example in templates. - By default this field will assume that your primary keys are numeric, simply because this is the most + By default, this field will assume that your primary keys are numeric, simply because this is the most common case. However, if you have a non-integer primary key, you can simply pass ``pk_indexable=False`` to the constructor, and Auditlog will fall back to using a non-indexed text based field for this model. @@ -585,3 +587,24 @@ def bulk_related_objects(self, objs, using=DEFAULT_DB_ALIAS): # method. However, because we don't want to delete these related # objects, we simply return an empty list. return [] + + +# should I add a signal receiver for setting_changed? +changes_func = None + + +def _changes_func() -> Callable[[LogEntry], Dict]: + def json_then_text(instance: LogEntry) -> Dict: + if instance.changes: + return instance.changes + elif instance.changes_text: + with contextlib.suppress(ValueError): + return json.loads(instance.changes_text) + return {} + + def default(instance: LogEntry) -> Dict: + return instance.changes or {} + + if settings.AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT: + return json_then_text + return default diff --git a/auditlog_tests/test_two_step_json_migration.py b/auditlog_tests/test_two_step_json_migration.py new file mode 100644 index 00000000..8470cbf7 --- /dev/null +++ b/auditlog_tests/test_two_step_json_migration.py @@ -0,0 +1,151 @@ +import json +from io import StringIO +from unittest.mock import patch + +from django.core.management import call_command +from django.test import TestCase, override_settings + +from auditlog.models import LogEntry +from auditlog_tests.models import SimpleModel + + +class TwoStepMigrationTest(TestCase): + def test_use_text_changes_first(self): + text_obj = '{"field": "changes_text"}' + json_obj = {"field": "changes"} + _params = [ + (True, None, text_obj, {"field": "changes_text"}), + (True, json_obj, text_obj, json_obj), + (True, None, "not json", {}), + (False, json_obj, text_obj, json_obj), + ] + + for setting_value, changes_value, changes_text_value, expected in _params: + with self.subTest(): + entry = LogEntry(changes=changes_value, changes_text=changes_text_value) + with self.settings( + AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT=setting_value + ): + from auditlog import models + + changes_dict = models._changes_func()(entry) + self.assertEqual(changes_dict, expected) + + +class AuditlogMigrateJsonTest(TestCase): + def make_logentry(self): + model = SimpleModel.objects.create(text="I am a simple model.") + log_entry: LogEntry = model.history.first() + log_entry.changes_text = json.dumps(log_entry.changes) + log_entry.changes = None + log_entry.save() + return log_entry + + def call_command(self, *args, **kwargs): + outbuf = StringIO() + errbuf = StringIO() + call_command( + "auditlogmigratejson", *args, stdout=outbuf, stderr=errbuf, **kwargs + ) + return outbuf.getvalue().strip(), errbuf.getvalue().strip() + + def test_nothing_to_migrate(self): + outbuf, errbuf = self.call_command() + + msg = "All records are have been migrated." + self.assertEqual(outbuf, msg) + + @override_settings(AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT=True) + def test_nothing_to_migrate_with_conf_true(self): + outbuf, errbuf = self.call_command() + + msg = ( + "All records are have been migrated.\n" + "You can now set AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT to False." + ) + + self.assertEqual(outbuf, msg) + + def test_using_django(self): + # Arrange + log_entry = self.make_logentry() + + # Act + outbuf, errbuf = self.call_command("-b=0") + log_entry.refresh_from_db() + + # Assert + self.assertEqual(errbuf, "") + self.assertIsNotNone(log_entry.changes) + + def test_using_django_batched(self): + # Arrange + log_entry_1 = self.make_logentry() + log_entry_2 = self.make_logentry() + + # Act + outbuf, errbuf = self.call_command("-b=1") + log_entry_1.refresh_from_db() + log_entry_2.refresh_from_db() + + # Assert + self.assertEqual(errbuf, "") + self.assertIsNotNone(log_entry_1.changes) + self.assertIsNotNone(log_entry_2.changes) + + def test_using_django_batched_call_count(self): + """ + This is split into a different test because I couldn't figure out how to properly patch bulk_update. + For some reason, then I + """ + # Arrange + self.make_logentry() + self.make_logentry() + + # Act + with patch("auditlog.models.LogEntry.objects.bulk_update") as bulk_update: + outbuf, errbuf = self.call_command("-b=1") + call_count = bulk_update.call_count + + # Assert + self.assertEqual(call_count, 2) + + def test_native_postgres(self): + # Arrange + log_entry = self.make_logentry() + + # Act + outbuf, errbuf = self.call_command("-d=postgres") + log_entry.refresh_from_db() + + # Assert + self.assertEqual(errbuf, "") + self.assertIsNotNone(log_entry.changes) + + def test_native_unsupported(self): + # Arrange + log_entry = self.make_logentry() + + # Act + outbuf, errbuf = self.call_command("-d=oracle") + log_entry.refresh_from_db() + + # Assert + msg = "Not yet implemented. Run this management command without passing a -d/--database argument." + self.assertEqual(errbuf, msg) + self.assertIsNone(log_entry.changes) + + def test_using_django_with_error(self): + # Arrange + log_entry = self.make_logentry() + log_entry.changes_text = "not json" + log_entry.save() + + # Act + outbuf, errbuf = self.call_command() + log_entry.refresh_from_db() + + # Assert + msg = f"ValueError was raised while migrating the log with id {log_entry.id}." + self.assertEqual(errbuf, msg) + self.assertIsNone(log_entry.changes) diff --git a/docs/source/index.rst b/docs/source/index.rst index bf58a72d..d4f1a469 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -21,6 +21,7 @@ Contents installation usage + upgrade internals diff --git a/docs/source/upgrade.rst b/docs/source/upgrade.rst new file mode 100644 index 00000000..19410252 --- /dev/null +++ b/docs/source/upgrade.rst @@ -0,0 +1,21 @@ +Upgrading to version 3 +====================== + +Version 3.0.0 introduces breaking changes. Please review the migration guide below before upgrading. +If you're new to django-auditlog, you can ignore this part. + +The major change in the version is that we're finally storing changes as json instead of json-text. +To convert the existing records, this version has a database migration that does just that. +However, this migration will take a long time if you have a huge amount of records, +causing your database and application to be out of sync until the migration is complete. + +To avoid this, follow these steps: + +1. Before upgrading the package, add these two variables to ``settings.py``: + + * ``AUDITLOG_TWO_STEP_MIGRATION = True`` + * ``AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT = True`` + +2. Upgrade the package. Your app will now start storing new records as JSON, but the old records will accessible via ``LogEntry.changes_text``. +3. Use the newly added ``auditlogmigratejson`` command to migrate your records. Run ``django-admin auditlogmigratejson --help`` to get more information. +4. Once all records are migrated, remove the variables listed above, or set their values to ``False``.