Skip to content

Commit

Permalink
feat: give users the option to run the json migration asyncly (#495)
Browse files Browse the repository at this point in the history
  • Loading branch information
aqeelat committed Aug 13, 2023
1 parent f2591e4 commit 134ef73
Show file tree
Hide file tree
Showing 9 changed files with 359 additions and 12 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#### Breaking Changes

- feat: Change `LogEntry.change` field type to `JSONField` rather than `TextField`. This change include a migration that may take time to run depending on the number of records on your `LogEntry` table ([#407](https://github.com/jazzband/django-auditlog/pull/407))
- feat: Change `LogEntry.change` field type to `JSONField` rather than `TextField`. This change include a migration that may take time to run depending on the number of records on your `LogEntry` table ([#407](https://github.com/jazzband/django-auditlog/pull/407))([#495](https://github.com/jazzband/django-auditlog/pull/495))
- Python: Drop support for Python 3.7 ([#546](https://github.com/jazzband/django-auditlog/pull/546))

#### Improvements
Expand Down
4 changes: 4 additions & 0 deletions auditlog/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,7 @@ def ready(self):
from auditlog.registry import auditlog

auditlog.register_from_settings()

from auditlog import models

models.changes_func = models._changes_func()
8 changes: 8 additions & 0 deletions auditlog/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,11 @@
settings, "AUDITLOG_CID_HEADER", "x-correlation-id"
)
settings.AUDITLOG_CID_GETTER = getattr(settings, "AUDITLOG_CID_GETTER", None)

# migration
settings.AUDITLOG_TWO_STEP_MIGRATION = getattr(
settings, "AUDITLOG_TWO_STEP_MIGRATION", False
)
settings.AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT = getattr(
settings, "AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT", False
)
114 changes: 114 additions & 0 deletions auditlog/management/commands/auditlogmigratejson.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from math import ceil

from django.conf import settings
from django.core.management.base import BaseCommand

from auditlog.models import LogEntry


class Command(BaseCommand):
help = "Migrates changes from changes_text to json changes."

def add_arguments(self, parser):
parser.add_argument(
"-d",
"--database",
default=None,
help="If provided, the script will use native db operations. "
"Otherwise, it will use LogEntry.objects.bulk_create",
dest="db",
type=str,
choices=["postgres", "mysql", "oracle"],
)
parser.add_argument(
"-b",
"--bactch-size",
default=500,
help="Split the migration into multiple batches. If 0, then no batching will be done. "
"When passing a -d/database, the batch value will be ignored.",
dest="batch_size",
type=int,
)

def handle(self, *args, **options):
database = options["db"]
batch_size = options["batch_size"]

if not self.check_logs():
return

if database:
result = self.migrate_using_sql(database)
self.stdout.write(
f"Updated {result} records using native database operations."
)
else:
result = self.migrate_using_django(batch_size)
self.stdout.write(f"Updated {result} records using django operations.")

self.check_logs()

def check_logs(self):
count = self.get_logs().count()
if count:
self.stdout.write(f"There are {count} records that needs migration.")
return True

self.stdout.write("All records are have been migrated.")
if settings.AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT:
self.stdout.write(
"You can now set AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT to False."
)

return False

def get_logs(self):
return LogEntry.objects.filter(
changes_text__isnull=False, changes__isnull=True
).exclude(changes_text__exact="")

def migrate_using_django(self, batch_size):
def _apply_django_migration(_logs) -> int:
import json

updated = []
for log in _logs:
try:
log.changes = json.loads(log.changes_text)
except ValueError:
self.stderr.write(
f"ValueError was raised while migrating the log with id {log.id}."
)
else:
updated.append(log)

LogEntry.objects.bulk_update(updated, fields=["changes"])
return len(updated)

logs = self.get_logs()

if not batch_size:
return _apply_django_migration(logs)

total_updated = 0
for _ in range(ceil(logs.count() / batch_size)):
total_updated += _apply_django_migration(self.get_logs()[:batch_size])
return total_updated

def migrate_using_sql(self, database):
from django.db import connection

def postgres():
with connection.cursor() as cursor:
cursor.execute(
'UPDATE auditlog_logentry SET changes="changes_text"::jsonb'
)
return cursor.cursor.rowcount

if database == "postgres":
return postgres()
else:
self.stderr.write(
"Not yet implemented. Run this management command without passing a -d/--database argument."
)
return 0
35 changes: 30 additions & 5 deletions auditlog/migrations/0015_alter_logentry_changes.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,42 @@
# Generated by Django 4.0 on 2022-08-04 15:41
from typing import List

from django.conf import settings
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("auditlog", "0014_logentry_cid"),
]
def two_step_migrations() -> List:
if settings.AUDITLOG_TWO_STEP_MIGRATION:
return [
migrations.RenameField(
model_name="logentry",
old_name="changes",
new_name="changes_text",
),
migrations.AddField(
model_name="logentry",
name="changes",
field=models.JSONField(null=True, verbose_name="change message"),
),
]

operations = [
return [
migrations.AddField(
model_name="logentry",
name="changes_text",
field=models.TextField(blank=True, verbose_name="text change message"),
),
migrations.AlterField(
model_name="logentry",
name="changes",
field=models.JSONField(null=True, verbose_name="change message"),
),
]


class Migration(migrations.Migration):
dependencies = [
("auditlog", "0014_logentry_cid"),
]

operations = [*two_step_migrations()]
35 changes: 29 additions & 6 deletions auditlog/models.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import ast
import contextlib
import json
from copy import deepcopy
from datetime import timezone
from typing import Any, Dict, List, Union
from typing import Any, Callable, Dict, List, Union

from dateutil import parser
from dateutil.tz import gettz
Expand Down Expand Up @@ -66,7 +67,7 @@ def log_create(self, instance, force_log: bool = False, **kwargs):
kwargs.setdefault("additional_data", get_additional_data())

# Delete log entries with the same pk as a newly created model.
# This should only be necessary when an pk is used twice.
# This should only be necessary when a pk is used twice.
if kwargs.get("action", None) is LogEntry.Action.CREATE:
if (
kwargs.get("object_id", None) is not None
Expand Down Expand Up @@ -225,7 +226,7 @@ def _get_pk_value(self, instance):
pk_field = instance._meta.pk.name
pk = getattr(instance, pk_field, None)

# Check to make sure that we got an pk not a model object.
# Check to make sure that we got a pk not a model object.
if isinstance(pk, models.Model):
pk = self._get_pk_value(pk)
return pk
Expand Down Expand Up @@ -358,6 +359,7 @@ class Action:
action = models.PositiveSmallIntegerField(
choices=Action.choices, verbose_name=_("action"), db_index=True
)
changes_text = models.TextField(blank=True, verbose_name=_("change message"))
changes = models.JSONField(null=True, verbose_name=_("change message"))
actor = models.ForeignKey(
to=settings.AUTH_USER_MODEL,
Expand Down Expand Up @@ -411,7 +413,7 @@ def changes_dict(self):
"""
:return: The changes recorded in this log entry as a dictionary object.
"""
return self.changes or {}
return changes_func(self)

@property
def changes_str(self, colon=": ", arrow=" \u2192 ", separator="; "):
Expand Down Expand Up @@ -459,7 +461,7 @@ def changes_display_dict(self):
changes_display_dict[field_name] = values
continue
values_display = []
# handle choices fields and Postgres ArrayField to get human readable version
# handle choices fields and Postgres ArrayField to get human-readable version
choices_dict = None
if getattr(field, "choices", []):
choices_dict = dict(field.choices)
Expand Down Expand Up @@ -546,7 +548,7 @@ class AuditlogHistoryField(GenericRelation):
A subclass of py:class:`django.contrib.contenttypes.fields.GenericRelation` that sets some default
variables. This makes it easier to access Auditlog's log entries, for example in templates.
By default this field will assume that your primary keys are numeric, simply because this is the most
By default, this field will assume that your primary keys are numeric, simply because this is the most
common case. However, if you have a non-integer primary key, you can simply pass ``pk_indexable=False``
to the constructor, and Auditlog will fall back to using a non-indexed text based field for this model.
Expand Down Expand Up @@ -585,3 +587,24 @@ def bulk_related_objects(self, objs, using=DEFAULT_DB_ALIAS):
# method. However, because we don't want to delete these related
# objects, we simply return an empty list.
return []


# should I add a signal receiver for setting_changed?
changes_func = None


def _changes_func() -> Callable[[LogEntry], Dict]:
def json_then_text(instance: LogEntry) -> Dict:
if instance.changes:
return instance.changes
elif instance.changes_text:
with contextlib.suppress(ValueError):
return json.loads(instance.changes_text)
return {}

def default(instance: LogEntry) -> Dict:
return instance.changes or {}

if settings.AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT:
return json_then_text
return default
Loading

0 comments on commit 134ef73

Please sign in to comment.