Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Group submission access logs #5113

Merged
merged 15 commits into from
Oct 2, 2024
63 changes: 62 additions & 1 deletion kobo/apps/audit_log/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from django.conf import settings
from django.db import models
from django.db.models import Case, Count, F, Min, Value, When
from django.db.models.functions import Cast, Concat, Trunc
from django.utils import timezone

from kobo.apps.kobo_auth.shortcuts import User
Expand All @@ -10,6 +12,7 @@
from kpi.constants import (
ACCESS_LOG_LOGINAS_AUTH_TYPE,
ACCESS_LOG_SUBMISSION_AUTH_TYPE,
ACCESS_LOG_SUBMISSION_GROUP_AUTH_TYPE,
ACCESS_LOG_UNKNOWN_AUTH_TYPE,
)
from kpi.fields.kpi_uid import UUID_LENGTH
Expand Down Expand Up @@ -124,6 +127,64 @@ def create(self, **kwargs):
**kwargs,
)

def with_group_key(self):
"""
Adds a group key to every access log. Used for grouping submissions.
"""
# add a group key to every access log
return self.annotate(
group_key=Case(
# for submissions, the group key is hour created + user_uid
# this enables us to group submissions by user by hour
When(
metadata__auth_type=ACCESS_LOG_SUBMISSION_AUTH_TYPE,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably, we may need an index metadata__auth_type

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're not doing any filtering by auth type with this query. It ends up in postgres as

SELECT "auth_user"."username",
kpi-1  |        "audit_log_auditlog"."object_id",
kpi-1  |        "audit_log_auditlog"."user_uid",
kpi-1  |        CASE WHEN ("audit_log_auditlog"."metadata" -> 'auth_type') = '"submission"'::jsonb THEN CONCAT(((DATE_TRUNC('hour', "audit_log_auditlog"."date_created" AT TIME ZONE 'UTC'))::varchar)::text, ("audit_log_auditlog"."user_uid")::text)
kpi-1  |             ELSE ("audit_log_auditlog"."id")::varchar
kpi-1  |              END AS "group_key",
kpi-1  |        COUNT("audit_log_auditlog"."id") AS "count",
kpi-1  |        CASE WHEN ("audit_log_auditlog"."metadata" -> 'auth_type') = '"submission"'::jsonb THEN '{"auth_type": "submission-group"}'::jsonb
kpi-1  |             ELSE "audit_log_auditlog"."metadata"
kpi-1  |              END AS "metadata",
kpi-1  |        MIN("audit_log_auditlog"."date_created") AS "date_created"
kpi-1  |   FROM "audit_log_auditlog"
kpi-1  |   LEFT OUTER JOIN "auth_user"
kpi-1  |     ON ("audit_log_auditlog"."user_id" = "auth_user"."id")
kpi-1  |  WHERE ("audit_log_auditlog"."log_type" = 'access' AND "audit_log_auditlog"."user_uid" = 'ubVyXzUy7S9VdR2PgKkbdR')
kpi-1  |  GROUP BY "auth_user"."username",
kpi-1  |           "audit_log_auditlog"."object_id",
kpi-1  |           "audit_log_auditlog"."user_uid",
kpi-1  |           4,
kpi-1  |           6
kpi-1  |  ORDER BY 7 DESC
kpi-1  |  LIMIT 100

I don't think an index will do anything for us.

then=Concat(
# get the time, rounded down to the hour, as a string
Cast(
Trunc('date_created', 'hour'),
output_field=models.CharField(),
),
'user_uid',
),
),
# for everything else, the group key is just the id
# since they won't be grouped
default=Cast('id', output_field=models.CharField()),
)
)

def with_submissions_grouped(self):
"""
Returns minimal representation with submissions grouped by user by hour
"""
return (
self.with_group_key()
.select_related('user')
# adding 'group_key' in the values lets us group submissions
# for performance and clarity, ignore things like action and log_type,
# which are the same for all audit logs
.values('user__username', 'object_id', 'user_uid', 'group_key')
.annotate(
# include the number of submissions per group
# will be '1' for everything else
count=Count('pk'),
metadata=Case(
When(
# override the metadata for submission groups
metadata__auth_type=ACCESS_LOG_SUBMISSION_AUTH_TYPE,
then=Value(
{'auth_type': ACCESS_LOG_SUBMISSION_GROUP_AUTH_TYPE},
models.JSONField(),
),
),
# keep the metadata the same for everything else
default=F('metadata'),
),
# for submission groups, use the earliest submission as the date_created
date_created=Min('date_created'),
)
)


class AccessLog(AuditLog):
objects = AccessLogManager()
Expand Down Expand Up @@ -154,7 +215,7 @@ def create_from_request(
)
is_submission = (
request.resolver_match is not None
and request.resolver_match.url_name == 'submissions'
and request.resolver_match.url_name in ['submissions', 'submissions-list']
and request.method == 'POST'
)
# a regular login may have an anonymous user as _cached_user, ignore that
Expand Down
21 changes: 20 additions & 1 deletion kobo/apps/audit_log/serializers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from django.contrib.auth import get_user_model
from rest_framework import serializers

from .models import AuditAction, AuditLog
from kpi.fields import RelativePrefixHyperlinkedRelatedField
from .models import AuditLog


class AuditLogSerializer(serializers.ModelSerializer):
Expand Down Expand Up @@ -45,3 +46,21 @@ def get_date_created(self, audit_log):

def get_username(self, audit_log):
return audit_log.user.username


class AccessLogSerializer(serializers.Serializer):
user = RelativePrefixHyperlinkedRelatedField(
view_name='user-kpi-detail',
lookup_field='user__username',
lookup_url_kwarg='username',
read_only=True,
source='user__username',
)
date_created = serializers.SerializerMethodField()
username = serializers.CharField(source='user__username')
metadata = serializers.JSONField()
user_uid = serializers.CharField()
count = serializers.IntegerField()

def get_date_created(self, audit_log):
return audit_log['date_created'].strftime('%Y-%m-%dT%H:%M:%SZ')
Loading
Loading