Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: hydrater la table du dernier évènement connu pour un email à partir des évènements passés #896

Open
wants to merge 1 commit into
base: 893-enregistrer-le-dernier-evenement-pour-un-email-dans-emaillastseen
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions lacommunaute/users/management/commands/populate_emaillastseen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import sys

from django.core.management.base import BaseCommand
from django.db.models import Value

from lacommunaute.event.models import Event
from lacommunaute.forum.models import ForumRating
from lacommunaute.forum_conversation.models import Post
from lacommunaute.forum_upvote.models import UpVote
from lacommunaute.surveys.models import DSP
from lacommunaute.users.enums import EmailLastSeenKind
from lacommunaute.users.models import EmailLastSeen, User


def collect_users_logged_in():
qs = (
User.objects.exclude(last_login=None)
.annotate(kind=Value(EmailLastSeenKind.LOGGED))
.values_list("email", "last_login", "kind")
)
return list(qs)


def collect_event():
qs = (
Event.objects.all()
.annotate(kind=Value(EmailLastSeenKind.EVENT))
.values_list("poster__email", "created", "kind")
)
return list(qs)


def collect_DSP():
qs = DSP.objects.all().annotate(kind=Value(EmailLastSeenKind.DSP)).values_list("user__email", "created", "kind")
return list(qs)


def collect_upvote():
qs = (
UpVote.objects.exclude(voter=None)
.annotate(kind=Value(EmailLastSeenKind.UPVOTE))
.values_list("voter__email", "created_at", "kind")
)
return list(qs)


def collect_forum_rating():
qs = (
ForumRating.objects.exclude(user=None)
.annotate(kind=Value(EmailLastSeenKind.FORUM_RATING))
.values_list("user__email", "created", "kind")
)
return list(qs)


def collect_post():
qs_authenticated = (
Post.objects.exclude(poster=None)
.annotate(kind=Value(EmailLastSeenKind.POST))
.values_list("poster__email", "created", "kind")
)
qs_anonymous = (
Post.objects.filter(poster=None)
.annotate(kind=Value(EmailLastSeenKind.POST))
.values_list("username", "created", "kind")
Comment on lines +60 to +65
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

)
return list(qs_authenticated) + list(qs_anonymous)


def collect_clicked_notifs():
# TODO VincentPorte, en attente #891
sys.stdout.write("collect_clicked_notifs: pending #891\n")
return []


def deduplicate(last_seen):
return {tup[0]: tup for tup in sorted(last_seen, key=lambda tup: (tup[0], tup[1]))}


def remove_known_last_seen(dedup_last_seen_dict):
known_last_seen = EmailLastSeen.objects.values_list("email", flat=True)
return {k: v for k, v in dedup_last_seen_dict.items() if k not in known_last_seen}


def insert_last_seen(dedup_last_seen_dict):
obj = [EmailLastSeen(email=k, last_seen_at=v[1], last_seen_kind=v[2]) for k, v in dedup_last_seen_dict.items()]
return EmailLastSeen.objects.bulk_create(obj, batch_size=1000)


class Command(BaseCommand):
help = "hydratation de la table EmailLastSeen avec la date de dernière visite des emails connus"

def handle(self, *args, **options):
last_seen = collect_users_logged_in()
sys.stdout.write(f"users logged in: collected {len(last_seen)}\n")

last_seen += collect_event()
sys.stdout.write(f"events: collected {len(last_seen)}\n")

last_seen += collect_DSP()
sys.stdout.write(f"DSP: collected {len(last_seen)}\n")

last_seen += collect_upvote()
sys.stdout.write(f"UpVotes: collected {len(last_seen)}\n")

last_seen += collect_forum_rating()
sys.stdout.write(f"forum ratings: collected {len(last_seen)}\n")

last_seen += collect_post()
sys.stdout.write(f"posts: collected {len(last_seen)}\n")

last_seen += collect_clicked_notifs()
sys.stdout.write(f"clicked notifications: collected {len(last_seen)}\n")

dedup_last_seen_dict = deduplicate(last_seen)
sys.stdout.write(f"deduplication: {len(dedup_last_seen_dict)}\n")

dedup_last_seen_dict = remove_known_last_seen(dedup_last_seen_dict)
sys.stdout.write(f"remove known last seen: {len(dedup_last_seen_dict)}\n")

res = insert_last_seen(dedup_last_seen_dict)
sys.stdout.write(f"insert last seen: {len(res)}\n")

sys.stdout.write("that's all folks!\n")
sys.stdout.flush()
Comment on lines +94 to +125
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

C’est violent niveau mémoire de tout charger, mais vu la taille de la commu 🤷

J’aurais plutôt utilisé l’identifiant utilisateur comme clé d’un dict[username, namedtuple(last_seen, kind)] et itéré sur les éléments petit à petit.

144 changes: 144 additions & 0 deletions lacommunaute/users/tests/tests_management_commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from datetime import datetime

from django.core.management import call_command
from django.utils import timezone

from lacommunaute.event.factories import EventFactory
from lacommunaute.forum.factories import ForumFactory, ForumRatingFactory
from lacommunaute.forum_conversation.factories import AnonymousTopicFactory, TopicFactory
from lacommunaute.forum_upvote.factories import UpVoteFactory
from lacommunaute.surveys.factories import DSPFactory
from lacommunaute.users.enums import EmailLastSeenKind
from lacommunaute.users.factories import EmailLastSeenFactory, UserFactory
from lacommunaute.users.management.commands.populate_emaillastseen import (
collect_DSP,
collect_event,
collect_forum_rating,
collect_post,
collect_upvote,
collect_users_logged_in,
deduplicate,
insert_last_seen,
remove_known_last_seen,
)
from lacommunaute.users.models import EmailLastSeen


def test_collect_users_logged_in(db):
logged_user = UserFactory(last_login=timezone.make_aware(datetime(2024, 10, 22)))
UserFactory(last_login=None)
assert collect_users_logged_in() == [(logged_user.email, logged_user.last_login, EmailLastSeenKind.LOGGED)]


def test_collect_event(db):
event = EventFactory()
assert collect_event() == [(event.poster.email, event.created, EmailLastSeenKind.EVENT)]


def test_collect_DSP(db):
dsp = DSPFactory()
assert collect_DSP() == [(dsp.user.email, dsp.created, "DSP")]


def test_upvote(db):
upvote = UpVoteFactory(content_object=ForumFactory(), voter=UserFactory())
assert collect_upvote() == [(upvote.voter.email, upvote.created_at, EmailLastSeenKind.UPVOTE)]


def test_forum_rating(db):
ForumRatingFactory(user=None)
forum_rating = ForumRatingFactory(user=UserFactory())
assert collect_forum_rating() == [(forum_rating.user.email, forum_rating.created, EmailLastSeenKind.FORUM_RATING)]


def test_collect_post(db):
topic = TopicFactory(with_post=True)
anonymous_topic = AnonymousTopicFactory(with_post=True)

assert collect_post() == [
(topic.first_post.poster.email, topic.first_post.created, EmailLastSeenKind.POST),
(anonymous_topic.first_post.username, anonymous_topic.first_post.created, EmailLastSeenKind.POST),
]


def test_collect_clicked_notifs():
# TODO VincentPorte, en attente #891
assert False


def test_deduplicate():
emails = ["toby@roberts.com", "adam@ondra.com", "jakob@schubert.com"]
last_seen = [(email, timezone.now(), kind) for email in emails for kind in EmailLastSeenKind.values]

deduplicated = deduplicate(last_seen)
for email in list(set(emails)):
assert deduplicated[email][0] == email
assert deduplicated[email][2] == EmailLastSeenKind.LOGGED


def test_remove_known_last_seen(db):
emails = ["oriane@bertone.com", "catherine@destivelle.com"]
EmailLastSeenFactory(email=emails[1])
deduplicated = {email: (email, datetime(2024, 10, 22), EmailLastSeenKind.FORUM_RATING) for email in emails}

output = remove_known_last_seen(deduplicated)
assert emails[0] in output
assert emails[1] not in output


def test_insert_last_seen(db):
emails = ["brooke@raboutou.com", "natalia@grossman.com"]
kinds = [EmailLastSeenKind.POST, EmailLastSeenKind.LOGGED]
deduplicated = {email: (email, datetime(2024, 10, 22), kind) for email, kind in zip(emails, kinds)}

insert_last_seen(deduplicated)
assert EmailLastSeen.objects.count() == 2
for email, kind in zip(emails, kinds):
email_last_seen = EmailLastSeen.objects.get(email=email)
assert email_last_seen.last_seen_kind == kind


def test_populate_emaillastseen_command(db):
user = UserFactory(last_login=timezone.make_aware(datetime(2024, 10, 22)))
event = EventFactory()
dsp = DSPFactory()
upvote = UpVoteFactory(content_object=ForumFactory(), voter=UserFactory())
forum_rating = ForumRatingFactory(user=UserFactory())
topic = TopicFactory(with_post=True)
anonymous_topic = AnonymousTopicFactory(with_post=True)
# TODO VincentPorte, en attente #891
# clicked_notification = NotificationFactory(visited_at=timezone.now())

# duplicated email
event_for_duplicated = EventFactory()
DSPFactory(user=event_for_duplicated.poster)

# already known email
event_for_known = EventFactory()
EmailLastSeen.objects.all().delete()
EmailLastSeenFactory(email=event_for_known.poster.email, last_seen_kind=EmailLastSeenKind.FORUM_RATING)

call_command("populate_emaillastseen")

assert EmailLastSeen.objects.count() == 9
assert EmailLastSeen.objects.filter(email=user.email, last_seen_kind=EmailLastSeenKind.LOGGED).exists()
assert EmailLastSeen.objects.filter(email=event.poster.email, last_seen_kind=EmailLastSeenKind.EVENT).exists()
assert EmailLastSeen.objects.filter(email=dsp.user.email, last_seen_kind=EmailLastSeenKind.DSP).exists()
assert EmailLastSeen.objects.filter(email=upvote.voter.email, last_seen_kind=EmailLastSeenKind.UPVOTE).exists()
assert EmailLastSeen.objects.filter(
email=forum_rating.user.email, last_seen_kind=EmailLastSeenKind.FORUM_RATING
).exists()
assert EmailLastSeen.objects.filter(
email=topic.first_post.poster.email, last_seen_kind=EmailLastSeenKind.POST
).exists()
assert EmailLastSeen.objects.filter(
email=anonymous_topic.first_post.username, last_seen_kind=EmailLastSeenKind.POST
).exists()
# TODO VincentPorte, en attente #891
# assert EmailLastSeen.objects.filter(email=clicked_notification.recipient, last_seen_kind=XXXX).exists()
assert EmailLastSeen.objects.filter(
email=event_for_duplicated.poster.email, last_seen_kind=EmailLastSeenKind.DSP
).exists()
assert EmailLastSeen.objects.filter(
email=event_for_known.poster.email, last_seen_kind=EmailLastSeenKind.FORUM_RATING
).exists()
Loading