Skip to content

Commit

Permalink
feat(api): weekly stamp data dump (#324)
Browse files Browse the repository at this point in the history
* feat(api): create scheduled task definition

* chore(api): model updates for data dump

* feat(api): data dump export script

* fix(infra): fix staging deployment of scheduled task

* fix(infra): fix review deployment of scheduled task

* chore(api): update pipfile lock after resolving pipfile merge conflict

* chore(infra): add aws key values

* chore(api): further environment configuration for weekly data dumps

* chore(infra): comment out scheduled task until script is merged

* refactor(api): paginate stamp query and save count and last_export

---------

Co-authored-by: Gerald Iakobinyi-Pich <nutrina9@gmail.com>
  • Loading branch information
schultztimothy and nutrina authored Jul 24, 2023
1 parent 4ab9804 commit 1e6694e
Show file tree
Hide file tree
Showing 12 changed files with 254 additions and 79 deletions.
90 changes: 11 additions & 79 deletions api/Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Empty file.
59 changes: 59 additions & 0 deletions api/ceramic_cache/management/commands/dump_stamp_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import datetime
import json
import os

import boto3
from ceramic_cache.models import CeramicCache, StampExports
from django.conf import settings
from django.core.management.base import BaseCommand
from django.core.paginator import Paginator
from django.utils import timezone

s3 = boto3.client(
"s3",
aws_access_key_id=settings.S3_DATA_AWS_SECRET_KEY_ID,
aws_secret_access_key=settings.S3_DATA_AWS_SECRET_ACCESS_KEY,
)


class Command(BaseCommand):
help = "Weekly data dump of new Stamp data since the last dump."

def handle(self, *args, **options):
print("Starting dump_stamp_data.py")

latest_export = StampExports.objects.order_by("-last_export_ts").first()

if not latest_export:
print("No previous exports found. Exporting all data.")
latest_export = StampExports.objects.create(
last_export_ts=timezone.now() - datetime.timedelta(days=7)
)

paginator = Paginator(
CeramicCache.objects.filter(
created_at__gt=latest_export.last_export_ts
).values_list("stamp", flat=True),
1000,
)

# Generate the dump file name
file_name = f'stamps_{latest_export.last_export_ts.strftime("%Y%m%d_%H%M%S")}_{timezone.now().strftime("%Y%m%d_%H%M%S")}.jsonl'

# Write serialized data to the file
with open(file_name, "w") as f:
for page in paginator.page_range:
for stamp in paginator.page(page).object_list:
f.write(json.dumps({"stamp": stamp}) + "\n")

# Upload to S3 bucket
s3.upload_file(file_name, settings.S3_WEEKLY_BACKUP_BUCKET_NAME, file_name)

# Delete local file after upload
os.remove(file_name)

StampExports.objects.create(
last_export_ts=timezone.now(), stamp_total=paginator.count
)

print(f"Data dump completed and uploaded to S3 as {file_name}")
29 changes: 29 additions & 0 deletions api/ceramic_cache/migrations/0009_stampexports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by Django 4.2.3 on 2023-07-21 22:00

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("ceramic_cache", "0008_remove_ceramiccache_deleted_at"),
]

operations = [
migrations.CreateModel(
name="StampExports",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("last_export_ts", models.DateTimeField(auto_now_add=True)),
("stamp_total", models.IntegerField(default=0)),
],
),
]
18 changes: 18 additions & 0 deletions api/ceramic_cache/migrations/0010_ceramiccache_created_at.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.3 on 2023-07-21 22:30

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("ceramic_cache", "0009_stampexports"),
]

operations = [
migrations.AddField(
model_name="ceramiccache",
name="created_at",
field=models.DateTimeField(blank=True, null=True),
),
]
18 changes: 18 additions & 0 deletions api/ceramic_cache/migrations/0011_alter_ceramiccache_created_at.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.3 on 2023-07-21 22:31

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("ceramic_cache", "0010_ceramiccache_created_at"),
]

operations = [
migrations.AlterField(
model_name="ceramiccache",
name="created_at",
field=models.DateTimeField(auto_now_add=True, null=True),
),
]
6 changes: 6 additions & 0 deletions api/ceramic_cache/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ class CeramicCache(models.Model):
null=False, blank=False, default="", max_length=256, db_index=True
)
stamp = models.JSONField(default=dict)
created_at = models.DateTimeField(auto_now_add=True, blank=True, null=True)

class Meta:
unique_together = ["address", "provider"]


class StampExports(models.Model):
last_export_ts = models.DateTimeField(auto_now_add=True)
stamp_total = models.IntegerField(default=0)
1 change: 1 addition & 0 deletions api/scorer/settings/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
# data. Seet the `import_allo_votes` command for an example.
S3_DATA_AWS_SECRET_KEY_ID = env("S3_DATA_AWS_SECRET_KEY_ID", default=None)
S3_DATA_AWS_SECRET_ACCESS_KEY = env("S3_DATA_AWS_SECRET_ACCESS_KEY", default=None)
S3_WEEKLY_BACKUP_BUCKET_NAME = env("S3_WEEKLY_BACKUP_BUCKET_NAME", default=None)
12 changes: 12 additions & 0 deletions infra/prod/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,18 @@ const secrets = [
name: "CGRANTS_API_TOKEN",
valueFrom: `${SCORER_SERVER_SSM_ARN}:CGRANTS_API_TOKEN::`,
},
{
name: "S3_DATA_AWS_SECRET_KEY_ID",
valueFrom: `${SCORER_SERVER_SSM_ARN}:S3_DATA_AWS_SECRET_KEY_ID::`,
},
{
name: "S3_DATA_AWS_SECRET_ACCESS_KEY",
valueFrom: `${SCORER_SERVER_SSM_ARN}:S3_DATA_AWS_SECRET_ACCESS_KEY::`,
},
{
name: "S3_WEEKLY_BACKUP_BUCKET_NAME",
valueFrom: `${SCORER_SERVER_SSM_ARN}:S3_WEEKLY_BACKUP_BUCKET_NAME::`,
},
];
const environment = [
{
Expand Down
Loading

0 comments on commit 1e6694e

Please sign in to comment.