feat(cgrants): adding command to import contributions made on the all…

…o protocol
passportxyz · Jul 24, 2023 · c49787f · c49787f
1 parent 838a6d3
commit c49787f
Show file tree

Hide file tree

Showing 11 changed files with 623 additions and 343 deletions.
diff --git a/api/Pipfile b/api/Pipfile
@@ -34,6 +34,8 @@ django-structlog = "*"
 django-ipware = "*"
 django-debug-toolbar = "*"
 django-filter = "*"
+boto3 = "*"
+tqdm = "*"
 
 [dev-packages]
 black = "*"

diff --git a/api/Pipfile.lock b/api/Pipfile.lock
diff --git a/api/cgrants/admin.py b/api/cgrants/admin.py
@@ -1,4 +1,5 @@
 from django.contrib import admin
+from scorer.scorer_admin import ScorerModelAdmin
 
 from .models import (
     Contribution,
@@ -7,57 +8,65 @@
     GrantCLRCalculation,
     GrantContributionIndex,
     Profile,
+    ProtocolContributions,
     SquelchProfile,
     Subscription,
 )
 
 
 @admin.register(Profile)
-class ProfileAdmin(admin.ModelAdmin):
+class ProfileAdmin(ScorerModelAdmin):
     list_display = ("handle",)
     search_fields = ("handle",)
 
 
 @admin.register(Grant)
-class GrantAdmin(admin.ModelAdmin):
+class GrantAdmin(ScorerModelAdmin):
     list_display = ("admin_profile", "hidden", "active", "is_clr_eligible")
     list_filter = ("hidden", "active", "is_clr_eligible")
     search_fields = ("admin_profile__handle",)
 
 
 @admin.register(Subscription)
-class SubscriptionAdmin(admin.ModelAdmin):
+class SubscriptionAdmin(ScorerModelAdmin):
     list_display = ("grant", "contributor_profile")
     search_fields = ("grant__admin_profile__handle", "contributor_profile__handle")
 
 
 @admin.register(Contribution)
-class ContributionAdmin(admin.ModelAdmin):
+class ContributionAdmin(ScorerModelAdmin):
     list_display = ("subscription",)
 
 
 @admin.register(GrantCLR)
-class GrantCLRAdmin(admin.ModelAdmin):
+class GrantCLRAdmin(ScorerModelAdmin):
     list_display = ("type",)
     list_filter = ("type",)
 
 
 @admin.register(GrantCLRCalculation)
-class GrantCLRCalculationAdmin(admin.ModelAdmin):
+class GrantCLRCalculationAdmin(ScorerModelAdmin):
     list_display = ("active", "latest", "grant", "grantclr")
     list_filter = ("active", "latest")
     search_fields = ("grant__admin_profile__handle", "grantclr__type")
 
 
 @admin.register(SquelchProfile)
-class SquelchProfileAdmin(admin.ModelAdmin):
+class SquelchProfileAdmin(ScorerModelAdmin):
     list_display = ("profile", "active")
     list_filter = ("active",)
     search_fields = ("profile__handle",)
 
 
 @admin.register(GrantContributionIndex)
-class GrantContributionIndexAdmin(admin.ModelAdmin):
+class GrantContributionIndexAdmin(ScorerModelAdmin):
     list_display = ("profile", "contribution", "grant", "round_num", "amount")
     list_filter = ("round_num",)
     search_fields = ("profile__handle", "grant__admin_profile__handle")
+
+
+@admin.register(ProtocolContributions)
+class ProtocolContributionsAdmin(ScorerModelAdmin):
+    list_display = ("ext_id", "round", "contributor", "amount")
+    list_filter = ("round",)
+    search_fields = ("contributor", "round", "project")
diff --git a/api/cgrants/management/commands/import_allo_votes.py b/api/cgrants/management/commands/import_allo_votes.py
@@ -0,0 +1,148 @@
+import itertools
+import json
+from urllib.parse import urlparse
+
+import boto3
+from cgrants.models import ProtocolContributions
+from django.conf import settings
+from django.core.management.base import BaseCommand
+from tqdm import tqdm
+
+
+def iterate_array_in_chunks(arr, chunk_size):
+    for i in range(0, len(arr), chunk_size):
+        yield arr[i : i + chunk_size]
+
+
+def batch_iterator(iterable, batch_size):
+    it = iter(iterable)
+    while True:
+        batch = list(itertools.islice(it, batch_size))
+        if not batch:
+            break
+        yield batch
+
+
+def get_prodocol_contribution_for_json(json_data):
+    return ProtocolContributions(
+        ext_id=json_data["id"],
+        contributor=json_data["voter"],
+        amount=json_data["amountUSD"],
+        project=json_data["projectId"],
+        round=json_data["roundId"],
+        data=json_data,
+    )
+
+
+class Command(BaseCommand):
+    help = (
+        "This command will import votes and contribution amounts for the Allo protocol."
+    )
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--in",
+            required=True,
+            help="""S3 uri for input file, for example 's3://your_bucket_name/your_folder_name/your_file_name.txt'.
+            Input file must be in JSONL format (that is 1 JSON record per line).)""",
+        )
+
+    def stream_jsonl_from_s3_uri(self, s3_uri):
+        # Parse the S3 URI to get the bucket name, folder, and file name
+        parsed_uri = urlparse(s3_uri)
+        bucket_name = parsed_uri.netloc
+        path = parsed_uri.path.strip("/")
+        folder_name, file_name = path.rsplit("/", 1)
+
+        s3 = boto3.client(
+            "s3",
+            aws_access_key_id=settings.S3_DATA_AWS_SECRET_KEY_ID,
+            aws_secret_access_key=settings.S3_DATA_AWS_SECRET_ACCESS_KEY,
+        )
+
+        try:
+            response = s3.get_object(
+                Bucket=bucket_name, Key=f"{folder_name}/{file_name}"
+            )
+            return response["Body"]
+        except Exception as e:
+            self.stdout.write(self.style.ERROR(f"Error reading file from S3: {e}"))
+            return None
+
+    def read_file_contents_from_s3_uri(self, s3_uri):
+        # Parse the S3 URI to get the bucket name, folder, and file name
+        parsed_uri = urlparse(s3_uri)
+        bucket_name = parsed_uri.netloc
+        path = parsed_uri.path.strip("/")
+        folder_name, file_name = path.rsplit("/", 1)
+
+        s3 = boto3.client(
+            "s3",
+            aws_access_key_id=settings.S3_DATA_AWS_SECRET_KEY_ID,
+            aws_secret_access_key=settings.S3_DATA_AWS_SECRET_ACCESS_KEY,
+        )
+
+        try:
+            response = s3.get_object(
+                Bucket=bucket_name, Key=f"{folder_name}/{file_name}"
+            )
+            contents = response["Body"].read().decode("utf-8")
+            return contents
+        except Exception as e:
+            self.stdout.write(self.style.ERROR(f"Error reading file from S3: {e}"))
+            return None
+
+    def handle(self, *args, **options):
+        s3_uri = options["in"]
+        self.stdout.write(f'Input file "{s3_uri}"')
+        num_errors = 0
+        stream = self.stream_jsonl_from_s3_uri(s3_uri)
+        if stream:
+            self.stdout.write(self.style.SUCCESS(f"Got stream, processing JSONL"))
+
+            total_size = None
+
+            # Process each line of the JSONL file with a progress bar
+            with tqdm(
+                total=total_size, unit="B", unit_scale=True, desc="Processing JSONL"
+            ) as pbar:
+                self.stdout.write(f"reading lines ...")
+                chunk_size = 1000
+
+                for dataset in batch_iterator(stream.iter_lines(), chunk_size):
+                    protocol_contributions = []
+                    for line in dataset:
+                        try:
+                            json_data = json.loads(line)
+                            protocol_contributions.append(
+                                get_prodocol_contribution_for_json(json_data)
+                            )
+                        except json.JSONDecodeError as e:
+                            self.stdout.write(
+                                self.style.ERROR(f"Error parsing JSON line: '{line}'")
+                            )
+                            self.stdout.write(self.style.ERROR(f"Error: '{e}'"))
+                            num_errors = num_errors + 1
+
+                        # Update the progress bar with the number of bytes read
+                        pbar.update(len(line))
+
+                    ProtocolContributions.objects.bulk_create(
+                        protocol_contributions,
+                        ignore_conflicts=True,
+                    )
+        else:
+            self.stdout.write(self.style.ERROR(f"Empty file read from S3: {s3_uri}"))
+
+        if num_errors == 0:
+            self.stdout.write(
+                self.style.SUCCESS(
+                    "JSONL loading status: All records loaded succefully!"
+                )
+            )
+        else:
+            self.stdout.write(
+                self.style.ERROR(
+                    f"JSONL loading status: {num_errors} records failed to parse"
+                )
+            )
diff --git a/.../migrations/0005_protocolcontributions.py → ...ns/0005_protocolcontributions_and_more.py b/.../migrations/0005_protocolcontributions.py → ...ns/0005_protocolcontributions_and_more.py
@@ -1,4 +1,4 @@
-# Generated by Django 4.2.3 on 2023-07-20 14:29
+# Generated by Django 4.2.3 on 2023-07-24 11:02
 
 import account.models
 from django.db import migrations, models
@@ -22,26 +22,32 @@ class Migration(migrations.Migration):
                         verbose_name="ID",
                     ),
                 ),
+                (
+                    "ext_id",
+                    models.CharField(
+                        db_index=True, default="", max_length=66, unique=True
+                    ),
+                ),
                 (
                     "contributor",
                     account.models.EthAddressField(
-                        db_index=True, max_length=100, null=True
+                        db_index=True, default="", max_length=100
                     ),
                 ),
                 (
                     "round",
                     account.models.EthAddressField(
-                        db_index=True, max_length=100, null=True
+                        db_index=True, default="", max_length=100
                     ),
                 ),
                 (
                     "project",
                     account.models.EthAddressField(
-                        db_index=True, max_length=100, null=True
+                        db_index=True, default="", max_length=100
                     ),
                 ),
                 (
-                    "amount_usd",
+                    "amount",
                     models.DecimalField(
                         db_index=True,
                         decimal_places=18,
@@ -50,6 +56,24 @@ class Migration(migrations.Migration):
                         max_digits=64,
                     ),
                 ),
+                (
+                    "data",
+                    models.JSONField(
+                        default=dict,
+                        help_text="Original contribution data in JSON format",
+                    ),
+                ),
             ],
         ),
+        migrations.AlterField(
+            model_name="grantcontributionindex",
+            name="amount",
+            field=models.DecimalField(
+                db_index=True,
+                decimal_places=18,
+                default=0,
+                help_text="The USD amount contributed",
+                max_digits=64,
+            ),
+        ),
     ]
diff --git a/api/cgrants/models.py b/api/cgrants/models.py
@@ -195,7 +195,7 @@ class GrantContributionIndex(models.Model):
         decimal_places=18,
         max_digits=64,
         db_index=True,
-        help_text=_("The amount contributed"),
+        help_text=_("The USD amount contributed"),
     )
 
 
@@ -205,14 +205,25 @@ class ProtocolContributions(models.Model):
     The data in this table was produced by the allo indexer, see: https://github.com/gitcoinco/allo-indexer
     """
 
-    contributor = EthAddressField(null=True, blank=False, max_length=100, db_index=True)
-    round = EthAddressField(null=True, blank=False, max_length=100, db_index=True)
-    project = EthAddressField(null=True, blank=False, max_length=100, db_index=True)
-    amount_usd = models.DecimalField(
+    ext_id = models.CharField(
+        null=False, blank=False, max_length=66, db_index=True, unique=True, default=""
+    )
+    contributor = EthAddressField(
+        null=False, blank=False, max_length=100, db_index=True, default=""
+    )
+    round = EthAddressField(
+        null=False, blank=False, max_length=100, db_index=True, default=""
+    )
+    project = EthAddressField(
+        null=False, blank=False, max_length=100, db_index=True, default=""
+    )
+    amount = models.DecimalField(
         default=0,
         decimal_places=18,
         max_digits=64,
         db_index=True,
         help_text=_("The USD amount contributed"),
     )
-    # TODO: add also other fields, like transaction hash, etc.
+    data = models.JSONField(
+        help_text=_("Original contribution data in JSON format"), default=dict
+    )
diff --git a/api/scorer/scorer_admin.py b/api/scorer/scorer_admin.py
@@ -0,0 +1,27 @@
+from django.contrib import admin
+from django.core.paginator import Paginator
+from django.utils.functional import cached_property
+
+# class NoCountPaginator(Paginator):
+#     @cached_property
+#     def count(self):
+#         """Return a fix number for the total count. We want to avoid slow loading of page ..."""
+#         from django.apps import apps
+
+#         model = apps.get_model("app_name", "model_name")
+#         return 1000
+
+
+class ScorerModelAdmin(admin.ModelAdmin):
+    """
+    This extends the default ModelAdmin in django and:
+    - sets `show_full_result_count` to `False`
+    - sets `paginator` to `NoCountPaginator` -> the reasoning here is that
+    having the count slows queries down a lot, making the admin list page unusable and issuing a Gatway timeout.
+    Also, that count & pagination have no real value. Users should rely on the search function,
+    to narrow down the list of results to a small enough number.
+    """
+
+    show_full_result_count = False
+    # TODO: holding back on changing the NoCountPaginator for now ...
+    # paginator = NoCountPaginator
diff --git a/api/scorer/settings/__init__.py b/api/scorer/settings/__init__.py
@@ -5,3 +5,4 @@
 from .gitcoin_passport_weights import *
 from .ninja_jwt import *
 from .ratelimit import *
+from .s3 import *
diff --git a/api/scorer/settings/base.py b/api/scorer/settings/base.py
@@ -327,6 +327,16 @@
                 "handlers": [],
                 "propagate": False,
             },
+            "urllib3": {
+                "level": "DEBUG",
+                "handlers": [],
+                "propagate": False,
+            },
+            "botocore": {
+                "level": "DEBUG",
+                "handlers": [],
+                "propagate": False,
+            },
         },
     }