Skip to content

Commit

Permalink
feat(cgrants): adding command to import contributions made on the all…
Browse files Browse the repository at this point in the history
…o protocol
  • Loading branch information
nutrina committed Jul 24, 2023
1 parent 838a6d3 commit c49787f
Show file tree
Hide file tree
Showing 11 changed files with 623 additions and 343 deletions.
2 changes: 2 additions & 0 deletions api/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ django-structlog = "*"
django-ipware = "*"
django-debug-toolbar = "*"
django-filter = "*"
boto3 = "*"
tqdm = "*"

[dev-packages]
black = "*"
Expand Down
683 changes: 359 additions & 324 deletions api/Pipfile.lock

Large diffs are not rendered by default.

25 changes: 17 additions & 8 deletions api/cgrants/admin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from django.contrib import admin
from scorer.scorer_admin import ScorerModelAdmin

from .models import (
Contribution,
Expand All @@ -7,57 +8,65 @@
GrantCLRCalculation,
GrantContributionIndex,
Profile,
ProtocolContributions,
SquelchProfile,
Subscription,
)


@admin.register(Profile)
class ProfileAdmin(admin.ModelAdmin):
class ProfileAdmin(ScorerModelAdmin):
list_display = ("handle",)
search_fields = ("handle",)


@admin.register(Grant)
class GrantAdmin(admin.ModelAdmin):
class GrantAdmin(ScorerModelAdmin):
list_display = ("admin_profile", "hidden", "active", "is_clr_eligible")
list_filter = ("hidden", "active", "is_clr_eligible")
search_fields = ("admin_profile__handle",)


@admin.register(Subscription)
class SubscriptionAdmin(admin.ModelAdmin):
class SubscriptionAdmin(ScorerModelAdmin):
list_display = ("grant", "contributor_profile")
search_fields = ("grant__admin_profile__handle", "contributor_profile__handle")


@admin.register(Contribution)
class ContributionAdmin(admin.ModelAdmin):
class ContributionAdmin(ScorerModelAdmin):
list_display = ("subscription",)


@admin.register(GrantCLR)
class GrantCLRAdmin(admin.ModelAdmin):
class GrantCLRAdmin(ScorerModelAdmin):
list_display = ("type",)
list_filter = ("type",)


@admin.register(GrantCLRCalculation)
class GrantCLRCalculationAdmin(admin.ModelAdmin):
class GrantCLRCalculationAdmin(ScorerModelAdmin):
list_display = ("active", "latest", "grant", "grantclr")
list_filter = ("active", "latest")
search_fields = ("grant__admin_profile__handle", "grantclr__type")


@admin.register(SquelchProfile)
class SquelchProfileAdmin(admin.ModelAdmin):
class SquelchProfileAdmin(ScorerModelAdmin):
list_display = ("profile", "active")
list_filter = ("active",)
search_fields = ("profile__handle",)


@admin.register(GrantContributionIndex)
class GrantContributionIndexAdmin(admin.ModelAdmin):
class GrantContributionIndexAdmin(ScorerModelAdmin):
list_display = ("profile", "contribution", "grant", "round_num", "amount")
list_filter = ("round_num",)
search_fields = ("profile__handle", "grant__admin_profile__handle")


@admin.register(ProtocolContributions)
class ProtocolContributionsAdmin(ScorerModelAdmin):
list_display = ("ext_id", "round", "contributor", "amount")
list_filter = ("round",)
search_fields = ("contributor", "round", "project")
148 changes: 148 additions & 0 deletions api/cgrants/management/commands/import_allo_votes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import itertools
import json
from urllib.parse import urlparse

import boto3
from cgrants.models import ProtocolContributions
from django.conf import settings
from django.core.management.base import BaseCommand
from tqdm import tqdm


def iterate_array_in_chunks(arr, chunk_size):
for i in range(0, len(arr), chunk_size):
yield arr[i : i + chunk_size]


def batch_iterator(iterable, batch_size):
it = iter(iterable)
while True:
batch = list(itertools.islice(it, batch_size))
if not batch:
break
yield batch


def get_prodocol_contribution_for_json(json_data):
return ProtocolContributions(
ext_id=json_data["id"],
contributor=json_data["voter"],
amount=json_data["amountUSD"],
project=json_data["projectId"],
round=json_data["roundId"],
data=json_data,
)


class Command(BaseCommand):
help = (
"This command will import votes and contribution amounts for the Allo protocol."
)

def add_arguments(self, parser):
parser.add_argument(
"--in",
required=True,
help="""S3 uri for input file, for example 's3://your_bucket_name/your_folder_name/your_file_name.txt'.
Input file must be in JSONL format (that is 1 JSON record per line).)""",
)

def stream_jsonl_from_s3_uri(self, s3_uri):
# Parse the S3 URI to get the bucket name, folder, and file name
parsed_uri = urlparse(s3_uri)
bucket_name = parsed_uri.netloc
path = parsed_uri.path.strip("/")
folder_name, file_name = path.rsplit("/", 1)

s3 = boto3.client(
"s3",
aws_access_key_id=settings.S3_DATA_AWS_SECRET_KEY_ID,
aws_secret_access_key=settings.S3_DATA_AWS_SECRET_ACCESS_KEY,
)

try:
response = s3.get_object(
Bucket=bucket_name, Key=f"{folder_name}/{file_name}"
)
return response["Body"]
except Exception as e:
self.stdout.write(self.style.ERROR(f"Error reading file from S3: {e}"))
return None

def read_file_contents_from_s3_uri(self, s3_uri):
# Parse the S3 URI to get the bucket name, folder, and file name
parsed_uri = urlparse(s3_uri)
bucket_name = parsed_uri.netloc
path = parsed_uri.path.strip("/")
folder_name, file_name = path.rsplit("/", 1)

s3 = boto3.client(
"s3",
aws_access_key_id=settings.S3_DATA_AWS_SECRET_KEY_ID,
aws_secret_access_key=settings.S3_DATA_AWS_SECRET_ACCESS_KEY,
)

try:
response = s3.get_object(
Bucket=bucket_name, Key=f"{folder_name}/{file_name}"
)
contents = response["Body"].read().decode("utf-8")
return contents
except Exception as e:
self.stdout.write(self.style.ERROR(f"Error reading file from S3: {e}"))
return None

def handle(self, *args, **options):
s3_uri = options["in"]
self.stdout.write(f'Input file "{s3_uri}"')
num_errors = 0
stream = self.stream_jsonl_from_s3_uri(s3_uri)
if stream:
self.stdout.write(self.style.SUCCESS(f"Got stream, processing JSONL"))

total_size = None

# Process each line of the JSONL file with a progress bar
with tqdm(
total=total_size, unit="B", unit_scale=True, desc="Processing JSONL"
) as pbar:
self.stdout.write(f"reading lines ...")
chunk_size = 1000

for dataset in batch_iterator(stream.iter_lines(), chunk_size):
protocol_contributions = []
for line in dataset:
try:
json_data = json.loads(line)
protocol_contributions.append(
get_prodocol_contribution_for_json(json_data)
)
except json.JSONDecodeError as e:
self.stdout.write(
self.style.ERROR(f"Error parsing JSON line: '{line}'")
)
self.stdout.write(self.style.ERROR(f"Error: '{e}'"))
num_errors = num_errors + 1

# Update the progress bar with the number of bytes read
pbar.update(len(line))

ProtocolContributions.objects.bulk_create(
protocol_contributions,
ignore_conflicts=True,
)
else:
self.stdout.write(self.style.ERROR(f"Empty file read from S3: {s3_uri}"))

if num_errors == 0:
self.stdout.write(
self.style.SUCCESS(
"JSONL loading status: All records loaded succefully!"
)
)
else:
self.stdout.write(
self.style.ERROR(
f"JSONL loading status: {num_errors} records failed to parse"
)
)
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Django 4.2.3 on 2023-07-20 14:29
# Generated by Django 4.2.3 on 2023-07-24 11:02

import account.models
from django.db import migrations, models
Expand All @@ -22,26 +22,32 @@ class Migration(migrations.Migration):
verbose_name="ID",
),
),
(
"ext_id",
models.CharField(
db_index=True, default="", max_length=66, unique=True
),
),
(
"contributor",
account.models.EthAddressField(
db_index=True, max_length=100, null=True
db_index=True, default="", max_length=100
),
),
(
"round",
account.models.EthAddressField(
db_index=True, max_length=100, null=True
db_index=True, default="", max_length=100
),
),
(
"project",
account.models.EthAddressField(
db_index=True, max_length=100, null=True
db_index=True, default="", max_length=100
),
),
(
"amount_usd",
"amount",
models.DecimalField(
db_index=True,
decimal_places=18,
Expand All @@ -50,6 +56,24 @@ class Migration(migrations.Migration):
max_digits=64,
),
),
(
"data",
models.JSONField(
default=dict,
help_text="Original contribution data in JSON format",
),
),
],
),
migrations.AlterField(
model_name="grantcontributionindex",
name="amount",
field=models.DecimalField(
db_index=True,
decimal_places=18,
default=0,
help_text="The USD amount contributed",
max_digits=64,
),
),
]
23 changes: 17 additions & 6 deletions api/cgrants/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ class GrantContributionIndex(models.Model):
decimal_places=18,
max_digits=64,
db_index=True,
help_text=_("The amount contributed"),
help_text=_("The USD amount contributed"),
)


Expand All @@ -205,14 +205,25 @@ class ProtocolContributions(models.Model):
The data in this table was produced by the allo indexer, see: https://github.com/gitcoinco/allo-indexer
"""

contributor = EthAddressField(null=True, blank=False, max_length=100, db_index=True)
round = EthAddressField(null=True, blank=False, max_length=100, db_index=True)
project = EthAddressField(null=True, blank=False, max_length=100, db_index=True)
amount_usd = models.DecimalField(
ext_id = models.CharField(
null=False, blank=False, max_length=66, db_index=True, unique=True, default=""
)
contributor = EthAddressField(
null=False, blank=False, max_length=100, db_index=True, default=""
)
round = EthAddressField(
null=False, blank=False, max_length=100, db_index=True, default=""
)
project = EthAddressField(
null=False, blank=False, max_length=100, db_index=True, default=""
)
amount = models.DecimalField(
default=0,
decimal_places=18,
max_digits=64,
db_index=True,
help_text=_("The USD amount contributed"),
)
# TODO: add also other fields, like transaction hash, etc.
data = models.JSONField(
help_text=_("Original contribution data in JSON format"), default=dict
)
27 changes: 27 additions & 0 deletions api/scorer/scorer_admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from django.contrib import admin
from django.core.paginator import Paginator
from django.utils.functional import cached_property

# class NoCountPaginator(Paginator):
# @cached_property
# def count(self):
# """Return a fix number for the total count. We want to avoid slow loading of page ..."""
# from django.apps import apps

# model = apps.get_model("app_name", "model_name")
# return 1000


class ScorerModelAdmin(admin.ModelAdmin):
"""
This extends the default ModelAdmin in django and:
- sets `show_full_result_count` to `False`
- sets `paginator` to `NoCountPaginator` -> the reasoning here is that
having the count slows queries down a lot, making the admin list page unusable and issuing a Gatway timeout.
Also, that count & pagination have no real value. Users should rely on the search function,
to narrow down the list of results to a small enough number.
"""

show_full_result_count = False
# TODO: holding back on changing the NoCountPaginator for now ...
# paginator = NoCountPaginator
1 change: 1 addition & 0 deletions api/scorer/settings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from .gitcoin_passport_weights import *
from .ninja_jwt import *
from .ratelimit import *
from .s3 import *
10 changes: 10 additions & 0 deletions api/scorer/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,16 @@
"handlers": [],
"propagate": False,
},
"urllib3": {
"level": "DEBUG",
"handlers": [],
"propagate": False,
},
"botocore": {
"level": "DEBUG",
"handlers": [],
"propagate": False,
},
},
}

Expand Down
Loading

0 comments on commit c49787f

Please sign in to comment.