diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index 7d2be7f11..87ea6f167 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -155,7 +155,7 @@ jobs: with: python-version: 3.11.2 - run: pip install poetry - - run: poetry lock --no-update + - run: poetry lock - run: poetry install - run: poetry run python manage.py collectstatic --noinput - name: Run tests @@ -186,8 +186,8 @@ jobs: docker run -d --name my-container my-app - run: docker exec my-container pip install poetry - - run: docker exec my-container poetry lock --no-update - - run: docker exec my-container poetry install --no-dev --no-interaction + - run: docker exec my-container poetry lock + - run: docker exec my-container poetry install --without dev --no-interaction - name: Clean up run: | diff --git a/Dockerfile b/Dockerfile index 048b84014..80f86acfc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,8 +33,8 @@ RUN ln -s /usr/bin/google-chrome-stable /usr/local/bin/google-chrome RUN pip install poetry RUN poetry config virtualenvs.create false COPY pyproject.toml poetry.lock* ./ -RUN poetry lock --no-update -RUN poetry install +RUN poetry lock +RUN poetry install --no-root # Install additional Python packages RUN pip install opentelemetry-api opentelemetry-instrumentation diff --git a/poetry.lock b/poetry.lock index 414edc6b8..716435368 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -2651,6 +2651,17 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "ndjson" +version = "0.3.1" +description = "JsonDecoder for ndjson" +optional = false +python-versions = "*" +files = [ + {file = "ndjson-0.3.1-py2.py3-none-any.whl", hash = "sha256:839c22275e6baa3040077b83c005ac24199b94973309a8a1809be962c753a410"}, + {file = "ndjson-0.3.1.tar.gz", hash = "sha256:bf9746cb6bb1cb53d172cda7f154c07c786d665ff28341e4e689b796b229e5d6"}, +] + [[package]] name = "nest-asyncio" version = "1.6.0" @@ -3227,6 +3238,7 @@ files = [ {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"}, {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"}, {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"}, @@ -3817,9 +3829,6 @@ files = [ {file = "redis-5.2.1.tar.gz", hash = "sha256:16f2e22dff21d5125e8481515e386711a34cbec50f0e44413dd7d9c060a54e0f"}, ] -[package.dependencies] -async-timeout = {version = ">=4.0.3", markers = "python_full_version < \"3.11.3\""} - [package.extras] hiredis = ["hiredis (>=3.0.0)"] ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==23.2.1)", "requests (>=2.31.0)"] @@ -3996,29 +4005,29 @@ pyasn1 = ">=0.1.3" [[package]] name = "ruff" -version = "0.8.4" +version = "0.8.6" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.8.4-py3-none-linux_armv6l.whl", hash = "sha256:58072f0c06080276804c6a4e21a9045a706584a958e644353603d36ca1eb8a60"}, - {file = "ruff-0.8.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ffb60904651c00a1e0b8df594591770018a0f04587f7deeb3838344fe3adabac"}, - {file = "ruff-0.8.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:6ddf5d654ac0d44389f6bf05cee4caeefc3132a64b58ea46738111d687352296"}, - {file = "ruff-0.8.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e248b1f0fa2749edd3350a2a342b67b43a2627434c059a063418e3d375cfe643"}, - {file = "ruff-0.8.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bf197b98ed86e417412ee3b6c893f44c8864f816451441483253d5ff22c0e81e"}, - {file = "ruff-0.8.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c41319b85faa3aadd4d30cb1cffdd9ac6b89704ff79f7664b853785b48eccdf3"}, - {file = "ruff-0.8.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:9f8402b7c4f96463f135e936d9ab77b65711fcd5d72e5d67597b543bbb43cf3f"}, - {file = "ruff-0.8.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4e56b3baa9c23d324ead112a4fdf20db9a3f8f29eeabff1355114dd96014604"}, - {file = "ruff-0.8.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:736272574e97157f7edbbb43b1d046125fce9e7d8d583d5d65d0c9bf2c15addf"}, - {file = "ruff-0.8.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5fe710ab6061592521f902fca7ebcb9fabd27bc7c57c764298b1c1f15fff720"}, - {file = "ruff-0.8.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:13e9ec6d6b55f6da412d59953d65d66e760d583dd3c1c72bf1f26435b5bfdbae"}, - {file = "ruff-0.8.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:97d9aefef725348ad77d6db98b726cfdb075a40b936c7984088804dfd38268a7"}, - {file = "ruff-0.8.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:ab78e33325a6f5374e04c2ab924a3367d69a0da36f8c9cb6b894a62017506111"}, - {file = "ruff-0.8.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:8ef06f66f4a05c3ddbc9121a8b0cecccd92c5bf3dd43b5472ffe40b8ca10f0f8"}, - {file = "ruff-0.8.4-py3-none-win32.whl", hash = "sha256:552fb6d861320958ca5e15f28b20a3d071aa83b93caee33a87b471f99a6c0835"}, - {file = "ruff-0.8.4-py3-none-win_amd64.whl", hash = "sha256:f21a1143776f8656d7f364bd264a9d60f01b7f52243fbe90e7670c0dfe0cf65d"}, - {file = "ruff-0.8.4-py3-none-win_arm64.whl", hash = "sha256:9183dd615d8df50defa8b1d9a074053891ba39025cf5ae88e8bcb52edcc4bf08"}, - {file = "ruff-0.8.4.tar.gz", hash = "sha256:0d5f89f254836799af1615798caa5f80b7f935d7a670fad66c5007928e57ace8"}, + {file = "ruff-0.8.6-py3-none-linux_armv6l.whl", hash = "sha256:defed167955d42c68b407e8f2e6f56ba52520e790aba4ca707a9c88619e580e3"}, + {file = "ruff-0.8.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:54799ca3d67ae5e0b7a7ac234baa657a9c1784b48ec954a094da7c206e0365b1"}, + {file = "ruff-0.8.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e88b8f6d901477c41559ba540beeb5a671e14cd29ebd5683903572f4b40a9807"}, + {file = "ruff-0.8.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0509e8da430228236a18a677fcdb0c1f102dd26d5520f71f79b094963322ed25"}, + {file = "ruff-0.8.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:91a7ddb221779871cf226100e677b5ea38c2d54e9e2c8ed847450ebbdf99b32d"}, + {file = "ruff-0.8.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:248b1fb3f739d01d528cc50b35ee9c4812aa58cc5935998e776bf8ed5b251e75"}, + {file = "ruff-0.8.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:bc3c083c50390cf69e7e1b5a5a7303898966be973664ec0c4a4acea82c1d4315"}, + {file = "ruff-0.8.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52d587092ab8df308635762386f45f4638badb0866355b2b86760f6d3c076188"}, + {file = "ruff-0.8.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:61323159cf21bc3897674e5adb27cd9e7700bab6b84de40d7be28c3d46dc67cf"}, + {file = "ruff-0.8.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ae4478b1471fc0c44ed52a6fb787e641a2ac58b1c1f91763bafbc2faddc5117"}, + {file = "ruff-0.8.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0c000a471d519b3e6cfc9c6680025d923b4ca140ce3e4612d1a2ef58e11f11fe"}, + {file = "ruff-0.8.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:9257aa841e9e8d9b727423086f0fa9a86b6b420fbf4bf9e1465d1250ce8e4d8d"}, + {file = "ruff-0.8.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:45a56f61b24682f6f6709636949ae8cc82ae229d8d773b4c76c09ec83964a95a"}, + {file = "ruff-0.8.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:496dd38a53aa173481a7d8866bcd6451bd934d06976a2505028a50583e001b76"}, + {file = "ruff-0.8.6-py3-none-win32.whl", hash = "sha256:e169ea1b9eae61c99b257dc83b9ee6c76f89042752cb2d83486a7d6e48e8f764"}, + {file = "ruff-0.8.6-py3-none-win_amd64.whl", hash = "sha256:f1d70bef3d16fdc897ee290d7d20da3cbe4e26349f62e8a0274e7a3f4ce7a905"}, + {file = "ruff-0.8.6-py3-none-win_arm64.whl", hash = "sha256:7d7fc2377a04b6e04ffe588caad613d0c460eb2ecba4c0ccbbfe2bc973cbc162"}, + {file = "ruff-0.8.6.tar.gz", hash = "sha256:dcad24b81b62650b0eb8814f576fc65cfee8674772a6e24c9b747911801eeaa5"}, ] [[package]] @@ -4498,13 +4507,13 @@ files = [ [[package]] name = "unstructured" -version = "0.16.11" +version = "0.16.12" description = "A library that prepares raw documents for downstream ML tasks." optional = false python-versions = "<3.13,>=3.9.0" files = [ - {file = "unstructured-0.16.11-py3-none-any.whl", hash = "sha256:a92d5bc2c2b7bb23369641fb7a7f0daba1775639199306ce4cd83ca564a03763"}, - {file = "unstructured-0.16.11.tar.gz", hash = "sha256:33ebf68aae11ce33c8a96335296557b5abd8ba96eaba3e5a1554c0b9eee40bb5"}, + {file = "unstructured-0.16.12-py3-none-any.whl", hash = "sha256:bcac29ac1b38fba4228c5a1a7721d1aa7c48220f7c1dd43b563645c56e978c49"}, + {file = "unstructured-0.16.12.tar.gz", hash = "sha256:c3133731c6edb9c2f474e62cb2b560cd0a8d578c4532ec14d8c0941e401770b0"}, ] [package.dependencies] @@ -4517,6 +4526,7 @@ filetype = "*" html5lib = "*" langdetect = "*" lxml = "*" +ndjson = "*" nltk = "*" numpy = "<2" psutil = "*" @@ -5009,4 +5019,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "3.11.2" -content-hash = "d614aeb15753a479822bb32554445a54e6d3188ac1c97b064639ce2b74bed759" +content-hash = "807c866bfcd861d24cbd2a95912e12845ca06a689199504ba971618fa27d3b62" diff --git a/pyproject.toml b/pyproject.toml index e8119dfe8..f74479c75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,7 @@ sentry-sdk = "^2.19.0" bitcash = "^1.0.2" pydantic = "^2.7.3" pydantic_core = "^2.18.4" -unstructured = "^0.16.8" +unstructured = "^0.16.12" Markdown = "^3.6" faiss-cpu = "^1.8.0" psutil = "^5.9.8" @@ -87,7 +87,7 @@ newrelic = "^10.4.0" [tool.poetry.group.dev.dependencies] black = "^24.8.0" isort = "^5.13.2" -ruff = "^0.8.4" +ruff = "^0.8.6" pre-commit = "^3.8.0" [tool.isort] diff --git a/website/management/commands/sync_repo_contributors.py b/website/management/commands/sync_repo_contributors.py new file mode 100644 index 000000000..096639d37 --- /dev/null +++ b/website/management/commands/sync_repo_contributors.py @@ -0,0 +1,82 @@ +import logging +import time + +import requests +from django.conf import settings +from django.core.management.base import BaseCommand + +from website.models import Contributor, Repo + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = "Synchronize all contributors for a repository" + + def add_arguments(self, parser): + parser.add_argument("--repo_id", type=int, help="Repository ID to sync") + + def handle(self, *args, **options): + repo_id = options.get("repo_id") + if not repo_id: + return + + repo = Repo.objects.get(id=repo_id) + owner_repo = repo.repo_url.rstrip("/").split("github.com/")[-1] + + headers = { + "Authorization": f"token {settings.GITHUB_TOKEN}", + "Accept": "application/vnd.github.v3+json", + } + + # Get all contributors with pagination + page = 1 + all_contributors = [] + + while True: + api_url = f"https://api.github.com/repos/{owner_repo}/contributors?anon=true&per_page=100&page={page}" + response = requests.get(api_url, headers=headers) + + if response.status_code == 403: + reset_time = int(response.headers.get("X-RateLimit-Reset", 0)) + wait_time = reset_time - int(time.time()) + if wait_time > 0: + logger.info(f"Rate limit hit, waiting {wait_time} seconds") + time.sleep(wait_time) + continue + + if response.status_code != 200: + break + + contributors_page = response.json() + if not contributors_page: + break + + all_contributors.extend(contributors_page) + page += 1 + + # Be nice to GitHub API + time.sleep(1) + + # Batch create/update contributors + for contrib_data in all_contributors: + github_id = contrib_data.get("id") + if not github_id: + # skip if 'id' is missing + continue + contributor, created = Contributor.objects.update_or_create( + github_id=github_id, + defaults={ + "name": contrib_data.get("login", "unknown"), + "github_url": contrib_data.get("html_url", ""), + "avatar_url": contrib_data.get("avatar_url", ""), + "contributions": contrib_data.get("contributions", 0), + "contributor_type": contrib_data.get("type", "User"), + }, + ) + repo.contributor.add(contributor) + + repo.contributor_count = len(all_contributors) + repo.save() + + logger.info(f"Synced {len(all_contributors)} contributors for {repo.name}") diff --git a/website/migrations/0179_contributorstats.py b/website/migrations/0179_contributorstats.py new file mode 100644 index 000000000..08fea502b --- /dev/null +++ b/website/migrations/0179_contributorstats.py @@ -0,0 +1,60 @@ +# Generated by Django 5.1.3 on 2025-01-06 13:17 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("website", "0178_alter_ip_agent"), + ] + + operations = [ + migrations.CreateModel( + name="ContributorStats", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("date", models.DateField()), + ("commits", models.PositiveIntegerField(default=0)), + ("issues_opened", models.PositiveIntegerField(default=0)), + ("issues_closed", models.PositiveIntegerField(default=0)), + ("pull_requests", models.PositiveIntegerField(default=0)), + ("comments", models.PositiveIntegerField(default=0)), + ( + "granularity", + models.CharField( + choices=[("day", "Day"), ("month", "Month")], + default="day", + max_length=10, + ), + ), + ( + "contributor", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="stats", + to="website.contributor", + ), + ), + ( + "repo", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="stats", + to="website.repo", + ), + ), + ], + options={ + "unique_together": {("contributor", "repo", "date", "granularity")}, + }, + ), + ] diff --git a/website/models.py b/website/models.py index 5e163ed93..2d0168710 100644 --- a/website/models.py +++ b/website/models.py @@ -936,17 +936,6 @@ def __str__(self): return self.name -# class ContributorStats(models.Model): -# username = models.CharField(max_length=255, unique=True) -# commits = models.IntegerField(default=0) -# issues_opened = models.IntegerField(default=0) -# issues_closed = models.IntegerField(default=0) -# prs = models.IntegerField(default=0) -# comments = models.IntegerField(default=0) -# assigned_issues = models.IntegerField(default=0) -# created = models.DateTimeField(auto_now_add=True) - - class Contribution(models.Model): CONTRIBUTION_TYPES = [ ("commit", "Commit"), @@ -1318,3 +1307,32 @@ def save(self, *args, **kwargs): def __str__(self): return f"{self.project.name}/{self.name}" + + +class ContributorStats(models.Model): + contributor = models.ForeignKey(Contributor, on_delete=models.CASCADE, related_name="stats") + repo = models.ForeignKey(Repo, on_delete=models.CASCADE, related_name="stats") + + # This will represent either a specific day or the first day of a month. + date = models.DateField() + + # Store counts + commits = models.PositiveIntegerField(default=0) + issues_opened = models.PositiveIntegerField(default=0) + issues_closed = models.PositiveIntegerField(default=0) + pull_requests = models.PositiveIntegerField(default=0) + comments = models.PositiveIntegerField(default=0) + + # "day" for daily entries, "month" for monthly entries + granularity = models.CharField( + max_length=10, choices=[("day", "Day"), ("month", "Month")], default="day" + ) + + class Meta: + # You can't have two different stats for the same date+granularity + unique_together = ("contributor", "repo", "date", "granularity") + + def __str__(self): + return ( + f"{self.contributor.name} in {self.repo.name} " f"on {self.date} [{self.granularity}]" + ) diff --git a/website/templates/projects/repo_detail.html b/website/templates/projects/repo_detail.html index 1a3b911c3..83ed4ab96 100644 --- a/website/templates/projects/repo_detail.html +++ b/website/templates/projects/repo_detail.html @@ -314,25 +314,28 @@