From a36883cf37c537435fd9230088700e28496ecef2 Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Mon, 11 Aug 2025 20:58:02 +0530 Subject: [PATCH 01/20] Add bi-directional sync feature --- apps/base/utils.py | 7 + apps/challenges/github_utils.py | 377 +++++++++++++++++++++ apps/challenges/models.py | 29 ++ celerybeat.pid | 1 + requirements/common.txt | 1 + tests/unit/challenges/test_github_utils.py | 234 +++++++++++++ 6 files changed, 649 insertions(+) create mode 100644 apps/challenges/github_utils.py create mode 100644 celerybeat.pid create mode 100644 tests/unit/challenges/test_github_utils.py diff --git a/apps/base/utils.py b/apps/base/utils.py index 4d487f5c8c..c970ae20b9 100644 --- a/apps/base/utils.py +++ b/apps/base/utils.py @@ -333,3 +333,10 @@ def is_user_a_staff(user): {bool} : True/False if the user is staff or not """ return user.is_staff + + +def deserialize_object(object): + deserialized_object = None + for obj in serializers.deserialize("json", object): + deserialized_object = obj.object + return deserialized_object diff --git a/apps/challenges/github_utils.py b/apps/challenges/github_utils.py new file mode 100644 index 0000000000..65939149ab --- /dev/null +++ b/apps/challenges/github_utils.py @@ -0,0 +1,377 @@ +import base64 +import json +import logging +from typing import Any, Dict, Optional, Tuple +from urllib.parse import urljoin + +import requests + +logger = logging.getLogger(__name__) + +URLS = {"contents": "/repos/{}/contents/{}", "repos": "/repos/{}"} + + +class GithubInterface: + """ + Interface for GitHub API operations + """ + + def __init__(self, token, repo): + self.token = token + self.repo = repo + self.base_url = "https://api.github.com" + self.headers = { + "Authorization": f"token {token}", + "Accept": "application/vnd.github.v3+json", + } + + def get_github_url(self, url): + """Get the full GitHub API URL""" + return urljoin(self.base_url, url) + + def get_file_contents(self, path: str) -> Optional[Dict[str, Any]]: + """ + Get file contents from GitHub repository + """ + url = URLS["contents"].format(self.repo, path) + full_url = self.get_github_url(url) + + # No ref -> GitHub uses default branch + response = requests.get(full_url, headers=self.headers) + + if response.status_code == 200: + return response.json() + elif response.status_code == 404: + return None + else: + logger.error(f"Failed to get file contents: {response.status_code}") + return None + + def _put_file(self, path: str, base64_content: str, message: str, sha: Optional[str] = None) -> Optional[Dict[str, Any]]: + """ + Create or update file in GitHub repository with base64 content + """ + url = URLS["contents"].format(self.repo, path) + full_url = self.get_github_url(url) + + data = { + "message": message, + "content": base64_content, + } + + if sha: + data["sha"] = sha + + response = requests.put(full_url, headers=self.headers, json=data) + + if response.status_code in [200, 201]: + return response.json() + else: + logger.error(f"Failed to update file: {response.status_code}") + return None + + def update_text_file(self, path: str, content: str, message: str, sha: Optional[str] = None) -> Optional[Dict[str, Any]]: + return self._put_file(path, base64.b64encode(content.encode()).decode(), message, sha) + + def update_binary_file(self, path: str, content_bytes: bytes, message: str, sha: Optional[str] = None) -> Optional[Dict[str, Any]]: + return self._put_file(path, base64.b64encode(content_bytes).decode(), message, sha) + + @staticmethod + def _decode_github_file_content(file_data: Dict[str, Any]) -> Optional[bytes]: + try: + encoded = file_data.get("content") + if not encoded: + return None + # GitHub may include newlines in base64 content + return base64.b64decode(encoded) + except Exception as exc: + logger.error(f"Failed to decode GitHub content: {exc}") + return None + + @staticmethod + def _diff_top_level_keys(old: Dict[str, Any], new: Dict[str, Any]) -> Tuple[bool, Tuple[str, ...]]: + changed_keys = [] + all_keys = set(old.keys()) | set(new.keys()) + for key in sorted(all_keys): + if old.get(key) != new.get(key): + changed_keys.append(key) + return (len(changed_keys) > 0, tuple(changed_keys)) + + def update_json_if_changed(self, path: str, data: Dict[str, Any]) -> Optional[Dict[str, Any]]: + """ + Create/update a JSON file only if content changed. Commit message lists changed keys. + """ + try: + file_data = self.get_file_contents(path) + new_json_text = json.dumps(data, indent=2, sort_keys=True) + if file_data: + old_bytes = self._decode_github_file_content(file_data) or b"" + try: + old_json = json.loads(old_bytes.decode() or "{}") + except Exception: + old_json = {} + changed, changed_keys = self._diff_top_level_keys(old_json, data) + if not changed: + logger.info(f"No changes for {path}. Skipping commit.") + return None + message = f"Update {path}: keys updated: {', '.join(changed_keys)}" + return self.update_text_file(path, new_json_text, message, file_data.get("sha")) + else: + message = f"Create {path} via EvalAI sync" + return self.update_text_file(path, new_json_text, message) + except Exception as exc: + logger.error(f"Error updating JSON at {path}: {exc}") + return None + + def update_binary_if_changed(self, path: str, content_bytes: bytes, create_message: str, update_message_prefix: str) -> Optional[Dict[str, Any]]: + try: + file_data = self.get_file_contents(path) + if file_data: + old_bytes = self._decode_github_file_content(file_data) or b"" + if old_bytes == content_bytes: + logger.info(f"No binary changes for {path}. Skipping commit.") + return None + message = f"{update_message_prefix} {path}" + return self.update_binary_file(path, content_bytes, message, file_data.get("sha")) + else: + return self.update_binary_file(path, content_bytes, create_message) + except Exception as exc: + logger.error(f"Error updating binary at {path}: {exc}") + return None + + +def _fetch_auth_token_from_repo(repo: str, candidate_token: Optional[str] = None) -> Optional[str]: + """ + Try to read AUTH_TOKEN file from the repository. This supports two modes: + 1) Unauthenticated read (works only for public repos) + 2) Authenticated read using a candidate token if provided + + Returns the token string if found, else None. + """ + base_url = "https://api.github.com" + path = URLS["contents"].format(repo, "AUTH_TOKEN") + url = urljoin(base_url, path) + + def _request(headers: Optional[Dict[str, str]]) -> Optional[str]: + try: + # No ref -> default branch + response = requests.get(url, headers=headers or {}) + if response.status_code == 200: + data = response.json() + content_b64 = data.get("content") + if not content_b64: + return None + try: + value = base64.b64decode(content_b64).decode().strip() + except Exception: + return None + return value or None + return None + except Exception: + return None + + # Try unauthenticated first (public repo) + token_value = _request(None) + if token_value: + return token_value + + # Try with candidate token if provided (for private repo) + if candidate_token: + headers = { + "Authorization": f"token {candidate_token}", + "Accept": "application/vnd.github.v3+json", + } + token_value = _request(headers) + if token_value: + return token_value + + return None + + +def sync_challenge_to_github(challenge): + """ + Sync challenge data to GitHub repository + """ + if not challenge.github_repository: + logger.warning("GitHub repository not configured for challenge") + return + + try: + repo = challenge.github_repository + + # Enforce AUTH_TOKEN from repo if available; fallback to challenge.github_token + repo_auth_token = _fetch_auth_token_from_repo(repo, candidate_token=challenge.github_token) + resolved_token = repo_auth_token or challenge.github_token + if not resolved_token: + logger.warning("Neither repo AUTH_TOKEN nor challenge.github_token is available; skipping sync") + return + + github = GithubInterface(resolved_token, repo) + + # Sync non-file fields + challenge_data = { + "title": challenge.title, + "description": challenge.description, + "short_description": challenge.short_description, + "terms_and_conditions": challenge.terms_and_conditions, + "submission_guidelines": challenge.submission_guidelines, + "evaluation_details": challenge.evaluation_details, + "start_date": challenge.start_date.isoformat() if challenge.start_date else None, + "end_date": challenge.end_date.isoformat() if challenge.end_date else None, + "domain": challenge.domain, + "list_tags": challenge.list_tags, + "has_prize": challenge.has_prize, + "has_sponsors": challenge.has_sponsors, + "published": challenge.published, + "submission_time_limit": challenge.submission_time_limit, + "is_registration_open": challenge.is_registration_open, + "enable_forum": challenge.enable_forum, + "forum_url": challenge.forum_url, + "leaderboard_description": challenge.leaderboard_description, + "anonymous_leaderboard": challenge.anonymous_leaderboard, + "manual_participant_approval": challenge.manual_participant_approval, + "is_disabled": challenge.is_disabled, + "approved_by_admin": challenge.approved_by_admin, + "uses_ec2_worker": challenge.uses_ec2_worker, + "ec2_storage": challenge.ec2_storage, + "ephemeral_storage": challenge.ephemeral_storage, + "featured": challenge.featured, + "allowed_email_domains": challenge.allowed_email_domains, + "blocked_email_domains": challenge.blocked_email_domains, + "banned_email_ids": challenge.banned_email_ids, + "remote_evaluation": challenge.remote_evaluation, + "queue": challenge.queue, + "sqs_retention_period": challenge.sqs_retention_period, + "is_docker_based": challenge.is_docker_based, + "is_static_dataset_code_upload": challenge.is_static_dataset_code_upload, + "max_docker_image_size": challenge.max_docker_image_size, + "max_concurrent_submission_evaluation": challenge.max_concurrent_submission_evaluation, + "use_host_credentials": challenge.use_host_credentials, + "use_host_sqs": challenge.use_host_sqs, + "allow_resuming_submissions": challenge.allow_resuming_submissions, + "allow_host_cancel_submissions": challenge.allow_host_cancel_submissions, + "allow_cancel_running_submissions": challenge.allow_cancel_running_submissions, + "allow_participants_resubmissions": challenge.allow_participants_resubmissions, + "cli_version": challenge.cli_version, + "workers": challenge.workers, + "task_def_arn": challenge.task_def_arn, + "slack_webhook_url": challenge.slack_webhook_url, + "worker_cpu_cores": challenge.worker_cpu_cores, + "worker_memory": challenge.worker_memory, + "inform_hosts": challenge.inform_hosts, + "vpc_cidr": challenge.vpc_cidr, + "subnet_1_cidr": challenge.subnet_1_cidr, + "subnet_2_cidr": challenge.subnet_2_cidr, + "worker_instance_type": challenge.worker_instance_type, + "worker_ami_type": challenge.worker_ami_type, + "worker_disk_size": challenge.worker_disk_size, + "max_worker_instance": challenge.max_worker_instance, + "min_worker_instance": challenge.min_worker_instance, + "desired_worker_instance": challenge.desired_worker_instance, + "cpu_only_jobs": challenge.cpu_only_jobs, + "job_cpu_cores": challenge.job_cpu_cores, + "job_memory": challenge.job_memory, + "worker_image_url": challenge.worker_image_url, + "evaluation_module_error": challenge.evaluation_module_error, + } + + # Update challenge data + github.update_json_if_changed("challenge.json", challenge_data) + + # Sync file fields if they exist + if challenge.evaluation_script: + try: + field_file = challenge.evaluation_script + field_file.open("rb") + try: + content_bytes = field_file.read() + finally: + field_file.close() + file_name = "evaluation_script.zip" + github.update_binary_if_changed( + file_name, + content_bytes, + create_message=f"Add {file_name} via EvalAI sync", + update_message_prefix="Update", + ) + except Exception as exc: + logger.error(f"Failed to sync evaluation_script for challenge {challenge.id}: {exc}") + + logger.info(f"Successfully synced challenge {challenge.id} to GitHub") + + except Exception as e: + logger.error(f"Error syncing challenge {challenge.id} to GitHub: {str(e)}") + + +def sync_challenge_phase_to_github(challenge_phase): + """ + Sync challenge phase data to GitHub repository + """ + challenge = challenge_phase.challenge + if not challenge.github_repository: + logger.warning("GitHub repository not configured for challenge") + return + + try: + repo = challenge.github_repository + repo_auth_token = _fetch_auth_token_from_repo(repo, candidate_token=challenge.github_token) + resolved_token = repo_auth_token or challenge.github_token + if not resolved_token: + logger.warning("Neither repo AUTH_TOKEN nor challenge.github_token is available; skipping phase sync") + return + + github = GithubInterface(resolved_token, repo) + + # Sync non-file fields + phase_data = { + "name": challenge_phase.name, + "description": challenge_phase.description, + "leaderboard_public": challenge_phase.leaderboard_public, + "start_date": challenge_phase.start_date.isoformat() if challenge_phase.start_date else None, + "end_date": challenge_phase.end_date.isoformat() if challenge_phase.end_date else None, + "is_public": challenge_phase.is_public, + "is_submission_public": challenge_phase.is_submission_public, + "annotations_uploaded_using_cli": challenge_phase.annotations_uploaded_using_cli, + "max_submissions_per_day": challenge_phase.max_submissions_per_day, + "max_submissions_per_month": challenge_phase.max_submissions_per_month, + "max_submissions": challenge_phase.max_submissions, + "max_concurrent_submissions_allowed": challenge_phase.max_concurrent_submissions_allowed, + "codename": challenge_phase.codename, + "allowed_email_ids": challenge_phase.allowed_email_ids, + "environment_image": challenge_phase.environment_image, + "allowed_submission_file_types": challenge_phase.allowed_submission_file_types, + "is_restricted_to_select_one_submission": challenge_phase.is_restricted_to_select_one_submission, + "submission_meta_attributes": challenge_phase.submission_meta_attributes, + "is_partial_submission_evaluation_enabled": challenge_phase.is_partial_submission_evaluation_enabled, + "config_id": challenge_phase.config_id, + "default_submission_meta_attributes": challenge_phase.default_submission_meta_attributes, + "disable_logs": challenge_phase.disable_logs, + } + + # Update phase data + phase_path = f"phases/{challenge_phase.codename}.json" + github.update_json_if_changed(phase_path, phase_data) + + # Sync file fields if they exist + if challenge_phase.test_annotation: + try: + field_file = challenge_phase.test_annotation + field_file.open("rb") + try: + content_bytes = field_file.read() + finally: + field_file.close() + file_name = f"annotations/{challenge_phase.codename}.json" + github.update_binary_if_changed( + file_name, + content_bytes, + create_message=f"Add {file_name} via EvalAI sync", + update_message_prefix="Update", + ) + except Exception as exc: + logger.error(f"Failed to sync test_annotation for phase {challenge_phase.id}: {exc}") + + logger.info(f"Successfully synced challenge phase {challenge_phase.id} to GitHub") + + except Exception as e: + logger.error(f"Error syncing challenge phase {challenge_phase.id} to GitHub: {str(e)}") \ No newline at end of file diff --git a/apps/challenges/models.py b/apps/challenges/models.py index 823309d9d1..ca832240fa 100644 --- a/apps/challenges/models.py +++ b/apps/challenges/models.py @@ -1,4 +1,5 @@ from __future__ import unicode_literals +import logging from base.models import TimeStampedModel, model_field_name from base.utils import RandomFileName, get_slug, is_model_field_changed @@ -13,6 +14,8 @@ from hosts.models import ChallengeHost from participants.models import ParticipantTeam +logger = logging.getLogger(__name__) + @receiver(pre_save, sender="challenges.Challenge") def save_challenge_slug(sender, instance, **kwargs): @@ -190,6 +193,10 @@ def __init__(self, *args, **kwargs): github_branch = models.CharField( max_length=200, null=True, blank=True, default="" ) + # The github token used for authentication + github_token = models.CharField( + max_length=200, null=True, blank=True, default="" + ) # The number of vCPU for a Fargate worker for the challenge. Default value # is 0.25 vCPU. worker_cpu_cores = models.IntegerField(null=True, blank=True, default=512) @@ -315,6 +322,17 @@ def update_sqs_retention_period_for_challenge( challenge.save() +@receiver(signals.post_save, sender="challenges.Challenge") +def challenge_details_sync(sender, instance, created, **kwargs): + """Sync challenge details to GitHub when challenge is updated""" + if not created and instance.github_token and instance.github_repository: + try: + from challenges.github_utils import sync_challenge_to_github + sync_challenge_to_github(instance) + except Exception as e: + logger.error(f"Error in challenge_details_sync: {str(e)}") + + class DatasetSplit(TimeStampedModel): name = models.CharField(max_length=100) codename = models.CharField(max_length=100) @@ -441,6 +459,17 @@ def save(self, *args, **kwargs): return challenge_phase_instance +@receiver(signals.post_save, sender="challenges.ChallengePhase") +def challenge_phase_details_sync(sender, instance, created, **kwargs): + """Sync challenge phase details to GitHub when challenge phase is updated""" + if not created and instance.challenge.github_token and instance.challenge.github_repository: + try: + from challenges.github_utils import sync_challenge_phase_to_github + sync_challenge_phase_to_github(instance) + except Exception as e: + logger.error(f"Error in challenge_phase_details_sync: {str(e)}") + + def post_save_connect(field_name, sender): import challenges.aws_utils as aws diff --git a/celerybeat.pid b/celerybeat.pid new file mode 100644 index 0000000000..45a4fb75db --- /dev/null +++ b/celerybeat.pid @@ -0,0 +1 @@ +8 diff --git a/requirements/common.txt b/requirements/common.txt index ffafed0a70..e51464faf7 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -31,5 +31,6 @@ pycurl==7.43.0.6 PyJWT==2.1.0 PyYaml==5.1 rstr==2.2.6 +requests==2.25.1 sendgrid==6.4.8 vine==1.3.0 diff --git a/tests/unit/challenges/test_github_utils.py b/tests/unit/challenges/test_github_utils.py new file mode 100644 index 0000000000..4c0f9a3821 --- /dev/null +++ b/tests/unit/challenges/test_github_utils.py @@ -0,0 +1,234 @@ +import json +import logging +from unittest.mock import Mock, patch + +from django.test import TestCase +from django.utils import timezone + +from challenges.models import Challenge, ChallengePhase +from challenges.github_utils import ( + GithubInterface, + sync_challenge_to_github, + sync_challenge_phase_to_github, +) + + +class TestGithubInterface(TestCase): + """Test cases for GithubInterface class""" + + def setUp(self): + self.token = "test_token" + self.repo = "test/repo" + self.branch = "master" + self.github = GithubInterface(self.token, self.repo, self.branch) + + def test_init(self): + """Test GithubInterface initialization""" + self.assertEqual(self.github.token, self.token) + self.assertEqual(self.github.repo, self.repo) + self.assertEqual(self.github.branch, self.branch) + self.assertEqual(self.github.base_url, "https://api.github.com") + self.assertIn("Authorization", self.github.headers) + self.assertIn("Accept", self.github.headers) + + def test_get_github_url(self): + """Test get_github_url method""" + url = "/test/path" + expected = "https://api.github.com/test/path" + result = self.github.get_github_url(url) + self.assertEqual(result, expected) + + @patch("challenges.github_utils.requests.get") + def test_get_file_contents_success(self, mock_get): + """Test get_file_contents with successful response""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"content": "test content"} + mock_get.return_value = mock_response + + result = self.github.get_file_contents("test.json") + + self.assertEqual(result, {"content": "test content"}) + mock_get.assert_called_once() + + @patch("challenges.github_utils.requests.get") + def test_get_file_contents_not_found(self, mock_get): + """Test get_file_contents with 404 response""" + mock_response = Mock() + mock_response.status_code = 404 + mock_get.return_value = mock_response + + result = self.github.get_file_contents("test.json") + + self.assertIsNone(result) + + @patch("challenges.github_utils.requests.put") + def test_update_text_file_success(self, mock_put): + """Test update_text_file with successful response""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"sha": "test_sha"} + mock_put.return_value = mock_response + + result = self.github.update_text_file("test.json", "content", "message", "sha") + + self.assertEqual(result, {"sha": "test_sha"}) + mock_put.assert_called_once() + + @patch("challenges.github_utils.requests.put") + def test_update_text_file_failure(self, mock_put): + """Test update_text_file with failure response""" + mock_response = Mock() + mock_response.status_code = 400 + mock_put.return_value = mock_response + + result = self.github.update_text_file("test.json", "content", "message", "sha") + + self.assertIsNone(result) + + @patch("challenges.github_utils.GithubInterface.get_file_contents") + @patch("challenges.github_utils.GithubInterface.update_text_file") + def test_update_json_if_changed_existing_changed(self, mock_update_text, mock_get): + """When existing JSON differs, it should commit with updated content""" + old_data = {"a": 1} + old_text = json.dumps(old_data, sort_keys=True) + mock_get.return_value = {"sha": "test_sha", "content": old_text.encode("utf-8").decode("utf-8")} # content will be base64-decoded internally; provide as raw for simplicity + mock_update_text.return_value = {"sha": "new_sha"} + + new_data = {"a": 2} + result = self.github.update_json_if_changed("test.json", new_data) + + self.assertEqual(result, {"sha": "new_sha"}) + mock_get.assert_called_once_with("test.json") + mock_update_text.assert_called_once() + + @patch("challenges.github_utils.GithubInterface.get_file_contents") + @patch("challenges.github_utils.GithubInterface.update_text_file") + def test_update_json_if_changed_new_file(self, mock_update_text, mock_get): + """When file doesn't exist, it should create it""" + mock_get.return_value = None + mock_update_text.return_value = {"sha": "new_sha"} + + data = {"test": "data"} + result = self.github.update_json_if_changed("test.json", data) + + self.assertEqual(result, {"sha": "new_sha"}) + mock_get.assert_called_once_with("test.json") + mock_update_text.assert_called_once() + + +class TestGithubSync(TestCase): + """Test cases for GitHub sync functionality""" + + def setUp(self): + # Create a test challenge with GitHub configuration + self.challenge = Challenge.objects.create( + title="Test Challenge", + description="Test Description", + github_token="test_token", + github_repository="test/repo", + github_branch="master", + start_date=timezone.now(), + end_date=timezone.now() + timezone.timedelta(days=30), + ) + + # Create a test challenge phase + self.challenge_phase = ChallengePhase.objects.create( + name="Test Phase", + description="Test Phase Description", + challenge=self.challenge, + codename="test_phase", + ) + + @patch("challenges.github_utils.GithubInterface") + def test_sync_challenge_to_github_success(self, mock_github_class): + """Test successful challenge sync to GitHub""" + mock_github = Mock() + mock_github_class.return_value = mock_github + mock_github.update_json_if_changed.return_value = {"sha": "test_sha"} + + sync_challenge_to_github(self.challenge) + + mock_github_class.assert_called_once_with( + "test_token", "test/repo", "master" + ) + mock_github.update_json_if_changed.assert_called_once() + + def test_sync_challenge_to_github_no_token(self): + """Test challenge sync when no GitHub token is configured""" + self.challenge.github_token = "" + + with self.assertLogs(level=logging.WARNING): + sync_challenge_to_github(self.challenge) + + def test_sync_challenge_to_github_no_repo(self): + """Test challenge sync when no GitHub repository is configured""" + self.challenge.github_repository = "" + + with self.assertLogs(level=logging.WARNING): + sync_challenge_to_github(self.challenge) + + @patch("challenges.github_utils.GithubInterface") + def test_sync_challenge_phase_to_github_success(self, mock_github_class): + """Test successful challenge phase sync to GitHub""" + mock_github = Mock() + mock_github_class.return_value = mock_github + mock_github.update_json_if_changed.return_value = {"sha": "test_sha"} + + sync_challenge_phase_to_github(self.challenge_phase) + + mock_github_class.assert_called_once_with( + "test_token", "test/repo", "master" + ) + mock_github.update_json_if_changed.assert_called_once() + + def test_sync_challenge_phase_to_github_no_token(self): + """Test challenge phase sync when no GitHub token is configured""" + self.challenge.github_token = "" + + with self.assertLogs(level=logging.WARNING): + sync_challenge_phase_to_github(self.challenge_phase) + + def test_sync_challenge_phase_to_github_no_repo(self): + """Test challenge phase sync when no GitHub repository is configured""" + self.challenge.github_repository = "" + + with self.assertLogs(level=logging.WARNING): + sync_challenge_phase_to_github(self.challenge_phase) + + @patch("challenges.github_utils.GithubInterface") + def test_sync_challenge_data_structure(self, mock_github_class): + """Test that challenge data is properly structured for GitHub sync""" + mock_github = Mock() + mock_github_class.return_value = mock_github + + sync_challenge_to_github(self.challenge) + + # Verify that update_data_from_path was called with challenge data + call_args = mock_github.update_json_if_changed.call_args + self.assertEqual(call_args[0][0], "challenge.json") # path + challenge_data = call_args[0][1] # data + + # Check that key fields are present + self.assertEqual(challenge_data["title"], "Test Challenge") + self.assertEqual(challenge_data["description"], "Test Description") + self.assertIn("start_date", challenge_data) + self.assertIn("end_date", challenge_data) + + @patch("challenges.github_utils.GithubInterface") + def test_sync_challenge_phase_data_structure(self, mock_github_class): + """Test that challenge phase data is properly structured for GitHub sync""" + mock_github = Mock() + mock_github_class.return_value = mock_github + + sync_challenge_phase_to_github(self.challenge_phase) + + # Verify that update_data_from_path was called with phase data + call_args = mock_github.update_json_if_changed.call_args + self.assertEqual(call_args[0][0], "phases/test_phase.json") # path + phase_data = call_args[0][1] # data + + # Check that key fields are present + self.assertEqual(phase_data["name"], "Test Phase") + self.assertEqual(phase_data["description"], "Test Phase Description") + self.assertEqual(phase_data["codename"], "test_phase") \ No newline at end of file From 3c2ca3d03dcc941b0dff9c6fa7e73cafe7dc7d55 Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Sat, 16 Aug 2025 20:46:30 +0530 Subject: [PATCH 02/20] Clean up files --- apps/challenges/github_interface.py | 96 ++++++ apps/challenges/github_sync_config.py | 37 ++ apps/challenges/github_utils.py | 466 ++++++-------------------- 3 files changed, 237 insertions(+), 362 deletions(-) create mode 100644 apps/challenges/github_interface.py create mode 100644 apps/challenges/github_sync_config.py diff --git a/apps/challenges/github_interface.py b/apps/challenges/github_interface.py new file mode 100644 index 0000000000..e2c24c65f8 --- /dev/null +++ b/apps/challenges/github_interface.py @@ -0,0 +1,96 @@ +import requests +import base64 +import logging + +logger = logging.getLogger(__name__) + +URLS = {"contents": "/repos/{}/contents/{}", "repos": "/repos/{}"} + + +class GithubInterface: + def __init__(self, GITHUB_AUTH_TOKEN, GITHUB_REPOSITORY): + self.GITHUB_AUTH_TOKEN = GITHUB_AUTH_TOKEN + self.GITHUB_REPOSITORY = GITHUB_REPOSITORY + self.BRANCH = "challenge" + self.COMMIT_PREFIX = "evalai_bot: Update {}" + + def get_request_headers(self): + headers = {"Authorization": "token {}".format(self.GITHUB_AUTH_TOKEN)} + return headers + + def make_request(self, url, method, params={}, data={}): + url = self.get_github_url(url) + headers = self.get_request_headers() + try: + response = requests.request( + method=method, + url=url, + headers=headers, + params=params, + json=data, + ) + response.raise_for_status() + except requests.exceptions.RequestException: + logger.info( + "EvalAI is not able to establish connection with github {}".format( + response.json() + ) + ) + return None + return response.json() + + def get_github_url(self, url): + base_url = "https://api.github.com" + url = "{0}{1}".format(base_url, url) + return url + + def get_content_from_path(self, path): + """ + Gets the file content, information in json format in the repository at particular path + Ref: https://docs.github.com/en/rest/reference/repos#contents + """ + url = URLS.get("contents").format(self.GITHUB_REPOSITORY, path) + params = {"ref": self.BRANCH} + response = self.make_request(url, "GET", params) + return response + + def get_data_from_path(self, path): + """ + Gets the file data in string format in the repository at particular path + Calls get_content_from_path and encode the base64 content + """ + content_response = self.get_content_from_path(path) + string_data = None + if content_response and content_response.get("content"): + string_data = base64.b64decode(content_response["content"]).decode( + "utf-8" + ) + return string_data + + def update_content_from_path(self, path, content): + """ + Updates the file content, creates a commit in the repository at particular path + Ref: https://docs.github.com/en/rest/reference/repos#create-or-update-file-contents + """ + url = URLS.get("contents").format(self.GITHUB_REPOSITORY, path) + data = { + "message": self.COMMIT_PREFIX.format(path), + "branch": self.BRANCH, + "sha": self.get_content_from_path(path).get("sha"), + "content": content, + } + response = self.make_request(url, "PUT", data=data) + return response + + def update_data_from_path(self, path, data): + """ + Updates the file data to the data(string) provided, at particular path + Call update_content_from_path with decoded base64 content + """ + content = base64.b64encode(bytes(data, "utf-8")).decode("utf-8") + return self.update_content_from_path(path, content) + + def is_repository(self): + url = URLS.get("repos").format(self.GITHUB_REPOSITORY) + repo_response = self.make_request(url, "GET") + return True if repo_response else False \ No newline at end of file diff --git a/apps/challenges/github_sync_config.py b/apps/challenges/github_sync_config.py new file mode 100644 index 0000000000..a604ba79df --- /dev/null +++ b/apps/challenges/github_sync_config.py @@ -0,0 +1,37 @@ +# Fields from Challenge, ChallengePhase model to be considered for github_sync + +challenge_non_file_fields = [ + "title", + "short_description", + "leaderboard_description", + "remote_evaluation", + "is_docker_based", + "is_static_dataset_code_upload", + "start_date", + "end_date", + "published", +] + +challenge_file_fields = [ + "description", + "evaluation_details", + "terms_and_conditions", + "submission_guidelines", +] + +challenge_phase_non_file_fields = [ + "name", + "leaderboard_public", + "is_public", + "is_submission_public", + "start_date", + "end_date", + "max_submissions_per_day", + "max_submissions_per_month", + "max_submissions", + "is_restricted_to_select_one_submission", + "is_partial_submission_evaluation_enabled", + "allowed_submission_file_types", +] + +challenge_phase_file_fields = ["description"] \ No newline at end of file diff --git a/apps/challenges/github_utils.py b/apps/challenges/github_utils.py index 65939149ab..c19cfa5e87 100644 --- a/apps/challenges/github_utils.py +++ b/apps/challenges/github_utils.py @@ -1,377 +1,119 @@ -import base64 -import json import logging -from typing import Any, Dict, Optional, Tuple -from urllib.parse import urljoin - -import requests +import yaml + +from base.utils import deserialize_object +from .github_sync_config import ( + challenge_non_file_fields, + challenge_file_fields, + challenge_phase_non_file_fields, + challenge_phase_file_fields, +) +from .github_interface import GithubInterface +from evalai.celery import app logger = logging.getLogger(__name__) -URLS = {"contents": "/repos/{}/contents/{}", "repos": "/repos/{}"} - - -class GithubInterface: - """ - Interface for GitHub API operations - """ - - def __init__(self, token, repo): - self.token = token - self.repo = repo - self.base_url = "https://api.github.com" - self.headers = { - "Authorization": f"token {token}", - "Accept": "application/vnd.github.v3+json", - } - - def get_github_url(self, url): - """Get the full GitHub API URL""" - return urljoin(self.base_url, url) - - def get_file_contents(self, path: str) -> Optional[Dict[str, Any]]: - """ - Get file contents from GitHub repository - """ - url = URLS["contents"].format(self.repo, path) - full_url = self.get_github_url(url) - - # No ref -> GitHub uses default branch - response = requests.get(full_url, headers=self.headers) - - if response.status_code == 200: - return response.json() - elif response.status_code == 404: - return None - else: - logger.error(f"Failed to get file contents: {response.status_code}") - return None - - def _put_file(self, path: str, base64_content: str, message: str, sha: Optional[str] = None) -> Optional[Dict[str, Any]]: - """ - Create or update file in GitHub repository with base64 content - """ - url = URLS["contents"].format(self.repo, path) - full_url = self.get_github_url(url) - - data = { - "message": message, - "content": base64_content, - } - - if sha: - data["sha"] = sha - - response = requests.put(full_url, headers=self.headers, json=data) - - if response.status_code in [200, 201]: - return response.json() - else: - logger.error(f"Failed to update file: {response.status_code}") - return None - - def update_text_file(self, path: str, content: str, message: str, sha: Optional[str] = None) -> Optional[Dict[str, Any]]: - return self._put_file(path, base64.b64encode(content.encode()).decode(), message, sha) - - def update_binary_file(self, path: str, content_bytes: bytes, message: str, sha: Optional[str] = None) -> Optional[Dict[str, Any]]: - return self._put_file(path, base64.b64encode(content_bytes).decode(), message, sha) - - @staticmethod - def _decode_github_file_content(file_data: Dict[str, Any]) -> Optional[bytes]: - try: - encoded = file_data.get("content") - if not encoded: - return None - # GitHub may include newlines in base64 content - return base64.b64decode(encoded) - except Exception as exc: - logger.error(f"Failed to decode GitHub content: {exc}") - return None - - @staticmethod - def _diff_top_level_keys(old: Dict[str, Any], new: Dict[str, Any]) -> Tuple[bool, Tuple[str, ...]]: - changed_keys = [] - all_keys = set(old.keys()) | set(new.keys()) - for key in sorted(all_keys): - if old.get(key) != new.get(key): - changed_keys.append(key) - return (len(changed_keys) > 0, tuple(changed_keys)) - - def update_json_if_changed(self, path: str, data: Dict[str, Any]) -> Optional[Dict[str, Any]]: - """ - Create/update a JSON file only if content changed. Commit message lists changed keys. - """ - try: - file_data = self.get_file_contents(path) - new_json_text = json.dumps(data, indent=2, sort_keys=True) - if file_data: - old_bytes = self._decode_github_file_content(file_data) or b"" - try: - old_json = json.loads(old_bytes.decode() or "{}") - except Exception: - old_json = {} - changed, changed_keys = self._diff_top_level_keys(old_json, data) - if not changed: - logger.info(f"No changes for {path}. Skipping commit.") - return None - message = f"Update {path}: keys updated: {', '.join(changed_keys)}" - return self.update_text_file(path, new_json_text, message, file_data.get("sha")) - else: - message = f"Create {path} via EvalAI sync" - return self.update_text_file(path, new_json_text, message) - except Exception as exc: - logger.error(f"Error updating JSON at {path}: {exc}") - return None - - def update_binary_if_changed(self, path: str, content_bytes: bytes, create_message: str, update_message_prefix: str) -> Optional[Dict[str, Any]]: - try: - file_data = self.get_file_contents(path) - if file_data: - old_bytes = self._decode_github_file_content(file_data) or b"" - if old_bytes == content_bytes: - logger.info(f"No binary changes for {path}. Skipping commit.") - return None - message = f"{update_message_prefix} {path}" - return self.update_binary_file(path, content_bytes, message, file_data.get("sha")) - else: - return self.update_binary_file(path, content_bytes, create_message) - except Exception as exc: - logger.error(f"Error updating binary at {path}: {exc}") - return None - -def _fetch_auth_token_from_repo(repo: str, candidate_token: Optional[str] = None) -> Optional[str]: - """ - Try to read AUTH_TOKEN file from the repository. This supports two modes: - 1) Unauthenticated read (works only for public repos) - 2) Authenticated read using a candidate token if provided - - Returns the token string if found, else None. - """ - base_url = "https://api.github.com" - path = URLS["contents"].format(repo, "AUTH_TOKEN") - url = urljoin(base_url, path) - - def _request(headers: Optional[Dict[str, str]]) -> Optional[str]: - try: - # No ref -> default branch - response = requests.get(url, headers=headers or {}) - if response.status_code == 200: - data = response.json() - content_b64 = data.get("content") - if not content_b64: - return None - try: - value = base64.b64decode(content_b64).decode().strip() - except Exception: - return None - return value or None - return None - except Exception: - return None - - # Try unauthenticated first (public repo) - token_value = _request(None) - if token_value: - return token_value - - # Try with candidate token if provided (for private repo) - if candidate_token: - headers = { - "Authorization": f"token {candidate_token}", - "Accept": "application/vnd.github.v3+json", - } - token_value = _request(headers) - if token_value: - return token_value - - return None - - -def sync_challenge_to_github(challenge): - """ - Sync challenge data to GitHub repository - """ - if not challenge.github_repository: - logger.warning("GitHub repository not configured for challenge") +@app.task +def github_challenge_sync(challenge): + challenge = deserialize_object(challenge) + github = GithubInterface( + GITHUB_REPOSITORY=getattr(challenge, "github_repository"), + GITHUB_AUTH_TOKEN=getattr(challenge, "github_token"), + ) + if not github.is_repository(): return - try: - repo = challenge.github_repository - - # Enforce AUTH_TOKEN from repo if available; fallback to challenge.github_token - repo_auth_token = _fetch_auth_token_from_repo(repo, candidate_token=challenge.github_token) - resolved_token = repo_auth_token or challenge.github_token - if not resolved_token: - logger.warning("Neither repo AUTH_TOKEN nor challenge.github_token is available; skipping sync") - return - - github = GithubInterface(resolved_token, repo) - - # Sync non-file fields - challenge_data = { - "title": challenge.title, - "description": challenge.description, - "short_description": challenge.short_description, - "terms_and_conditions": challenge.terms_and_conditions, - "submission_guidelines": challenge.submission_guidelines, - "evaluation_details": challenge.evaluation_details, - "start_date": challenge.start_date.isoformat() if challenge.start_date else None, - "end_date": challenge.end_date.isoformat() if challenge.end_date else None, - "domain": challenge.domain, - "list_tags": challenge.list_tags, - "has_prize": challenge.has_prize, - "has_sponsors": challenge.has_sponsors, - "published": challenge.published, - "submission_time_limit": challenge.submission_time_limit, - "is_registration_open": challenge.is_registration_open, - "enable_forum": challenge.enable_forum, - "forum_url": challenge.forum_url, - "leaderboard_description": challenge.leaderboard_description, - "anonymous_leaderboard": challenge.anonymous_leaderboard, - "manual_participant_approval": challenge.manual_participant_approval, - "is_disabled": challenge.is_disabled, - "approved_by_admin": challenge.approved_by_admin, - "uses_ec2_worker": challenge.uses_ec2_worker, - "ec2_storage": challenge.ec2_storage, - "ephemeral_storage": challenge.ephemeral_storage, - "featured": challenge.featured, - "allowed_email_domains": challenge.allowed_email_domains, - "blocked_email_domains": challenge.blocked_email_domains, - "banned_email_ids": challenge.banned_email_ids, - "remote_evaluation": challenge.remote_evaluation, - "queue": challenge.queue, - "sqs_retention_period": challenge.sqs_retention_period, - "is_docker_based": challenge.is_docker_based, - "is_static_dataset_code_upload": challenge.is_static_dataset_code_upload, - "max_docker_image_size": challenge.max_docker_image_size, - "max_concurrent_submission_evaluation": challenge.max_concurrent_submission_evaluation, - "use_host_credentials": challenge.use_host_credentials, - "use_host_sqs": challenge.use_host_sqs, - "allow_resuming_submissions": challenge.allow_resuming_submissions, - "allow_host_cancel_submissions": challenge.allow_host_cancel_submissions, - "allow_cancel_running_submissions": challenge.allow_cancel_running_submissions, - "allow_participants_resubmissions": challenge.allow_participants_resubmissions, - "cli_version": challenge.cli_version, - "workers": challenge.workers, - "task_def_arn": challenge.task_def_arn, - "slack_webhook_url": challenge.slack_webhook_url, - "worker_cpu_cores": challenge.worker_cpu_cores, - "worker_memory": challenge.worker_memory, - "inform_hosts": challenge.inform_hosts, - "vpc_cidr": challenge.vpc_cidr, - "subnet_1_cidr": challenge.subnet_1_cidr, - "subnet_2_cidr": challenge.subnet_2_cidr, - "worker_instance_type": challenge.worker_instance_type, - "worker_ami_type": challenge.worker_ami_type, - "worker_disk_size": challenge.worker_disk_size, - "max_worker_instance": challenge.max_worker_instance, - "min_worker_instance": challenge.min_worker_instance, - "desired_worker_instance": challenge.desired_worker_instance, - "cpu_only_jobs": challenge.cpu_only_jobs, - "job_cpu_cores": challenge.job_cpu_cores, - "job_memory": challenge.job_memory, - "worker_image_url": challenge.worker_image_url, - "evaluation_module_error": challenge.evaluation_module_error, - } - - # Update challenge data - github.update_json_if_changed("challenge.json", challenge_data) - - # Sync file fields if they exist - if challenge.evaluation_script: - try: - field_file = challenge.evaluation_script - field_file.open("rb") - try: - content_bytes = field_file.read() - finally: - field_file.close() - file_name = "evaluation_script.zip" - github.update_binary_if_changed( - file_name, - content_bytes, - create_message=f"Add {file_name} via EvalAI sync", - update_message_prefix="Update", - ) - except Exception as exc: - logger.error(f"Failed to sync evaluation_script for challenge {challenge.id}: {exc}") - - logger.info(f"Successfully synced challenge {challenge.id} to GitHub") - + # Challenge non-file field update + challenge_config_str = github.get_data_from_path( + "challenge_config.yaml" + ) + challenge_config_yaml = yaml.safe_load(challenge_config_str) + update_challenge_config = False + for field in challenge_non_file_fields: + # Ignoring commits when no update in field value + if challenge_config_yaml.get( + field + ) is not None and challenge_config_yaml[field] == getattr( + challenge, field + ): + continue + update_challenge_config = True + challenge_config_yaml[field] = getattr(challenge, field) + if update_challenge_config: + content_str = yaml.dump(challenge_config_yaml, sort_keys=False) + github.update_data_from_path("challenge_config.yaml", content_str) + + # Challenge file fields update + for field in challenge_file_fields: + if challenge_config_yaml.get(field) is None: + continue + field_path = challenge_config_yaml[field] + field_str = github.get_data_from_path(field_path) + if field_str is None or field_str == getattr(challenge, field): + continue + github.update_data_from_path(field_path, getattr(challenge, field)) except Exception as e: - logger.error(f"Error syncing challenge {challenge.id} to GitHub: {str(e)}") + logger.error("Github Sync unsuccessful due to {}".format(e)) -def sync_challenge_phase_to_github(challenge_phase): - """ - Sync challenge phase data to GitHub repository - """ +@app.task +def github_challenge_phase_sync(challenge_phase): + challenge_phase = deserialize_object(challenge_phase) challenge = challenge_phase.challenge - if not challenge.github_repository: - logger.warning("GitHub repository not configured for challenge") + github = GithubInterface( + GITHUB_REPOSITORY=getattr(challenge, "github_repository"), + GITHUB_AUTH_TOKEN=getattr(challenge, "github_token"), + ) + if not github.is_repository(): return - try: - repo = challenge.github_repository - repo_auth_token = _fetch_auth_token_from_repo(repo, candidate_token=challenge.github_token) - resolved_token = repo_auth_token or challenge.github_token - if not resolved_token: - logger.warning("Neither repo AUTH_TOKEN nor challenge.github_token is available; skipping phase sync") - return - - github = GithubInterface(resolved_token, repo) - - # Sync non-file fields - phase_data = { - "name": challenge_phase.name, - "description": challenge_phase.description, - "leaderboard_public": challenge_phase.leaderboard_public, - "start_date": challenge_phase.start_date.isoformat() if challenge_phase.start_date else None, - "end_date": challenge_phase.end_date.isoformat() if challenge_phase.end_date else None, - "is_public": challenge_phase.is_public, - "is_submission_public": challenge_phase.is_submission_public, - "annotations_uploaded_using_cli": challenge_phase.annotations_uploaded_using_cli, - "max_submissions_per_day": challenge_phase.max_submissions_per_day, - "max_submissions_per_month": challenge_phase.max_submissions_per_month, - "max_submissions": challenge_phase.max_submissions, - "max_concurrent_submissions_allowed": challenge_phase.max_concurrent_submissions_allowed, - "codename": challenge_phase.codename, - "allowed_email_ids": challenge_phase.allowed_email_ids, - "environment_image": challenge_phase.environment_image, - "allowed_submission_file_types": challenge_phase.allowed_submission_file_types, - "is_restricted_to_select_one_submission": challenge_phase.is_restricted_to_select_one_submission, - "submission_meta_attributes": challenge_phase.submission_meta_attributes, - "is_partial_submission_evaluation_enabled": challenge_phase.is_partial_submission_evaluation_enabled, - "config_id": challenge_phase.config_id, - "default_submission_meta_attributes": challenge_phase.default_submission_meta_attributes, - "disable_logs": challenge_phase.disable_logs, - } - - # Update phase data - phase_path = f"phases/{challenge_phase.codename}.json" - github.update_json_if_changed(phase_path, phase_data) - - # Sync file fields if they exist - if challenge_phase.test_annotation: - try: - field_file = challenge_phase.test_annotation - field_file.open("rb") - try: - content_bytes = field_file.read() - finally: - field_file.close() - file_name = f"annotations/{challenge_phase.codename}.json" - github.update_binary_if_changed( - file_name, - content_bytes, - create_message=f"Add {file_name} via EvalAI sync", - update_message_prefix="Update", + # Challenge phase non-file field update + challenge_phase_unique = "codename" + challenge_config_str = github.get_data_from_path( + "challenge_config.yaml" + ) + challenge_config_yaml = yaml.safe_load(challenge_config_str) + update_challenge_config = False + + for phase in challenge_config_yaml["challenge_phases"]: + if phase.get(challenge_phase_unique) != getattr( + challenge_phase, challenge_phase_unique + ): + continue + for field in challenge_phase_non_file_fields: + # Ignoring commits when no update in field value + if phase.get(field) is not None and phase[field] == getattr( + challenge_phase, field + ): + continue + update_challenge_config = True + phase[field] = getattr(challenge_phase, field) + break + if update_challenge_config: + content_str = yaml.dump(challenge_config_yaml, sort_keys=False) + github.update_data_from_path("challenge_config.yaml", content_str) + + # Challenge phase file fields update + for phase in challenge_config_yaml["challenge_phases"]: + if phase.get(challenge_phase_unique) != getattr( + challenge_phase, challenge_phase_unique + ): + continue + for field in challenge_phase_file_fields: + if phase.get(field) is None: + continue + field_path = phase[field] + field_str = github.get_data_from_path(field_path) + if field_str is None or field_str == getattr( + challenge_phase, field + ): + continue + github.update_data_from_path( + field_path, getattr(challenge_phase, field) ) - except Exception as exc: - logger.error(f"Failed to sync test_annotation for phase {challenge_phase.id}: {exc}") - - logger.info(f"Successfully synced challenge phase {challenge_phase.id} to GitHub") - + break except Exception as e: - logger.error(f"Error syncing challenge phase {challenge_phase.id} to GitHub: {str(e)}") \ No newline at end of file + logger.error( + "Github Sync Challenge Phase unsuccessful due to {}".format(e) + ) \ No newline at end of file From 45e637051d81214cc60d80631a229c03d4859163 Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Wed, 20 Aug 2025 12:22:17 +0530 Subject: [PATCH 03/20] Rename migration file --- .../migrations/0114_add_github_token_field.py | 18 ++++++++++++++++++ celerybeat.pid | 1 - 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 apps/challenges/migrations/0114_add_github_token_field.py delete mode 100644 celerybeat.pid diff --git a/apps/challenges/migrations/0114_add_github_token_field.py b/apps/challenges/migrations/0114_add_github_token_field.py new file mode 100644 index 0000000000..8376f6d642 --- /dev/null +++ b/apps/challenges/migrations/0114_add_github_token_field.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.20 on 2025-08-18 07:21 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('challenges', '0113_add_github_branch_field_and_unique_constraint'), + ] + + operations = [ + migrations.AddField( + model_name='challenge', + name='github_token', + field=models.CharField(blank=True, default='', max_length=200, null=True), + ), + ] diff --git a/celerybeat.pid b/celerybeat.pid deleted file mode 100644 index 45a4fb75db..0000000000 --- a/celerybeat.pid +++ /dev/null @@ -1 +0,0 @@ -8 From 61b0c24e6a58ca56b045a2389191cfe55f295c2a Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Sat, 23 Aug 2025 12:40:29 +0530 Subject: [PATCH 04/20] Add github_token to serializer fields and requests --- apps/challenges/serializers.py | 8 ++++++++ apps/challenges/views.py | 22 ++++++++++++++++------ celerybeat.pid | 1 + 3 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 celerybeat.pid diff --git a/apps/challenges/serializers.py b/apps/challenges/serializers.py index 4e933be0a6..c8393d47b4 100644 --- a/apps/challenges/serializers.py +++ b/apps/challenges/serializers.py @@ -34,6 +34,9 @@ def __init__(self, *args, **kwargs): if context and context.get("request").method != "GET": challenge_host_team = context.get("challenge_host_team") kwargs["data"]["creator"] = challenge_host_team.pk + github_token = context.get("github_token") + if github_token: + kwargs["data"]["github_token"] = github_token else: self.fields["creator"] = ChallengeHostTeamSerializer() @@ -96,6 +99,7 @@ class Meta: "sqs_retention_period", "github_repository", "github_branch", + "github_token", ) @@ -260,6 +264,9 @@ def __init__(self, *args, **kwargs): github_branch = context.get("github_branch") if github_branch: kwargs["data"]["github_branch"] = github_branch + github_token = context.get("github_token") + if github_token: + kwargs["data"]["github_token"] = github_token class Meta: model = Challenge @@ -299,6 +306,7 @@ class Meta: "cli_version", "github_repository", "github_branch", + "github_token", "vpc_cidr", "subnet_1_cidr", "subnet_2_cidr", diff --git a/apps/challenges/views.py b/apps/challenges/views.py index 97daf932cf..dfef103b85 100644 --- a/apps/challenges/views.py +++ b/apps/challenges/views.py @@ -205,6 +205,7 @@ def challenge_list(request, challenge_host_team_pk): context={ "challenge_host_team": challenge_host_team, "request": request, + "github_token": request.data.get("GITHUB_AUTH_TOKEN"), }, ) if serializer.is_valid(): @@ -256,6 +257,7 @@ def challenge_detail(request, challenge_host_team_pk, challenge_pk): context={ "challenge_host_team": challenge_host_team, "request": request, + "github_token": request.data.get("GITHUB_AUTH_TOKEN"), }, partial=True, ) @@ -271,6 +273,7 @@ def challenge_detail(request, challenge_host_team_pk, challenge_pk): context={ "challenge_host_team": challenge_host_team, "request": request, + "github_token": request.data.get("GITHUB_AUTH_TOKEN"), }, partial=True, ) @@ -286,6 +289,7 @@ def challenge_detail(request, challenge_host_team_pk, challenge_pk): context={ "challenge_host_team": challenge_host_team, "request": request, + "github_token": request.data.get("GITHUB_AUTH_TOKEN"), }, partial=True, ) @@ -301,6 +305,7 @@ def challenge_detail(request, challenge_host_team_pk, challenge_pk): context={ "challenge_host_team": challenge_host_team, "request": request, + "github_token": request.data.get("GITHUB_AUTH_TOKEN"), }, partial=True, ) @@ -311,18 +316,20 @@ def challenge_detail(request, challenge_host_team_pk, challenge_pk): context={ "challenge_host_team": challenge_host_team, "request": request, + "github_token": request.data.get("GITHUB_AUTH_TOKEN"), }, partial=True, ) else: serializer = ZipChallengeSerializer( challenge, - data=request.data, - context={ - "challenge_host_team": challenge_host_team, - "request": request, - }, - ) + data=request.data, + context={ + "challenge_host_team": challenge_host_team, + "request": request, + "github_token": request.data.get("GITHUB_AUTH_TOKEN"), + }, + ) if serializer.is_valid(): serializer.save() challenge = get_challenge_model(serializer.instance.pk) @@ -1667,6 +1674,7 @@ def create_challenge_using_zip_file(request, challenge_host_team_pk): "challenge_host_team": challenge_host_team, "image": challenge_image_file, "evaluation_script": challenge_evaluation_script_file, + "github_token": request.data.get("GITHUB_AUTH_TOKEN"), }, ) if serializer.is_valid(): @@ -3980,6 +3988,7 @@ def create_or_update_github_challenge(request, challenge_host_team_pk): "github_repository": request.data[ "GITHUB_REPOSITORY" ], + "github_token": request.data.get("GITHUB_AUTH_TOKEN"), "worker_image_url": worker_image_url, }, ) @@ -4283,6 +4292,7 @@ def create_or_update_github_challenge(request, challenge_host_team_pk): "evaluation_script": files[ "challenge_evaluation_script_file" ], + "github_token": request.data.get("GITHUB_AUTH_TOKEN"), "worker_image_url": worker_image_url, }, ) diff --git a/celerybeat.pid b/celerybeat.pid new file mode 100644 index 0000000000..ec635144f6 --- /dev/null +++ b/celerybeat.pid @@ -0,0 +1 @@ +9 From c9db18aaadd2aab03bda2278e1f8834d8d142acf Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Mon, 25 Aug 2025 00:42:28 +0530 Subject: [PATCH 05/20] Update sync logic --- apps/challenges/github_interface.py | 338 +++++++++++++++++- apps/challenges/github_sync_config.py | 28 +- apps/challenges/github_utils.py | 184 +++++----- .../0115_add_last_github_sync_field.py | 18 + apps/challenges/models.py | 37 +- apps/challenges/urls.py | 3 +- apps/challenges/views.py | 4 + 7 files changed, 489 insertions(+), 123 deletions(-) create mode 100644 apps/challenges/migrations/0115_add_last_github_sync_field.py diff --git a/apps/challenges/github_interface.py b/apps/challenges/github_interface.py index e2c24c65f8..0ba06bb169 100644 --- a/apps/challenges/github_interface.py +++ b/apps/challenges/github_interface.py @@ -8,11 +8,26 @@ class GithubInterface: - def __init__(self, GITHUB_AUTH_TOKEN, GITHUB_REPOSITORY): + def __init__(self, GITHUB_REPOSITORY, GITHUB_BRANCH, GITHUB_AUTH_TOKEN): self.GITHUB_AUTH_TOKEN = GITHUB_AUTH_TOKEN self.GITHUB_REPOSITORY = GITHUB_REPOSITORY - self.BRANCH = "challenge" + self.BRANCH = GITHUB_BRANCH self.COMMIT_PREFIX = "evalai_bot: Update {}" + + def _serialize_field_value(self, field, value): + """ + Serialize field values appropriately, handling datetime fields to preserve naive format + """ + if value is None: + return None + + # Handle datetime fields to preserve naive format without timezone info + if field in ['start_date', 'end_date'] and hasattr(value, 'strftime'): + return value.strftime('%Y-%m-%d %H:%M:%S') + + return value + + def get_request_headers(self): headers = {"Authorization": "token {}".format(self.GITHUB_AUTH_TOKEN)} @@ -52,6 +67,20 @@ def get_content_from_path(self, path): url = URLS.get("contents").format(self.GITHUB_REPOSITORY, path) params = {"ref": self.BRANCH} response = self.make_request(url, "GET", params) + + # If branch doesn't exist, try to create it or use default branch + if response is None or (isinstance(response, dict) and response.get('message') == 'No commit found for the ref ' + self.BRANCH): + logger.warning(f"Branch '{self.BRANCH}' not found, trying to create it or use default branch") + # Try to get content from default branch (usually 'main' or 'master') + for default_branch in ['main', 'master']: + if default_branch != self.BRANCH: + logger.info(f"Trying default branch '{default_branch}'") + params = {"ref": default_branch} + response = self.make_request(url, "GET", params) + if response and not (isinstance(response, dict) and 'No commit found' in response.get('message', '')): + logger.info(f"Found content in default branch '{default_branch}'") + break + return response def get_data_from_path(self, path): @@ -73,12 +102,26 @@ def update_content_from_path(self, path, content): Ref: https://docs.github.com/en/rest/reference/repos#create-or-update-file-contents """ url = URLS.get("contents").format(self.GITHUB_REPOSITORY, path) - data = { - "message": self.COMMIT_PREFIX.format(path), - "branch": self.BRANCH, - "sha": self.get_content_from_path(path).get("sha"), - "content": content, - } + + # Get existing content to get SHA (required for updates) + existing_content = self.get_content_from_path(path) + + if existing_content and existing_content.get("sha"): + # File exists, update it + data = { + "message": self.COMMIT_PREFIX.format(path), + "branch": self.BRANCH, + "sha": existing_content.get("sha"), + "content": content, + } + else: + # File doesn't exist, create it + data = { + "message": self.COMMIT_PREFIX.format(path), + "branch": self.BRANCH, + "content": content, + } + response = self.make_request(url, "PUT", data=data) return response @@ -93,4 +136,281 @@ def update_data_from_path(self, path, data): def is_repository(self): url = URLS.get("repos").format(self.GITHUB_REPOSITORY) repo_response = self.make_request(url, "GET") - return True if repo_response else False \ No newline at end of file + return True if repo_response else False + + def update_challenge_config(self, challenge): + """ + Update challenge configuration in GitHub repository + Preserves existing structure and custom configuration while updating only EvalAI-managed fields + """ + try: + import yaml + from collections import OrderedDict + from challenges.github_sync_config import challenge_non_file_fields, challenge_file_fields + + # Get existing challenge config to preserve structure and custom fields + existing_config = self.get_data_from_path("challenge_config.yaml") + if existing_config: + try: + config_data = yaml.safe_load(existing_config) + if not isinstance(config_data, dict): + config_data = {} + except yaml.YAMLError: + logger.warning("Existing challenge_config.yaml is not valid YAML, starting fresh") + config_data = {} + else: + config_data = {} + + # Create ordered config with exact field order as specified + ordered_config = OrderedDict() + + # Pre-extract all field values once to avoid multiple hasattr/getattr calls + field_values = {} + challenge_fields = ['title', 'short_description', 'description', 'evaluation_details', 'terms_and_conditions', 'image', 'submission_guidelines', 'leaderboard_description', 'evaluation_script', 'remote_evaluation', 'start_date', 'end_date', 'published', 'tags'] + + for field in challenge_fields: + if field in config_data: + field_values[field] = config_data[field] + elif hasattr(challenge, field): + field_values[field] = getattr(challenge, field) + + # Add fields in the exact order specified with efficient processing + for field in challenge_fields: + if field in field_values: + value = field_values[field] + if value is None: + continue + + # Process field value based on type + if field in ['start_date', 'end_date'] and hasattr(value, 'strftime'): + ordered_config[field] = value.strftime('%Y-%m-%d %H:%M:%S') + elif field in ['description', 'evaluation_details', 'terms_and_conditions', 'submission_guidelines']: + # Extract content from HTML fields + if hasattr(value, 'read'): + try: + value.seek(0) + content = value.read().decode('utf-8') + ordered_config[field] = content + except: + ordered_config[field] = str(value) + else: + ordered_config[field] = str(value) + elif field in ['image', 'evaluation_script']: + # Handle file fields - extract filename/path + if hasattr(value, 'name'): + ordered_config[field] = value.name + else: + ordered_config[field] = str(value) + else: + # Handle other field types + if hasattr(value, 'pk'): # Django model instance + ordered_config[field] = value.pk + elif hasattr(value, 'id'): # Django model instance + ordered_config[field] = value.id + elif isinstance(value, (list, tuple)): # Handle lists/tuples + clean_list = [] + for item in value: + if hasattr(item, 'pk'): + clean_list.append(item.pk) + elif hasattr(item, 'id'): + clean_list.append(item.id) + else: + clean_list.append(item) + ordered_config[field] = clean_list + else: + ordered_config[field] = value + + # Sync additional sections in the correct order + try: + # Sync leaderboard information if available + if hasattr(challenge, 'leaderboard') and challenge.leaderboard: + ordered_config['leaderboard'] = challenge.leaderboard + elif 'leaderboard' in config_data: + ordered_config['leaderboard'] = config_data['leaderboard'] + + # Sync challenge phases if available + if hasattr(challenge, 'challenge_phases') and challenge.challenge_phases.exists(): + challenge_phases = [] + for phase in challenge.challenge_phases.all(): + phase_data = OrderedDict() + # Add phase fields in the exact order specified + phase_fields = ['id', 'name', 'description', 'leaderboard_public', 'is_public', 'challenge', 'is_active', 'max_concurrent_submissions_allowed', 'allowed_email_ids', 'disable_logs', 'is_submission_public', 'start_date', 'end_date', 'test_annotation_file', 'codename', 'max_submissions_per_day', 'max_submissions_per_month', 'max_submissions', 'default_submission_meta_attributes', 'submission_meta_attributes', 'is_restricted_to_select_one_submission', 'is_partial_submission_evaluation_enabled', 'allowed_submission_file_types'] + + for field in phase_fields: + if hasattr(phase, field): + value = getattr(phase, field) + if field in ['start_date', 'end_date'] and hasattr(value, 'strftime'): + phase_data[field] = value.strftime('%Y-%m-%d %H:%M:%S') + elif field == 'description' and value: + # Extract the actual content from HTML fields + if hasattr(value, 'read'): + # It's a file-like object, read the content + try: + value.seek(0) + content = value.read().decode('utf-8') + phase_data[field] = content + except: + phase_data[field] = str(value) + else: + phase_data[field] = str(value) + elif value is not None: + # Handle Django model fields and related objects + if hasattr(value, 'pk'): # Django model instance + phase_data[field] = value.pk + elif hasattr(value, 'id'): # Django model instance + phase_data[field] = value.id + elif isinstance(value, (list, tuple)): # Handle lists/tuples + # Convert to list of simple values + clean_list = [] + for item in value: + if hasattr(item, 'pk'): + clean_list.append(item.pk) + elif hasattr(item, 'id'): + clean_list.append(item.id) + else: + clean_list.append(item) + phase_data[field] = clean_list + else: + phase_data[field] = value + + challenge_phases.append(phase_data) + ordered_config['challenge_phases'] = challenge_phases + elif 'challenge_phases' in config_data: + ordered_config['challenge_phases'] = config_data['challenge_phases'] + + # Sync dataset splits if available + if hasattr(challenge, 'dataset_splits') and challenge.dataset_splits.exists(): + dataset_splits = [] + for split in challenge.dataset_splits.all(): + split_data = OrderedDict([ + ('id', split.id), + ('name', split.name), + ('codename', split.codename), + ]) + dataset_splits.append(split_data) + ordered_config['dataset_splits'] = dataset_splits + elif 'dataset_splits' in config_data: + ordered_config['dataset_splits'] = config_data['dataset_splits'] + + # Sync challenge phase splits if available + if hasattr(challenge, 'challenge_phases') and challenge.challenge_phases.exists(): + phase_splits = [] + for phase in challenge.challenge_phases.all(): + if hasattr(phase, 'challenge_phase_splits') and phase.challenge_phase_splits.exists(): + for split in phase.challenge_phase_splits.all(): + split_data = OrderedDict([ + ('challenge_phase_id', split.challenge_phase.id), + ('leaderboard_id', split.leaderboard.id if split.leaderboard else None), + ('dataset_split_id', split.dataset_split.id if split.dataset_split else None), + ('visibility', split.visibility), + ('leaderboard_decimal_precision', split.leaderboard_decimal_precision), + ('is_leaderboard_order_descending', split.is_leaderboard_order_descending), + ('show_execution_time', split.show_execution_time), + ('show_leaderboard_by_latest_submission', split.show_leaderboard_by_latest_submission), + ]) + phase_splits.append(split_data) + ordered_config['challenge_phase_splits'] = phase_splits + elif 'challenge_phase_splits' in config_data: + ordered_config['challenge_phase_splits'] = config_data['challenge_phase_splits'] + + except Exception as e: + logger.warning(f"Error syncing additional sections: {str(e)}") + # Continue without failing the entire sync + + # Use the ordered config for YAML generation + config_data = ordered_config + + # Convert to YAML with custom representer to handle OrderedDict properly + def represent_ordereddict(dumper, data): + return dumper.represent_mapping('tag:yaml.org,2002:map', data.items()) + + yaml.add_representer(OrderedDict, represent_ordereddict) + yaml_content = yaml.dump(config_data, default_flow_style=False, allow_unicode=True) + + # Add the documentation header comment + header_comment = "# If you are not sure what all these fields mean, please refer our documentation here:\n# https://evalai.readthedocs.io/en/latest/configuration.html\n" + yaml_content = header_comment + yaml_content + + # Update the file in GitHub + success = self.update_data_from_path("challenge_config.yaml", yaml_content) + + if success: + logger.info(f"Successfully updated challenge config for challenge {challenge.id} while preserving existing structure") + return True + else: + logger.error(f"Failed to update challenge config for challenge {challenge.id}") + return False + + except Exception as e: + logger.error(f"Error updating challenge config: {str(e)}") + return False + + def update_challenge_phase_config(self, challenge_phase): + """ + Update challenge phase configuration in GitHub repository + Preserves existing structure and custom configuration while updating only phase information + """ + try: + import yaml + from challenges.github_sync_config import challenge_phase_non_file_fields, challenge_phase_file_fields + + # Get existing challenge config to preserve structure and custom fields + existing_config = self.get_data_from_path("challenge_config.yaml") + if existing_config: + try: + config_data = yaml.safe_load(existing_config) + if not isinstance(config_data, dict): + config_data = {} + except yaml.YAMLError: + logger.warning("Existing challenge_config.yaml is not valid YAML, starting fresh") + config_data = {} + else: + config_data = {} + + # Initialize challenge_phases section if it doesn't exist + if 'challenge_phases' not in config_data: + config_data['challenge_phases'] = [] + + # Find existing phase or create new one + phase_data = {} + for field in challenge_phase_non_file_fields: + if hasattr(challenge_phase, field): + value = getattr(challenge_phase, field) + serialized_value = self._serialize_field_value(field, value) + if serialized_value is not None: + phase_data[field] = serialized_value + + # Add file fields + for field in challenge_phase_file_fields: + if hasattr(challenge_phase, field): + value = getattr(challenge_phase, field) + if value: + phase_data[field] = str(value) + + # Update or add phase + phase_found = False + for i, phase in enumerate(config_data['challenge_phases']): + if phase.get('codename') == challenge_phase.codename: + config_data['challenge_phases'][i] = phase_data + phase_found = True + break + + if not phase_found: + config_data['challenge_phases'].append(phase_data) + + # Convert back to YAML + yaml_content = yaml.dump(config_data, default_flow_style=False, allow_unicode=True) + + # Update the file in GitHub + success = self.update_data_from_path("challenge_config.yaml", yaml_content) + + if success: + logger.info(f"Successfully updated challenge phase config for phase {challenge_phase.id} while preserving existing structure") + return True + else: + logger.error(f"Failed to update challenge phase config for phase {challenge_phase.id}") + return False + + except Exception as e: + logger.error(f"Error updating challenge phase config: {str(e)}") + return False \ No newline at end of file diff --git a/apps/challenges/github_sync_config.py b/apps/challenges/github_sync_config.py index a604ba79df..d1915c458e 100644 --- a/apps/challenges/github_sync_config.py +++ b/apps/challenges/github_sync_config.py @@ -1,8 +1,10 @@ # Fields from Challenge, ChallengePhase model to be considered for github_sync +# If you are not sure what all these fields mean, please refer our documentation here: +# https://evalai.readthedocs.io/en/latest/configuration.html challenge_non_file_fields = [ "title", - "short_description", + "short_description", "leaderboard_description", "remote_evaluation", "is_docker_based", @@ -10,6 +12,9 @@ "start_date", "end_date", "published", + "image", + "evaluation_script", + "tags", ] challenge_file_fields = [ @@ -20,18 +25,37 @@ ] challenge_phase_non_file_fields = [ + "id", "name", "leaderboard_public", "is_public", + "challenge", + "is_active", + "max_concurrent_submissions_allowed", + "allowed_email_ids", + "disable_logs", "is_submission_public", "start_date", "end_date", + "test_annotation_file", + "codename", "max_submissions_per_day", "max_submissions_per_month", "max_submissions", "is_restricted_to_select_one_submission", "is_partial_submission_evaluation_enabled", "allowed_submission_file_types", + "default_submission_meta_attributes", + "submission_meta_attributes", ] -challenge_phase_file_fields = ["description"] \ No newline at end of file +challenge_phase_file_fields = [ + "description", +] + +# Additional sections that should be synced +challenge_additional_sections = [ + "leaderboard", + "dataset_splits", + "challenge_phase_splits", +] \ No newline at end of file diff --git a/apps/challenges/github_utils.py b/apps/challenges/github_utils.py index c19cfa5e87..3e8531e285 100644 --- a/apps/challenges/github_utils.py +++ b/apps/challenges/github_utils.py @@ -1,119 +1,95 @@ import logging import yaml - -from base.utils import deserialize_object -from .github_sync_config import ( - challenge_non_file_fields, - challenge_file_fields, - challenge_phase_non_file_fields, - challenge_phase_file_fields, -) +from django.utils import timezone +from .models import Challenge, ChallengePhase from .github_interface import GithubInterface -from evalai.celery import app logger = logging.getLogger(__name__) -@app.task -def github_challenge_sync(challenge): - challenge = deserialize_object(challenge) - github = GithubInterface( - GITHUB_REPOSITORY=getattr(challenge, "github_repository"), - GITHUB_AUTH_TOKEN=getattr(challenge, "github_token"), - ) - if not github.is_repository(): - return +# Global set to track challenges currently being synced +_sync_in_progress = set() + +def github_challenge_sync(challenge_id): + """ + Simple sync from EvalAI to GitHub + This is the core function that keeps GitHub in sync with EvalAI + """ + # Prevent multiple simultaneous syncs for the same challenge + if challenge_id in _sync_in_progress: + logger.info(f"Challenge {challenge_id} sync already in progress, skipping") + return False + try: - # Challenge non-file field update - challenge_config_str = github.get_data_from_path( - "challenge_config.yaml" + # Mark this challenge as being synced + _sync_in_progress.add(challenge_id) + + challenge = Challenge.objects.get(id=challenge_id) + + if not challenge.github_repository or not challenge.github_token: + logger.warning(f"Challenge {challenge_id} missing GitHub configuration") + return False + + # Initialize GitHub interface + github_interface = GithubInterface( + challenge.github_repository, + challenge.github_branch or 'challenge', # Default to 'challenge' branch + challenge.github_token ) - challenge_config_yaml = yaml.safe_load(challenge_config_str) - update_challenge_config = False - for field in challenge_non_file_fields: - # Ignoring commits when no update in field value - if challenge_config_yaml.get( - field - ) is not None and challenge_config_yaml[field] == getattr( - challenge, field - ): - continue - update_challenge_config = True - challenge_config_yaml[field] = getattr(challenge, field) - if update_challenge_config: - content_str = yaml.dump(challenge_config_yaml, sort_keys=False) - github.update_data_from_path("challenge_config.yaml", content_str) - - # Challenge file fields update - for field in challenge_file_fields: - if challenge_config_yaml.get(field) is None: - continue - field_path = challenge_config_yaml[field] - field_str = github.get_data_from_path(field_path) - if field_str is None or field_str == getattr(challenge, field): - continue - github.update_data_from_path(field_path, getattr(challenge, field)) + + # Update challenge config in GitHub + success = github_interface.update_challenge_config(challenge) + + if success: + logger.info(f"Successfully synced challenge {challenge_id} to GitHub") + return True + else: + logger.error(f"Failed to sync challenge {challenge_id} to GitHub") + return False + + except Challenge.DoesNotExist: + logger.error(f"Challenge {challenge_id} not found") + return False except Exception as e: - logger.error("Github Sync unsuccessful due to {}".format(e)) + logger.error(f"Error syncing challenge {challenge_id} to GitHub: {str(e)}") + return False + finally: + # Always remove from in-progress set + _sync_in_progress.discard(challenge_id) -@app.task -def github_challenge_phase_sync(challenge_phase): - challenge_phase = deserialize_object(challenge_phase) - challenge = challenge_phase.challenge - github = GithubInterface( - GITHUB_REPOSITORY=getattr(challenge, "github_repository"), - GITHUB_AUTH_TOKEN=getattr(challenge, "github_token"), - ) - if not github.is_repository(): - return +def github_challenge_phase_sync(challenge_phase_id): + """ + Sync challenge phase from EvalAI to GitHub + """ try: - # Challenge phase non-file field update - challenge_phase_unique = "codename" - challenge_config_str = github.get_data_from_path( - "challenge_config.yaml" + challenge_phase = ChallengePhase.objects.get(id=challenge_phase_id) + challenge = challenge_phase.challenge + + if not challenge.github_repository or not challenge.github_token: + logger.warning(f"Challenge {challenge.id} missing GitHub configuration") + return False + + # Initialize GitHub interface + github_interface = GithubInterface( + challenge.github_repository, + challenge.github_branch or 'challenge', # Default to 'challenge' branch + challenge.github_token ) - challenge_config_yaml = yaml.safe_load(challenge_config_str) - update_challenge_config = False - - for phase in challenge_config_yaml["challenge_phases"]: - if phase.get(challenge_phase_unique) != getattr( - challenge_phase, challenge_phase_unique - ): - continue - for field in challenge_phase_non_file_fields: - # Ignoring commits when no update in field value - if phase.get(field) is not None and phase[field] == getattr( - challenge_phase, field - ): - continue - update_challenge_config = True - phase[field] = getattr(challenge_phase, field) - break - if update_challenge_config: - content_str = yaml.dump(challenge_config_yaml, sort_keys=False) - github.update_data_from_path("challenge_config.yaml", content_str) - - # Challenge phase file fields update - for phase in challenge_config_yaml["challenge_phases"]: - if phase.get(challenge_phase_unique) != getattr( - challenge_phase, challenge_phase_unique - ): - continue - for field in challenge_phase_file_fields: - if phase.get(field) is None: - continue - field_path = phase[field] - field_str = github.get_data_from_path(field_path) - if field_str is None or field_str == getattr( - challenge_phase, field - ): - continue - github.update_data_from_path( - field_path, getattr(challenge_phase, field) - ) - break + + # Update challenge phase config in GitHub + success = github_interface.update_challenge_phase_config(challenge_phase) + + if success: + logger.info(f"Successfully synced challenge phase {challenge_phase_id} to GitHub") + return True + else: + logger.error(f"Failed to sync challenge phase {challenge_phase_id} to GitHub") + return False + + except ChallengePhase.DoesNotExist: + logger.error(f"Challenge phase {challenge_phase_id} not found") + return False except Exception as e: - logger.error( - "Github Sync Challenge Phase unsuccessful due to {}".format(e) - ) \ No newline at end of file + logger.error(f"Error syncing challenge phase {challenge_phase_id} to GitHub: {str(e)}") + return False \ No newline at end of file diff --git a/apps/challenges/migrations/0115_add_last_github_sync_field.py b/apps/challenges/migrations/0115_add_last_github_sync_field.py new file mode 100644 index 0000000000..926ce6496b --- /dev/null +++ b/apps/challenges/migrations/0115_add_last_github_sync_field.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.20 on 2025-08-23 19:02 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('challenges', '0114_add_github_token_field'), + ] + + operations = [ + migrations.AddField( + model_name='challenge', + name='last_github_sync', + field=models.DateTimeField(blank=True, null=True), + ), + ] diff --git a/apps/challenges/models.py b/apps/challenges/models.py index ca832240fa..4f1da96880 100644 --- a/apps/challenges/models.py +++ b/apps/challenges/models.py @@ -197,6 +197,8 @@ def __init__(self, *args, **kwargs): github_token = models.CharField( max_length=200, null=True, blank=True, default="" ) + # Simple sync tracking + last_github_sync = models.DateTimeField(null=True, blank=True) # The number of vCPU for a Fargate worker for the challenge. Default value # is 0.25 vCPU. worker_cpu_cores = models.IntegerField(null=True, blank=True, default=512) @@ -315,22 +317,41 @@ def update_sqs_retention_period_for_challenge( if not created and is_model_field_changed(instance, field_name): serialized_obj = serializers.serialize("json", [instance]) aws.update_sqs_retention_period_task.delay(serialized_obj) - # Update challenge + # Update challenge - but prevent signal recursion curr = getattr(instance, "{}".format(field_name)) challenge = instance challenge._original_sqs_retention_period = curr - challenge.save() + # Use update() to avoid triggering signals again + Challenge.objects.filter(id=challenge.id).update(_original_sqs_retention_period=curr) @receiver(signals.post_save, sender="challenges.Challenge") def challenge_details_sync(sender, instance, created, **kwargs): """Sync challenge details to GitHub when challenge is updated""" + # Prevent recursive calls by checking if this is a signal-triggered save + if hasattr(instance, '_signal_save_in_progress'): + logger.info(f"Skipping GitHub sync for challenge {instance.id} - signal save already in progress") + return + + logger.info(f"Challenge signal triggered: created={created}, id={instance.id}, github_repo={instance.github_repository}, github_token={'YES' if instance.github_token else 'NO'}") if not created and instance.github_token and instance.github_repository: try: - from challenges.github_utils import sync_challenge_to_github - sync_challenge_to_github(instance) + # Mark that we're doing a signal-triggered operation + instance._signal_save_in_progress = True + + from challenges.github_utils import github_challenge_sync + logger.info(f"Starting GitHub sync for challenge {instance.id}") + github_challenge_sync(instance.id) + + # Clear the flag + delattr(instance, '_signal_save_in_progress') except Exception as e: logger.error(f"Error in challenge_details_sync: {str(e)}") + # Clear the flag even on error + if hasattr(instance, '_signal_save_in_progress'): + delattr(instance, '_signal_save_in_progress') + else: + logger.info(f"Skipping GitHub sync: created={created}, has_token={bool(instance.github_token)}, has_repo={bool(instance.github_repository)}") class DatasetSplit(TimeStampedModel): @@ -462,12 +483,16 @@ def save(self, *args, **kwargs): @receiver(signals.post_save, sender="challenges.ChallengePhase") def challenge_phase_details_sync(sender, instance, created, **kwargs): """Sync challenge phase details to GitHub when challenge phase is updated""" + logger.info(f"ChallengePhase signal triggered: created={created}, id={instance.id}, challenge_id={instance.challenge.id}") if not created and instance.challenge.github_token and instance.challenge.github_repository: try: - from challenges.github_utils import sync_challenge_phase_to_github - sync_challenge_phase_to_github(instance) + from challenges.github_utils import github_challenge_phase_sync + logger.info(f"Starting GitHub sync for challenge phase {instance.id}") + github_challenge_phase_sync(instance.id) except Exception as e: logger.error(f"Error in challenge_phase_details_sync: {str(e)}") + else: + logger.info(f"Skipping GitHub sync: created={created}, challenge_has_token={bool(instance.challenge.github_token)}, challenge_has_repo={bool(instance.challenge.github_repository)}") def post_save_connect(field_name, sender): diff --git a/apps/challenges/urls.py b/apps/challenges/urls.py index 3139d5381c..f375bcbbe0 100644 --- a/apps/challenges/urls.py +++ b/apps/challenges/urls.py @@ -14,7 +14,7 @@ name="get_challenge_detail", ), url( - r"^(?P[0-9]+)/participant_team/team_detail$", + r"^challenge/(?P[0-9]+)/participant_team/team_detail$", views.participant_team_detail_for_challenge, name="participant_team_detail_for_challenge", ), @@ -38,7 +38,6 @@ views.challenge_phase_detail, name="get_challenge_phase_detail", ), - # `A-Za-z` because it accepts either of `all, future, past or present` in either case url( r"^challenge/(?P[A-Za-z]+)/(?P[A-Za-z]+)/(?P[A-Za-z]+)$", views.get_all_challenges, diff --git a/apps/challenges/views.py b/apps/challenges/views.py index dfef103b85..70d87c07d0 100644 --- a/apps/challenges/views.py +++ b/apps/challenges/views.py @@ -159,6 +159,7 @@ send_subscription_plans_email, ) + logger = logging.getLogger(__name__) try: @@ -211,6 +212,7 @@ def challenge_list(request, challenge_host_team_pk): if serializer.is_valid(): serializer.save() challenge = get_challenge_model(serializer.instance.pk) + serializer = ChallengeSerializer(challenge) response_data = serializer.data return Response(response_data, status=status.HTTP_201_CREATED) @@ -333,6 +335,7 @@ def challenge_detail(request, challenge_host_team_pk, challenge_pk): if serializer.is_valid(): serializer.save() challenge = get_challenge_model(serializer.instance.pk) + serializer = ChallengeSerializer(challenge) response_data = serializer.data return Response(response_data, status=status.HTTP_200_OK) @@ -1094,6 +1097,7 @@ def challenge_phase_detail(request, challenge_pk, pk): if serializer.is_valid(): serializer.save() challenge_phase = get_challenge_phase_model(serializer.instance.pk) + serializer = ChallengePhaseSerializer(challenge_phase) response_data = serializer.data return Response(response_data, status=status.HTTP_200_OK) From 6f59a0d6a73289c54c7426a1cb5ead178f464ab1 Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Tue, 26 Aug 2025 02:46:12 +0530 Subject: [PATCH 06/20] Implement middleware logic --- apps/challenges/github_interface.py | 417 +++++++++++----------------- apps/challenges/github_utils.py | 60 ++-- apps/challenges/models.py | 186 +++++++++++-- settings/common.py | 1 + 4 files changed, 374 insertions(+), 290 deletions(-) diff --git a/apps/challenges/github_interface.py b/apps/challenges/github_interface.py index 0ba06bb169..1181302095 100644 --- a/apps/challenges/github_interface.py +++ b/apps/challenges/github_interface.py @@ -1,6 +1,7 @@ import requests import base64 import logging +import yaml logger = logging.getLogger(__name__) @@ -11,23 +12,8 @@ class GithubInterface: def __init__(self, GITHUB_REPOSITORY, GITHUB_BRANCH, GITHUB_AUTH_TOKEN): self.GITHUB_AUTH_TOKEN = GITHUB_AUTH_TOKEN self.GITHUB_REPOSITORY = GITHUB_REPOSITORY - self.BRANCH = GITHUB_BRANCH + self.BRANCH = GITHUB_BRANCH or "challenge" self.COMMIT_PREFIX = "evalai_bot: Update {}" - - def _serialize_field_value(self, field, value): - """ - Serialize field values appropriately, handling datetime fields to preserve naive format - """ - if value is None: - return None - - # Handle datetime fields to preserve naive format without timezone info - if field in ['start_date', 'end_date'] and hasattr(value, 'strftime'): - return value.strftime('%Y-%m-%d %H:%M:%S') - - return value - - def get_request_headers(self): headers = {"Authorization": "token {}".format(self.GITHUB_AUTH_TOKEN)} @@ -67,20 +53,6 @@ def get_content_from_path(self, path): url = URLS.get("contents").format(self.GITHUB_REPOSITORY, path) params = {"ref": self.BRANCH} response = self.make_request(url, "GET", params) - - # If branch doesn't exist, try to create it or use default branch - if response is None or (isinstance(response, dict) and response.get('message') == 'No commit found for the ref ' + self.BRANCH): - logger.warning(f"Branch '{self.BRANCH}' not found, trying to create it or use default branch") - # Try to get content from default branch (usually 'main' or 'master') - for default_branch in ['main', 'master']: - if default_branch != self.BRANCH: - logger.info(f"Trying default branch '{default_branch}'") - params = {"ref": default_branch} - response = self.make_request(url, "GET", params) - if response and not (isinstance(response, dict) and 'No commit found' in response.get('message', '')): - logger.info(f"Found content in default branch '{default_branch}'") - break - return response def get_data_from_path(self, path): @@ -92,11 +64,11 @@ def get_data_from_path(self, path): string_data = None if content_response and content_response.get("content"): string_data = base64.b64decode(content_response["content"]).decode( - "utf-8" + "utf-8", errors="ignore" ) return string_data - def update_content_from_path(self, path, content): + def update_content_from_path(self, path, content, changed_field=None): """ Updates the file content, creates a commit in the repository at particular path Ref: https://docs.github.com/en/rest/reference/repos#create-or-update-file-contents @@ -106,10 +78,16 @@ def update_content_from_path(self, path, content): # Get existing content to get SHA (required for updates) existing_content = self.get_content_from_path(path) + # Create specific commit message + if changed_field: + commit_message = f"evalai_bot: Update {path} - changed field: {changed_field}" + else: + commit_message = self.COMMIT_PREFIX.format(path) + if existing_content and existing_content.get("sha"): # File exists, update it data = { - "message": self.COMMIT_PREFIX.format(path), + "message": commit_message, "branch": self.BRANCH, "sha": existing_content.get("sha"), "content": content, @@ -117,7 +95,7 @@ def update_content_from_path(self, path, content): else: # File doesn't exist, create it data = { - "message": self.COMMIT_PREFIX.format(path), + "message": commit_message, "branch": self.BRANCH, "content": content, } @@ -125,30 +103,49 @@ def update_content_from_path(self, path, content): response = self.make_request(url, "PUT", data=data) return response - def update_data_from_path(self, path, data): + def update_data_from_path(self, path, data, changed_field=None): """ Updates the file data to the data(string) provided, at particular path Call update_content_from_path with decoded base64 content """ content = base64.b64encode(bytes(data, "utf-8")).decode("utf-8") - return self.update_content_from_path(path, content) + return self.update_content_from_path(path, content, changed_field) def is_repository(self): url = URLS.get("repos").format(self.GITHUB_REPOSITORY) repo_response = self.make_request(url, "GET") return True if repo_response else False - def update_challenge_config(self, challenge): + def _read_text_from_file_field(self, value): + """Best-effort read of text from a Django FileField-like value.""" + if value is None: + return None + try: + # Django FieldFile has open/read + if hasattr(value, "open"): + value.open("rb") + data = value.read() + value.close() + elif hasattr(value, "read"): + data = value.read() + else: + data = str(value) + if isinstance(data, bytes): + try: + return data.decode("utf-8") + except Exception: + return data.decode("latin-1", errors="ignore") + return str(data) + except Exception: + return None + + def update_challenge_config(self, challenge, changed_field): """ Update challenge configuration in GitHub repository - Preserves existing structure and custom configuration while updating only EvalAI-managed fields + Only updates the specific field that changed """ try: - import yaml - from collections import OrderedDict - from challenges.github_sync_config import challenge_non_file_fields, challenge_file_fields - - # Get existing challenge config to preserve structure and custom fields + # Get existing challenge config to preserve structure existing_config = self.get_data_from_path("challenge_config.yaml") if existing_config: try: @@ -161,200 +158,57 @@ def update_challenge_config(self, challenge): else: config_data = {} - # Create ordered config with exact field order as specified - ordered_config = OrderedDict() - - # Pre-extract all field values once to avoid multiple hasattr/getattr calls - field_values = {} - challenge_fields = ['title', 'short_description', 'description', 'evaluation_details', 'terms_and_conditions', 'image', 'submission_guidelines', 'leaderboard_description', 'evaluation_script', 'remote_evaluation', 'start_date', 'end_date', 'published', 'tags'] - - for field in challenge_fields: - if field in config_data: - field_values[field] = config_data[field] - elif hasattr(challenge, field): - field_values[field] = getattr(challenge, field) - - # Add fields in the exact order specified with efficient processing - for field in challenge_fields: - if field in field_values: - value = field_values[field] - if value is None: - continue - - # Process field value based on type - if field in ['start_date', 'end_date'] and hasattr(value, 'strftime'): - ordered_config[field] = value.strftime('%Y-%m-%d %H:%M:%S') - elif field in ['description', 'evaluation_details', 'terms_and_conditions', 'submission_guidelines']: - # Extract content from HTML fields - if hasattr(value, 'read'): - try: - value.seek(0) - content = value.read().decode('utf-8') - ordered_config[field] = content - except: - ordered_config[field] = str(value) - else: - ordered_config[field] = str(value) - elif field in ['image', 'evaluation_script']: - # Handle file fields - extract filename/path - if hasattr(value, 'name'): - ordered_config[field] = value.name - else: - ordered_config[field] = str(value) - else: - # Handle other field types - if hasattr(value, 'pk'): # Django model instance - ordered_config[field] = value.pk - elif hasattr(value, 'id'): # Django model instance - ordered_config[field] = value.id - elif isinstance(value, (list, tuple)): # Handle lists/tuples - clean_list = [] - for item in value: - if hasattr(item, 'pk'): - clean_list.append(item.pk) - elif hasattr(item, 'id'): - clean_list.append(item.id) - else: - clean_list.append(item) - ordered_config[field] = clean_list - else: - ordered_config[field] = value - - # Sync additional sections in the correct order - try: - # Sync leaderboard information if available - if hasattr(challenge, 'leaderboard') and challenge.leaderboard: - ordered_config['leaderboard'] = challenge.leaderboard - elif 'leaderboard' in config_data: - ordered_config['leaderboard'] = config_data['leaderboard'] - - # Sync challenge phases if available - if hasattr(challenge, 'challenge_phases') and challenge.challenge_phases.exists(): - challenge_phases = [] - for phase in challenge.challenge_phases.all(): - phase_data = OrderedDict() - # Add phase fields in the exact order specified - phase_fields = ['id', 'name', 'description', 'leaderboard_public', 'is_public', 'challenge', 'is_active', 'max_concurrent_submissions_allowed', 'allowed_email_ids', 'disable_logs', 'is_submission_public', 'start_date', 'end_date', 'test_annotation_file', 'codename', 'max_submissions_per_day', 'max_submissions_per_month', 'max_submissions', 'default_submission_meta_attributes', 'submission_meta_attributes', 'is_restricted_to_select_one_submission', 'is_partial_submission_evaluation_enabled', 'allowed_submission_file_types'] - - for field in phase_fields: - if hasattr(phase, field): - value = getattr(phase, field) - if field in ['start_date', 'end_date'] and hasattr(value, 'strftime'): - phase_data[field] = value.strftime('%Y-%m-%d %H:%M:%S') - elif field == 'description' and value: - # Extract the actual content from HTML fields - if hasattr(value, 'read'): - # It's a file-like object, read the content - try: - value.seek(0) - content = value.read().decode('utf-8') - phase_data[field] = content - except: - phase_data[field] = str(value) - else: - phase_data[field] = str(value) - elif value is not None: - # Handle Django model fields and related objects - if hasattr(value, 'pk'): # Django model instance - phase_data[field] = value.pk - elif hasattr(value, 'id'): # Django model instance - phase_data[field] = value.id - elif isinstance(value, (list, tuple)): # Handle lists/tuples - # Convert to list of simple values - clean_list = [] - for item in value: - if hasattr(item, 'pk'): - clean_list.append(item.pk) - elif hasattr(item, 'id'): - clean_list.append(item.id) - else: - clean_list.append(item) - phase_data[field] = clean_list - else: - phase_data[field] = value - - challenge_phases.append(phase_data) - ordered_config['challenge_phases'] = challenge_phases - elif 'challenge_phases' in config_data: - ordered_config['challenge_phases'] = config_data['challenge_phases'] - - # Sync dataset splits if available - if hasattr(challenge, 'dataset_splits') and challenge.dataset_splits.exists(): - dataset_splits = [] - for split in challenge.dataset_splits.all(): - split_data = OrderedDict([ - ('id', split.id), - ('name', split.name), - ('codename', split.codename), - ]) - dataset_splits.append(split_data) - ordered_config['dataset_splits'] = dataset_splits - elif 'dataset_splits' in config_data: - ordered_config['dataset_splits'] = config_data['dataset_splits'] - - # Sync challenge phase splits if available - if hasattr(challenge, 'challenge_phases') and challenge.challenge_phases.exists(): - phase_splits = [] - for phase in challenge.challenge_phases.all(): - if hasattr(phase, 'challenge_phase_splits') and phase.challenge_phase_splits.exists(): - for split in phase.challenge_phase_splits.all(): - split_data = OrderedDict([ - ('challenge_phase_id', split.challenge_phase.id), - ('leaderboard_id', split.leaderboard.id if split.leaderboard else None), - ('dataset_split_id', split.dataset_split.id if split.dataset_split else None), - ('visibility', split.visibility), - ('leaderboard_decimal_precision', split.leaderboard_decimal_precision), - ('is_leaderboard_order_descending', split.is_leaderboard_order_descending), - ('show_execution_time', split.show_execution_time), - ('show_leaderboard_by_latest_submission', split.show_leaderboard_by_latest_submission), - ]) - phase_splits.append(split_data) - ordered_config['challenge_phase_splits'] = phase_splits - elif 'challenge_phase_splits' in config_data: - ordered_config['challenge_phase_splits'] = config_data['challenge_phase_splits'] - - except Exception as e: - logger.warning(f"Error syncing additional sections: {str(e)}") - # Continue without failing the entire sync - - # Use the ordered config for YAML generation - config_data = ordered_config + # File fields logic (update the referenced file content) + if changed_field in {"evaluation_script"}: + file_path = config_data.get(changed_field) + if not file_path: + logger.warning(f"No path for '{changed_field}' in challenge_config.yaml; skipping file update") + return False + current_text = self.get_data_from_path(file_path) + new_text = self._read_text_from_file_field(getattr(challenge, changed_field, None)) + if new_text is None or new_text == current_text: + logger.info(f"No content change for file field '{changed_field}'") + return True + return True if self.update_data_from_path(file_path, new_text, changed_field) else False - # Convert to YAML with custom representer to handle OrderedDict properly - def represent_ordereddict(dumper, data): - return dumper.represent_mapping('tag:yaml.org,2002:map', data.items()) + # Non-file field: update YAML key with processed value + if hasattr(challenge, changed_field): + current_value = getattr(challenge, changed_field) + processed_value = self._process_field_value(changed_field, current_value) + if processed_value is None: + logger.warning(f"Could not process changed field: {changed_field}") + return False + # Skip if value unchanged to avoid empty commit + if config_data.get(changed_field) == processed_value: + logger.info(f"No change detected for '{changed_field}', skipping commit") + return True + config_data[changed_field] = processed_value + else: + logger.error(f"Field {changed_field} not found on challenge model") + return False - yaml.add_representer(OrderedDict, represent_ordereddict) + # Convert back to YAML yaml_content = yaml.dump(config_data, default_flow_style=False, allow_unicode=True) - # Add the documentation header comment + # Add documentation header header_comment = "# If you are not sure what all these fields mean, please refer our documentation here:\n# https://evalai.readthedocs.io/en/latest/configuration.html\n" yaml_content = header_comment + yaml_content # Update the file in GitHub - success = self.update_data_from_path("challenge_config.yaml", yaml_content) - - if success: - logger.info(f"Successfully updated challenge config for challenge {challenge.id} while preserving existing structure") - return True - else: - logger.error(f"Failed to update challenge config for challenge {challenge.id}") - return False + success = self.update_data_from_path("challenge_config.yaml", yaml_content, changed_field) + return True if success else False except Exception as e: logger.error(f"Error updating challenge config: {str(e)}") return False - def update_challenge_phase_config(self, challenge_phase): + def update_challenge_phase_config(self, challenge_phase, changed_field): """ Update challenge phase configuration in GitHub repository - Preserves existing structure and custom configuration while updating only phase information + Only updates the specific field that changed """ try: - import yaml - from challenges.github_sync_config import challenge_phase_non_file_fields, challenge_phase_file_fields - - # Get existing challenge config to preserve structure and custom fields + # Get existing challenge config to preserve structure existing_config = self.get_data_from_path("challenge_config.yaml") if existing_config: try: @@ -371,46 +225,105 @@ def update_challenge_phase_config(self, challenge_phase): if 'challenge_phases' not in config_data: config_data['challenge_phases'] = [] - # Find existing phase or create new one - phase_data = {} - for field in challenge_phase_non_file_fields: - if hasattr(challenge_phase, field): - value = getattr(challenge_phase, field) - serialized_value = self._serialize_field_value(field, value) - if serialized_value is not None: - phase_data[field] = serialized_value - - # Add file fields - for field in challenge_phase_file_fields: - if hasattr(challenge_phase, field): - value = getattr(challenge_phase, field) - if value: - phase_data[field] = str(value) - - # Update or add phase - phase_found = False + # Locate the target phase by codename + target_index = None for i, phase in enumerate(config_data['challenge_phases']): - if phase.get('codename') == challenge_phase.codename: - config_data['challenge_phases'][i] = phase_data - phase_found = True + if phase.get('codename') == getattr(challenge_phase, 'codename', None): + target_index = i break + if target_index is None: + logger.error(f"Phase with codename {getattr(challenge_phase, 'codename', None)} not found") + return False + + # File field mapping in YAML + yaml_key_map = {"test_annotation": "test_annotation_file"} + yaml_key = yaml_key_map.get(changed_field, changed_field) - if not phase_found: - config_data['challenge_phases'].append(phase_data) + # File field for phase: update referenced file content + if changed_field in {"test_annotation"}: + file_path = config_data['challenge_phases'][target_index].get(yaml_key) + if not file_path: + logger.warning(f"No path for '{yaml_key}' in challenge_config.yaml; skipping file update") + return False + current_text = self.get_data_from_path(file_path) + new_text = self._read_text_from_file_field(getattr(challenge_phase, changed_field, None)) + if new_text is None or new_text == current_text: + logger.info(f"No content change for file field '{changed_field}' in phase") + return True + return True if self.update_data_from_path(file_path, new_text, changed_field) else False + + # Non-file field: update YAML entry for that phase + if hasattr(challenge_phase, changed_field): + value = getattr(challenge_phase, changed_field) + processed_value = self._process_field_value(changed_field, value) + if processed_value is None: + logger.warning(f"Could not process changed phase field: {changed_field}") + return False + # Skip if unchanged + if config_data['challenge_phases'][target_index].get(yaml_key) == processed_value: + logger.info(f"No change detected for phase '{yaml_key}', skipping commit") + return True + config_data['challenge_phases'][target_index][yaml_key] = processed_value + else: + logger.error(f"Field {changed_field} not found on challenge_phase model") + return False # Convert back to YAML yaml_content = yaml.dump(config_data, default_flow_style=False, allow_unicode=True) # Update the file in GitHub - success = self.update_data_from_path("challenge_config.yaml", yaml_content) - - if success: - logger.info(f"Successfully updated challenge phase config for phase {challenge_phase.id} while preserving existing structure") - return True - else: - logger.error(f"Failed to update challenge phase config for phase {challenge_phase.id}") - return False + success = self.update_data_from_path("challenge_config.yaml", yaml_content, changed_field) + return True if success else False except Exception as e: logger.error(f"Error updating challenge phase config: {str(e)}") - return False \ No newline at end of file + return False + + def _process_field_value(self, field, value): + """ + Process a field value for GitHub sync + Returns the processed value or None if processing failed + """ + if value is None: + return None + + try: + if field in ['start_date', 'end_date'] and hasattr(value, 'strftime'): + return value.strftime('%Y-%m-%d %H:%M:%S') + elif field in ['description', 'evaluation_details', 'terms_and_conditions', 'submission_guidelines'] and value: + # Extract the actual content from HTML fields + if hasattr(value, 'read'): + try: + value.seek(0) + content = value.read().decode('utf-8') + return content + except Exception: + return str(value) + else: + return str(value) + elif field in ['image', 'evaluation_script'] and value: + # For YAML, store filename/path if available + if hasattr(value, 'name'): + return value.name + else: + return str(value) + elif isinstance(value, (list, tuple)): + clean_list = [] + for item in value: + if hasattr(item, 'pk'): + clean_list.append(item.pk) + elif hasattr(item, 'id'): + clean_list.append(item.id) + else: + clean_list.append(item) + return clean_list + else: + if hasattr(value, 'pk'): + return value.pk + elif hasattr(value, 'id'): + return value.id + else: + return value + except Exception as e: + logger.error(f"Error processing field {field}: {str(e)}") + return None \ No newline at end of file diff --git a/apps/challenges/github_utils.py b/apps/challenges/github_utils.py index 3e8531e285..7033c808b3 100644 --- a/apps/challenges/github_utils.py +++ b/apps/challenges/github_utils.py @@ -1,28 +1,28 @@ import logging -import yaml -from django.utils import timezone +import threading from .models import Challenge, ChallengePhase from .github_interface import GithubInterface logger = logging.getLogger(__name__) +# Thread-local storage to prevent recursive GitHub sync calls +_github_sync_context = threading.local() -# Global set to track challenges currently being synced -_sync_in_progress = set() -def github_challenge_sync(challenge_id): +def github_challenge_sync(challenge_id, changed_field): """ Simple sync from EvalAI to GitHub This is the core function that keeps GitHub in sync with EvalAI """ - # Prevent multiple simultaneous syncs for the same challenge - if challenge_id in _sync_in_progress: - logger.info(f"Challenge {challenge_id} sync already in progress, skipping") - return False - try: - # Mark this challenge as being synced - _sync_in_progress.add(challenge_id) + # Set flag to prevent recursive calls + _github_sync_context.skip_github_sync = True + _github_sync_context.change_source = 'github' + + # Ensure changed_field is a string + if not isinstance(changed_field, str): + logger.error(f"Invalid changed_field type: {type(changed_field)}, expected string") + return False challenge = Challenge.objects.get(id=challenge_id) @@ -37,8 +37,8 @@ def github_challenge_sync(challenge_id): challenge.github_token ) - # Update challenge config in GitHub - success = github_interface.update_challenge_config(challenge) + # Update challenge config in GitHub with the specific changed field + success = github_interface.update_challenge_config(challenge, changed_field) if success: logger.info(f"Successfully synced challenge {challenge_id} to GitHub") @@ -54,15 +54,27 @@ def github_challenge_sync(challenge_id): logger.error(f"Error syncing challenge {challenge_id} to GitHub: {str(e)}") return False finally: - # Always remove from in-progress set - _sync_in_progress.discard(challenge_id) + # Always clean up the flags + if hasattr(_github_sync_context, 'skip_github_sync'): + delattr(_github_sync_context, 'skip_github_sync') + if hasattr(_github_sync_context, 'change_source'): + delattr(_github_sync_context, 'change_source') -def github_challenge_phase_sync(challenge_phase_id): +def github_challenge_phase_sync(challenge_phase_id, changed_field): """ Sync challenge phase from EvalAI to GitHub """ try: + # Set flag to prevent recursive calls + _github_sync_context.skip_github_sync = True + _github_sync_context.change_source = 'github' + + # Ensure changed_field is a string + if not isinstance(changed_field, str): + logger.error(f"Invalid changed_field type: {type(changed_field)}, expected string") + return False + challenge_phase = ChallengePhase.objects.get(id=challenge_phase_id) challenge = challenge_phase.challenge @@ -77,8 +89,8 @@ def github_challenge_phase_sync(challenge_phase_id): challenge.github_token ) - # Update challenge phase config in GitHub - success = github_interface.update_challenge_phase_config(challenge_phase) + # Update challenge phase config in GitHub with the specific changed field + success = github_interface.update_challenge_phase_config(challenge_phase, changed_field) if success: logger.info(f"Successfully synced challenge phase {challenge_phase_id} to GitHub") @@ -91,5 +103,11 @@ def github_challenge_phase_sync(challenge_phase_id): logger.error(f"Challenge phase {challenge_phase_id} not found") return False except Exception as e: - logger.error(f"Error syncing challenge phase {challenge_phase_id} to GitHub: {str(e)}") - return False \ No newline at end of file + logger.error(f"Error syncing challenge phase {challenge_phase_id} to GitHub") + return False + finally: + # Always clean up the flags + if hasattr(_github_sync_context, 'skip_github_sync'): + delattr(_github_sync_context, 'skip_github_sync') + if hasattr(_github_sync_context, 'change_source'): + delattr(_github_sync_context, 'change_source') \ No newline at end of file diff --git a/apps/challenges/models.py b/apps/challenges/models.py index 4f1da96880..f8f38fa119 100644 --- a/apps/challenges/models.py +++ b/apps/challenges/models.py @@ -1,5 +1,9 @@ from __future__ import unicode_literals import logging +import threading +import json +from django.core.handlers.wsgi import WSGIRequest +from django.utils.deprecation import MiddlewareMixin from base.models import TimeStampedModel, model_field_name from base.utils import RandomFileName, get_slug, is_model_field_changed @@ -16,6 +20,97 @@ logger = logging.getLogger(__name__) +# Thread-local storage to prevent recursive GitHub sync calls +_github_sync_context = threading.local() + +# Request-level context to track GitHub sync operations +class GitHubSyncContext: + def __init__(self): + self.is_syncing = False + self.synced_challenges = set() + self.synced_phases = set() + + def mark_syncing(self, challenge_id=None, phase_id=None): + """Mark that we're currently syncing""" + self.is_syncing = True + if challenge_id: + self.synced_challenges.add(challenge_id) + if phase_id: + self.synced_phases.add(phase_id) + + def is_already_synced(self, challenge_id=None, phase_id=None): + """Check if we've already synced this challenge/phase in this request""" + if challenge_id and challenge_id in self.synced_challenges: + return True + if phase_id and phase_id in self.synced_phases: + return True + return False + +# Global context for the current request +_github_request_context = GitHubSyncContext() + +# Thread-local store for current request payload keys +_github_request_local = threading.local() + + +def reset_github_sync_context(): + """Reset the GitHub sync context for a new request""" + global _github_request_context + _github_request_context = GitHubSyncContext() + # also clear payload keys + if hasattr(_github_request_local, 'payload_keys'): + delattr(_github_request_local, 'payload_keys') + logger.info("GitHub sync context reset for new request") + + +class GitHubSyncMiddleware(MiddlewareMixin): + """Middleware to reset GitHub sync context on each request and capture payload keys""" + + def process_request(self, request): + """Reset context at the start of each request; capture payload keys for inference""" + reset_github_sync_context() + try: + keys = [] + if request.method in ("PATCH", "PUT", "POST"): + # Try JSON body first + if getattr(request, 'body', None): + try: + payload = json.loads(request.body.decode('utf-8')) + if isinstance(payload, dict): + keys = list(payload.keys()) + except Exception: + keys = [] + # Fallback to POST dict + if not keys and hasattr(request, 'POST'): + try: + keys = list(request.POST.keys()) + except Exception: + keys = [] + _github_request_local.payload_keys = keys + except Exception: + _github_request_local.payload_keys = [] + return None + + +def _infer_changed_field_from_request(model_instance): + """Infer a single changed field from the current request payload keys. + Returns a field name string or None. + """ + try: + keys = getattr(_github_request_local, 'payload_keys', []) or [] + if not keys: + return None + # Prefer keys that actually exist on the model + for key in keys: + # Ignore meta and non-model keys + if key in {"id", "pk", "challenge", "phase", "created_at", "modified_at"}: + continue + if hasattr(model_instance, key): + return key + return None + except Exception: + return None + @receiver(pre_save, sender="challenges.Challenge") def save_challenge_slug(sender, instance, **kwargs): @@ -317,39 +412,60 @@ def update_sqs_retention_period_for_challenge( if not created and is_model_field_changed(instance, field_name): serialized_obj = serializers.serialize("json", [instance]) aws.update_sqs_retention_period_task.delay(serialized_obj) - # Update challenge - but prevent signal recursion + # Update challenge curr = getattr(instance, "{}".format(field_name)) challenge = instance challenge._original_sqs_retention_period = curr - # Use update() to avoid triggering signals again - Challenge.objects.filter(id=challenge.id).update(_original_sqs_retention_period=curr) + challenge.save() @receiver(signals.post_save, sender="challenges.Challenge") def challenge_details_sync(sender, instance, created, **kwargs): """Sync challenge details to GitHub when challenge is updated""" - # Prevent recursive calls by checking if this is a signal-triggered save - if hasattr(instance, '_signal_save_in_progress'): - logger.info(f"Skipping GitHub sync for challenge {instance.id} - signal save already in progress") + # Skip if this is a bot-triggered save to prevent recursive calls + if hasattr(_github_sync_context, 'skip_github_sync') and _github_sync_context.skip_github_sync: + logger.info(f"Skipping GitHub sync for challenge {instance.id} - recursive call prevented") return + # Skip if this is a GitHub-sourced change (not from UI) + if hasattr(_github_sync_context, 'change_source') and _github_sync_context.change_source == 'github': + logger.info(f"Skipping GitHub sync for challenge {instance.id} - change sourced from GitHub") + return + + # Skip if we've already synced this challenge in this request + if _github_request_context.is_already_synced(challenge_id=instance.id): + logger.info(f"Skipping GitHub sync for challenge {instance.id} - already synced in this request") + return + + # By default, allow UI changes to trigger GitHub sync logger.info(f"Challenge signal triggered: created={created}, id={instance.id}, github_repo={instance.github_repository}, github_token={'YES' if instance.github_token else 'NO'}") if not created and instance.github_token and instance.github_repository: try: - # Mark that we're doing a signal-triggered operation - instance._signal_save_in_progress = True - from challenges.github_utils import github_challenge_sync logger.info(f"Starting GitHub sync for challenge {instance.id}") - github_challenge_sync(instance.id) + _github_request_context.mark_syncing(challenge_id=instance.id) + + # Get the changed field from update_fields if available + changed_field = None + if kwargs.get('update_fields'): + # Django provides update_fields when using .save(update_fields=['field_name']) + changed_field = list(kwargs['update_fields'])[0] if kwargs['update_fields'] else None + logger.info(f"Detected changed field: {changed_field}") + # Infer from request payload if not provided + if not changed_field: + inferred = _infer_changed_field_from_request(instance) + if inferred: + changed_field = inferred + logger.info(f"Inferred changed field from request: {changed_field}") - # Clear the flag - delattr(instance, '_signal_save_in_progress') + # Require a specific changed field to proceed (single-field commit intent) + if not isinstance(changed_field, str) or not changed_field: + logger.info("No specific changed field detected; skipping GitHub sync for challenge") + return + + github_challenge_sync(instance.id, changed_field=changed_field) except Exception as e: logger.error(f"Error in challenge_details_sync: {str(e)}") - # Clear the flag even on error - if hasattr(instance, '_signal_save_in_progress'): - delattr(instance, '_signal_save_in_progress') else: logger.info(f"Skipping GitHub sync: created={created}, has_token={bool(instance.github_token)}, has_repo={bool(instance.github_repository)}") @@ -483,16 +599,52 @@ def save(self, *args, **kwargs): @receiver(signals.post_save, sender="challenges.ChallengePhase") def challenge_phase_details_sync(sender, instance, created, **kwargs): """Sync challenge phase details to GitHub when challenge phase is updated""" + # Skip if this is a bot-triggered save to prevent recursive calls + if hasattr(_github_sync_context, 'skip_github_sync') and _github_sync_context.skip_github_sync: + logger.info(f"Skipping GitHub sync for challenge phase {instance.id} - recursive call prevented") + return + + # Skip if this is a GitHub-sourced change (not from UI) + if hasattr(_github_sync_context, 'change_source') and _github_sync_context.change_source == 'github': + logger.info(f"Skipping GitHub sync for challenge phase {instance.id} - change sourced from GitHub") + return + + # Skip if we've already synced this phase in this request + if _github_request_context.is_already_synced(phase_id=instance.id): + logger.info(f"Skipping GitHub sync for challenge phase {instance.id} - already synced in this request") + return + + # By default, allow UI changes to trigger GitHub sync logger.info(f"ChallengePhase signal triggered: created={created}, id={instance.id}, challenge_id={instance.challenge.id}") if not created and instance.challenge.github_token and instance.challenge.github_repository: try: from challenges.github_utils import github_challenge_phase_sync logger.info(f"Starting GitHub sync for challenge phase {instance.id}") - github_challenge_phase_sync(instance.id) + _github_request_context.mark_syncing(phase_id=instance.id) + + # Get the changed field from update_fields if available + changed_field = None + if kwargs.get('update_fields'): + # Django provides update_fields when using .save(update_fields=['field_name']) + changed_field = list(kwargs['update_fields'])[0] if kwargs['update_fields'] else None + logger.info(f"Detected changed field: {changed_field}") + # Infer from request payload if not provided + if not changed_field: + inferred = _infer_changed_field_from_request(instance) + if inferred: + changed_field = inferred + logger.info(f"Inferred changed field from request: {changed_field}") + + # Require a specific changed field to proceed (single-field commit intent) + if not isinstance(changed_field, str) or not changed_field: + logger.info("No specific changed field detected; skipping GitHub sync for challenge phase") + return + + github_challenge_phase_sync(instance.id, changed_field=changed_field) except Exception as e: logger.error(f"Error in challenge_phase_details_sync: {str(e)}") else: - logger.info(f"Skipping GitHub sync: created={created}, challenge_has_token={bool(instance.challenge.github_token)}, challenge_has_repo={bool(instance.challenge.github_repository)}") + logger.info(f"Skipping GitHub sync: created={created}, challenge_has_token={bool(instance.challenge.github_token)}, challenge_has_repo={bool(instance.github_repository)}") def post_save_connect(field_name, sender): diff --git a/settings/common.py b/settings/common.py index 3aa982e58d..8aef2ab95a 100755 --- a/settings/common.py +++ b/settings/common.py @@ -86,6 +86,7 @@ "django.middleware.security.SecurityMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", "django.middleware.common.CommonMiddleware", + "challenges.models.GitHubSyncMiddleware", "django.middleware.csrf.CsrfViewMiddleware", "django.contrib.auth.middleware.AuthenticationMiddleware", "django.contrib.messages.middleware.MessageMiddleware", From 79771c463533d28fdd956ccf346533971e270347 Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Tue, 26 Aug 2025 02:50:24 +0530 Subject: [PATCH 07/20] Fix field order --- apps/challenges/github_interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/challenges/github_interface.py b/apps/challenges/github_interface.py index 1181302095..f5fdc34261 100644 --- a/apps/challenges/github_interface.py +++ b/apps/challenges/github_interface.py @@ -188,7 +188,7 @@ def update_challenge_config(self, challenge, changed_field): return False # Convert back to YAML - yaml_content = yaml.dump(config_data, default_flow_style=False, allow_unicode=True) + yaml_content = yaml.dump(config_data, default_flow_style=False, allow_unicode=True, sort_keys=False) # Add documentation header header_comment = "# If you are not sure what all these fields mean, please refer our documentation here:\n# https://evalai.readthedocs.io/en/latest/configuration.html\n" @@ -269,7 +269,7 @@ def update_challenge_phase_config(self, challenge_phase, changed_field): return False # Convert back to YAML - yaml_content = yaml.dump(config_data, default_flow_style=False, allow_unicode=True) + yaml_content = yaml.dump(config_data, default_flow_style=False, allow_unicode=True, sort_keys=False) # Update the file in GitHub success = self.update_data_from_path("challenge_config.yaml", yaml_content, changed_field) From 3d97a038f39f75b92096d4a6c6ecf2248cca1a9d Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Tue, 26 Aug 2025 02:54:32 +0530 Subject: [PATCH 08/20] Omit debug statements --- apps/challenges/github_interface.py | 4 ---- apps/challenges/github_utils.py | 2 -- apps/challenges/models.py | 24 +++++++++++------------- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/apps/challenges/github_interface.py b/apps/challenges/github_interface.py index f5fdc34261..a5009129c3 100644 --- a/apps/challenges/github_interface.py +++ b/apps/challenges/github_interface.py @@ -167,7 +167,6 @@ def update_challenge_config(self, challenge, changed_field): current_text = self.get_data_from_path(file_path) new_text = self._read_text_from_file_field(getattr(challenge, changed_field, None)) if new_text is None or new_text == current_text: - logger.info(f"No content change for file field '{changed_field}'") return True return True if self.update_data_from_path(file_path, new_text, changed_field) else False @@ -180,7 +179,6 @@ def update_challenge_config(self, challenge, changed_field): return False # Skip if value unchanged to avoid empty commit if config_data.get(changed_field) == processed_value: - logger.info(f"No change detected for '{changed_field}', skipping commit") return True config_data[changed_field] = processed_value else: @@ -248,7 +246,6 @@ def update_challenge_phase_config(self, challenge_phase, changed_field): current_text = self.get_data_from_path(file_path) new_text = self._read_text_from_file_field(getattr(challenge_phase, changed_field, None)) if new_text is None or new_text == current_text: - logger.info(f"No content change for file field '{changed_field}' in phase") return True return True if self.update_data_from_path(file_path, new_text, changed_field) else False @@ -261,7 +258,6 @@ def update_challenge_phase_config(self, challenge_phase, changed_field): return False # Skip if unchanged if config_data['challenge_phases'][target_index].get(yaml_key) == processed_value: - logger.info(f"No change detected for phase '{yaml_key}', skipping commit") return True config_data['challenge_phases'][target_index][yaml_key] = processed_value else: diff --git a/apps/challenges/github_utils.py b/apps/challenges/github_utils.py index 7033c808b3..effdb7029f 100644 --- a/apps/challenges/github_utils.py +++ b/apps/challenges/github_utils.py @@ -41,7 +41,6 @@ def github_challenge_sync(challenge_id, changed_field): success = github_interface.update_challenge_config(challenge, changed_field) if success: - logger.info(f"Successfully synced challenge {challenge_id} to GitHub") return True else: logger.error(f"Failed to sync challenge {challenge_id} to GitHub") @@ -93,7 +92,6 @@ def github_challenge_phase_sync(challenge_phase_id, changed_field): success = github_interface.update_challenge_phase_config(challenge_phase, changed_field) if success: - logger.info(f"Successfully synced challenge phase {challenge_phase_id} to GitHub") return True else: logger.error(f"Failed to sync challenge phase {challenge_phase_id} to GitHub") diff --git a/apps/challenges/models.py b/apps/challenges/models.py index f8f38fa119..0d9e76570e 100644 --- a/apps/challenges/models.py +++ b/apps/challenges/models.py @@ -60,7 +60,7 @@ def reset_github_sync_context(): # also clear payload keys if hasattr(_github_request_local, 'payload_keys'): delattr(_github_request_local, 'payload_keys') - logger.info("GitHub sync context reset for new request") + # reset per-request sync context class GitHubSyncMiddleware(MiddlewareMixin): @@ -438,11 +438,10 @@ def challenge_details_sync(sender, instance, created, **kwargs): return # By default, allow UI changes to trigger GitHub sync - logger.info(f"Challenge signal triggered: created={created}, id={instance.id}, github_repo={instance.github_repository}, github_token={'YES' if instance.github_token else 'NO'}") + # proceed only for updates with github configured if not created and instance.github_token and instance.github_repository: try: from challenges.github_utils import github_challenge_sync - logger.info(f"Starting GitHub sync for challenge {instance.id}") _github_request_context.mark_syncing(challenge_id=instance.id) # Get the changed field from update_fields if available @@ -450,24 +449,24 @@ def challenge_details_sync(sender, instance, created, **kwargs): if kwargs.get('update_fields'): # Django provides update_fields when using .save(update_fields=['field_name']) changed_field = list(kwargs['update_fields'])[0] if kwargs['update_fields'] else None - logger.info(f"Detected changed field: {changed_field}") + pass # Infer from request payload if not provided if not changed_field: inferred = _infer_changed_field_from_request(instance) if inferred: changed_field = inferred - logger.info(f"Inferred changed field from request: {changed_field}") + pass # Require a specific changed field to proceed (single-field commit intent) if not isinstance(changed_field, str) or not changed_field: - logger.info("No specific changed field detected; skipping GitHub sync for challenge") + # skip if we cannot determine a single changed field return github_challenge_sync(instance.id, changed_field=changed_field) except Exception as e: logger.error(f"Error in challenge_details_sync: {str(e)}") else: - logger.info(f"Skipping GitHub sync: created={created}, has_token={bool(instance.github_token)}, has_repo={bool(instance.github_repository)}") + pass class DatasetSplit(TimeStampedModel): @@ -615,11 +614,10 @@ def challenge_phase_details_sync(sender, instance, created, **kwargs): return # By default, allow UI changes to trigger GitHub sync - logger.info(f"ChallengePhase signal triggered: created={created}, id={instance.id}, challenge_id={instance.challenge.id}") + # proceed only for updates with github configured if not created and instance.challenge.github_token and instance.challenge.github_repository: try: from challenges.github_utils import github_challenge_phase_sync - logger.info(f"Starting GitHub sync for challenge phase {instance.id}") _github_request_context.mark_syncing(phase_id=instance.id) # Get the changed field from update_fields if available @@ -627,24 +625,24 @@ def challenge_phase_details_sync(sender, instance, created, **kwargs): if kwargs.get('update_fields'): # Django provides update_fields when using .save(update_fields=['field_name']) changed_field = list(kwargs['update_fields'])[0] if kwargs['update_fields'] else None - logger.info(f"Detected changed field: {changed_field}") + pass # Infer from request payload if not provided if not changed_field: inferred = _infer_changed_field_from_request(instance) if inferred: changed_field = inferred - logger.info(f"Inferred changed field from request: {changed_field}") + pass # Require a specific changed field to proceed (single-field commit intent) if not isinstance(changed_field, str) or not changed_field: - logger.info("No specific changed field detected; skipping GitHub sync for challenge phase") + # skip if we cannot determine a single changed field return github_challenge_phase_sync(instance.id, changed_field=changed_field) except Exception as e: logger.error(f"Error in challenge_phase_details_sync: {str(e)}") else: - logger.info(f"Skipping GitHub sync: created={created}, challenge_has_token={bool(instance.challenge.github_token)}, challenge_has_repo={bool(instance.github_repository)}") + pass def post_save_connect(field_name, sender): From 9c2b699cb83e81d50c8e70b5885e9c6983d837dd Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Tue, 26 Aug 2025 12:39:57 +0530 Subject: [PATCH 09/20] Fix github_branch migration file --- ...d_github_branch_field_and_unique_constraint.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/apps/challenges/migrations/0113_add_github_branch_field_and_unique_constraint.py b/apps/challenges/migrations/0113_add_github_branch_field_and_unique_constraint.py index 54d121849c..278a489c51 100644 --- a/apps/challenges/migrations/0113_add_github_branch_field_and_unique_constraint.py +++ b/apps/challenges/migrations/0113_add_github_branch_field_and_unique_constraint.py @@ -1,6 +1,6 @@ # Generated by Django 2.2.20 on 2025-07-09 15:00 -from django.db import migrations +from django.db import migrations, models class Migration(migrations.Migration): @@ -42,4 +42,17 @@ class Migration(migrations.Migration): "challenge_github_repo_branch_partial_idx;" ), ), + # Record the field in Django state without changing the database again + migrations.SeparateDatabaseAndState( + database_operations=[], + state_operations=[ + migrations.AddField( + model_name="challenge", + name="github_branch", + field=models.CharField( + max_length=200, null=True, blank=True, default="" + ), + ), + ], + ), ] From 650d823bdf2350f21d751b4e6d0b2dcd449d8bc0 Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Tue, 26 Aug 2025 12:42:52 +0530 Subject: [PATCH 10/20] Pass linting checks --- apps/challenges/github_interface.py | 193 ++++++++++++------ apps/challenges/github_sync_config.py | 6 +- apps/challenges/github_utils.py | 97 +++++---- .../migrations/0114_add_github_token_field.py | 10 +- .../0115_add_last_github_sync_field.py | 6 +- apps/challenges/models.py | 132 ++++++++---- apps/challenges/views.py | 25 +-- tests/unit/challenges/test_github_utils.py | 48 +++-- 8 files changed, 337 insertions(+), 180 deletions(-) diff --git a/apps/challenges/github_interface.py b/apps/challenges/github_interface.py index a5009129c3..d8f720b05c 100644 --- a/apps/challenges/github_interface.py +++ b/apps/challenges/github_interface.py @@ -1,6 +1,7 @@ -import requests import base64 import logging + +import requests import yaml logger = logging.getLogger(__name__) @@ -74,16 +75,18 @@ def update_content_from_path(self, path, content, changed_field=None): Ref: https://docs.github.com/en/rest/reference/repos#create-or-update-file-contents """ url = URLS.get("contents").format(self.GITHUB_REPOSITORY, path) - + # Get existing content to get SHA (required for updates) existing_content = self.get_content_from_path(path) - + # Create specific commit message if changed_field: - commit_message = f"evalai_bot: Update {path} - changed field: {changed_field}" + commit_message = ( + f"evalai_bot: Update {path} - changed field: {changed_field}" + ) else: commit_message = self.COMMIT_PREFIX.format(path) - + if existing_content and existing_content.get("sha"): # File exists, update it data = { @@ -99,7 +102,7 @@ def update_content_from_path(self, path, content, changed_field=None): "branch": self.BRANCH, "content": content, } - + response = self.make_request(url, "PUT", data=data) return response @@ -153,49 +156,74 @@ def update_challenge_config(self, challenge, changed_field): if not isinstance(config_data, dict): config_data = {} except yaml.YAMLError: - logger.warning("Existing challenge_config.yaml is not valid YAML, starting fresh") + logger.warning( + "Existing challenge_config.yaml is not valid YAML, starting fresh" + ) config_data = {} else: config_data = {} - + # File fields logic (update the referenced file content) if changed_field in {"evaluation_script"}: file_path = config_data.get(changed_field) if not file_path: - logger.warning(f"No path for '{changed_field}' in challenge_config.yaml; skipping file update") + logger.warning( + f"No path for '{changed_field}' in challenge_config.yaml; skipping file update" + ) return False current_text = self.get_data_from_path(file_path) - new_text = self._read_text_from_file_field(getattr(challenge, changed_field, None)) + new_text = self._read_text_from_file_field( + getattr(challenge, changed_field, None) + ) if new_text is None or new_text == current_text: return True - return True if self.update_data_from_path(file_path, new_text, changed_field) else False - + return ( + True + if self.update_data_from_path( + file_path, new_text, changed_field + ) + else False + ) + # Non-file field: update YAML key with processed value if hasattr(challenge, changed_field): current_value = getattr(challenge, changed_field) - processed_value = self._process_field_value(changed_field, current_value) + processed_value = self._process_field_value( + changed_field, current_value + ) if processed_value is None: - logger.warning(f"Could not process changed field: {changed_field}") + logger.warning( + f"Could not process changed field: {changed_field}" + ) return False # Skip if value unchanged to avoid empty commit if config_data.get(changed_field) == processed_value: return True config_data[changed_field] = processed_value else: - logger.error(f"Field {changed_field} not found on challenge model") + logger.error( + f"Field {changed_field} not found on challenge model" + ) return False - + # Convert back to YAML - yaml_content = yaml.dump(config_data, default_flow_style=False, allow_unicode=True, sort_keys=False) - + yaml_content = yaml.dump( + config_data, + default_flow_style=False, + allow_unicode=True, + sort_keys=False, + ) + # Add documentation header header_comment = "# If you are not sure what all these fields mean, please refer our documentation here:\n# https://evalai.readthedocs.io/en/latest/configuration.html\n" yaml_content = header_comment + yaml_content - + # Update the file in GitHub - success = self.update_data_from_path("challenge_config.yaml", yaml_content, changed_field) + success = self.update_data_from_path( + "challenge_config.yaml", yaml_content, changed_field + ) return True if success else False - + except Exception as e: logger.error(f"Error updating challenge config: {str(e)}") return False @@ -214,63 +242,99 @@ def update_challenge_phase_config(self, challenge_phase, changed_field): if not isinstance(config_data, dict): config_data = {} except yaml.YAMLError: - logger.warning("Existing challenge_config.yaml is not valid YAML, starting fresh") + logger.warning( + "Existing challenge_config.yaml is not valid YAML, starting fresh" + ) config_data = {} else: config_data = {} - + # Initialize challenge_phases section if it doesn't exist - if 'challenge_phases' not in config_data: - config_data['challenge_phases'] = [] - + if "challenge_phases" not in config_data: + config_data["challenge_phases"] = [] + # Locate the target phase by codename target_index = None - for i, phase in enumerate(config_data['challenge_phases']): - if phase.get('codename') == getattr(challenge_phase, 'codename', None): + for i, phase in enumerate(config_data["challenge_phases"]): + if phase.get("codename") == getattr( + challenge_phase, "codename", None + ): target_index = i break if target_index is None: - logger.error(f"Phase with codename {getattr(challenge_phase, 'codename', None)} not found") + logger.error( + f"Phase with codename {getattr(challenge_phase, 'codename', None)} not found" + ) return False - + # File field mapping in YAML yaml_key_map = {"test_annotation": "test_annotation_file"} yaml_key = yaml_key_map.get(changed_field, changed_field) - + # File field for phase: update referenced file content if changed_field in {"test_annotation"}: - file_path = config_data['challenge_phases'][target_index].get(yaml_key) + file_path = config_data["challenge_phases"][target_index].get( + yaml_key + ) if not file_path: - logger.warning(f"No path for '{yaml_key}' in challenge_config.yaml; skipping file update") + logger.warning( + f"No path for '{yaml_key}' in challenge_config.yaml; skipping file update" + ) return False current_text = self.get_data_from_path(file_path) - new_text = self._read_text_from_file_field(getattr(challenge_phase, changed_field, None)) + new_text = self._read_text_from_file_field( + getattr(challenge_phase, changed_field, None) + ) if new_text is None or new_text == current_text: return True - return True if self.update_data_from_path(file_path, new_text, changed_field) else False - + return ( + True + if self.update_data_from_path( + file_path, new_text, changed_field + ) + else False + ) + # Non-file field: update YAML entry for that phase if hasattr(challenge_phase, changed_field): value = getattr(challenge_phase, changed_field) - processed_value = self._process_field_value(changed_field, value) + processed_value = self._process_field_value( + changed_field, value + ) if processed_value is None: - logger.warning(f"Could not process changed phase field: {changed_field}") + logger.warning( + f"Could not process changed phase field: {changed_field}" + ) return False # Skip if unchanged - if config_data['challenge_phases'][target_index].get(yaml_key) == processed_value: + if ( + config_data["challenge_phases"][target_index].get(yaml_key) + == processed_value + ): return True - config_data['challenge_phases'][target_index][yaml_key] = processed_value + config_data["challenge_phases"][target_index][ + yaml_key + ] = processed_value else: - logger.error(f"Field {changed_field} not found on challenge_phase model") + logger.error( + f"Field {changed_field} not found on challenge_phase model" + ) return False - + # Convert back to YAML - yaml_content = yaml.dump(config_data, default_flow_style=False, allow_unicode=True, sort_keys=False) - + yaml_content = yaml.dump( + config_data, + default_flow_style=False, + allow_unicode=True, + sort_keys=False, + ) + # Update the file in GitHub - success = self.update_data_from_path("challenge_config.yaml", yaml_content, changed_field) + success = self.update_data_from_path( + "challenge_config.yaml", yaml_content, changed_field + ) return True if success else False - + except Exception as e: logger.error(f"Error updating challenge phase config: {str(e)}") return False @@ -282,44 +346,55 @@ def _process_field_value(self, field, value): """ if value is None: return None - + try: - if field in ['start_date', 'end_date'] and hasattr(value, 'strftime'): - return value.strftime('%Y-%m-%d %H:%M:%S') - elif field in ['description', 'evaluation_details', 'terms_and_conditions', 'submission_guidelines'] and value: + if field in ["start_date", "end_date"] and hasattr( + value, "strftime" + ): + return value.strftime("%Y-%m-%d %H:%M:%S") + elif ( + field + in [ + "description", + "evaluation_details", + "terms_and_conditions", + "submission_guidelines", + ] + and value + ): # Extract the actual content from HTML fields - if hasattr(value, 'read'): + if hasattr(value, "read"): try: value.seek(0) - content = value.read().decode('utf-8') + content = value.read().decode("utf-8") return content except Exception: return str(value) else: return str(value) - elif field in ['image', 'evaluation_script'] and value: + elif field in ["image", "evaluation_script"] and value: # For YAML, store filename/path if available - if hasattr(value, 'name'): + if hasattr(value, "name"): return value.name else: return str(value) elif isinstance(value, (list, tuple)): clean_list = [] for item in value: - if hasattr(item, 'pk'): + if hasattr(item, "pk"): clean_list.append(item.pk) - elif hasattr(item, 'id'): + elif hasattr(item, "id"): clean_list.append(item.id) else: clean_list.append(item) return clean_list else: - if hasattr(value, 'pk'): + if hasattr(value, "pk"): return value.pk - elif hasattr(value, 'id'): + elif hasattr(value, "id"): return value.id else: return value except Exception as e: logger.error(f"Error processing field {field}: {str(e)}") - return None \ No newline at end of file + return None diff --git a/apps/challenges/github_sync_config.py b/apps/challenges/github_sync_config.py index d1915c458e..8c8995833e 100644 --- a/apps/challenges/github_sync_config.py +++ b/apps/challenges/github_sync_config.py @@ -4,7 +4,7 @@ challenge_non_file_fields = [ "title", - "short_description", + "short_description", "leaderboard_description", "remote_evaluation", "is_docker_based", @@ -56,6 +56,6 @@ # Additional sections that should be synced challenge_additional_sections = [ "leaderboard", - "dataset_splits", + "dataset_splits", "challenge_phase_splits", -] \ No newline at end of file +] diff --git a/apps/challenges/github_utils.py b/apps/challenges/github_utils.py index effdb7029f..e84d2ca2b2 100644 --- a/apps/challenges/github_utils.py +++ b/apps/challenges/github_utils.py @@ -1,7 +1,8 @@ import logging import threading -from .models import Challenge, ChallengePhase + from .github_interface import GithubInterface +from .models import Challenge, ChallengePhase logger = logging.getLogger(__name__) @@ -17,47 +18,56 @@ def github_challenge_sync(challenge_id, changed_field): try: # Set flag to prevent recursive calls _github_sync_context.skip_github_sync = True - _github_sync_context.change_source = 'github' - + _github_sync_context.change_source = "github" + # Ensure changed_field is a string if not isinstance(changed_field, str): - logger.error(f"Invalid changed_field type: {type(changed_field)}, expected string") + logger.error( + f"Invalid changed_field type: {type(changed_field)}, expected string" + ) return False - + challenge = Challenge.objects.get(id=challenge_id) - + if not challenge.github_repository or not challenge.github_token: - logger.warning(f"Challenge {challenge_id} missing GitHub configuration") + logger.warning( + f"Challenge {challenge_id} missing GitHub configuration" + ) return False - + # Initialize GitHub interface github_interface = GithubInterface( challenge.github_repository, - challenge.github_branch or 'challenge', # Default to 'challenge' branch - challenge.github_token + challenge.github_branch + or "challenge", # Default to 'challenge' branch + challenge.github_token, ) - + # Update challenge config in GitHub with the specific changed field - success = github_interface.update_challenge_config(challenge, changed_field) - + success = github_interface.update_challenge_config( + challenge, changed_field + ) + if success: return True else: logger.error(f"Failed to sync challenge {challenge_id} to GitHub") return False - + except Challenge.DoesNotExist: logger.error(f"Challenge {challenge_id} not found") return False except Exception as e: - logger.error(f"Error syncing challenge {challenge_id} to GitHub: {str(e)}") + logger.error( + f"Error syncing challenge {challenge_id} to GitHub: {str(e)}" + ) return False finally: # Always clean up the flags - if hasattr(_github_sync_context, 'skip_github_sync'): - delattr(_github_sync_context, 'skip_github_sync') - if hasattr(_github_sync_context, 'change_source'): - delattr(_github_sync_context, 'change_source') + if hasattr(_github_sync_context, "skip_github_sync"): + delattr(_github_sync_context, "skip_github_sync") + if hasattr(_github_sync_context, "change_source"): + delattr(_github_sync_context, "change_source") def github_challenge_phase_sync(challenge_phase_id, changed_field): @@ -67,45 +77,56 @@ def github_challenge_phase_sync(challenge_phase_id, changed_field): try: # Set flag to prevent recursive calls _github_sync_context.skip_github_sync = True - _github_sync_context.change_source = 'github' - + _github_sync_context.change_source = "github" + # Ensure changed_field is a string if not isinstance(changed_field, str): - logger.error(f"Invalid changed_field type: {type(changed_field)}, expected string") + logger.error( + f"Invalid changed_field type: {type(changed_field)}, expected string" + ) return False - + challenge_phase = ChallengePhase.objects.get(id=challenge_phase_id) challenge = challenge_phase.challenge - + if not challenge.github_repository or not challenge.github_token: - logger.warning(f"Challenge {challenge.id} missing GitHub configuration") + logger.warning( + f"Challenge {challenge.id} missing GitHub configuration" + ) return False - + # Initialize GitHub interface github_interface = GithubInterface( challenge.github_repository, - challenge.github_branch or 'challenge', # Default to 'challenge' branch - challenge.github_token + challenge.github_branch + or "challenge", # Default to 'challenge' branch + challenge.github_token, ) - + # Update challenge phase config in GitHub with the specific changed field - success = github_interface.update_challenge_phase_config(challenge_phase, changed_field) - + success = github_interface.update_challenge_phase_config( + challenge_phase, changed_field + ) + if success: return True else: - logger.error(f"Failed to sync challenge phase {challenge_phase_id} to GitHub") + logger.error( + f"Failed to sync challenge phase {challenge_phase_id} to GitHub" + ) return False - + except ChallengePhase.DoesNotExist: logger.error(f"Challenge phase {challenge_phase_id} not found") return False except Exception as e: - logger.error(f"Error syncing challenge phase {challenge_phase_id} to GitHub") + logger.error( + f"Error syncing challenge phase {challenge_phase_id} to GitHub" + ) return False finally: # Always clean up the flags - if hasattr(_github_sync_context, 'skip_github_sync'): - delattr(_github_sync_context, 'skip_github_sync') - if hasattr(_github_sync_context, 'change_source'): - delattr(_github_sync_context, 'change_source') \ No newline at end of file + if hasattr(_github_sync_context, "skip_github_sync"): + delattr(_github_sync_context, "skip_github_sync") + if hasattr(_github_sync_context, "change_source"): + delattr(_github_sync_context, "change_source") diff --git a/apps/challenges/migrations/0114_add_github_token_field.py b/apps/challenges/migrations/0114_add_github_token_field.py index 8376f6d642..489724c232 100644 --- a/apps/challenges/migrations/0114_add_github_token_field.py +++ b/apps/challenges/migrations/0114_add_github_token_field.py @@ -6,13 +6,15 @@ class Migration(migrations.Migration): dependencies = [ - ('challenges', '0113_add_github_branch_field_and_unique_constraint'), + ("challenges", "0113_add_github_branch_field_and_unique_constraint"), ] operations = [ migrations.AddField( - model_name='challenge', - name='github_token', - field=models.CharField(blank=True, default='', max_length=200, null=True), + model_name="challenge", + name="github_token", + field=models.CharField( + blank=True, default="", max_length=200, null=True + ), ), ] diff --git a/apps/challenges/migrations/0115_add_last_github_sync_field.py b/apps/challenges/migrations/0115_add_last_github_sync_field.py index 926ce6496b..a831352b44 100644 --- a/apps/challenges/migrations/0115_add_last_github_sync_field.py +++ b/apps/challenges/migrations/0115_add_last_github_sync_field.py @@ -6,13 +6,13 @@ class Migration(migrations.Migration): dependencies = [ - ('challenges', '0114_add_github_token_field'), + ("challenges", "0114_add_github_token_field"), ] operations = [ migrations.AddField( - model_name='challenge', - name='last_github_sync', + model_name="challenge", + name="last_github_sync", field=models.DateTimeField(blank=True, null=True), ), ] diff --git a/apps/challenges/models.py b/apps/challenges/models.py index 0d9e76570e..d353f72c07 100644 --- a/apps/challenges/models.py +++ b/apps/challenges/models.py @@ -1,20 +1,21 @@ from __future__ import unicode_literals + +import json import logging import threading -import json -from django.core.handlers.wsgi import WSGIRequest -from django.utils.deprecation import MiddlewareMixin from base.models import TimeStampedModel, model_field_name from base.utils import RandomFileName, get_slug, is_model_field_changed from django.contrib.auth.models import User from django.contrib.postgres.fields import ArrayField, JSONField from django.core import serializers +from django.core.handlers.wsgi import WSGIRequest from django.db import models from django.db.models import signals from django.db.models.signals import pre_save from django.dispatch import receiver from django.utils import timezone +from django.utils.deprecation import MiddlewareMixin from hosts.models import ChallengeHost from participants.models import ParticipantTeam @@ -23,13 +24,14 @@ # Thread-local storage to prevent recursive GitHub sync calls _github_sync_context = threading.local() + # Request-level context to track GitHub sync operations class GitHubSyncContext: def __init__(self): self.is_syncing = False self.synced_challenges = set() self.synced_phases = set() - + def mark_syncing(self, challenge_id=None, phase_id=None): """Mark that we're currently syncing""" self.is_syncing = True @@ -37,7 +39,7 @@ def mark_syncing(self, challenge_id=None, phase_id=None): self.synced_challenges.add(challenge_id) if phase_id: self.synced_phases.add(phase_id) - + def is_already_synced(self, challenge_id=None, phase_id=None): """Check if we've already synced this challenge/phase in this request""" if challenge_id and challenge_id in self.synced_challenges: @@ -46,6 +48,7 @@ def is_already_synced(self, challenge_id=None, phase_id=None): return True return False + # Global context for the current request _github_request_context = GitHubSyncContext() @@ -58,14 +61,14 @@ def reset_github_sync_context(): global _github_request_context _github_request_context = GitHubSyncContext() # also clear payload keys - if hasattr(_github_request_local, 'payload_keys'): - delattr(_github_request_local, 'payload_keys') + if hasattr(_github_request_local, "payload_keys"): + delattr(_github_request_local, "payload_keys") # reset per-request sync context class GitHubSyncMiddleware(MiddlewareMixin): """Middleware to reset GitHub sync context on each request and capture payload keys""" - + def process_request(self, request): """Reset context at the start of each request; capture payload keys for inference""" reset_github_sync_context() @@ -73,15 +76,15 @@ def process_request(self, request): keys = [] if request.method in ("PATCH", "PUT", "POST"): # Try JSON body first - if getattr(request, 'body', None): + if getattr(request, "body", None): try: - payload = json.loads(request.body.decode('utf-8')) + payload = json.loads(request.body.decode("utf-8")) if isinstance(payload, dict): keys = list(payload.keys()) except Exception: keys = [] # Fallback to POST dict - if not keys and hasattr(request, 'POST'): + if not keys and hasattr(request, "POST"): try: keys = list(request.POST.keys()) except Exception: @@ -97,13 +100,20 @@ def _infer_changed_field_from_request(model_instance): Returns a field name string or None. """ try: - keys = getattr(_github_request_local, 'payload_keys', []) or [] + keys = getattr(_github_request_local, "payload_keys", []) or [] if not keys: return None # Prefer keys that actually exist on the model for key in keys: # Ignore meta and non-model keys - if key in {"id", "pk", "challenge", "phase", "created_at", "modified_at"}: + if key in { + "id", + "pk", + "challenge", + "phase", + "created_at", + "modified_at", + }: continue if hasattr(model_instance, key): return key @@ -423,32 +433,49 @@ def update_sqs_retention_period_for_challenge( def challenge_details_sync(sender, instance, created, **kwargs): """Sync challenge details to GitHub when challenge is updated""" # Skip if this is a bot-triggered save to prevent recursive calls - if hasattr(_github_sync_context, 'skip_github_sync') and _github_sync_context.skip_github_sync: - logger.info(f"Skipping GitHub sync for challenge {instance.id} - recursive call prevented") + if ( + hasattr(_github_sync_context, "skip_github_sync") + and _github_sync_context.skip_github_sync + ): + logger.info( + f"Skipping GitHub sync for challenge {instance.id} - recursive call prevented" + ) return - + # Skip if this is a GitHub-sourced change (not from UI) - if hasattr(_github_sync_context, 'change_source') and _github_sync_context.change_source == 'github': - logger.info(f"Skipping GitHub sync for challenge {instance.id} - change sourced from GitHub") + if ( + hasattr(_github_sync_context, "change_source") + and _github_sync_context.change_source == "github" + ): + logger.info( + f"Skipping GitHub sync for challenge {instance.id} - change sourced from GitHub" + ) return - + # Skip if we've already synced this challenge in this request if _github_request_context.is_already_synced(challenge_id=instance.id): - logger.info(f"Skipping GitHub sync for challenge {instance.id} - already synced in this request") + logger.info( + f"Skipping GitHub sync for challenge {instance.id} - already synced in this request" + ) return - + # By default, allow UI changes to trigger GitHub sync # proceed only for updates with github configured if not created and instance.github_token and instance.github_repository: try: from challenges.github_utils import github_challenge_sync + _github_request_context.mark_syncing(challenge_id=instance.id) - + # Get the changed field from update_fields if available changed_field = None - if kwargs.get('update_fields'): + if kwargs.get("update_fields"): # Django provides update_fields when using .save(update_fields=['field_name']) - changed_field = list(kwargs['update_fields'])[0] if kwargs['update_fields'] else None + changed_field = ( + list(kwargs["update_fields"])[0] + if kwargs["update_fields"] + else None + ) pass # Infer from request payload if not provided if not changed_field: @@ -456,12 +483,12 @@ def challenge_details_sync(sender, instance, created, **kwargs): if inferred: changed_field = inferred pass - + # Require a specific changed field to proceed (single-field commit intent) if not isinstance(changed_field, str) or not changed_field: # skip if we cannot determine a single changed field return - + github_challenge_sync(instance.id, changed_field=changed_field) except Exception as e: logger.error(f"Error in challenge_details_sync: {str(e)}") @@ -599,32 +626,53 @@ def save(self, *args, **kwargs): def challenge_phase_details_sync(sender, instance, created, **kwargs): """Sync challenge phase details to GitHub when challenge phase is updated""" # Skip if this is a bot-triggered save to prevent recursive calls - if hasattr(_github_sync_context, 'skip_github_sync') and _github_sync_context.skip_github_sync: - logger.info(f"Skipping GitHub sync for challenge phase {instance.id} - recursive call prevented") + if ( + hasattr(_github_sync_context, "skip_github_sync") + and _github_sync_context.skip_github_sync + ): + logger.info( + f"Skipping GitHub sync for challenge phase {instance.id} - recursive call prevented" + ) return - + # Skip if this is a GitHub-sourced change (not from UI) - if hasattr(_github_sync_context, 'change_source') and _github_sync_context.change_source == 'github': - logger.info(f"Skipping GitHub sync for challenge phase {instance.id} - change sourced from GitHub") + if ( + hasattr(_github_sync_context, "change_source") + and _github_sync_context.change_source == "github" + ): + logger.info( + f"Skipping GitHub sync for challenge phase {instance.id} - change sourced from GitHub" + ) return - + # Skip if we've already synced this phase in this request if _github_request_context.is_already_synced(phase_id=instance.id): - logger.info(f"Skipping GitHub sync for challenge phase {instance.id} - already synced in this request") + logger.info( + f"Skipping GitHub sync for challenge phase {instance.id} - already synced in this request" + ) return - + # By default, allow UI changes to trigger GitHub sync # proceed only for updates with github configured - if not created and instance.challenge.github_token and instance.challenge.github_repository: + if ( + not created + and instance.challenge.github_token + and instance.challenge.github_repository + ): try: from challenges.github_utils import github_challenge_phase_sync + _github_request_context.mark_syncing(phase_id=instance.id) - + # Get the changed field from update_fields if available changed_field = None - if kwargs.get('update_fields'): + if kwargs.get("update_fields"): # Django provides update_fields when using .save(update_fields=['field_name']) - changed_field = list(kwargs['update_fields'])[0] if kwargs['update_fields'] else None + changed_field = ( + list(kwargs["update_fields"])[0] + if kwargs["update_fields"] + else None + ) pass # Infer from request payload if not provided if not changed_field: @@ -632,13 +680,15 @@ def challenge_phase_details_sync(sender, instance, created, **kwargs): if inferred: changed_field = inferred pass - + # Require a specific changed field to proceed (single-field commit intent) if not isinstance(changed_field, str) or not changed_field: # skip if we cannot determine a single changed field return - - github_challenge_phase_sync(instance.id, changed_field=changed_field) + + github_challenge_phase_sync( + instance.id, changed_field=changed_field + ) except Exception as e: logger.error(f"Error in challenge_phase_details_sync: {str(e)}") else: diff --git a/apps/challenges/views.py b/apps/challenges/views.py index 70d87c07d0..77933f2c0d 100644 --- a/apps/challenges/views.py +++ b/apps/challenges/views.py @@ -159,7 +159,6 @@ send_subscription_plans_email, ) - logger = logging.getLogger(__name__) try: @@ -212,7 +211,7 @@ def challenge_list(request, challenge_host_team_pk): if serializer.is_valid(): serializer.save() challenge = get_challenge_model(serializer.instance.pk) - + serializer = ChallengeSerializer(challenge) response_data = serializer.data return Response(response_data, status=status.HTTP_201_CREATED) @@ -325,17 +324,17 @@ def challenge_detail(request, challenge_host_team_pk, challenge_pk): else: serializer = ZipChallengeSerializer( challenge, - data=request.data, - context={ - "challenge_host_team": challenge_host_team, - "request": request, - "github_token": request.data.get("GITHUB_AUTH_TOKEN"), - }, - ) + data=request.data, + context={ + "challenge_host_team": challenge_host_team, + "request": request, + "github_token": request.data.get("GITHUB_AUTH_TOKEN"), + }, + ) if serializer.is_valid(): serializer.save() challenge = get_challenge_model(serializer.instance.pk) - + serializer = ChallengeSerializer(challenge) response_data = serializer.data return Response(response_data, status=status.HTTP_200_OK) @@ -1097,7 +1096,7 @@ def challenge_phase_detail(request, challenge_pk, pk): if serializer.is_valid(): serializer.save() challenge_phase = get_challenge_phase_model(serializer.instance.pk) - + serializer = ChallengePhaseSerializer(challenge_phase) response_data = serializer.data return Response(response_data, status=status.HTTP_200_OK) @@ -3992,7 +3991,9 @@ def create_or_update_github_challenge(request, challenge_host_team_pk): "github_repository": request.data[ "GITHUB_REPOSITORY" ], - "github_token": request.data.get("GITHUB_AUTH_TOKEN"), + "github_token": request.data.get( + "GITHUB_AUTH_TOKEN" + ), "worker_image_url": worker_image_url, }, ) diff --git a/tests/unit/challenges/test_github_utils.py b/tests/unit/challenges/test_github_utils.py index 4c0f9a3821..f23074fbb9 100644 --- a/tests/unit/challenges/test_github_utils.py +++ b/tests/unit/challenges/test_github_utils.py @@ -2,15 +2,14 @@ import logging from unittest.mock import Mock, patch -from django.test import TestCase -from django.utils import timezone - -from challenges.models import Challenge, ChallengePhase from challenges.github_utils import ( GithubInterface, - sync_challenge_to_github, sync_challenge_phase_to_github, + sync_challenge_to_github, ) +from challenges.models import Challenge, ChallengePhase +from django.test import TestCase +from django.utils import timezone class TestGithubInterface(TestCase): @@ -47,7 +46,7 @@ def test_get_file_contents_success(self, mock_get): mock_get.return_value = mock_response result = self.github.get_file_contents("test.json") - + self.assertEqual(result, {"content": "test content"}) mock_get.assert_called_once() @@ -59,7 +58,7 @@ def test_get_file_contents_not_found(self, mock_get): mock_get.return_value = mock_response result = self.github.get_file_contents("test.json") - + self.assertIsNone(result) @patch("challenges.github_utils.requests.put") @@ -70,8 +69,10 @@ def test_update_text_file_success(self, mock_put): mock_response.json.return_value = {"sha": "test_sha"} mock_put.return_value = mock_response - result = self.github.update_text_file("test.json", "content", "message", "sha") - + result = self.github.update_text_file( + "test.json", "content", "message", "sha" + ) + self.assertEqual(result, {"sha": "test_sha"}) mock_put.assert_called_once() @@ -82,17 +83,24 @@ def test_update_text_file_failure(self, mock_put): mock_response.status_code = 400 mock_put.return_value = mock_response - result = self.github.update_text_file("test.json", "content", "message", "sha") - + result = self.github.update_text_file( + "test.json", "content", "message", "sha" + ) + self.assertIsNone(result) @patch("challenges.github_utils.GithubInterface.get_file_contents") @patch("challenges.github_utils.GithubInterface.update_text_file") - def test_update_json_if_changed_existing_changed(self, mock_update_text, mock_get): + def test_update_json_if_changed_existing_changed( + self, mock_update_text, mock_get + ): """When existing JSON differs, it should commit with updated content""" old_data = {"a": 1} old_text = json.dumps(old_data, sort_keys=True) - mock_get.return_value = {"sha": "test_sha", "content": old_text.encode("utf-8").decode("utf-8")} # content will be base64-decoded internally; provide as raw for simplicity + mock_get.return_value = { + "sha": "test_sha", + "content": old_text.encode("utf-8").decode("utf-8"), + } # content will be base64-decoded internally; provide as raw for simplicity mock_update_text.return_value = {"sha": "new_sha"} new_data = {"a": 2} @@ -157,14 +165,14 @@ def test_sync_challenge_to_github_success(self, mock_github_class): def test_sync_challenge_to_github_no_token(self): """Test challenge sync when no GitHub token is configured""" self.challenge.github_token = "" - + with self.assertLogs(level=logging.WARNING): sync_challenge_to_github(self.challenge) def test_sync_challenge_to_github_no_repo(self): """Test challenge sync when no GitHub repository is configured""" self.challenge.github_repository = "" - + with self.assertLogs(level=logging.WARNING): sync_challenge_to_github(self.challenge) @@ -185,14 +193,14 @@ def test_sync_challenge_phase_to_github_success(self, mock_github_class): def test_sync_challenge_phase_to_github_no_token(self): """Test challenge phase sync when no GitHub token is configured""" self.challenge.github_token = "" - + with self.assertLogs(level=logging.WARNING): sync_challenge_phase_to_github(self.challenge_phase) def test_sync_challenge_phase_to_github_no_repo(self): """Test challenge phase sync when no GitHub repository is configured""" self.challenge.github_repository = "" - + with self.assertLogs(level=logging.WARNING): sync_challenge_phase_to_github(self.challenge_phase) @@ -208,7 +216,7 @@ def test_sync_challenge_data_structure(self, mock_github_class): call_args = mock_github.update_json_if_changed.call_args self.assertEqual(call_args[0][0], "challenge.json") # path challenge_data = call_args[0][1] # data - + # Check that key fields are present self.assertEqual(challenge_data["title"], "Test Challenge") self.assertEqual(challenge_data["description"], "Test Description") @@ -227,8 +235,8 @@ def test_sync_challenge_phase_data_structure(self, mock_github_class): call_args = mock_github.update_json_if_changed.call_args self.assertEqual(call_args[0][0], "phases/test_phase.json") # path phase_data = call_args[0][1] # data - + # Check that key fields are present self.assertEqual(phase_data["name"], "Test Phase") self.assertEqual(phase_data["description"], "Test Phase Description") - self.assertEqual(phase_data["codename"], "test_phase") \ No newline at end of file + self.assertEqual(phase_data["codename"], "test_phase") From d581b4b79dcd230b4d69f67dd2406163119c1ca3 Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Tue, 26 Aug 2025 13:01:39 +0530 Subject: [PATCH 11/20] Add tests --- celerybeat.pid | 1 - .../challenges/test_github_sync_signals.py | 164 ++++++++++++++++++ 2 files changed, 164 insertions(+), 1 deletion(-) delete mode 100644 celerybeat.pid create mode 100644 tests/unit/challenges/test_github_sync_signals.py diff --git a/celerybeat.pid b/celerybeat.pid deleted file mode 100644 index ec635144f6..0000000000 --- a/celerybeat.pid +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/tests/unit/challenges/test_github_sync_signals.py b/tests/unit/challenges/test_github_sync_signals.py new file mode 100644 index 0000000000..73e5444342 --- /dev/null +++ b/tests/unit/challenges/test_github_sync_signals.py @@ -0,0 +1,164 @@ +from unittest.mock import patch + +from django.test import TestCase +from django.utils import timezone +from django.contrib.auth.models import User + +from hosts.models import ChallengeHostTeam + +from challenges.models import ( + Challenge, + ChallengePhase, + GitHubSyncMiddleware, + reset_github_sync_context, +) + +from challenges import models as challenge_models + + +class TestGithubSyncSignals(TestCase): + + def setUp(self): + # minimal creator for Challenge + self.user = User.objects.create(username="owner", email="o@example.com") + self.host_team = ChallengeHostTeam.objects.create( + team_name="team", created_by=self.user + ) + self.challenge = Challenge.objects.create( + title="Initial Title", + description="Desc", + github_token="test_token", + github_repository="org/repo", + github_branch="main", + creator=self.host_team, + start_date=timezone.now(), + end_date=timezone.now() + timezone.timedelta(days=5), + ) + self.phase = ChallengePhase.objects.create( + name="Initial Phase", + description="Phase Desc", + challenge=self.challenge, + codename="phase_code", + ) + + @patch("challenges.aws_utils.challenge_approval_callback") + @patch("challenges.github_utils.github_challenge_sync") + def test_challenge_post_save_calls_sync_with_update_fields( + self, mock_sync, _mock_approval_cb + ): + self.challenge.title = "Updated Title" + # Pass update_fields so receiver can read changed field directly + self.challenge.save(update_fields=["title"]) + + mock_sync.assert_called_once() + args, kwargs = mock_sync.call_args + self.assertEqual(args[0], self.challenge.id) + self.assertEqual(kwargs.get("changed_field"), "title") + + @patch("challenges.github_utils.github_challenge_phase_sync") + def test_phase_post_save_calls_sync_with_update_fields(self, mock_phase_sync): + self.phase.name = "Updated Phase" + self.phase.save(update_fields=["name"]) + + mock_phase_sync.assert_called_once() + args, kwargs = mock_phase_sync.call_args + self.assertEqual(args[0], self.phase.id) + self.assertEqual(kwargs.get("changed_field"), "name") + + @patch("challenges.aws_utils.challenge_approval_callback") + @patch("challenges.github_utils.github_challenge_sync") + def test_middleware_infers_changed_field_and_triggers_sync( + self, mock_sync, _mock_approval_cb + ): + # Simulate a PATCH request payload captured by middleware + class _Req: + method = "PATCH" + body = b"{\n \"title\": \"MW Title\"\n}" + + mw = GitHubSyncMiddleware() + mw.process_request(_Req()) + + # Save without update_fields; receiver should infer from payload keys + self.challenge.title = "MW Title" + self.challenge.save() + + mock_sync.assert_called_once() + _args, kwargs = mock_sync.call_args + self.assertEqual(kwargs.get("changed_field"), "title") + + @patch("challenges.github_utils.github_challenge_sync") + def test_challenge_create_does_not_sync(self, mock_sync): + with patch("challenges.aws_utils.challenge_approval_callback"): + Challenge.objects.create( + title="New Challenge", + description="Desc", + github_token="test_token", + github_repository="org/repo", + github_branch="feature/test", # avoid unique (repo, branch) conflict + creator=self.host_team, + start_date=timezone.now(), + end_date=timezone.now() + timezone.timedelta(days=5), + ) + + mock_sync.assert_not_called() + + @patch("challenges.aws_utils.challenge_approval_callback") + @patch("challenges.github_utils.github_challenge_sync") + def test_no_sync_without_github_config(self, mock_sync, _mock_approval_cb): + self.challenge.github_token = "" + self.challenge.save(update_fields=["github_token"]) # change without config + mock_sync.assert_not_called() + + @patch("challenges.aws_utils.challenge_approval_callback") + @patch("challenges.github_utils.github_challenge_sync") + def test_dedupe_within_single_request(self, mock_sync, _mock_approval_cb): + # Start request, middleware captures keys + class _Req: + method = "PATCH" + body = b"{\n \"title\": \"One\"\n}" + + mw = GitHubSyncMiddleware() + mw.process_request(_Req()) + + # Two saves in same request + self.challenge.title = "One" + self.challenge.save() + self.challenge.description = "Changed" + self.challenge.save() + + # Only the first should sync for this challenge in this request + mock_sync.assert_called_once() + + @patch("challenges.aws_utils.challenge_approval_callback") + @patch("challenges.github_utils.github_challenge_sync") + def test_skip_when_change_source_is_github(self, mock_sync, _mock_approval_cb): + # Simulate a GitHub-sourced change via models' sync context + challenge_models._github_sync_context.change_source = "github" + try: + self.challenge.title = "Ignored" + self.challenge.save(update_fields=["title"]) + mock_sync.assert_not_called() + finally: + if hasattr(challenge_models._github_sync_context, "change_source"): + delattr(challenge_models._github_sync_context, "change_source") + + @patch("challenges.aws_utils.challenge_approval_callback") + @patch("challenges.github_utils.github_challenge_sync") + def test_no_changed_field_inference_means_no_sync(self, mock_sync, _mock_approval_cb): + # Ensure no payload keys available to infer + reset_github_sync_context() + self.challenge.title = "Still Updated" + self.challenge.save() # no update_fields, no middleware keys + mock_sync.assert_not_called() + + @patch("challenges.aws_utils.challenge_approval_callback") + @patch("challenges.github_utils.github_challenge_sync") + def test_multiple_update_fields_prefers_first(self, mock_sync, _mock_approval_cb): + self.challenge.title = "A" + self.challenge.description = "B" + self.challenge.save(update_fields=["title", "description"]) + _args, kwargs = mock_sync.call_args + # Order of update_fields is non-deterministic (Django converts to set), accept either + assert kwargs.get("changed_field") in {"title", "description"} + + From 10ee70261d50e0eec3d47437f0084d83c77c309e Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Tue, 26 Aug 2025 13:06:14 +0530 Subject: [PATCH 12/20] Add tests for github_utils --- tests/unit/challenges/test_github_utils.py | 198 +++++++++------------ 1 file changed, 80 insertions(+), 118 deletions(-) diff --git a/tests/unit/challenges/test_github_utils.py b/tests/unit/challenges/test_github_utils.py index f23074fbb9..655623d364 100644 --- a/tests/unit/challenges/test_github_utils.py +++ b/tests/unit/challenges/test_github_utils.py @@ -2,14 +2,16 @@ import logging from unittest.mock import Mock, patch +from challenges.github_interface import GithubInterface from challenges.github_utils import ( - GithubInterface, - sync_challenge_phase_to_github, - sync_challenge_to_github, + github_challenge_phase_sync, + github_challenge_sync, ) from challenges.models import Challenge, ChallengePhase from django.test import TestCase from django.utils import timezone +from django.contrib.auth.models import User +from hosts.models import ChallengeHostTeam class TestGithubInterface(TestCase): @@ -19,16 +21,15 @@ def setUp(self): self.token = "test_token" self.repo = "test/repo" self.branch = "master" - self.github = GithubInterface(self.token, self.repo, self.branch) + self.github = GithubInterface(self.repo, self.branch, self.token) def test_init(self): """Test GithubInterface initialization""" - self.assertEqual(self.github.token, self.token) - self.assertEqual(self.github.repo, self.repo) - self.assertEqual(self.github.branch, self.branch) - self.assertEqual(self.github.base_url, "https://api.github.com") - self.assertIn("Authorization", self.github.headers) - self.assertIn("Accept", self.github.headers) + self.assertEqual(self.github.GITHUB_AUTH_TOKEN, self.token) + self.assertEqual(self.github.GITHUB_REPOSITORY, self.repo) + self.assertEqual(self.github.BRANCH, self.branch) + headers = self.github.get_request_headers() + self.assertIn("Authorization", headers) def test_get_github_url(self): """Test get_github_url method""" @@ -37,92 +38,75 @@ def test_get_github_url(self): result = self.github.get_github_url(url) self.assertEqual(result, expected) - @patch("challenges.github_utils.requests.get") - def test_get_file_contents_success(self, mock_get): - """Test get_file_contents with successful response""" + @patch("challenges.github_interface.requests.request") + def test_get_content_from_path_success(self, mock_request): + """Test get_content_from_path with successful response""" mock_response = Mock() mock_response.status_code = 200 mock_response.json.return_value = {"content": "test content"} - mock_get.return_value = mock_response + mock_response.raise_for_status.return_value = None + mock_request.return_value = mock_response - result = self.github.get_file_contents("test.json") + result = self.github.get_content_from_path("test.json") self.assertEqual(result, {"content": "test content"}) - mock_get.assert_called_once() + mock_request.assert_called_once() + + @patch("challenges.github_interface.requests.request") + def test_get_content_from_path_not_found(self, mock_request): + """Test get_content_from_path with error response returns None""" + from requests.exceptions import RequestException - @patch("challenges.github_utils.requests.get") - def test_get_file_contents_not_found(self, mock_get): - """Test get_file_contents with 404 response""" mock_response = Mock() - mock_response.status_code = 404 - mock_get.return_value = mock_response + mock_response.raise_for_status.side_effect = RequestException() + mock_request.return_value = mock_response - result = self.github.get_file_contents("test.json") + result = self.github.get_content_from_path("test.json") self.assertIsNone(result) - @patch("challenges.github_utils.requests.put") - def test_update_text_file_success(self, mock_put): - """Test update_text_file with successful response""" + @patch("challenges.github_interface.GithubInterface.get_content_from_path") + @patch("challenges.github_interface.requests.request") + def test_update_content_from_path_success(self, mock_request, mock_get): + """Test update_content_from_path with successful response""" + # Simulate existing file with sha so update path is used + mock_get.return_value = {"sha": "old_sha"} mock_response = Mock() - mock_response.status_code = 200 - mock_response.json.return_value = {"sha": "test_sha"} - mock_put.return_value = mock_response + mock_response.json.return_value = {"sha": "new_sha"} + mock_response.raise_for_status.return_value = None + mock_request.return_value = mock_response - result = self.github.update_text_file( - "test.json", "content", "message", "sha" + result = self.github.update_content_from_path( + "test.json", "Y29udGVudA==", changed_field="title" ) - self.assertEqual(result, {"sha": "test_sha"}) - mock_put.assert_called_once() + self.assertEqual(result, {"sha": "new_sha"}) + mock_request.assert_called_once() + + @patch("challenges.github_interface.requests.request") + def test_update_content_from_path_failure(self, mock_request): + """Test update_content_from_path with failed response returns None""" + from requests.exceptions import RequestException - @patch("challenges.github_utils.requests.put") - def test_update_text_file_failure(self, mock_put): - """Test update_text_file with failure response""" mock_response = Mock() - mock_response.status_code = 400 - mock_put.return_value = mock_response + mock_response.raise_for_status.side_effect = RequestException() + mock_request.return_value = mock_response - result = self.github.update_text_file( - "test.json", "content", "message", "sha" + result = self.github.update_content_from_path( + "test.json", "Y29udGVudA==", changed_field="title" ) self.assertIsNone(result) - @patch("challenges.github_utils.GithubInterface.get_file_contents") - @patch("challenges.github_utils.GithubInterface.update_text_file") - def test_update_json_if_changed_existing_changed( - self, mock_update_text, mock_get - ): - """When existing JSON differs, it should commit with updated content""" - old_data = {"a": 1} - old_text = json.dumps(old_data, sort_keys=True) - mock_get.return_value = { - "sha": "test_sha", - "content": old_text.encode("utf-8").decode("utf-8"), - } # content will be base64-decoded internally; provide as raw for simplicity - mock_update_text.return_value = {"sha": "new_sha"} - - new_data = {"a": 2} - result = self.github.update_json_if_changed("test.json", new_data) - - self.assertEqual(result, {"sha": "new_sha"}) - mock_get.assert_called_once_with("test.json") - mock_update_text.assert_called_once() - - @patch("challenges.github_utils.GithubInterface.get_file_contents") - @patch("challenges.github_utils.GithubInterface.update_text_file") - def test_update_json_if_changed_new_file(self, mock_update_text, mock_get): - """When file doesn't exist, it should create it""" - mock_get.return_value = None - mock_update_text.return_value = {"sha": "new_sha"} - - data = {"test": "data"} - result = self.github.update_json_if_changed("test.json", data) + @patch("challenges.github_interface.GithubInterface.update_content_from_path") + def test_update_data_from_path_encodes_and_calls_update(self, mock_update): + """update_data_from_path should base64-encode and call update_content_from_path""" + mock_update.return_value = {"sha": "new_sha"} + text = "hello" + result = self.github.update_data_from_path("test.json", text, changed_field="title") self.assertEqual(result, {"sha": "new_sha"}) - mock_get.assert_called_once_with("test.json") - mock_update_text.assert_called_once() + mock_update.assert_called_once() class TestGithubSync(TestCase): @@ -130,12 +114,17 @@ class TestGithubSync(TestCase): def setUp(self): # Create a test challenge with GitHub configuration + self.user = User.objects.create(username="owner", email="o@example.com") + self.host_team = ChallengeHostTeam.objects.create( + team_name="team", created_by=self.user + ) self.challenge = Challenge.objects.create( title="Test Challenge", description="Test Description", github_token="test_token", github_repository="test/repo", github_branch="master", + creator=self.host_team, start_date=timezone.now(), end_date=timezone.now() + timezone.timedelta(days=30), ) @@ -153,90 +142,63 @@ def test_sync_challenge_to_github_success(self, mock_github_class): """Test successful challenge sync to GitHub""" mock_github = Mock() mock_github_class.return_value = mock_github - mock_github.update_json_if_changed.return_value = {"sha": "test_sha"} + mock_github.update_challenge_config.return_value = True - sync_challenge_to_github(self.challenge) + github_challenge_sync(self.challenge.id, changed_field="title") - mock_github_class.assert_called_once_with( - "test_token", "test/repo", "master" - ) - mock_github.update_json_if_changed.assert_called_once() + mock_github_class.assert_called_once_with("test/repo", "master", "test_token") + mock_github.update_challenge_config.assert_called_once() def test_sync_challenge_to_github_no_token(self): """Test challenge sync when no GitHub token is configured""" self.challenge.github_token = "" - with self.assertLogs(level=logging.WARNING): - sync_challenge_to_github(self.challenge) + github_challenge_sync(self.challenge.id, changed_field="title") def test_sync_challenge_to_github_no_repo(self): """Test challenge sync when no GitHub repository is configured""" self.challenge.github_repository = "" - with self.assertLogs(level=logging.WARNING): - sync_challenge_to_github(self.challenge) + github_challenge_sync(self.challenge.id, changed_field="title") @patch("challenges.github_utils.GithubInterface") def test_sync_challenge_phase_to_github_success(self, mock_github_class): """Test successful challenge phase sync to GitHub""" mock_github = Mock() mock_github_class.return_value = mock_github - mock_github.update_json_if_changed.return_value = {"sha": "test_sha"} + mock_github.update_challenge_phase_config.return_value = True - sync_challenge_phase_to_github(self.challenge_phase) + github_challenge_phase_sync(self.challenge_phase.id, changed_field="name") - mock_github_class.assert_called_once_with( - "test_token", "test/repo", "master" - ) - mock_github.update_json_if_changed.assert_called_once() + mock_github_class.assert_called_once_with("test/repo", "master", "test_token") + mock_github.update_challenge_phase_config.assert_called_once() def test_sync_challenge_phase_to_github_no_token(self): """Test challenge phase sync when no GitHub token is configured""" self.challenge.github_token = "" - with self.assertLogs(level=logging.WARNING): - sync_challenge_phase_to_github(self.challenge_phase) + github_challenge_phase_sync(self.challenge_phase.id, changed_field="name") def test_sync_challenge_phase_to_github_no_repo(self): """Test challenge phase sync when no GitHub repository is configured""" self.challenge.github_repository = "" - with self.assertLogs(level=logging.WARNING): - sync_challenge_phase_to_github(self.challenge_phase) + github_challenge_phase_sync(self.challenge_phase.id, changed_field="name") @patch("challenges.github_utils.GithubInterface") - def test_sync_challenge_data_structure(self, mock_github_class): - """Test that challenge data is properly structured for GitHub sync""" + def test_sync_challenge_calls_update(self, mock_github_class): + """Basic check that challenge sync invokes update method""" mock_github = Mock() mock_github_class.return_value = mock_github - sync_challenge_to_github(self.challenge) - - # Verify that update_data_from_path was called with challenge data - call_args = mock_github.update_json_if_changed.call_args - self.assertEqual(call_args[0][0], "challenge.json") # path - challenge_data = call_args[0][1] # data - - # Check that key fields are present - self.assertEqual(challenge_data["title"], "Test Challenge") - self.assertEqual(challenge_data["description"], "Test Description") - self.assertIn("start_date", challenge_data) - self.assertIn("end_date", challenge_data) + github_challenge_sync(self.challenge.id, changed_field="title") + mock_github.update_challenge_config.assert_called_once() @patch("challenges.github_utils.GithubInterface") - def test_sync_challenge_phase_data_structure(self, mock_github_class): - """Test that challenge phase data is properly structured for GitHub sync""" + def test_sync_challenge_phase_calls_update(self, mock_github_class): + """Basic check that challenge phase sync invokes update method""" mock_github = Mock() mock_github_class.return_value = mock_github - sync_challenge_phase_to_github(self.challenge_phase) - - # Verify that update_data_from_path was called with phase data - call_args = mock_github.update_json_if_changed.call_args - self.assertEqual(call_args[0][0], "phases/test_phase.json") # path - phase_data = call_args[0][1] # data - - # Check that key fields are present - self.assertEqual(phase_data["name"], "Test Phase") - self.assertEqual(phase_data["description"], "Test Phase Description") - self.assertEqual(phase_data["codename"], "test_phase") + github_challenge_phase_sync(self.challenge_phase.id, changed_field="name") + mock_github.update_challenge_phase_config.assert_called_once() From 2eaa4657c3e9c465f661e03308a33269afab35de Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Tue, 26 Aug 2025 13:07:11 +0530 Subject: [PATCH 13/20] Pass code quality checks --- .../challenges/test_github_sync_signals.py | 43 +++++++++++-------- tests/unit/challenges/test_github_utils.py | 38 +++++++++++----- 2 files changed, 53 insertions(+), 28 deletions(-) diff --git a/tests/unit/challenges/test_github_sync_signals.py b/tests/unit/challenges/test_github_sync_signals.py index 73e5444342..b83897b113 100644 --- a/tests/unit/challenges/test_github_sync_signals.py +++ b/tests/unit/challenges/test_github_sync_signals.py @@ -1,26 +1,25 @@ from unittest.mock import patch -from django.test import TestCase -from django.utils import timezone -from django.contrib.auth.models import User - -from hosts.models import ChallengeHostTeam - +from challenges import models as challenge_models from challenges.models import ( Challenge, ChallengePhase, GitHubSyncMiddleware, reset_github_sync_context, ) - -from challenges import models as challenge_models +from django.contrib.auth.models import User +from django.test import TestCase +from django.utils import timezone +from hosts.models import ChallengeHostTeam class TestGithubSyncSignals(TestCase): def setUp(self): # minimal creator for Challenge - self.user = User.objects.create(username="owner", email="o@example.com") + self.user = User.objects.create( + username="owner", email="o@example.com" + ) self.host_team = ChallengeHostTeam.objects.create( team_name="team", created_by=self.user ) @@ -56,7 +55,9 @@ def test_challenge_post_save_calls_sync_with_update_fields( self.assertEqual(kwargs.get("changed_field"), "title") @patch("challenges.github_utils.github_challenge_phase_sync") - def test_phase_post_save_calls_sync_with_update_fields(self, mock_phase_sync): + def test_phase_post_save_calls_sync_with_update_fields( + self, mock_phase_sync + ): self.phase.name = "Updated Phase" self.phase.save(update_fields=["name"]) @@ -73,7 +74,7 @@ def test_middleware_infers_changed_field_and_triggers_sync( # Simulate a PATCH request payload captured by middleware class _Req: method = "PATCH" - body = b"{\n \"title\": \"MW Title\"\n}" + body = b'{\n "title": "MW Title"\n}' mw = GitHubSyncMiddleware() mw.process_request(_Req()) @@ -106,7 +107,9 @@ def test_challenge_create_does_not_sync(self, mock_sync): @patch("challenges.github_utils.github_challenge_sync") def test_no_sync_without_github_config(self, mock_sync, _mock_approval_cb): self.challenge.github_token = "" - self.challenge.save(update_fields=["github_token"]) # change without config + self.challenge.save( + update_fields=["github_token"] + ) # change without config mock_sync.assert_not_called() @patch("challenges.aws_utils.challenge_approval_callback") @@ -115,7 +118,7 @@ def test_dedupe_within_single_request(self, mock_sync, _mock_approval_cb): # Start request, middleware captures keys class _Req: method = "PATCH" - body = b"{\n \"title\": \"One\"\n}" + body = b'{\n "title": "One"\n}' mw = GitHubSyncMiddleware() mw.process_request(_Req()) @@ -131,7 +134,9 @@ class _Req: @patch("challenges.aws_utils.challenge_approval_callback") @patch("challenges.github_utils.github_challenge_sync") - def test_skip_when_change_source_is_github(self, mock_sync, _mock_approval_cb): + def test_skip_when_change_source_is_github( + self, mock_sync, _mock_approval_cb + ): # Simulate a GitHub-sourced change via models' sync context challenge_models._github_sync_context.change_source = "github" try: @@ -144,7 +149,9 @@ def test_skip_when_change_source_is_github(self, mock_sync, _mock_approval_cb): @patch("challenges.aws_utils.challenge_approval_callback") @patch("challenges.github_utils.github_challenge_sync") - def test_no_changed_field_inference_means_no_sync(self, mock_sync, _mock_approval_cb): + def test_no_changed_field_inference_means_no_sync( + self, mock_sync, _mock_approval_cb + ): # Ensure no payload keys available to infer reset_github_sync_context() self.challenge.title = "Still Updated" @@ -153,12 +160,12 @@ def test_no_changed_field_inference_means_no_sync(self, mock_sync, _mock_approva @patch("challenges.aws_utils.challenge_approval_callback") @patch("challenges.github_utils.github_challenge_sync") - def test_multiple_update_fields_prefers_first(self, mock_sync, _mock_approval_cb): + def test_multiple_update_fields_prefers_first( + self, mock_sync, _mock_approval_cb + ): self.challenge.title = "A" self.challenge.description = "B" self.challenge.save(update_fields=["title", "description"]) _args, kwargs = mock_sync.call_args # Order of update_fields is non-deterministic (Django converts to set), accept either assert kwargs.get("changed_field") in {"title", "description"} - - diff --git a/tests/unit/challenges/test_github_utils.py b/tests/unit/challenges/test_github_utils.py index 655623d364..b9615fc333 100644 --- a/tests/unit/challenges/test_github_utils.py +++ b/tests/unit/challenges/test_github_utils.py @@ -8,9 +8,9 @@ github_challenge_sync, ) from challenges.models import Challenge, ChallengePhase +from django.contrib.auth.models import User from django.test import TestCase from django.utils import timezone -from django.contrib.auth.models import User from hosts.models import ChallengeHostTeam @@ -98,12 +98,16 @@ def test_update_content_from_path_failure(self, mock_request): self.assertIsNone(result) - @patch("challenges.github_interface.GithubInterface.update_content_from_path") + @patch( + "challenges.github_interface.GithubInterface.update_content_from_path" + ) def test_update_data_from_path_encodes_and_calls_update(self, mock_update): """update_data_from_path should base64-encode and call update_content_from_path""" mock_update.return_value = {"sha": "new_sha"} text = "hello" - result = self.github.update_data_from_path("test.json", text, changed_field="title") + result = self.github.update_data_from_path( + "test.json", text, changed_field="title" + ) self.assertEqual(result, {"sha": "new_sha"}) mock_update.assert_called_once() @@ -114,7 +118,9 @@ class TestGithubSync(TestCase): def setUp(self): # Create a test challenge with GitHub configuration - self.user = User.objects.create(username="owner", email="o@example.com") + self.user = User.objects.create( + username="owner", email="o@example.com" + ) self.host_team = ChallengeHostTeam.objects.create( team_name="team", created_by=self.user ) @@ -146,7 +152,9 @@ def test_sync_challenge_to_github_success(self, mock_github_class): github_challenge_sync(self.challenge.id, changed_field="title") - mock_github_class.assert_called_once_with("test/repo", "master", "test_token") + mock_github_class.assert_called_once_with( + "test/repo", "master", "test_token" + ) mock_github.update_challenge_config.assert_called_once() def test_sync_challenge_to_github_no_token(self): @@ -168,22 +176,30 @@ def test_sync_challenge_phase_to_github_success(self, mock_github_class): mock_github_class.return_value = mock_github mock_github.update_challenge_phase_config.return_value = True - github_challenge_phase_sync(self.challenge_phase.id, changed_field="name") + github_challenge_phase_sync( + self.challenge_phase.id, changed_field="name" + ) - mock_github_class.assert_called_once_with("test/repo", "master", "test_token") + mock_github_class.assert_called_once_with( + "test/repo", "master", "test_token" + ) mock_github.update_challenge_phase_config.assert_called_once() def test_sync_challenge_phase_to_github_no_token(self): """Test challenge phase sync when no GitHub token is configured""" self.challenge.github_token = "" with self.assertLogs(level=logging.WARNING): - github_challenge_phase_sync(self.challenge_phase.id, changed_field="name") + github_challenge_phase_sync( + self.challenge_phase.id, changed_field="name" + ) def test_sync_challenge_phase_to_github_no_repo(self): """Test challenge phase sync when no GitHub repository is configured""" self.challenge.github_repository = "" with self.assertLogs(level=logging.WARNING): - github_challenge_phase_sync(self.challenge_phase.id, changed_field="name") + github_challenge_phase_sync( + self.challenge_phase.id, changed_field="name" + ) @patch("challenges.github_utils.GithubInterface") def test_sync_challenge_calls_update(self, mock_github_class): @@ -200,5 +216,7 @@ def test_sync_challenge_phase_calls_update(self, mock_github_class): mock_github = Mock() mock_github_class.return_value = mock_github - github_challenge_phase_sync(self.challenge_phase.id, changed_field="name") + github_challenge_phase_sync( + self.challenge_phase.id, changed_field="name" + ) mock_github.update_challenge_phase_config.assert_called_once() From f13a78d73931f7618673ffee3a58461f616ea856 Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Tue, 26 Aug 2025 16:15:59 +0530 Subject: [PATCH 14/20] Modify tests --- tests/unit/challenges/test_urls.py | 2 +- tests/unit/challenges/test_views.py | 19 +++++++++++++++++++ tests/unit/participants/test_views.py | 2 ++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/tests/unit/challenges/test_urls.py b/tests/unit/challenges/test_urls.py index 96c89f214b..ed2ef2e628 100644 --- a/tests/unit/challenges/test_urls.py +++ b/tests/unit/challenges/test_urls.py @@ -124,7 +124,7 @@ def test_challenges_urls(self): ) self.assertEqual( url, - "/api/challenges/" + "/api/challenges/challenge/" + str(self.challenge.pk) + "/participant_team/team_detail", ) diff --git a/tests/unit/challenges/test_views.py b/tests/unit/challenges/test_views.py index ba543c7ae2..7392122768 100644 --- a/tests/unit/challenges/test_views.py +++ b/tests/unit/challenges/test_views.py @@ -206,6 +206,7 @@ def test_get_challenge(self): "sqs_retention_period": self.challenge.sqs_retention_period, "github_repository": self.challenge.github_repository, "github_branch": self.challenge.github_branch, + "github_token": self.challenge.github_token, } ] @@ -581,6 +582,7 @@ def test_get_particular_challenge(self): "sqs_retention_period": self.challenge.sqs_retention_period, "github_repository": self.challenge.github_repository, "github_branch": self.challenge.github_branch, + "github_token": self.challenge.github_token, } response = self.client.get(self.url, {}) self.assertEqual(response.data, expected) @@ -685,6 +687,7 @@ def test_update_challenge_when_user_is_its_creator(self): "sqs_retention_period": self.challenge.sqs_retention_period, "github_repository": self.challenge.github_repository, "github_branch": self.challenge.github_branch, + "github_token": self.challenge.github_token, } response = self.client.put( self.url, {"title": new_title, "description": new_description} @@ -815,6 +818,7 @@ def test_particular_challenge_partial_update(self): "sqs_retention_period": self.challenge.sqs_retention_period, "github_repository": self.challenge.github_repository, "github_branch": self.challenge.github_branch, + "github_token": self.challenge.github_token, } response = self.client.patch(self.url, self.partial_update_data) self.assertEqual(response.data, expected) @@ -894,6 +898,7 @@ def test_particular_challenge_update(self): "sqs_retention_period": self.challenge.sqs_retention_period, "github_repository": self.challenge.github_repository, "github_branch": self.challenge.github_branch, + "github_token": self.challenge.github_token, } response = self.client.put(self.url, self.data) self.assertEqual(response.data, expected) @@ -1492,6 +1497,7 @@ def test_get_past_challenges(self): "sqs_retention_period": self.challenge3.sqs_retention_period, "github_repository": self.challenge3.github_repository, "github_branch": self.challenge3.github_branch, + "github_token": self.challenge3.github_token, } ] response = self.client.get(self.url, {}, format="json") @@ -1577,6 +1583,7 @@ def test_get_present_challenges(self): "sqs_retention_period": self.challenge2.sqs_retention_period, "github_repository": self.challenge2.github_repository, "github_branch": self.challenge2.github_branch, + "github_token": self.challenge2.github_token, } ] response = self.client.get(self.url, {}, format="json") @@ -1662,6 +1669,7 @@ def test_get_future_challenges(self): "sqs_retention_period": self.challenge4.sqs_retention_period, "github_repository": self.challenge4.github_repository, "github_branch": self.challenge4.github_branch, + "github_token": self.challenge4.github_token, } ] response = self.client.get(self.url, {}, format="json") @@ -1747,6 +1755,7 @@ def test_get_all_challenges(self): "sqs_retention_period": self.challenge4.sqs_retention_period, "github_repository": self.challenge4.github_repository, "github_branch": self.challenge4.github_branch, + "github_token": self.challenge4.github_token, }, { "id": self.challenge3.pk, @@ -1816,6 +1825,7 @@ def test_get_all_challenges(self): "sqs_retention_period": self.challenge3.sqs_retention_period, "github_repository": self.challenge3.github_repository, "github_branch": self.challenge3.github_branch, + "github_token": self.challenge3.github_token, }, { "id": self.challenge2.pk, @@ -1885,6 +1895,7 @@ def test_get_all_challenges(self): "sqs_retention_period": self.challenge2.sqs_retention_period, "github_repository": self.challenge2.github_repository, "github_branch": self.challenge2.github_branch, + "github_token": self.challenge2.github_token, }, ] response = self.client.get(self.url, {}, format="json") @@ -2026,6 +2037,7 @@ def test_get_featured_challenges(self): "sqs_retention_period": self.challenge3.sqs_retention_period, "github_repository": self.challenge3.github_repository, "github_branch": self.challenge3.github_branch, + "github_token": self.challenge3.github_token, } ] response = self.client.get(self.url, {}, format="json") @@ -2192,6 +2204,7 @@ def test_get_challenge_by_pk_when_user_is_challenge_host(self): "sqs_retention_period": self.challenge3.sqs_retention_period, "github_repository": self.challenge3.github_repository, "github_branch": self.challenge3.github_branch, + "github_token": self.challenge3.github_token, } response = self.client.get(self.url, {}) @@ -2285,6 +2298,7 @@ def test_get_challenge_by_pk_when_user_is_participant(self): "sqs_retention_period": self.challenge4.sqs_retention_period, "github_repository": self.challenge4.github_repository, "github_branch": self.challenge4.github_branch, + "github_token": self.challenge4.github_token, } self.client.force_authenticate(user=self.user1) @@ -2440,6 +2454,7 @@ def test_get_challenge_when_host_team_is_given(self): "sqs_retention_period": self.challenge2.sqs_retention_period, "github_repository": self.challenge2.github_repository, "github_branch": self.challenge2.github_branch, + "github_token": self.challenge2.github_token, } ] @@ -2521,6 +2536,7 @@ def test_get_challenge_when_participant_team_is_given(self): "sqs_retention_period": self.challenge2.sqs_retention_period, "github_repository": self.challenge2.github_repository, "github_branch": self.challenge2.github_branch, + "github_token": self.challenge2.github_token, } ] @@ -2602,6 +2618,7 @@ def test_get_challenge_when_mode_is_participant(self): "sqs_retention_period": self.challenge2.sqs_retention_period, "github_repository": self.challenge2.github_repository, "github_branch": self.challenge2.github_branch, + "github_token": self.challenge2.github_token, } ] @@ -2681,6 +2698,7 @@ def test_get_challenge_when_mode_is_host(self): "sqs_retention_period": self.challenge.sqs_retention_period, "github_repository": self.challenge.github_repository, "github_branch": self.challenge.github_branch, + "github_token": self.challenge.github_token, }, { "id": self.challenge2.pk, @@ -2750,6 +2768,7 @@ def test_get_challenge_when_mode_is_host(self): "sqs_retention_period": self.challenge2.sqs_retention_period, "github_repository": self.challenge2.github_repository, "github_branch": self.challenge2.github_branch, + "github_token": self.challenge2.github_token, }, ] diff --git a/tests/unit/participants/test_views.py b/tests/unit/participants/test_views.py index eaa3327fd9..a731b97543 100644 --- a/tests/unit/participants/test_views.py +++ b/tests/unit/participants/test_views.py @@ -885,6 +885,7 @@ def test_get_teams_and_corresponding_challenges_for_a_participant(self): "sqs_retention_period": self.challenge1.sqs_retention_period, "github_repository": self.challenge1.github_repository, "github_branch": self.challenge1.github_branch, + "github_token": self.challenge1.github_token, }, "participant_team": { "id": self.participant_team.id, @@ -983,6 +984,7 @@ def test_get_participant_team_challenge_list(self): "sqs_retention_period": self.challenge1.sqs_retention_period, "github_repository": self.challenge1.github_repository, "github_branch": self.challenge1.github_branch, + "github_token": self.challenge1.github_token, } ] From dff14ee504c3fdbf56a3725e285688d507762191 Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Tue, 26 Aug 2025 16:37:40 +0530 Subject: [PATCH 15/20] Omit non essential model fields --- .../0115_add_last_github_sync_field.py | 18 ------------------ apps/challenges/models.py | 2 -- 2 files changed, 20 deletions(-) delete mode 100644 apps/challenges/migrations/0115_add_last_github_sync_field.py diff --git a/apps/challenges/migrations/0115_add_last_github_sync_field.py b/apps/challenges/migrations/0115_add_last_github_sync_field.py deleted file mode 100644 index a831352b44..0000000000 --- a/apps/challenges/migrations/0115_add_last_github_sync_field.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 2.2.20 on 2025-08-23 19:02 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("challenges", "0114_add_github_token_field"), - ] - - operations = [ - migrations.AddField( - model_name="challenge", - name="last_github_sync", - field=models.DateTimeField(blank=True, null=True), - ), - ] diff --git a/apps/challenges/models.py b/apps/challenges/models.py index d353f72c07..3011470388 100644 --- a/apps/challenges/models.py +++ b/apps/challenges/models.py @@ -302,8 +302,6 @@ def __init__(self, *args, **kwargs): github_token = models.CharField( max_length=200, null=True, blank=True, default="" ) - # Simple sync tracking - last_github_sync = models.DateTimeField(null=True, blank=True) # The number of vCPU for a Fargate worker for the challenge. Default value # is 0.25 vCPU. worker_cpu_cores = models.IntegerField(null=True, blank=True, default=512) From d277d88f06c275a64e6f5da1591b0884e392a55c Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Mon, 1 Sep 2025 23:42:23 +0530 Subject: [PATCH 16/20] Pass code quality checks --- apps/base/utils.py | 1 + apps/challenges/github_utils.py | 10 +++++----- apps/challenges/models.py | 1 - tests/unit/challenges/test_github_utils.py | 1 - 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/apps/base/utils.py b/apps/base/utils.py index c970ae20b9..8c8e93f788 100644 --- a/apps/base/utils.py +++ b/apps/base/utils.py @@ -11,6 +11,7 @@ import requests import sendgrid from django.conf import settings +from django.core import serializers from django.utils.deconstruct import deconstructible from rest_framework.exceptions import NotFound from rest_framework.pagination import PageNumberPagination diff --git a/apps/challenges/github_utils.py b/apps/challenges/github_utils.py index e84d2ca2b2..fe86761650 100644 --- a/apps/challenges/github_utils.py +++ b/apps/challenges/github_utils.py @@ -57,9 +57,9 @@ def github_challenge_sync(challenge_id, changed_field): except Challenge.DoesNotExist: logger.error(f"Challenge {challenge_id} not found") return False - except Exception as e: - logger.error( - f"Error syncing challenge {challenge_id} to GitHub: {str(e)}" + except Exception: + logger.exception( + f"Error syncing challenge {challenge_id} to GitHub" ) return False finally: @@ -119,8 +119,8 @@ def github_challenge_phase_sync(challenge_phase_id, changed_field): except ChallengePhase.DoesNotExist: logger.error(f"Challenge phase {challenge_phase_id} not found") return False - except Exception as e: - logger.error( + except Exception: + logger.exception( f"Error syncing challenge phase {challenge_phase_id} to GitHub" ) return False diff --git a/apps/challenges/models.py b/apps/challenges/models.py index 3011470388..126e608dad 100644 --- a/apps/challenges/models.py +++ b/apps/challenges/models.py @@ -9,7 +9,6 @@ from django.contrib.auth.models import User from django.contrib.postgres.fields import ArrayField, JSONField from django.core import serializers -from django.core.handlers.wsgi import WSGIRequest from django.db import models from django.db.models import signals from django.db.models.signals import pre_save diff --git a/tests/unit/challenges/test_github_utils.py b/tests/unit/challenges/test_github_utils.py index b9615fc333..2793ae9143 100644 --- a/tests/unit/challenges/test_github_utils.py +++ b/tests/unit/challenges/test_github_utils.py @@ -1,4 +1,3 @@ -import json import logging from unittest.mock import Mock, patch From 4a05fa5ba38a40a737f1342552475532309dde2f Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Tue, 2 Sep 2025 00:48:28 +0530 Subject: [PATCH 17/20] Pass code quality checks --- apps/challenges/github_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/apps/challenges/github_utils.py b/apps/challenges/github_utils.py index fe86761650..a0c42686ce 100644 --- a/apps/challenges/github_utils.py +++ b/apps/challenges/github_utils.py @@ -58,9 +58,7 @@ def github_challenge_sync(challenge_id, changed_field): logger.error(f"Challenge {challenge_id} not found") return False except Exception: - logger.exception( - f"Error syncing challenge {challenge_id} to GitHub" - ) + logger.exception(f"Error syncing challenge {challenge_id} to GitHub") return False finally: # Always clean up the flags From 9cd82fc117c4f581b9ac89c1825608cc31a2eb6b Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Tue, 2 Sep 2025 23:03:57 +0530 Subject: [PATCH 18/20] Add coverage for missing lines --- tests/unit/base/test_utils.py | 17 + .../challenges/test_github_sync_signals.py | 38 ++ tests/unit/challenges/test_github_utils.py | 379 ++++++++++++++++++ tests/unit/challenges/test_models.py | 41 ++ tests/unit/challenges/test_serializers.py | 172 +++++++- 5 files changed, 646 insertions(+), 1 deletion(-) diff --git a/tests/unit/base/test_utils.py b/tests/unit/base/test_utils.py index 3701157fc0..fd77712535 100644 --- a/tests/unit/base/test_utils.py +++ b/tests/unit/base/test_utils.py @@ -1,3 +1,6 @@ +import json + + import os import unittest from datetime import timedelta @@ -403,3 +406,17 @@ def test_encode_data_empty_list(self): def test_decode_data_empty_list(self): data = [] self.assertEqual(decode_data(data), []) + + +class TestDeserializeObject(TestCase): + def test_deserialize_object_returns_model_instance(self): + from django.contrib.auth.models import User + from django.core import serializers as dj_serializers + from apps.base.utils import deserialize_object + + user = User.objects.create(username="alice", email="alice@example.com") + serialized = dj_serializers.serialize("json", [user]) + obj = deserialize_object(serialized) + self.assertIsNotNone(obj) + self.assertIsInstance(obj, User) + self.assertEqual(obj.pk, user.pk) diff --git a/tests/unit/challenges/test_github_sync_signals.py b/tests/unit/challenges/test_github_sync_signals.py index b83897b113..71a5ae4af0 100644 --- a/tests/unit/challenges/test_github_sync_signals.py +++ b/tests/unit/challenges/test_github_sync_signals.py @@ -147,6 +147,25 @@ def test_skip_when_change_source_is_github( if hasattr(challenge_models._github_sync_context, "change_source"): delattr(challenge_models._github_sync_context, "change_source") + @patch("challenges.aws_utils.challenge_approval_callback") + @patch("challenges.github_utils.github_challenge_sync") + def test_skip_when_skip_github_sync_flag_set( + self, mock_sync, _mock_approval_cb + ): + # When internal skip flag is set, no sync should happen + challenge_models._github_sync_context.skip_github_sync = True + try: + self.challenge.title = "Ignored by flag" + self.challenge.save(update_fields=["title"]) + mock_sync.assert_not_called() + finally: + if hasattr( + challenge_models._github_sync_context, "skip_github_sync" + ): + delattr( + challenge_models._github_sync_context, "skip_github_sync" + ) + @patch("challenges.aws_utils.challenge_approval_callback") @patch("challenges.github_utils.github_challenge_sync") def test_no_changed_field_inference_means_no_sync( @@ -169,3 +188,22 @@ def test_multiple_update_fields_prefers_first( _args, kwargs = mock_sync.call_args # Order of update_fields is non-deterministic (Django converts to set), accept either assert kwargs.get("changed_field") in {"title", "description"} + + @patch("challenges.github_utils.github_challenge_phase_sync") + def test_phase_middleware_infers_changed_field_and_triggers_sync( + self, mock_phase_sync + ): + # Simulate request payload for phase update + class _Req: + method = "PATCH" + body = b'{\n "name": "New Phase Name"\n}' + + mw = challenge_models.GitHubSyncMiddleware() + mw.process_request(_Req()) + + self.phase.name = "New Phase Name" + self.phase.save() + + mock_phase_sync.assert_called_once() + _args, kwargs = mock_phase_sync.call_args + self.assertEqual(kwargs.get("changed_field"), "name") diff --git a/tests/unit/challenges/test_github_utils.py b/tests/unit/challenges/test_github_utils.py index 2793ae9143..0ecfbb10b1 100644 --- a/tests/unit/challenges/test_github_utils.py +++ b/tests/unit/challenges/test_github_utils.py @@ -1,6 +1,8 @@ +import datetime as dt import logging from unittest.mock import Mock, patch +from challenges import github_utils as gu from challenges.github_interface import GithubInterface from challenges.github_utils import ( github_challenge_phase_sync, @@ -111,6 +113,334 @@ def test_update_data_from_path_encodes_and_calls_update(self, mock_update): self.assertEqual(result, {"sha": "new_sha"}) mock_update.assert_called_once() + @patch("challenges.github_interface.GithubInterface.get_content_from_path") + @patch("challenges.github_interface.requests.request") + def test_update_content_from_path_creates_when_missing( + self, mock_request, mock_get + ): + """When file is missing, create flow is used and no sha is sent""" + mock_get.return_value = None + mock_response = Mock() + mock_response.json.return_value = {"sha": "new_sha"} + mock_response.raise_for_status.return_value = None + mock_request.return_value = mock_response + + result = self.github.update_content_from_path( + "new.yaml", "Y29udGVudA==" + ) + + self.assertEqual(result, {"sha": "new_sha"}) + # Ensure PUT happened once + mock_request.assert_called_once() + + def test_process_field_value_formats_and_serializes(self): + # date formatting + date = dt.datetime(2023, 1, 2, 3, 4, 5) + self.assertEqual( + self.github._process_field_value("start_date", date), + "2023-01-02 03:04:05", + ) + + # list of objects -> list of ids + class Obj: + def __init__(self, pk): + self.pk = pk + + self.assertEqual( + self.github._process_field_value("list", [Obj(1), Obj(2)]), [1, 2] + ) + + # file-like/path-like + class Dummy: + name = "path/to/file.txt" + + self.assertEqual( + self.github._process_field_value("evaluation_script", Dummy()), + "path/to/file.txt", + ) + + def test_read_text_from_file_field_variants(self): + # value with open/read returning bytes + class FieldFileLike: + def __init__(self, data=b"hello"): + self._data = data + + def open(self, *_args, **_kwargs): + return None + + def read(self): + return self._data + + def close(self): + return None + + self.assertEqual( + self.github._read_text_from_file_field(FieldFileLike(b"hi")), "hi" + ) + + # value with only read + class ReadOnly: + def __init__(self, data=b"bye"): + self._data = data + + def read(self): + return self._data + + self.assertEqual( + self.github._read_text_from_file_field(ReadOnly(b"bye")), "bye" + ) + + # value without read/open + class Other: + def __str__(self): + return "stringified" + + self.assertEqual( + self.github._read_text_from_file_field(Other()), "stringified" + ) + + def test_get_data_from_path_none_when_no_content(self): + with patch.object( + self.github, "get_content_from_path", return_value={} + ): + assert self.github.get_data_from_path("x") is None + + def test_is_repository_true_false(self): + with patch.object(self.github, "make_request", return_value={"id": 1}): + assert self.github.is_repository() is True + with patch.object(self.github, "make_request", return_value=None): + assert self.github.is_repository() is False + + def test_update_challenge_config_non_file_changes(self): + # Existing YAML without key, then update data + with patch.object( + self.github, "get_data_from_path", return_value="title: Old\n" + ), patch.object( + self.github, "update_data_from_path", return_value={"ok": True} + ) as mock_update: + + class ChallengeObj: + title = "New" + start_date = None + end_date = None + + ok = self.github.update_challenge_config(ChallengeObj(), "title") + self.assertTrue(ok) + mock_update.assert_called_once() + + def test_update_challenge_config_skips_if_unchanged(self): + with patch.object( + self.github, "get_data_from_path", return_value="title: Same\n" + ): + + class ChallengeObj: + title = "Same" + + ok = self.github.update_challenge_config(ChallengeObj(), "title") + self.assertTrue(ok) # returns True but no update + + def test_update_challenge_config_file_field_missing_path(self): + # evaluation_script path missing in YAML + with patch.object( + self.github, "get_data_from_path", return_value="{}\n" + ): + + class ChallengeObj: + evaluation_script = object() + + ok = self.github.update_challenge_config( + ChallengeObj(), "evaluation_script" + ) + self.assertFalse(ok) + + def test_update_challenge_config_file_field_updates_when_changed(self): + # YAML with path, and new content different + with patch.object( + self.github, + "get_data_from_path", + side_effect=[ + "evaluation_script: path/file.txt\n", + "old-text", + ], + ), patch.object( + self.github, "update_data_from_path", return_value={"ok": True} + ) as mock_update: + + class FileLike: + def __init__(self, data): + self._data = data + + def read(self): + return self._data + + class ChallengeObj: + evaluation_script = FileLike(b"new-text") + + ok = self.github.update_challenge_config( + ChallengeObj(), "evaluation_script" + ) + self.assertTrue(ok) + mock_update.assert_called_once() + + def test_update_challenge_config_returns_false_on_process_failure(self): + with patch.object( + self.github, "get_data_from_path", return_value="{}\n" + ): + + class ChallengeObj: + # make _process_field_value return None by passing None + title = None + + ok = self.github.update_challenge_config(ChallengeObj(), "title") + self.assertFalse(ok) + + def test_update_challenge_phase_config_not_found_by_codename(self): + with patch.object( + self.github, + "get_data_from_path", + return_value="challenge_phases: []\n", + ): + + class PhaseObj: + codename = "missing" + challenge = object() + + ok = self.github.update_challenge_phase_config(PhaseObj(), "name") + self.assertFalse(ok) + + def test_update_challenge_phase_config_file_field_missing_path(self): + yaml_text = """ +challenge_phases: + - codename: C1 + name: N +""" + with patch.object( + self.github, "get_data_from_path", return_value=yaml_text + ): + + class PhaseObj: + codename = "C1" + test_annotation = object() + challenge = object() + + ok = self.github.update_challenge_phase_config( + PhaseObj(), "test_annotation" + ) + self.assertFalse(ok) + + def test_update_challenge_phase_config_non_file_updates(self): + yaml_text = """ +challenge_phases: + - codename: C1 + name: Old +""" + with patch.object( + self.github, "get_data_from_path", return_value=yaml_text + ), patch.object( + self.github, "update_data_from_path", return_value={"ok": True} + ) as mock_update: + + class PhaseObj: + codename = "C1" + name = "New" + challenge = object() + + ok = self.github.update_challenge_phase_config(PhaseObj(), "name") + self.assertTrue(ok) + mock_update.assert_called_once() + + def test_update_challenge_phase_config_skips_if_unchanged(self): + yaml_text = """ +challenge_phases: + - codename: C1 + name: Same +""" + with patch.object( + self.github, "get_data_from_path", return_value=yaml_text + ): + + class PhaseObj: + codename = "C1" + name = "Same" + challenge = object() + + ok = self.github.update_challenge_phase_config(PhaseObj(), "name") + self.assertTrue(ok) + + def test_update_challenge_phase_config_file_field_updates_when_changed( + self, + ): + yaml_text = """ +challenge_phases: + - codename: C1 + name: N + test_annotation_file: path/ann.txt +""" + with patch.object( + self.github, + "get_data_from_path", + side_effect=[ + yaml_text, + "old", + ], + ), patch.object( + self.github, "update_data_from_path", return_value={"ok": True} + ) as mock_update: + + class FileLike: + def __init__(self, data): + self._data = data + + def read(self): + return self._data + + class PhaseObj: + codename = "C1" + test_annotation = FileLike(b"new") + challenge = object() + + ok = self.github.update_challenge_phase_config( + PhaseObj(), "test_annotation" + ) + self.assertTrue(ok) + mock_update.assert_called_once() + + +# Lightweight checks for sync config constants to ensure availability and shape +def test_github_sync_config_expected_keys(): + from challenges import github_sync_config as cfg + + assert isinstance(cfg.challenge_non_file_fields, list) + for key in [ + "title", + "published", + "image", + "evaluation_script", + "start_date", + "end_date", + ]: + assert key in cfg.challenge_non_file_fields + + assert isinstance(cfg.challenge_file_fields, list) + for key in [ + "description", + "evaluation_details", + "terms_and_conditions", + "submission_guidelines", + ]: + assert key in cfg.challenge_file_fields + + assert isinstance(cfg.challenge_phase_non_file_fields, list) + assert "name" in cfg.challenge_phase_non_file_fields + assert "codename" in cfg.challenge_phase_non_file_fields + + assert isinstance(cfg.challenge_phase_file_fields, list) + assert "description" in cfg.challenge_phase_file_fields + + assert isinstance(cfg.challenge_additional_sections, list) + for key in ["leaderboard", "dataset_splits", "challenge_phase_splits"]: + assert key in cfg.challenge_additional_sections + class TestGithubSync(TestCase): """Test cases for GitHub sync functionality""" @@ -156,6 +486,17 @@ def test_sync_challenge_to_github_success(self, mock_github_class): ) mock_github.update_challenge_config.assert_called_once() + @patch("challenges.github_utils.GithubInterface") + def test_sync_challenge_returns_false_on_update_failure( + self, mock_github_class + ): + mock_github = Mock() + mock_github_class.return_value = mock_github + mock_github.update_challenge_config.return_value = False + + ok = gu.github_challenge_sync(self.challenge.id, changed_field="title") + self.assertFalse(ok) + def test_sync_challenge_to_github_no_token(self): """Test challenge sync when no GitHub token is configured""" self.challenge.github_token = "" @@ -192,6 +533,44 @@ def test_sync_challenge_phase_to_github_no_token(self): self.challenge_phase.id, changed_field="name" ) + def test_github_sync_invalid_changed_field_type_and_not_found(self): + # invalid changed_field should return False and cleanup flags + ok = gu.github_challenge_sync(self.challenge.id, changed_field=123) + self.assertFalse(ok) + # flags are cleaned + self.assertFalse(hasattr(gu._github_sync_context, "skip_github_sync")) + self.assertFalse(hasattr(gu._github_sync_context, "change_source")) + + # not found + ok2 = gu.github_challenge_sync(999999, changed_field="title") + self.assertFalse(ok2) + + @patch("challenges.github_utils.GithubInterface") + def test_sync_challenge_phase_returns_false_on_update_failure( + self, mock_github_class + ): + mock_github = Mock() + mock_github_class.return_value = mock_github + mock_github.update_challenge_phase_config.return_value = False + + ok = gu.github_challenge_phase_sync( + self.challenge_phase.id, changed_field="name" + ) + self.assertFalse(ok) + + def test_github_phase_sync_invalid_changed_field_type_and_not_found(self): + # invalid changed_field type + ok = gu.github_challenge_phase_sync( + self.challenge_phase.id, changed_field=[] + ) + self.assertFalse(ok) + self.assertFalse(hasattr(gu._github_sync_context, "skip_github_sync")) + self.assertFalse(hasattr(gu._github_sync_context, "change_source")) + + # not found + ok2 = gu.github_challenge_phase_sync(999999, changed_field="name") + self.assertFalse(ok2) + def test_sync_challenge_phase_to_github_no_repo(self): """Test challenge phase sync when no GitHub repository is configured""" self.challenge.github_repository = "" diff --git a/tests/unit/challenges/test_models.py b/tests/unit/challenges/test_models.py index fef437059f..73ddae843a 100644 --- a/tests/unit/challenges/test_models.py +++ b/tests/unit/challenges/test_models.py @@ -8,8 +8,11 @@ ChallengePhase, ChallengePhaseSplit, DatasetSplit, + GitHubSyncMiddleware, Leaderboard, LeaderboardData, + _infer_changed_field_from_request, + reset_github_sync_context, ) from django.contrib.auth.models import User from django.core.files.uploadedfile import SimpleUploadedFile @@ -261,3 +264,41 @@ def test__str__(self): "{0} : {1}".format(self.challenge_phase_split, self.submission), self.leaderboard_data.__str__(), ) + + +class GitHubSyncMiddlewareTests(BaseTestCase): + def test_middleware_captures_post_keys_and_infer_from_request(self): + reset_github_sync_context() + + class _Req: + method = "POST" + body = b"" + POST = {"title": "T", "created_at": "ignore"} + + mw = GitHubSyncMiddleware() + mw.process_request(_Req()) + + # An object that has attribute 'title' should be inferred as changed + class Obj: # minimal stub with attribute present + title = "T" + + inferred = _infer_changed_field_from_request(Obj()) + self.assertEqual(inferred, "title") + + def test_reset_github_sync_context_clears_payload_keys(self): + # seed payload keys via middleware + class _Req: + method = "PATCH" + body = b'{\n "name": "X"\n}' + + mw = GitHubSyncMiddleware() + mw.process_request(_Req()) + + # now reset and confirm inference returns None + reset_github_sync_context() + + class Obj: + name = "X" + + inferred = _infer_changed_field_from_request(Obj()) + self.assertIsNone(inferred) diff --git a/tests/unit/challenges/test_serializers.py b/tests/unit/challenges/test_serializers.py index fb83b93972..ae6c32bf3b 100644 --- a/tests/unit/challenges/test_serializers.py +++ b/tests/unit/challenges/test_serializers.py @@ -6,16 +6,30 @@ import pytest from allauth.account.models import EmailAddress -from challenges.models import Challenge, ChallengePhase +from challenges.models import ( + Challenge, + ChallengePhase, + DatasetSplit, + Leaderboard, +) from challenges.serializers import ( + ChallengeConfigSerializer, ChallengePhaseCreateSerializer, ChallengePhaseSerializer, + ChallengeSerializer, + DatasetSplitSerializer, + LeaderboardDataSerializer, + LeaderboardSerializer, PWCChallengeLeaderboardSerializer, + StarChallengeSerializer, UserInvitationSerializer, + ZipChallengePhaseSplitSerializer, + ZipChallengeSerializer, ) from challenges.utils import add_sponsors_to_challenge from django.contrib.auth.models import User from django.core.files.uploadedfile import SimpleUploadedFile +from django.test import TestCase as DjangoTestCase from django.utils import timezone from hosts.models import ChallengeHost, ChallengeHostTeam from participants.models import ParticipantTeam @@ -204,6 +218,13 @@ def setUp(self): instance=self.challenge_phase ) + def test_phase_create_serializer_exclude_fields(self): + ser = ChallengePhaseCreateSerializer( + instance=self.challenge_phase, + context={"exclude_fields": ["slug", "nonexistent"]}, + ) + self.assertNotIn("slug", ser.fields) + def test_challenge_phase_create_serializer(self): data = self.challenge_phase_create_serializer.data @@ -486,6 +507,155 @@ def test_get_leaderboard(self): self.assertEqual(result, ["accuracy", "loss", "f1_score"]) +class DatasetSplitSerializerInitTests(DjangoTestCase): + def test_dataset_split_serializer_sets_config_id(self): + ser = DatasetSplitSerializer(data={}, context={"config_id": 7}) + self.assertEqual(ser.initial_data.get("config_id"), 7) + + +class ChallengeConfigAndLeaderboardSerializerInitTests(DjangoTestCase): + def test_challenge_config_serializer_sets_user(self): + user = User.objects.create(username="u3") + + class Req: + pass + + req = Req() + req.user = user + + ser = ChallengeConfigSerializer(data={}, context={"request": req}) + self.assertEqual(ser.initial_data.get("user"), user.pk) + + def test_leaderboard_serializer_sets_config_id(self): + ser = LeaderboardSerializer(data={}, context={"config_id": 5}) + self.assertEqual(ser.initial_data.get("config_id"), 5) + + +class ZipSerializersInitTests(DjangoTestCase): + def test_zip_challenge_serializer_sets_optional_fields_from_context(self): + img = object() + eval_script = object() + + class Req: + method = "GET" + + ser = ZipChallengeSerializer( + data={}, + context={ + "request": Req(), + "image": img, + "evaluation_script": eval_script, + "github_repository": "org/repo", + "github_branch": "main", + "github_token": "tok", + }, + ) + self.assertIs(ser.initial_data.get("image"), img) + self.assertIs(ser.initial_data.get("evaluation_script"), eval_script) + self.assertEqual(ser.initial_data.get("github_repository"), "org/repo") + self.assertEqual(ser.initial_data.get("github_branch"), "main") + self.assertEqual(ser.initial_data.get("github_token"), "tok") + + def test_zip_challenge_phase_split_serializer_excludes_fields(self): + ser = ZipChallengePhaseSplitSerializer( + instance=None, + context={"exclude_fields": ["leaderboard", "nonexistent"]}, + ) + self.assertNotIn("leaderboard", ser.fields) + + +class StarChallengeSerializerInitTests(DjangoTestCase): + def test_star_challenge_serializer_sets_fields_from_context(self): + user = User.objects.create(username="u6") + team = ChallengeHostTeam.objects.create( + team_name="t6", created_by=user + ) + chal = Challenge.objects.create(title="T6", creator=team) + + class Req: + pass + + req = Req() + req.user = user + + ser = StarChallengeSerializer( + data={}, + context={ + "challenge": chal, + "request": req, + "is_starred": True, + }, + ) + self.assertEqual(ser.initial_data.get("challenge"), chal.pk) + self.assertEqual(ser.initial_data.get("user"), user.pk) + self.assertTrue(ser.initial_data.get("is_starred")) + + +class LeaderboardDataSerializerInitTests(DjangoTestCase): + def test_leaderboard_data_serializer_sets_foreign_keys_from_context(self): + user = User.objects.create(username="u7") + team = ChallengeHostTeam.objects.create( + team_name="t7", created_by=user + ) + chal = Challenge.objects.create(title="T7", creator=team) + phase = ChallengePhase.objects.create( + name="P", description="d", challenge=chal + ) + ds = DatasetSplit.objects.create(name="N", codename="C") + lb = Leaderboard.objects.create(schema={}) + from challenges.models import ChallengePhaseSplit + + cps = ChallengePhaseSplit.objects.create( + challenge_phase=phase, dataset_split=ds, leaderboard=lb + ) + + class Sub: + pk = 101 + + ser = LeaderboardDataSerializer( + data={}, + context={"challenge_phase_split": cps, "submission": Sub()}, + ) + self.assertEqual(ser.initial_data.get("challenge_phase_split"), cps.pk) + self.assertEqual(ser.initial_data.get("submission"), 101) + + +class ChallengeSerializerInitTests(DjangoTestCase): + def test_challenge_serializer_sets_creator_and_token_on_non_get(self): + user = User.objects.create(username="u") + team = ChallengeHostTeam.objects.create(team_name="t", created_by=user) + + class Req: + pass + + req = Req() + req.method = "POST" + req.user = user + + context = { + "request": req, + "challenge_host_team": team, + "github_token": "tok", + } + data = {"title": "T"} + ser = ChallengeSerializer(data=data, context=context) + self.assertEqual(ser.initial_data.get("creator"), team.pk) + self.assertEqual(ser.initial_data.get("github_token"), "tok") + + def test_challenge_serializer_exposes_creator_field_on_get(self): + user = User.objects.create(username="u2") + team = ChallengeHostTeam.objects.create( + team_name="t2", created_by=user + ) + chal = Challenge.objects.create(title="TT", creator=team) + + class Req: + method = "GET" + + ser = ChallengeSerializer(instance=chal, context={"request": Req()}) + self.assertIn("creator", ser.fields) + + @pytest.mark.django_db class UserInvitationSerializerTests(TestCase): def setUp(self): From 958429160bc2f4211c82e4cb842a5a8cf44fa420 Mon Sep 17 00:00:00 2001 From: Zahed-Riyaz Date: Wed, 3 Sep 2025 02:01:02 +0530 Subject: [PATCH 19/20] Fix TestDeserializeObject error --- tests/unit/base/test_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/base/test_utils.py b/tests/unit/base/test_utils.py index fd77712535..68bce30ed7 100644 --- a/tests/unit/base/test_utils.py +++ b/tests/unit/base/test_utils.py @@ -28,6 +28,7 @@ from django.contrib.auth.models import User from django.core.files.uploadedfile import SimpleUploadedFile from django.utils import timezone +from django.test import TestCase as DjangoTestCase from hosts.models import ChallengeHostTeam from jobs.models import Submission from participants.models import Participant, ParticipantTeam @@ -408,7 +409,7 @@ def test_decode_data_empty_list(self): self.assertEqual(decode_data(data), []) -class TestDeserializeObject(TestCase): +class TestDeserializeObject(DjangoTestCase): def test_deserialize_object_returns_model_instance(self): from django.contrib.auth.models import User from django.core import serializers as dj_serializers From 65cf97f927cebc4cef3654d5bd98817457943304 Mon Sep 17 00:00:00 2001 From: ZahedR_327 Date: Thu, 4 Sep 2025 02:05:52 +0530 Subject: [PATCH 20/20] Update .travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4ccad346db..33cd6f856a 100755 --- a/.travis.yml +++ b/.travis.yml @@ -93,4 +93,4 @@ notifications: email: on_success: change on_failure: always - slack: cloudcv:gy3CGQGNXLwXOqVyzXGZfdea \ No newline at end of file + slack: cloudcv:gy3CGQGNXLwXOqVyzXGZfdea