From ae2107db5aa21800d472df84c5630da85dde3a0a Mon Sep 17 00:00:00 2001 From: anderdc Date: Tue, 3 Feb 2026 21:28:46 -0600 Subject: [PATCH 1/2] fix duplicate miner storage --- gittensor/validator/evaluation/reward.py | 4 +-- gittensor/validator/storage/queries.py | 23 ++++++++++++++++ gittensor/validator/storage/repository.py | 24 +++++++++++++++++ gittensor/validator/utils/storage.py | 3 +++ neurons/validator.py | 32 +++++++++++++++++++---- 5 files changed, 79 insertions(+), 7 deletions(-) diff --git a/gittensor/validator/evaluation/reward.py b/gittensor/validator/evaluation/reward.py index 1ecd92e..69cf262 100644 --- a/gittensor/validator/evaluation/reward.py +++ b/gittensor/validator/evaluation/reward.py @@ -131,7 +131,7 @@ async def get_rewards( miner_evaluations[uid] = miner_evaluation # If evaluation of miner was successful, store to cache, if api failure, fallback to previous successful evaluation if any - self.store_or_use_cached_evaluation(miner_evaluations) + cached_uids = self.store_or_use_cached_evaluation(miner_evaluations) # Adjust scores for duplicate accounts detect_and_penalize_miners_sharing_github(miner_evaluations) @@ -149,6 +149,6 @@ async def get_rewards( final_rewards = apply_dynamic_emissions_using_network_contributions(normalized_rewards, miner_evaluations) # Store miner evaluations after calculating all scores - await self.bulk_store_evaluation(miner_evaluations) + await self.bulk_store_evaluation(miner_evaluations, skip_uids=cached_uids) return np.array([final_rewards.get(uid, 0.0) for uid in sorted(uids)]) diff --git a/gittensor/validator/storage/queries.py b/gittensor/validator/storage/queries.py index 6bf445f..db35840 100644 --- a/gittensor/validator/storage/queries.py +++ b/gittensor/validator/storage/queries.py @@ -1,5 +1,28 @@ # Storage Queries - Only SET/INSERT operations for writing data +# Cleanup Queries - Remove stale data when a miner re-registers on a new uid/hotkey +CLEANUP_STALE_MINER_EVALUATIONS = """ +DELETE FROM miner_evaluations +WHERE github_id = %s + AND github_id != '0' + AND (uid != %s OR hotkey != %s) + AND created_at <= %s +""" + +CLEANUP_STALE_MINER_TIER_STATS = """ +DELETE FROM miner_tier_stats +WHERE github_id = %s + AND github_id != '0' + AND (uid != %s OR hotkey != %s) +""" + +CLEANUP_STALE_MINERS = """ +DELETE FROM miners +WHERE github_id = %s + AND github_id != '0' + AND (uid != %s OR hotkey != %s) +""" + # Miner Queries SET_MINER = """ INSERT INTO miners (uid, hotkey, github_id) diff --git a/gittensor/validator/storage/repository.py b/gittensor/validator/storage/repository.py index 0216d23..799b9f7 100644 --- a/gittensor/validator/storage/repository.py +++ b/gittensor/validator/storage/repository.py @@ -21,6 +21,9 @@ BULK_UPSERT_MINER_EVALUATION, BULK_UPSERT_MINER_TIER_STATS, BULK_UPSERT_PULL_REQUESTS, + CLEANUP_STALE_MINER_EVALUATIONS, + CLEANUP_STALE_MINER_TIER_STATS, + CLEANUP_STALE_MINERS, SET_MINER, ) @@ -106,6 +109,27 @@ def set_miner(self, miner: Miner) -> bool: params = (miner.uid, miner.hotkey, miner.github_id) return self.set_entity(SET_MINER, params) + def cleanup_stale_miner_data(self, evaluation: MinerEvaluation) -> None: + """ + Remove stale evaluation data when a miner re-registers on a new uid/hotkey. + + Deletes miner_evaluations, miner_tier_stats, and miners rows for the same + github_id but under a different (uid, hotkey) pair, ensuring only one + evaluation per real github user exists in the database. + + Args: + evaluation: The current MinerEvaluation being stored + """ + if not evaluation.github_id or evaluation.github_id == '0': + return + + params = (evaluation.github_id, evaluation.uid, evaluation.hotkey) + eval_params = params + (evaluation.evaluation_timestamp,) + + self.execute_command(CLEANUP_STALE_MINER_EVALUATIONS, eval_params) + self.execute_command(CLEANUP_STALE_MINER_TIER_STATS, params) + self.execute_command(CLEANUP_STALE_MINERS, params) + def store_pull_requests_bulk(self, pull_requests: List[PullRequest]) -> int: """ Bulk insert/update pull requests with efficient SQL conflict resolution diff --git a/gittensor/validator/utils/storage.py b/gittensor/validator/utils/storage.py index 362e873..f08c94e 100644 --- a/gittensor/validator/utils/storage.py +++ b/gittensor/validator/utils/storage.py @@ -62,6 +62,9 @@ def store_evaluation(self, miner_eval: MinerEvaluation) -> StorageResult: ) result.stored_counts['issues'] = self.repo.store_issues_bulk(miner_eval.get_all_issues()) result.stored_counts['file_changes'] = self.repo.store_file_changes_bulk(miner_eval.get_all_file_changes()) + # Clean up stale data if this github_id was previously registered under a different uid/hotkey + self.repo.cleanup_stale_miner_data(miner_eval) + result.stored_counts['evaluations'] = 1 if self.repo.set_miner_evaluation(miner_eval) else 0 result.stored_counts['tier_stats'] = 1 if self.repo.set_miner_tier_stats(miner_eval) else 0 diff --git a/neurons/validator.py b/neurons/validator.py index d812979..2192520 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -18,11 +18,11 @@ import threading import time -from typing import Dict, List +from typing import Dict, List, Set import bittensor as bt -import wandb +import wandb from gittensor.__init__ import __version__ from gittensor.classes import MinerEvaluation, MinerEvaluationCache from gittensor.validator.forward import forward @@ -84,15 +84,26 @@ def __init__(self, config=None): bt.logging.info('load_state()') self.load_state() - async def bulk_store_evaluation(self, miner_evals: Dict[int, MinerEvaluation]): - """Store all miner evaluations, log summary rather than per-UID.""" + async def bulk_store_evaluation(self, miner_evals: Dict[int, MinerEvaluation], skip_uids: Set[int] = None): + """Store all miner evaluations, log summary rather than per-UID. + + Args: + miner_evals: Dict of UID -> MinerEvaluation to store. + skip_uids: Set of UIDs to skip (e.g. cached evaluations that were already stored previously). + """ if self.db_storage is None: return + skip_uids = skip_uids or set() successful_count = 0 + skipped_count = 0 failed_uids: List[int] = [] for uid, evaluation in miner_evals.items(): + if uid in skip_uids: + skipped_count += 1 + continue + try: storage_result = self.db_storage.store_evaluation(evaluation) if storage_result.success: @@ -109,6 +120,8 @@ async def bulk_store_evaluation(self, miner_evals: Dict[int, MinerEvaluation]): # Summary logging if successful_count > 0: bt.logging.success(f'Stored validation results for {successful_count} UIDs to DB') + if skipped_count > 0: + bt.logging.info(f'Skipped {skipped_count} UIDs (cached evaluations)') if failed_uids: bt.logging.warning(f'Failed to store {len(failed_uids)} UIDs: {failed_uids}') @@ -131,12 +144,18 @@ async def store_evaluation(self, uid: int, miner_eval: MinerEvaluation): except Exception as e: bt.logging.error(f'Error when attempting to store miners evaluation for uid {uid}: {e}') - def store_or_use_cached_evaluation(self, miner_evaluations: Dict[int, MinerEvaluation]) -> None: + def store_or_use_cached_evaluation(self, miner_evaluations: Dict[int, MinerEvaluation]) -> Set[int]: """ Handle evaluation cache: store successful evals, fallback to cache for GitHub failures. Mutates the passed dict, replacing failed evaluations with cached ones if available. + + Returns: + Set of UIDs that were restored from cache (should be skipped during DB storage + since the cached data was already stored previously). """ + cached_uids: Set[int] = set() + for uid, miner_eval in miner_evaluations.items(): # Skip miners that failed validation (invalid PAT, etc.) if miner_eval.failed_reason is not None: @@ -156,6 +175,9 @@ def store_or_use_cached_evaluation(self, miner_evaluations: Dict[int, MinerEvalu f'closed={cached_eval.total_closed_prs})' ) miner_evaluations[uid] = cached_eval + cached_uids.add(uid) + + return cached_uids async def forward(self): """ From d7346e8192339528765b139297e92e63374278aa Mon Sep 17 00:00:00 2001 From: anderdc <61125407+anderdc@users.noreply.github.com> Date: Wed, 4 Feb 2026 03:36:12 +0000 Subject: [PATCH 2/2] style: auto-format with ruff --- gittensor/validator/evaluation/scoring.py | 4 +--- neurons/validator.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/gittensor/validator/evaluation/scoring.py b/gittensor/validator/evaluation/scoring.py index e56090e..af03ab4 100644 --- a/gittensor/validator/evaluation/scoring.py +++ b/gittensor/validator/evaluation/scoring.py @@ -344,9 +344,7 @@ def finalize_miner_scores(miner_evaluations: Dict[int, MinerEvaluation]) -> None ) # Calculate spam multiplier once per miner (same for all their merged PRs) - spam_multiplier = calculate_pr_spam_penalty_multiplier( - evaluation.total_open_prs, tier_stats - ) + spam_multiplier = calculate_pr_spam_penalty_multiplier(evaluation.total_open_prs, tier_stats) # Process merged PRs for pr in evaluation.merged_pull_requests: diff --git a/neurons/validator.py b/neurons/validator.py index 2192520..8bef671 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -21,8 +21,8 @@ from typing import Dict, List, Set import bittensor as bt - import wandb + from gittensor.__init__ import __version__ from gittensor.classes import MinerEvaluation, MinerEvaluationCache from gittensor.validator.forward import forward