Skip to content

Commit 0ca825d

Browse files
authored
Merge pull request #139 from backend-developers-ltd/manifest
reward manifest change
2 parents c959b4e + dc5c9e9 commit 0ca825d

File tree

13 files changed

+284
-45
lines changed

13 files changed

+284
-45
lines changed

compute_horde/compute_horde/mv_protocol/miner_requests.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ class ExecutorClassManifest(pydantic.BaseModel):
2929
class ExecutorManifest(pydantic.BaseModel):
3030
executor_classes: list[ExecutorClassManifest]
3131

32+
@property
33+
def total_count(self) -> int:
34+
return sum([x.count for x in self.executor_classes])
35+
3236

3337
class BaseMinerRequest(BaseRequest):
3438
message_type: RequestType

validator/app/src/compute_horde_validator/settings.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
from celery.schedules import crontab
1414
from compute_horde import base # noqa
1515

16-
# from celery.schedules import crontab
17-
18-
1916
root = environ.Path(__file__) - 2
2017

2118
env = environ.Env(DEBUG=(bool, False))
@@ -364,6 +361,9 @@ def wrapped(*args, **kwargs):
364361

365362
DEBUG_OVERRIDE_WEIGHTS_VERSION = env.int("DEBUG_OVERRIDE_WEIGHTS_VERSION", default=None)
366363

364+
MANIFEST_SCORE_MULTIPLIER = 1.05
365+
EXECUTOR_COUNT_INCREASE_THRESHOLD = 3
366+
367367

368368
def BITTENSOR_WALLET() -> bittensor.wallet:
369369
if not BITTENSOR_WALLET_NAME or not BITTENSOR_WALLET_HOTKEY_NAME:

validator/app/src/compute_horde_validator/validator/facilitator_client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ async def miner_driver(self, job_request: JobRequest):
258258
miner_hotkey=job_request.miner_hotkey,
259259
my_hotkey=self.my_hotkey(),
260260
job_uuid=job_request.uuid,
261+
batch_id=None,
261262
keypair=self.keypair,
262263
)
263264
await execute_organic_job(

validator/app/src/compute_horde_validator/validator/management/commands/debug_run_synthetic_job_to_miner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def handle(self, *args, **options):
5656
miner_hotkey=miner_hotkey,
5757
my_hotkey=key.ss58_address,
5858
job_uuid=None,
59+
batch_id=None,
5960
keypair=key,
6061
)
6162

validator/app/src/compute_horde_validator/validator/metagraph_client.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import asyncio
22
import datetime as dt
33
import logging
4+
import time
45

56
import bittensor
67
from asgiref.sync import sync_to_async
@@ -11,6 +12,32 @@
1112
logger = logging.getLogger(__name__)
1213

1314

15+
class WeightVersionHolder:
16+
def __init__(self):
17+
self._time_set = 0
18+
self.value = None
19+
20+
def get(self):
21+
if settings.DEBUG_OVERRIDE_WEIGHTS_VERSION is not None:
22+
return settings.DEBUG_OVERRIDE_WEIGHTS_VERSION
23+
24+
if time.time() - self._time_set > 300:
25+
subtensor = bittensor.subtensor(network=settings.BITTENSOR_NETWORK)
26+
hyperparameters = subtensor.get_subnet_hyperparameters(netuid=settings.BITTENSOR_NETUID)
27+
if hyperparameters is None:
28+
raise RuntimeError("Network hyperparameters are None")
29+
self.value = hyperparameters.weights_version
30+
self._time_set = time.time()
31+
return self.value
32+
33+
34+
weights_version_holder = WeightVersionHolder()
35+
36+
37+
def get_weights_version():
38+
return weights_version_holder.get()
39+
40+
1441
class AsyncMetagraphClient:
1542
def __init__(self, cache_time=dt.timedelta(minutes=5)):
1643
self.cache_time = cache_time
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Generated by Django 4.2.13 on 2024-06-27 11:12
2+
3+
import django.db.models.deletion
4+
from django.db import migrations, models
5+
6+
7+
class Migration(migrations.Migration):
8+
dependencies = [
9+
("validator", "0018_alter_systemevent_subtype_alter_systemevent_type"),
10+
]
11+
12+
operations = [
13+
migrations.CreateModel(
14+
name="MinerManifest",
15+
fields=[
16+
(
17+
"id",
18+
models.BigAutoField(
19+
auto_created=True,
20+
primary_key=True,
21+
serialize=False,
22+
verbose_name="ID",
23+
),
24+
),
25+
("created_at", models.DateTimeField(auto_now_add=True)),
26+
("executor_count", models.IntegerField(default=0)),
27+
(
28+
"batch",
29+
models.ForeignKey(
30+
on_delete=django.db.models.deletion.CASCADE,
31+
to="validator.syntheticjobbatch",
32+
),
33+
),
34+
(
35+
"miner",
36+
models.ForeignKey(
37+
on_delete=django.db.models.deletion.CASCADE,
38+
to="validator.miner",
39+
),
40+
),
41+
],
42+
),
43+
migrations.AddConstraint(
44+
model_name="minermanifest",
45+
constraint=models.UniqueConstraint(
46+
fields=("miner", "batch"), name="unique_miner_manifest"
47+
),
48+
),
49+
]

validator/app/src/compute_horde_validator/validator/models.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,19 +86,25 @@ class Meta:
8686
def __str__(self):
8787
return f"hotkey: {self.miner.hotkey}"
8888

89-
def set_manifest(self, manifest):
90-
self._manifest = manifest
91-
92-
def get_manifest(self):
93-
return getattr(self, "_manifest", None)
94-
9589

9690
class SyntheticJobBatch(models.Model):
9791
started_at = models.DateTimeField(auto_now_add=True)
9892
accepting_results_until = models.DateTimeField()
9993
scored = models.BooleanField(default=False)
10094

10195

96+
class MinerManifest(models.Model):
97+
miner = models.ForeignKey(Miner, on_delete=models.CASCADE)
98+
batch = models.ForeignKey(SyntheticJobBatch, on_delete=models.CASCADE)
99+
created_at = models.DateTimeField(auto_now_add=True)
100+
executor_count = models.IntegerField(default=0)
101+
102+
class Meta:
103+
constraints = [
104+
UniqueConstraint(fields=["miner", "batch"], name="unique_miner_manifest"),
105+
]
106+
107+
102108
class JobBase(models.Model):
103109
class Meta:
104110
abstract = True

validator/app/src/compute_horde_validator/validator/synthetic_jobs/generator/gpu_hashcat.py

Lines changed: 4 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
1-
import time
2-
3-
import bittensor
41
from asgiref.sync import sync_to_async
52
from compute_horde.mv_protocol.miner_requests import V0JobFinishedRequest
6-
from django.conf import settings
73

4+
from compute_horde_validator.validator.metagraph_client import get_weights_version
85
from compute_horde_validator.validator.synthetic_jobs.generator.base import (
96
AbstractSyntheticJobGenerator,
107
)
@@ -19,32 +16,10 @@
1916
MAX_SCORE = 2
2017

2118

22-
class WeightVersionHolder:
23-
def __init__(self):
24-
self._time_set = 0
25-
self.value = None
26-
27-
def get(self):
28-
if settings.DEBUG_OVERRIDE_WEIGHTS_VERSION is not None:
29-
return settings.DEBUG_OVERRIDE_WEIGHTS_VERSION
30-
31-
if time.time() - self._time_set > 300:
32-
subtensor = bittensor.subtensor(network=settings.BITTENSOR_NETWORK)
33-
hyperparameters = subtensor.get_subnet_hyperparameters(netuid=settings.BITTENSOR_NETUID)
34-
if hyperparameters is None:
35-
raise RuntimeError("Network hyperparameters are None")
36-
self.value = hyperparameters.weights_version
37-
self._time_set = time.time()
38-
return self.value
39-
40-
41-
weights_version_holder = WeightVersionHolder()
42-
43-
4419
class GPUHashcatSyntheticJobGenerator(AbstractSyntheticJobGenerator):
4520
def __init__(self):
4621
# set synthetic_jobs based on subnet weights_version
47-
self.weights_version = weights_version_holder.get()
22+
self.weights_version = get_weights_version()
4823
self.hash_job = None
4924
self.expected_answer = None
5025

@@ -59,7 +34,7 @@ def _get_hash_job(self):
5934
hash_job = V0SyntheticJob.generate(
6035
algorithm, HASHJOB_PARAMS[self.weights_version][algorithm]
6136
)
62-
elif self.weights_version == 1:
37+
elif self.weights_version in [1, 2]:
6338
algorithms = Algorithm.get_all_algorithms()
6439
params = [HASHJOB_PARAMS[self.weights_version][algorithm] for algorithm in algorithms]
6540
hash_job = V1SyntheticJob.generate(algorithms, params)
@@ -94,7 +69,7 @@ def volume_contents(self) -> str:
9469
def score(self, time_took: float) -> float:
9570
if self.weights_version == 0:
9671
return MAX_SCORE * (1 - (time_took / (2 * self.timeout_seconds())))
97-
elif self.weights_version == 1:
72+
elif self.weights_version in [1, 2]:
9873
return 1 / time_took
9974
else:
10075
raise RuntimeError(f"No score function for weights_version: {self.weights_version}")

validator/app/src/compute_horde_validator/validator/synthetic_jobs/utils.py

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,11 @@
4343
from django.conf import settings
4444
from django.utils.timezone import now
4545

46+
from compute_horde_validator.validator.metagraph_client import get_weights_version
4647
from compute_horde_validator.validator.models import (
4748
JobBase,
4849
Miner,
50+
MinerManifest,
4951
SyntheticJob,
5052
SyntheticJobBatch,
5153
SystemEvent,
@@ -84,7 +86,8 @@ def __init__(
8486
my_hotkey: str,
8587
miner_hotkey: str,
8688
miner_port: int,
87-
job_uuid: str,
89+
job_uuid: None | str,
90+
batch_id: None | int,
8891
keypair: bittensor.Keypair,
8992
):
9093
super().__init__(loop, f"{miner_hotkey}({miner_address}:{miner_port})")
@@ -95,6 +98,7 @@ def __init__(
9598
self.job_states = {}
9699
if job_uuid is not None:
97100
self.add_job(job_uuid)
101+
self.batch_id = batch_id
98102
self.keypair = keypair
99103
self._barrier = None
100104
self.miner_manifest = asyncio.Future()
@@ -132,6 +136,13 @@ async def handle_message(self, msg: BaseRequest):
132136
return
133137
if isinstance(msg, V0ExecutorManifestRequest):
134138
self.miner_manifest.set_result(msg.manifest)
139+
miner = await Miner.objects.aget(hotkey=self.miner_hotkey)
140+
if self.batch_id:
141+
await MinerManifest.objects.acreate(
142+
miner=miner,
143+
batch_id=self.batch_id,
144+
executor_count=msg.manifest.total_count,
145+
)
135146
return
136147
job_state = self.get_job_state(msg.job_uuid)
137148
if job_state is None:
@@ -202,7 +213,17 @@ def create_and_run_sythethic_job_batch(netuid, network):
202213
)
203214
}
204215
else:
205-
metagraph = bittensor.metagraph(netuid, network=network)
216+
try:
217+
metagraph = bittensor.metagraph(netuid, network=network)
218+
except Exception as e:
219+
msg = f"Failed to get metagraph - will not run synthetic jobs: {e}"
220+
logger.warning(msg)
221+
SystemEvent.objects.using(settings.DEFAULT_DB_ALIAS).create(
222+
type_=SystemEvent.EventType.MINER_SYNTHETIC_JOB_FAILURE,
223+
subtype=SystemEvent.EventSubType.SUBTENSOR_CONNECTIVITY_ERROR,
224+
long_description=msg,
225+
)
226+
return
206227
axons_by_key = {n.hotkey: n.axon_info for n in metagraph.neurons}
207228
miners = get_miners(metagraph)
208229
miners = [(miner.id, miner.hotkey) for miner in miners]
@@ -261,6 +282,7 @@ async def execute_miner_synthetic_jobs(batch_id, miner_id, miner_hotkey, axon_in
261282
miner_hotkey=miner_hotkey,
262283
my_hotkey=key.ss58_address,
263284
job_uuid=None,
285+
batch_id=batch_id,
264286
keypair=key,
265287
)
266288
data = {"miner_hotkey": miner_client.miner_hotkey}
@@ -325,6 +347,55 @@ async def execute_miner_synthetic_jobs(batch_id, miner_id, miner_hotkey, axon_in
325347
)
326348

327349

350+
async def apply_manifest_incentive(miner_hotkey: str, batch_id: int, score: float) -> float:
351+
weights_version = get_weights_version()
352+
if weights_version in [0, 1]:
353+
return score
354+
elif weights_version == 2:
355+
miner = await Miner.objects.aget(hotkey=miner_hotkey)
356+
357+
# get last 3 batches and manifests
358+
batches = [
359+
batch
360+
async for batch in SyntheticJobBatch.objects.filter(id__lte=batch_id).order_by("-id")[
361+
:3
362+
]
363+
]
364+
manifests = [
365+
manifest
366+
async for manifest in MinerManifest.objects.filter(
367+
miner=miner, batch__in=batches
368+
).order_by("-batch__id")
369+
]
370+
371+
if len(manifests) > 0 and manifests[0].executor_count <= 3:
372+
logger.debug(
373+
f"Applied manifest incentive for {miner_hotkey} - last manifest has 3 or less executors"
374+
)
375+
elif (
376+
len(manifests) == 3
377+
and manifests[0].executor_count - manifests[1].executor_count
378+
> settings.EXECUTOR_COUNT_INCREASE_THRESHOLD
379+
and manifests[0].executor_count - manifests[2].executor_count
380+
> settings.EXECUTOR_COUNT_INCREASE_THRESHOLD
381+
):
382+
logger.debug(
383+
f"Applied manifest incentive for {miner_hotkey} - miner has increased number of executors significantly"
384+
)
385+
elif len(manifests) < 3:
386+
logger.debug(
387+
f"Applied manifest incentive for {miner_hotkey} - validator has missed one of the previous 2 synthetic jobs windows"
388+
)
389+
else:
390+
# do not apply manifest incentive - return original score
391+
return score
392+
393+
# apply manifest incentive
394+
return score * settings.MANIFEST_SCORE_MULTIPLIER
395+
else:
396+
raise RuntimeError(f"Scoring undefined for {weights_version=}")
397+
398+
328399
async def _execute_synthetic_job(miner_client: MinerClient, job: SyntheticJob):
329400
data = {"job_uuid": str(job.job_uuid), "miner_hotkey": job.miner.hotkey}
330401
save_event = partial(save_job_execution_event, data=data)
@@ -474,13 +545,15 @@ async def _execute_synthetic_job(miner_client: MinerClient, job: SyntheticJob):
474545
)
475546

476547
# if job passed, save synthetic job score
477-
job.score = score
548+
job.score = await apply_manifest_incentive(
549+
miner_client.miner_hotkey, job.batch_id, score
550+
)
478551
await job.asave()
479552

480553
# Send receipt to miner
481554
try:
482555
receipt_message = miner_client.generate_receipt_message(
483-
job, full_job_sent, time_took, score
556+
job, full_job_sent, time_took, job.score
484557
)
485558
await miner_client.send_model(receipt_message)
486559
logger.info("Receipt message sent")

validator/app/src/compute_horde_validator/validator/tasks.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ async def run_admin_job_request(job_request_id: int, callback=None):
155155
miner_hotkey=miner.hotkey,
156156
my_hotkey=keypair.ss58_address,
157157
job_uuid=job.job_uuid,
158+
batch_id=None,
158159
keypair=keypair,
159160
)
160161

0 commit comments

Comments
 (0)