Skip to content

Commit

Permalink
Add optimal mixing ratio
Browse files Browse the repository at this point in the history
  • Loading branch information
dorian-K committed Jan 18, 2025
1 parent 623e647 commit 696eb3c
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions users/dorian_koch/datasets/MixingDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class MixingDataset(CachedDataset2):
# TODO i overcomplicated some things in the design of this,
1. I hyper optimized for memory usage, which makes the code very messy
2. Because of 1, this doesnt scale well at all inside a MultiProcDataset
3. This supports random access, but I had to hack some stuff together because apparently other Datasets don't support that?
"""

def __init__(
Expand Down Expand Up @@ -225,6 +226,7 @@ def _run_seq_idx(self, seq_idx):
# so just start loading them at the beginning again
if all(self.datasets_exhausted):
self.is_chooser_done = True
print(f"MixingDataset: optimal mixing ratio = {child_indices[1] / max(1, child_indices[0]+child_indices[1])}", file=log.v4)
break
# the modulo operator below will wrap around

Expand Down

0 comments on commit 696eb3c

Please sign in to comment.