Skip to content

Commit d44b481

Browse files
committed
Introduce seed_base
1 parent f347249 commit d44b481

File tree

5 files changed

+10
-10
lines changed

5 files changed

+10
-10
lines changed

distributed_training/averaging/avg_handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ async def fetch_training_data(self, block):
8787
try:
8888
loader = DatasetLoader(
8989
tokenizer=self.tokenizer,
90-
uid=self.uid,
90+
seed_base=self.uid,
9191
current_block=block,
9292
)
9393

distributed_training/data/dataset_loader.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def __len__(self):
9696
class DatasetLoader(BatchLoader):
9797
def __init__(
9898
self,
99-
uid: int,
99+
seed_base: int,
100100
current_block: int = 0,
101101
tokenizer=None,
102102

@@ -117,7 +117,7 @@ def __init__(
117117
sequence_length=sequence_length
118118
)
119119

120-
self.uid = uid
120+
self.seed_base = seed_base
121121
self.current_block = current_block
122122
self.logger = bt.logging
123123
load_dotenv(find_dotenv())
@@ -167,13 +167,13 @@ def require_env(name: str) -> str:
167167
self.total_row_groups_loaded = 0
168168
self.total_rows_loaded = 0
169169

170-
self.debug and self.logger.debug(f"DatasetLoader initialized with UID={self.uid}, block={self.current_block}")
170+
self.debug and self.logger.debug(f"DatasetLoader initialized with seed_base={self.seed_base}, block={self.current_block}")
171171

172172
def generate_rng(self, context: str = "") -> random.Random:
173173
"""
174-
Returns a reproducible RNG based on the stored UID and current block.
174+
Returns a reproducible RNG based on the stored seed_base and current block.
175175
"""
176-
seed_str = f"{self.uid}-{context}-{self.current_block}"
176+
seed_str = f"{self.seed_base}-{context}-{self.current_block}"
177177
seed = int(hashlib.sha256(seed_str.encode()).hexdigest(), 16) % (2**32)
178178
return random.Random(seed)
179179

@@ -330,7 +330,7 @@ async def tokenize_texts(self, texts):
330330

331331
loader = DatasetLoader(
332332
tokenizer=tokenizer,
333-
uid=miner_uid,
333+
seed_base=miner_uid,
334334
current_block=current_block,
335335
max_configs=1,
336336
# max_rows_per_group=100,

distributed_training/validator/reward.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ async def fetch_training_data(
8888
try:
8989
loader = DatasetLoader(
9090
tokenizer=self.tokenizer,
91-
uid=uid + self.local_rank, # Assuming self.local_rank (1-4) is also what miner provided.
91+
seed_base=uid + self.local_rank, # Assuming self.local_rank (1-4) is also what miner provided.
9292
current_block=block,
9393
max_configs=1, # set similar to miner.py during debug
9494
)

eval/eval_loss.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ async def fetch_training_data(tokenizer):
258258
try:
259259
loader = DatasetLoader(
260260
tokenizer=tokenizer,
261-
uid=uid,
261+
seed_base=uid,
262262
current_block=current_block,
263263
)
264264

neurons/miner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,7 @@ async def fetch_training_data(self, block: int):
582582
try:
583583
loader = DatasetLoader(
584584
tokenizer=self.tokenizer,
585-
uid=self.uid + self.local_rank,
585+
seed_base=self.uid + self.local_rank,
586586
current_block=block,
587587
max_configs=1, # REMOVE BECAUSE JUT FOR DEBUGGING
588588
)

0 commit comments

Comments
 (0)