@@ -96,7 +96,7 @@ def __len__(self):
9696class DatasetLoader (BatchLoader ):
9797 def __init__ (
9898 self ,
99- uid : int ,
99+ seed_base : int ,
100100 current_block : int = 0 ,
101101 tokenizer = None ,
102102
@@ -117,7 +117,7 @@ def __init__(
117117 sequence_length = sequence_length
118118 )
119119
120- self .uid = uid
120+ self .seed_base = seed_base
121121 self .current_block = current_block
122122 self .logger = bt .logging
123123 load_dotenv (find_dotenv ())
@@ -167,13 +167,13 @@ def require_env(name: str) -> str:
167167 self .total_row_groups_loaded = 0
168168 self .total_rows_loaded = 0
169169
170- self .debug and self .logger .debug (f"DatasetLoader initialized with UID ={ self .uid } , block={ self .current_block } " )
170+ self .debug and self .logger .debug (f"DatasetLoader initialized with seed_base ={ self .seed_base } , block={ self .current_block } " )
171171
172172 def generate_rng (self , context : str = "" ) -> random .Random :
173173 """
174- Returns a reproducible RNG based on the stored UID and current block.
174+ Returns a reproducible RNG based on the stored seed_base and current block.
175175 """
176- seed_str = f"{ self .uid } -{ context } -{ self .current_block } "
176+ seed_str = f"{ self .seed_base } -{ context } -{ self .current_block } "
177177 seed = int (hashlib .sha256 (seed_str .encode ()).hexdigest (), 16 ) % (2 ** 32 )
178178 return random .Random (seed )
179179
@@ -330,7 +330,7 @@ async def tokenize_texts(self, texts):
330330
331331 loader = DatasetLoader (
332332 tokenizer = tokenizer ,
333- uid = miner_uid ,
333+ seed_base = miner_uid ,
334334 current_block = current_block ,
335335 max_configs = 1 ,
336336 # max_rows_per_group=100,
0 commit comments