make batch smaller

This commit is contained in:
Wing Lian
2024-12-18 16:23:50 -05:00
parent d584354ee4
commit e633a12dbe

View File

@@ -53,7 +53,11 @@ class TokenizedPromptDataset(Dataset):
map_kwargs["batched"] = True
map_kwargs["batch_size"] = 100
if self.prompt_tokenizer.filter_rows:
dataset = dataset.filter(self.prompt_tokenizer.filter_rows)
dataset = dataset.filter(
self.prompt_tokenizer.filter_rows,
num_proc=num_proc,
desc="Filtering Rows",
)
return dataset.map(
self.prompt_tokenizer.tokenize_prompt,
num_proc=num_proc,