make batch smaller
This commit is contained in:
@@ -53,7 +53,11 @@ class TokenizedPromptDataset(Dataset):
|
|||||||
map_kwargs["batched"] = True
|
map_kwargs["batched"] = True
|
||||||
map_kwargs["batch_size"] = 100
|
map_kwargs["batch_size"] = 100
|
||||||
if self.prompt_tokenizer.filter_rows:
|
if self.prompt_tokenizer.filter_rows:
|
||||||
dataset = dataset.filter(self.prompt_tokenizer.filter_rows)
|
dataset = dataset.filter(
|
||||||
|
self.prompt_tokenizer.filter_rows,
|
||||||
|
num_proc=num_proc,
|
||||||
|
desc="Filtering Rows",
|
||||||
|
)
|
||||||
return dataset.map(
|
return dataset.map(
|
||||||
self.prompt_tokenizer.tokenize_prompt,
|
self.prompt_tokenizer.tokenize_prompt,
|
||||||
num_proc=num_proc,
|
num_proc=num_proc,
|
||||||
|
|||||||
Reference in New Issue
Block a user