From 3d1e2dcef495228f41c8bc6fc2495680d4e72550 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 18 Dec 2024 16:23:50 -0500 Subject: [PATCH] make batch smaller --- src/axolotl/datasets.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/axolotl/datasets.py b/src/axolotl/datasets.py index a8880a00f..460e8f1bd 100644 --- a/src/axolotl/datasets.py +++ b/src/axolotl/datasets.py @@ -53,7 +53,11 @@ class TokenizedPromptDataset(Dataset): map_kwargs["batched"] = True map_kwargs["batch_size"] = 100 if self.prompt_tokenizer.filter_rows: - dataset = dataset.filter(self.prompt_tokenizer.filter_rows) + dataset = dataset.filter( + self.prompt_tokenizer.filter_rows, + num_proc=num_proc, + desc="Filtering Rows", + ) return dataset.map( self.prompt_tokenizer.tokenize_prompt, num_proc=num_proc,