diff --git a/src/axolotl/datasets.py b/src/axolotl/datasets.py index a8880a00f..460e8f1bd 100644 --- a/src/axolotl/datasets.py +++ b/src/axolotl/datasets.py @@ -53,7 +53,11 @@ class TokenizedPromptDataset(Dataset): map_kwargs["batched"] = True map_kwargs["batch_size"] = 100 if self.prompt_tokenizer.filter_rows: - dataset = dataset.filter(self.prompt_tokenizer.filter_rows) + dataset = dataset.filter( + self.prompt_tokenizer.filter_rows, + num_proc=num_proc, + desc="Filtering Rows", + ) return dataset.map( self.prompt_tokenizer.tokenize_prompt, num_proc=num_proc,