From 69f356163ec4b757ccf2c1006f957bef21df4bed Mon Sep 17 00:00:00 2001 From: Dan Saunders Date: Fri, 22 Aug 2025 15:39:28 +0000 Subject: [PATCH] fix --- src/axolotl/datasets.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/axolotl/datasets.py b/src/axolotl/datasets.py index 44de3c975..e6e00b760 100644 --- a/src/axolotl/datasets.py +++ b/src/axolotl/datasets.py @@ -95,9 +95,14 @@ def wrap_dataset_for_tokenized_prompt( map_kwargs["batched"] = True # Map the dataset and remove original columns + # For IterableDataset, features might be None until first iteration + remove_columns = None + if dataset.features is not None: + remove_columns = list(dataset.features.keys()) + return dataset.map( prompt_tokenizer.tokenize_prompt, - remove_columns=list(dataset.features.keys()), + remove_columns=remove_columns, **map_kwargs, ) return TokenizedPromptDataset(prompt_tokenizer, dataset, **kwargs)