This commit is contained in:
Dan Saunders
2025-08-22 15:39:28 +00:00
parent 53bbca2591
commit 69f356163e

View File

@@ -95,9 +95,14 @@ def wrap_dataset_for_tokenized_prompt(
map_kwargs["batched"] = True
# Map the dataset and remove original columns
# For IterableDataset, features might be None until first iteration
remove_columns = None
if dataset.features is not None:
remove_columns = list(dataset.features.keys())
return dataset.map(
prompt_tokenizer.tokenize_prompt,
remove_columns=list(dataset.features.keys()),
remove_columns=remove_columns,
**map_kwargs,
)
return TokenizedPromptDataset(prompt_tokenizer, dataset, **kwargs)