From 24f2887e871b63bfaea24df0c276b936f37366e3 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Fri, 27 Jun 2025 10:37:53 -0400 Subject: [PATCH] don't fail during preprocess for sampling from iterable dataset (#2825) [skip ci] --- src/axolotl/common/datasets.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/axolotl/common/datasets.py b/src/axolotl/common/datasets.py index 96af84c1e..a9b4c0f0f 100644 --- a/src/axolotl/common/datasets.py +++ b/src/axolotl/common/datasets.py @@ -75,13 +75,17 @@ def load_datasets( num_examples = cli_args.debug_num_examples if cli_args else 1 text_only = cli_args.debug_text_only if cli_args else False - train_samples = sample_dataset(train_dataset, num_examples) - check_dataset_labels( - train_samples, - tokenizer, - num_examples=num_examples, - text_only=text_only, - ) + try: + train_samples = sample_dataset(train_dataset, num_examples) + check_dataset_labels( + train_samples, + tokenizer, + num_examples=num_examples, + text_only=text_only, + ) + except AttributeError: + # can't sample iterable datasets + pass LOG.info("printing prompters...") for prompter in prompters: