diff --git a/src/axolotl/utils/data/sft.py b/src/axolotl/utils/data/sft.py index 69cbfb871..e008b542b 100644 --- a/src/axolotl/utils/data/sft.py +++ b/src/axolotl/utils/data/sft.py @@ -348,7 +348,9 @@ def _load_raw_datasets( dataset = handle_long_seq_in_dataset(dataset, cfg.eval_sequence_len, cfg) else: dataset = handle_long_seq_in_dataset(dataset, cfg.sequence_len, cfg) - if cfg.sample_packing: + if (split == "train" and cfg.sample_packing) or ( + split == "test" and cfg.eval_sample_packing + ): dataset, _ = process_datasets_for_packing(cfg, dataset, None) # Deduplicate before saving so the saved dataset is already de-duplicated