From f56efdb4abd6bac88c5d8f69a4649ce4495dbad8 Mon Sep 17 00:00:00 2001 From: VED <146507396+ved1beta@users.noreply.github.com> Date: Mon, 16 Mar 2026 07:41:23 +0530 Subject: [PATCH] fix: high eval loss w/ sample packing (#3478) [skip ci] * check if eval_sp * radable condition --- src/axolotl/utils/data/sft.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/axolotl/utils/data/sft.py b/src/axolotl/utils/data/sft.py index 69cbfb871..e008b542b 100644 --- a/src/axolotl/utils/data/sft.py +++ b/src/axolotl/utils/data/sft.py @@ -348,7 +348,9 @@ def _load_raw_datasets( dataset = handle_long_seq_in_dataset(dataset, cfg.eval_sequence_len, cfg) else: dataset = handle_long_seq_in_dataset(dataset, cfg.sequence_len, cfg) - if cfg.sample_packing: + if (split == "train" and cfg.sample_packing) or ( + split == "test" and cfg.eval_sample_packing + ): dataset, _ = process_datasets_for_packing(cfg, dataset, None) # Deduplicate before saving so the saved dataset is already de-duplicated