From af8d257aa22f9030b0f39d5bc7b150eed459eb9a Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Mon, 21 Jul 2025 11:40:56 -0400
Subject: [PATCH] make pad_to_sequence_len default to the same value as
 sample_packing (#2941) [skip ci]

* make pad_to_sequence_len default to the same value as sample_packing

* remove duplicate validation

* fix test

* update description meta

Co-authored-by: NanoCode012 <nano@axolotl.ai>

---------

Co-authored-by: NanoCode012 <nano@axolotl.ai>
---
 examples/archived/code-llama/13b/lora.yml     |  2 +-
 examples/archived/code-llama/13b/qlora.yml    |  2 +-
 examples/archived/code-llama/34b/lora.yml     |  2 +-
 examples/archived/code-llama/34b/qlora.yml    |  2 +-
 examples/archived/code-llama/7b/lora.yml      |  2 +-
 examples/archived/code-llama/7b/qlora.yml     |  2 +-
 .../deepcoder/deepcoder-14B-preview-lora.yml  |  2 +-
 examples/archived/gemma/qlora.yml             |  2 +-
 examples/archived/stablelm-2/1.6b/fft.yml     |  2 +-
 examples/archived/stablelm-2/1.6b/lora.yml    |  2 +-
 examples/archived/starcoder2/qlora.yml        |  2 +-
 examples/archived/tiny-llama/lora-mps.yml     |  2 +-
 examples/archived/tiny-llama/lora.yml         |  2 +-
 examples/archived/tiny-llama/qlora.yml        |  2 +-
 examples/cloud/modal.yaml                     |  2 --
 examples/cohere/command-r-7b-qlora.yml        |  2 +-
 .../cogito-v1-preview-llama-3B-lora.yml       |  2 +-
 .../cogito-v1-preview-qwen-14B-lora.yml       |  2 +-
 examples/deepseek-v2/fft-fsdp-16b.yaml        |  2 +-
 examples/deepseek-v2/qlora-fsdp-2_5.yaml      |  2 +-
 examples/devstral/devstral-small-qlora.yml    |  2 +-
 .../falcon-h1/falcon-h1-1b-deep-qlora.yaml    |  2 +-
 examples/falcon-h1/falcon-h1-1b-qlora.yaml    |  2 +-
 examples/falcon-h1/falcon-h1-34b-qlora.yaml   |  2 +-
 examples/falcon-h1/falcon-h1-3b-qlora.yaml    |  2 +-
 examples/falcon-h1/falcon-h1-500m-qlora.yaml  |  2 +-
 examples/falcon-h1/falcon-h1-7b-qlora.yaml    |  2 +-
 examples/gemma2/qlora.yml                     |  2 +-
 examples/gemma2/reward-model.yaml             |  2 +-
 examples/gemma3/gemma-3-1b-qlora.yml          |  2 +-
 examples/gemma3/gemma-3-4b-qlora.yml          |  2 +-
 examples/glm4/qlora-32b.yaml                  |  2 +-
 examples/jamba/qlora_fsdp_large.yaml          |  2 +-
 examples/lfm2/lfm2-350m-fft.yaml              |  2 +-
 examples/llama-2/fft_optimized.yml            |  2 +-
 examples/llama-2/lisa.yml                     |  2 +-
 examples/llama-2/loftq.yml                    |  2 +-
 examples/llama-2/lora.yml                     |  2 +-
 examples/llama-2/qlora-fsdp.yml               |  2 +-
 examples/llama-2/qlora.yml                    |  2 +-
 examples/llama-2/relora.yml                   |  2 +-
 examples/llama-3/3b-qat-fsdp2.yaml            |  2 +-
 examples/llama-3/fft-8b-liger-fsdp.yaml       |  2 +-
 examples/llama-3/fft-8b.yaml                  |  2 +-
 examples/llama-3/instruct-dpo-lora-8b.yml     |  2 +-
 examples/llama-3/instruct-lora-8b.yml         |  2 +-
 examples/llama-3/lora-1b-deduplicate-dpo.yml  |  2 +-
 examples/llama-3/lora-1b-deduplicate-sft.yml  |  2 +-
 examples/llama-3/lora-1b-kernels.yml          |  2 +-
 examples/llama-3/lora-1b-ray.yml              |  2 +-
 .../lora-1b-sample-packing-sequentially.yml   |  2 +-
 examples/llama-3/lora-1b.yml                  |  2 +-
 examples/llama-3/lora-8b.yml                  |  2 +-
 examples/llama-3/qlora-1b.yml                 |  2 +-
 examples/llama-3/qlora-fsdp-405b.yaml         |  2 +-
 examples/llama-3/qlora-fsdp-70b.yaml          |  2 +-
 examples/llama-3/qlora.yml                    |  2 +-
 examples/llama-3/sparse-finetuning.yaml       |  2 +-
 .../do-no-use-fa2/maverick-qlora-fsdp1.yaml   |  2 +-
 .../do-no-use-fa2/scout-qlora-fsdp1.yaml      |  2 +-
 .../scout-qlora-single-h100.yaml              |  2 +-
 .../llama-4/scout-qlora-flexattn-fsdp2.yaml   |  2 +-
 .../llama-4/scout-qlora-single-h100-flex.yaml |  2 +-
 .../magistral/magistral-small-fsdp-qlora.yaml |  2 +-
 examples/magistral/magistral-small-qlora.yaml |  2 +-
 examples/mistral/bigstral-ds-zero3.yaml       |  2 +-
 examples/mistral/config.yml                   |  2 +-
 examples/mistral/lora-mps.yml                 |  2 +-
 examples/mistral/lora.yml                     |  2 +-
 examples/mistral/mistral-dpo-qlora.yml        |  2 +-
 examples/mistral/mistral-qlora-orpo.yml       |  2 +-
 examples/mistral/mixtral.yml                  |  2 +-
 examples/mistral/mixtral_22.yml               |  2 +-
 examples/mistral/qlora.yml                    |  2 +-
 examples/orpheus/finetune.yml                 |  2 +-
 examples/phi/lora-3.5.yaml                    |  2 +-
 examples/phi/phi-ft.yml                       |  2 +-
 examples/phi/phi-qlora.yml                    |  2 +-
 examples/phi/phi2-ft.yml                      |  2 +-
 examples/phi/phi3-ft-fsdp.yml                 |  2 +-
 examples/phi/phi3-ft.yml                      |  2 +-
 examples/qwen2/dpo.yaml                       |  2 +-
 examples/qwen2/prm.yaml                       |  2 +-
 examples/qwen2/qlora-fsdp.yaml                |  2 +-
 examples/qwen2/reward-model.yaml              |  2 +-
 examples/qwen3/32b-qlora.yaml                 |  2 +-
 examples/qwen3/8b-qat-fsdp2.yml               |  2 +-
 examples/qwen3/qlora-fsdp.yaml                |  2 +-
 src/axolotl/utils/schemas/config.py           |  2 +-
 .../schemas/validation/test_default_values.py | 21 +++++++++++++++++++
 90 files changed, 109 insertions(+), 90 deletions(-)
 create mode 100644 tests/utils/schemas/validation/test_default_values.py

diff --git a/examples/archived/code-llama/13b/lora.yml b/examples/archived/code-llama/13b/lora.yml
index 0ed2382ba..98ef516ab 100644
--- a/examples/archived/code-llama/13b/lora.yml
+++ b/examples/archived/code-llama/13b/lora.yml
@@ -17,7 +17,7 @@ output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/archived/code-llama/13b/qlora.yml b/examples/archived/code-llama/13b/qlora.yml
index 22bd1691b..2385368ac 100644
--- a/examples/archived/code-llama/13b/qlora.yml
+++ b/examples/archived/code-llama/13b/qlora.yml
@@ -20,7 +20,7 @@ lora_model_dir:
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/archived/code-llama/34b/lora.yml b/examples/archived/code-llama/34b/lora.yml
index 25dc9f421..fb44997ff 100644
--- a/examples/archived/code-llama/34b/lora.yml
+++ b/examples/archived/code-llama/34b/lora.yml
@@ -17,7 +17,7 @@ output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/archived/code-llama/34b/qlora.yml b/examples/archived/code-llama/34b/qlora.yml
index 0e33e2a45..22f4cae3c 100644
--- a/examples/archived/code-llama/34b/qlora.yml
+++ b/examples/archived/code-llama/34b/qlora.yml
@@ -20,7 +20,7 @@ lora_model_dir:
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/archived/code-llama/7b/lora.yml b/examples/archived/code-llama/7b/lora.yml
index d288b9f65..0632bdfb7 100644
--- a/examples/archived/code-llama/7b/lora.yml
+++ b/examples/archived/code-llama/7b/lora.yml
@@ -17,7 +17,7 @@ output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/archived/code-llama/7b/qlora.yml b/examples/archived/code-llama/7b/qlora.yml
index de41c0123..0bd462aab 100644
--- a/examples/archived/code-llama/7b/qlora.yml
+++ b/examples/archived/code-llama/7b/qlora.yml
@@ -20,7 +20,7 @@ lora_model_dir:
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/archived/deepcoder/deepcoder-14B-preview-lora.yml b/examples/archived/deepcoder/deepcoder-14B-preview-lora.yml
index 9e92c0a07..a9511e9e3 100644
--- a/examples/archived/deepcoder/deepcoder-14B-preview-lora.yml
+++ b/examples/archived/deepcoder/deepcoder-14B-preview-lora.yml
@@ -21,7 +21,7 @@ output_dir: ./outputs/lora-out
 sequence_len: 4096
 sample_packing: true
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/archived/gemma/qlora.yml b/examples/archived/gemma/qlora.yml
index 2738112b4..80829b3c9 100644
--- a/examples/archived/gemma/qlora.yml
+++ b/examples/archived/gemma/qlora.yml
@@ -25,7 +25,7 @@ lora_target_linear: true
 sequence_len: 4096
 sample_packing: true
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/archived/stablelm-2/1.6b/fft.yml b/examples/archived/stablelm-2/1.6b/fft.yml
index 9b45b399f..3ae08c9de 100644
--- a/examples/archived/stablelm-2/1.6b/fft.yml
+++ b/examples/archived/stablelm-2/1.6b/fft.yml
@@ -16,7 +16,7 @@ output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter:
 lora_model_dir:
diff --git a/examples/archived/stablelm-2/1.6b/lora.yml b/examples/archived/stablelm-2/1.6b/lora.yml
index 31e5ad933..e5aa81423 100644
--- a/examples/archived/stablelm-2/1.6b/lora.yml
+++ b/examples/archived/stablelm-2/1.6b/lora.yml
@@ -19,7 +19,7 @@ output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/archived/starcoder2/qlora.yml b/examples/archived/starcoder2/qlora.yml
index 18d85f9c3..889d837e8 100644
--- a/examples/archived/starcoder2/qlora.yml
+++ b/examples/archived/starcoder2/qlora.yml
@@ -19,7 +19,7 @@ lora_model_dir:
 
 sequence_len: 8192
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/archived/tiny-llama/lora-mps.yml b/examples/archived/tiny-llama/lora-mps.yml
index 66cf7cfb3..aa3b7d851 100644
--- a/examples/archived/tiny-llama/lora-mps.yml
+++ b/examples/archived/tiny-llama/lora-mps.yml
@@ -17,7 +17,7 @@ output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 eval_sample_packing: false
 
 adapter: lora
diff --git a/examples/archived/tiny-llama/lora.yml b/examples/archived/tiny-llama/lora.yml
index 90998880f..a92f4bd67 100644
--- a/examples/archived/tiny-llama/lora.yml
+++ b/examples/archived/tiny-llama/lora.yml
@@ -17,7 +17,7 @@ output_dir: ./outputs/lora-out
 sequence_len: 4096
 sample_packing: true
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/archived/tiny-llama/qlora.yml b/examples/archived/tiny-llama/qlora.yml
index 8b2a4565a..4d422a5ee 100644
--- a/examples/archived/tiny-llama/qlora.yml
+++ b/examples/archived/tiny-llama/qlora.yml
@@ -21,7 +21,7 @@ lora_model_dir:
 sequence_len: 4096
 sample_packing: true
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/cloud/modal.yaml b/examples/cloud/modal.yaml
index bbe8785f1..195031494 100644
--- a/examples/cloud/modal.yaml
+++ b/examples/cloud/modal.yaml
@@ -26,5 +26,3 @@ timeout: 86400
 # Preprocess specific configurations
 memory_preprocess: 32
 timeout_preprocess: 14400
-
-# save_first_step: true  # uncomment this to validate checkpoint saving works with your config
diff --git a/examples/cohere/command-r-7b-qlora.yml b/examples/cohere/command-r-7b-qlora.yml
index da2777270..b4741636b 100644
--- a/examples/cohere/command-r-7b-qlora.yml
+++ b/examples/cohere/command-r-7b-qlora.yml
@@ -27,7 +27,7 @@ lora_target_linear: true
 sequence_len: 2048
 sample_packing: true
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/deepcogito/cogito-v1-preview-llama-3B-lora.yml b/examples/deepcogito/cogito-v1-preview-llama-3B-lora.yml
index 1a051b98b..6f0b505bd 100644
--- a/examples/deepcogito/cogito-v1-preview-llama-3B-lora.yml
+++ b/examples/deepcogito/cogito-v1-preview-llama-3B-lora.yml
@@ -21,7 +21,7 @@ output_dir: ./outputs/lora-out
 sequence_len: 4096
 sample_packing: true
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/deepcogito/cogito-v1-preview-qwen-14B-lora.yml b/examples/deepcogito/cogito-v1-preview-qwen-14B-lora.yml
index 807342641..fefcfadea 100644
--- a/examples/deepcogito/cogito-v1-preview-qwen-14B-lora.yml
+++ b/examples/deepcogito/cogito-v1-preview-qwen-14B-lora.yml
@@ -21,7 +21,7 @@ output_dir: ./outputs/lora-out
 sequence_len: 4096
 sample_packing: true
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/deepseek-v2/fft-fsdp-16b.yaml b/examples/deepseek-v2/fft-fsdp-16b.yaml
index 78bf6b179..d23c789aa 100644
--- a/examples/deepseek-v2/fft-fsdp-16b.yaml
+++ b/examples/deepseek-v2/fft-fsdp-16b.yaml
@@ -12,7 +12,7 @@ output_dir: ./outputs/out
 
 sequence_len: 2048
 sample_packing: true
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/deepseek-v2/qlora-fsdp-2_5.yaml b/examples/deepseek-v2/qlora-fsdp-2_5.yaml
index da1d9aefd..0536d1c10 100644
--- a/examples/deepseek-v2/qlora-fsdp-2_5.yaml
+++ b/examples/deepseek-v2/qlora-fsdp-2_5.yaml
@@ -30,7 +30,7 @@ output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/devstral/devstral-small-qlora.yml b/examples/devstral/devstral-small-qlora.yml
index 9d92e8662..7fe4dd433 100644
--- a/examples/devstral/devstral-small-qlora.yml
+++ b/examples/devstral/devstral-small-qlora.yml
@@ -25,7 +25,7 @@ lora_model_dir:
 
 sequence_len: 2048
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/falcon-h1/falcon-h1-1b-deep-qlora.yaml b/examples/falcon-h1/falcon-h1-1b-deep-qlora.yaml
index 484c31fec..2473179f0 100644
--- a/examples/falcon-h1/falcon-h1-1b-deep-qlora.yaml
+++ b/examples/falcon-h1/falcon-h1-1b-deep-qlora.yaml
@@ -38,7 +38,7 @@ lora_target_modules:
 sequence_len: 2048
 sample_packing: false
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/falcon-h1/falcon-h1-1b-qlora.yaml b/examples/falcon-h1/falcon-h1-1b-qlora.yaml
index dea2a6e6d..bfb7836ef 100644
--- a/examples/falcon-h1/falcon-h1-1b-qlora.yaml
+++ b/examples/falcon-h1/falcon-h1-1b-qlora.yaml
@@ -38,7 +38,7 @@ lora_target_modules:
 sequence_len: 2048
 sample_packing: false
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/falcon-h1/falcon-h1-34b-qlora.yaml b/examples/falcon-h1/falcon-h1-34b-qlora.yaml
index b187efbf6..80a9d45b5 100644
--- a/examples/falcon-h1/falcon-h1-34b-qlora.yaml
+++ b/examples/falcon-h1/falcon-h1-34b-qlora.yaml
@@ -38,7 +38,7 @@ lora_target_modules:
 sequence_len: 2048
 sample_packing: false
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/falcon-h1/falcon-h1-3b-qlora.yaml b/examples/falcon-h1/falcon-h1-3b-qlora.yaml
index 4d981ad95..02be8ac5d 100644
--- a/examples/falcon-h1/falcon-h1-3b-qlora.yaml
+++ b/examples/falcon-h1/falcon-h1-3b-qlora.yaml
@@ -38,7 +38,7 @@ lora_target_modules:
 sequence_len: 2048
 sample_packing: false
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/falcon-h1/falcon-h1-500m-qlora.yaml b/examples/falcon-h1/falcon-h1-500m-qlora.yaml
index 5ee13facd..b112d5d85 100644
--- a/examples/falcon-h1/falcon-h1-500m-qlora.yaml
+++ b/examples/falcon-h1/falcon-h1-500m-qlora.yaml
@@ -38,7 +38,7 @@ lora_target_modules:
 sequence_len: 2048
 sample_packing: false
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/falcon-h1/falcon-h1-7b-qlora.yaml b/examples/falcon-h1/falcon-h1-7b-qlora.yaml
index 4b665c3cd..c5505873d 100644
--- a/examples/falcon-h1/falcon-h1-7b-qlora.yaml
+++ b/examples/falcon-h1/falcon-h1-7b-qlora.yaml
@@ -38,7 +38,7 @@ lora_target_modules:
 sequence_len: 2048
 sample_packing: false
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/gemma2/qlora.yml b/examples/gemma2/qlora.yml
index 68d213fad..8a295a1f8 100644
--- a/examples/gemma2/qlora.yml
+++ b/examples/gemma2/qlora.yml
@@ -31,7 +31,7 @@ lora_target_linear: true
 sequence_len: 2048
 sample_packing: true
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/gemma2/reward-model.yaml b/examples/gemma2/reward-model.yaml
index 624ebdcd2..67b1228b2 100644
--- a/examples/gemma2/reward-model.yaml
+++ b/examples/gemma2/reward-model.yaml
@@ -18,7 +18,7 @@ remove_unused_columns: false
 sequence_len: 2048
 sample_packing: false
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/gemma3/gemma-3-1b-qlora.yml b/examples/gemma3/gemma-3-1b-qlora.yml
index 99921770d..115717db7 100644
--- a/examples/gemma3/gemma-3-1b-qlora.yml
+++ b/examples/gemma3/gemma-3-1b-qlora.yml
@@ -35,7 +35,7 @@ lora_target_linear: true
 sequence_len: 2048
 sample_packing: true
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/gemma3/gemma-3-4b-qlora.yml b/examples/gemma3/gemma-3-4b-qlora.yml
index 025cb9240..44ba9c879 100644
--- a/examples/gemma3/gemma-3-4b-qlora.yml
+++ b/examples/gemma3/gemma-3-4b-qlora.yml
@@ -25,7 +25,7 @@ lora_model_dir:
 
 sequence_len: 2048
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/glm4/qlora-32b.yaml b/examples/glm4/qlora-32b.yaml
index 8973cedd4..b3656e3ae 100644
--- a/examples/glm4/qlora-32b.yaml
+++ b/examples/glm4/qlora-32b.yaml
@@ -17,7 +17,7 @@ lora_model_dir:
 sequence_len: 2048
 sample_packing: true
 eval_sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 16
 lora_alpha: 32
diff --git a/examples/jamba/qlora_fsdp_large.yaml b/examples/jamba/qlora_fsdp_large.yaml
index fda30e2d2..344f73e63 100644
--- a/examples/jamba/qlora_fsdp_large.yaml
+++ b/examples/jamba/qlora_fsdp_large.yaml
@@ -23,7 +23,7 @@ save_safetensors: true
 adapter: qlora
 sequence_len: 2048
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 16
 lora_alpha: 16
diff --git a/examples/lfm2/lfm2-350m-fft.yaml b/examples/lfm2/lfm2-350m-fft.yaml
index 74c90c1e1..16a0a028e 100644
--- a/examples/lfm2/lfm2-350m-fft.yaml
+++ b/examples/lfm2/lfm2-350m-fft.yaml
@@ -18,7 +18,7 @@ output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/llama-2/fft_optimized.yml b/examples/llama-2/fft_optimized.yml
index c44cd2230..a23778b96 100644
--- a/examples/llama-2/fft_optimized.yml
+++ b/examples/llama-2/fft_optimized.yml
@@ -14,7 +14,7 @@ output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter:
 lora_model_dir:
diff --git a/examples/llama-2/lisa.yml b/examples/llama-2/lisa.yml
index a44e261be..25adcad5d 100644
--- a/examples/llama-2/lisa.yml
+++ b/examples/llama-2/lisa.yml
@@ -14,7 +14,7 @@ output_dir: ./outputs/lisa-out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter:
 lora_model_dir:
diff --git a/examples/llama-2/loftq.yml b/examples/llama-2/loftq.yml
index 085627f63..606bbc735 100644
--- a/examples/llama-2/loftq.yml
+++ b/examples/llama-2/loftq.yml
@@ -14,7 +14,7 @@ output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml
index 759fce044..0781e0d1b 100644
--- a/examples/llama-2/lora.yml
+++ b/examples/llama-2/lora.yml
@@ -17,7 +17,7 @@ output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/llama-2/qlora-fsdp.yml b/examples/llama-2/qlora-fsdp.yml
index 3bf30120b..ceb3ce5d1 100644
--- a/examples/llama-2/qlora-fsdp.yml
+++ b/examples/llama-2/qlora-fsdp.yml
@@ -20,7 +20,7 @@ lora_model_dir:
 
 sequence_len: 512
 sample_packing: false
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml
index 09596c71e..1515872e6 100644
--- a/examples/llama-2/qlora.yml
+++ b/examples/llama-2/qlora.yml
@@ -20,7 +20,7 @@ lora_model_dir:
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml
index ca8b14a1c..6c9e83223 100644
--- a/examples/llama-2/relora.yml
+++ b/examples/llama-2/relora.yml
@@ -18,7 +18,7 @@ lora_model_dir:
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 8
 lora_alpha: 16
diff --git a/examples/llama-3/3b-qat-fsdp2.yaml b/examples/llama-3/3b-qat-fsdp2.yaml
index 08d8ee5c1..d9b96fb96 100644
--- a/examples/llama-3/3b-qat-fsdp2.yaml
+++ b/examples/llama-3/3b-qat-fsdp2.yaml
@@ -22,7 +22,7 @@ datasets:
 output_dir: ./outputs/qat_out/
 
 sample_packing: true
-pad_to_sequence_len: true
+
 sequence_len: 512
 
 flex_attention: true
diff --git a/examples/llama-3/fft-8b-liger-fsdp.yaml b/examples/llama-3/fft-8b-liger-fsdp.yaml
index e2808935f..b3d990a8b 100644
--- a/examples/llama-3/fft-8b-liger-fsdp.yaml
+++ b/examples/llama-3/fft-8b-liger-fsdp.yaml
@@ -26,7 +26,7 @@ output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/llama-3/fft-8b.yaml b/examples/llama-3/fft-8b.yaml
index 2dfe6d492..e067212b7 100644
--- a/examples/llama-3/fft-8b.yaml
+++ b/examples/llama-3/fft-8b.yaml
@@ -11,7 +11,7 @@ output_dir: ./outputs/out
 
 sequence_len: 8192
 sample_packing: true
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/llama-3/instruct-dpo-lora-8b.yml b/examples/llama-3/instruct-dpo-lora-8b.yml
index 10ab2a320..99de56ad3 100644
--- a/examples/llama-3/instruct-dpo-lora-8b.yml
+++ b/examples/llama-3/instruct-dpo-lora-8b.yml
@@ -37,7 +37,7 @@ output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: false
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/llama-3/instruct-lora-8b.yml b/examples/llama-3/instruct-lora-8b.yml
index 83b7f9a37..b8baa5b0a 100644
--- a/examples/llama-3/instruct-lora-8b.yml
+++ b/examples/llama-3/instruct-lora-8b.yml
@@ -28,7 +28,7 @@ output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: false
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/llama-3/lora-1b-deduplicate-dpo.yml b/examples/llama-3/lora-1b-deduplicate-dpo.yml
index b20dbad84..288e8fd19 100644
--- a/examples/llama-3/lora-1b-deduplicate-dpo.yml
+++ b/examples/llama-3/lora-1b-deduplicate-dpo.yml
@@ -49,7 +49,7 @@ output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: false
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/llama-3/lora-1b-deduplicate-sft.yml b/examples/llama-3/lora-1b-deduplicate-sft.yml
index 67e518184..6ce504a0d 100644
--- a/examples/llama-3/lora-1b-deduplicate-sft.yml
+++ b/examples/llama-3/lora-1b-deduplicate-sft.yml
@@ -22,7 +22,7 @@ dataset_exact_deduplication: true
 sequence_len: 4096
 sample_packing: true
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/llama-3/lora-1b-kernels.yml b/examples/llama-3/lora-1b-kernels.yml
index 92a948c2e..71e569ae0 100644
--- a/examples/llama-3/lora-1b-kernels.yml
+++ b/examples/llama-3/lora-1b-kernels.yml
@@ -14,7 +14,7 @@ lora_model_dir:
 
 sequence_len: 2048
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 16
 lora_alpha: 32
diff --git a/examples/llama-3/lora-1b-ray.yml b/examples/llama-3/lora-1b-ray.yml
index 178a1fb89..7b9d15741 100644
--- a/examples/llama-3/lora-1b-ray.yml
+++ b/examples/llama-3/lora-1b-ray.yml
@@ -15,7 +15,7 @@ lora_model_dir:
 sequence_len: 2048
 sample_packing: true
 eval_sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 16
 lora_alpha: 32
diff --git a/examples/llama-3/lora-1b-sample-packing-sequentially.yml b/examples/llama-3/lora-1b-sample-packing-sequentially.yml
index c4ce3eb0f..9f764e131 100644
--- a/examples/llama-3/lora-1b-sample-packing-sequentially.yml
+++ b/examples/llama-3/lora-1b-sample-packing-sequentially.yml
@@ -24,7 +24,7 @@ sample_packing: true
 sample_packing_sequentially: true
 curriculum_sampling: true
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/llama-3/lora-1b.yml b/examples/llama-3/lora-1b.yml
index 82085483f..34d540eb7 100644
--- a/examples/llama-3/lora-1b.yml
+++ b/examples/llama-3/lora-1b.yml
@@ -15,7 +15,7 @@ lora_model_dir:
 sequence_len: 2048
 sample_packing: true
 eval_sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 16
 lora_alpha: 32
diff --git a/examples/llama-3/lora-8b.yml b/examples/llama-3/lora-8b.yml
index c39389755..ca6cd9e97 100644
--- a/examples/llama-3/lora-8b.yml
+++ b/examples/llama-3/lora-8b.yml
@@ -18,7 +18,7 @@ output_dir: ./outputs/lora-out
 sequence_len: 4096
 sample_packing: true
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/llama-3/qlora-1b.yml b/examples/llama-3/qlora-1b.yml
index 6b76ea8d9..288b7dc6c 100644
--- a/examples/llama-3/qlora-1b.yml
+++ b/examples/llama-3/qlora-1b.yml
@@ -18,7 +18,7 @@ lora_model_dir:
 sequence_len: 2048
 sample_packing: true
 eval_sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/llama-3/qlora-fsdp-405b.yaml b/examples/llama-3/qlora-fsdp-405b.yaml
index 1ee922b59..0f31b5bdc 100644
--- a/examples/llama-3/qlora-fsdp-405b.yaml
+++ b/examples/llama-3/qlora-fsdp-405b.yaml
@@ -18,7 +18,7 @@ adapter: qlora
 
 sequence_len: 2048
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 16
 lora_alpha: 16
diff --git a/examples/llama-3/qlora-fsdp-70b.yaml b/examples/llama-3/qlora-fsdp-70b.yaml
index 5edd8353a..28387ba1b 100644
--- a/examples/llama-3/qlora-fsdp-70b.yaml
+++ b/examples/llama-3/qlora-fsdp-70b.yaml
@@ -20,7 +20,7 @@ lora_model_dir:
 
 sequence_len: 512
 sample_packing: false
-pad_to_sequence_len: true
+
 
 lora_r: 8
 lora_alpha: 16
diff --git a/examples/llama-3/qlora.yml b/examples/llama-3/qlora.yml
index a674eca27..ffb00dace 100644
--- a/examples/llama-3/qlora.yml
+++ b/examples/llama-3/qlora.yml
@@ -20,7 +20,7 @@ lora_model_dir:
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/llama-3/sparse-finetuning.yaml b/examples/llama-3/sparse-finetuning.yaml
index 8577a19d2..ecf5df955 100644
--- a/examples/llama-3/sparse-finetuning.yaml
+++ b/examples/llama-3/sparse-finetuning.yaml
@@ -16,7 +16,7 @@ output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 eval_sample_packing: false
 
 wandb_project:
diff --git a/examples/llama-4/do-no-use-fa2/maverick-qlora-fsdp1.yaml b/examples/llama-4/do-no-use-fa2/maverick-qlora-fsdp1.yaml
index d4a038e11..3bd05b5ba 100644
--- a/examples/llama-4/do-no-use-fa2/maverick-qlora-fsdp1.yaml
+++ b/examples/llama-4/do-no-use-fa2/maverick-qlora-fsdp1.yaml
@@ -47,7 +47,7 @@ output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 gradient_accumulation_steps: 1
 micro_batch_size: 1
diff --git a/examples/llama-4/do-no-use-fa2/scout-qlora-fsdp1.yaml b/examples/llama-4/do-no-use-fa2/scout-qlora-fsdp1.yaml
index bea10d979..1c6ba1410 100644
--- a/examples/llama-4/do-no-use-fa2/scout-qlora-fsdp1.yaml
+++ b/examples/llama-4/do-no-use-fa2/scout-qlora-fsdp1.yaml
@@ -48,7 +48,7 @@ output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/llama-4/do-no-use-fa2/scout-qlora-single-h100.yaml b/examples/llama-4/do-no-use-fa2/scout-qlora-single-h100.yaml
index 737d93812..081089555 100644
--- a/examples/llama-4/do-no-use-fa2/scout-qlora-single-h100.yaml
+++ b/examples/llama-4/do-no-use-fa2/scout-qlora-single-h100.yaml
@@ -51,7 +51,7 @@ output_dir: ./outputs/out
 
 sequence_len: 4096  # up to 8k will work on a single H100
 sample_packing: true
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/llama-4/scout-qlora-flexattn-fsdp2.yaml b/examples/llama-4/scout-qlora-flexattn-fsdp2.yaml
index b3e8c328c..6193e4ed5 100644
--- a/examples/llama-4/scout-qlora-flexattn-fsdp2.yaml
+++ b/examples/llama-4/scout-qlora-flexattn-fsdp2.yaml
@@ -46,7 +46,7 @@ output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 gradient_accumulation_steps: 1
 micro_batch_size: 2
diff --git a/examples/llama-4/scout-qlora-single-h100-flex.yaml b/examples/llama-4/scout-qlora-single-h100-flex.yaml
index 6be3988ef..c3bbfe56a 100644
--- a/examples/llama-4/scout-qlora-single-h100-flex.yaml
+++ b/examples/llama-4/scout-qlora-single-h100-flex.yaml
@@ -51,7 +51,7 @@ output_dir: ./outputs/out
 
 sequence_len: 4096  # up to 8k will work on a single H100
 sample_packing: true
-pad_to_sequence_len: true
+
 
 gradient_accumulation_steps: 1
 micro_batch_size: 1
diff --git a/examples/magistral/magistral-small-fsdp-qlora.yaml b/examples/magistral/magistral-small-fsdp-qlora.yaml
index b23d2309a..4a769510a 100644
--- a/examples/magistral/magistral-small-fsdp-qlora.yaml
+++ b/examples/magistral/magistral-small-fsdp-qlora.yaml
@@ -23,7 +23,7 @@ lora_model_dir:
 sequence_len: 2048
 sample_packing: true
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/magistral/magistral-small-qlora.yaml b/examples/magistral/magistral-small-qlora.yaml
index f0fce014f..bb2e0ccf0 100644
--- a/examples/magistral/magistral-small-qlora.yaml
+++ b/examples/magistral/magistral-small-qlora.yaml
@@ -22,7 +22,7 @@ lora_model_dir:
 
 sequence_len: 2048
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/mistral/bigstral-ds-zero3.yaml b/examples/mistral/bigstral-ds-zero3.yaml
index e9bcbb7d6..a8dc36216 100644
--- a/examples/mistral/bigstral-ds-zero3.yaml
+++ b/examples/mistral/bigstral-ds-zero3.yaml
@@ -27,7 +27,7 @@ output_dir: ./outputs/out
 
 sequence_len: 2048
 sample_packing: true
-pad_to_sequence_len: true
+
 
 gradient_accumulation_steps: 1
 micro_batch_size: 1
diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml
index 8c4d80f79..455c3c224 100644
--- a/examples/mistral/config.yml
+++ b/examples/mistral/config.yml
@@ -14,7 +14,7 @@ output_dir: ./outputs/out
 
 sequence_len: 8192
 sample_packing: true
-pad_to_sequence_len: true
+
 eval_sample_packing: false
 
 wandb_project:
diff --git a/examples/mistral/lora-mps.yml b/examples/mistral/lora-mps.yml
index d54c3e30b..c18d10aee 100644
--- a/examples/mistral/lora-mps.yml
+++ b/examples/mistral/lora-mps.yml
@@ -18,7 +18,7 @@ lora_model_dir:
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/mistral/lora.yml b/examples/mistral/lora.yml
index 161255468..77a87a1da 100644
--- a/examples/mistral/lora.yml
+++ b/examples/mistral/lora.yml
@@ -20,7 +20,7 @@ lora_model_dir:
 
 sequence_len: 8192
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/mistral/mistral-dpo-qlora.yml b/examples/mistral/mistral-dpo-qlora.yml
index 8d0378690..49f5e4ede 100644
--- a/examples/mistral/mistral-dpo-qlora.yml
+++ b/examples/mistral/mistral-dpo-qlora.yml
@@ -31,7 +31,7 @@ output_dir: ./outputs/dpo-qlora
 
 sequence_len: 2048
 sample_packing: false
-pad_to_sequence_len: true
+
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/mistral/mistral-qlora-orpo.yml b/examples/mistral/mistral-qlora-orpo.yml
index f37dc09fa..ea3e112b9 100644
--- a/examples/mistral/mistral-qlora-orpo.yml
+++ b/examples/mistral/mistral-qlora-orpo.yml
@@ -25,7 +25,7 @@ lora_model_dir:
 
 sequence_len: 4096
 sample_packing: false
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/mistral/mixtral.yml b/examples/mistral/mixtral.yml
index 5be9b4db8..933275484 100644
--- a/examples/mistral/mixtral.yml
+++ b/examples/mistral/mixtral.yml
@@ -34,7 +34,7 @@ lora_model_dir:
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/mistral/mixtral_22.yml b/examples/mistral/mixtral_22.yml
index 100e4464f..0b606b7d7 100644
--- a/examples/mistral/mixtral_22.yml
+++ b/examples/mistral/mixtral_22.yml
@@ -25,7 +25,7 @@ output_dir: ./outputs/out
 
 sequence_len: 8000
 sample_packing: true
-pad_to_sequence_len: true
+
 
 gradient_accumulation_steps: 1
 micro_batch_size: 1
diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml
index 08df36e15..a5e8b65fb 100644
--- a/examples/mistral/qlora.yml
+++ b/examples/mistral/qlora.yml
@@ -20,7 +20,7 @@ lora_model_dir:
 
 sequence_len: 8192
 sample_packing: true
-pad_to_sequence_len: true
+
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/orpheus/finetune.yml b/examples/orpheus/finetune.yml
index 57f65d966..9dcb8a43e 100644
--- a/examples/orpheus/finetune.yml
+++ b/examples/orpheus/finetune.yml
@@ -18,7 +18,7 @@ output_dir: ./outputs/out
 
 sequence_len: 8192
 sample_packing: true
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/phi/lora-3.5.yaml b/examples/phi/lora-3.5.yaml
index 9f3bbdf53..b7f902d63 100644
--- a/examples/phi/lora-3.5.yaml
+++ b/examples/phi/lora-3.5.yaml
@@ -28,7 +28,7 @@ output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: false
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/phi/phi-ft.yml b/examples/phi/phi-ft.yml
index fc6d649d7..4adb62d3a 100644
--- a/examples/phi/phi-ft.yml
+++ b/examples/phi/phi-ft.yml
@@ -15,7 +15,7 @@ output_dir: ./outputs/phi-sft-out
 
 sequence_len: 2048
 sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter:
 lora_model_dir:
diff --git a/examples/phi/phi-qlora.yml b/examples/phi/phi-qlora.yml
index ccd92c817..11c08bfe6 100644
--- a/examples/phi/phi-qlora.yml
+++ b/examples/phi/phi-qlora.yml
@@ -18,7 +18,7 @@ output_dir: ./outputs/phi-sft-out
 
 sequence_len: 2048
 sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/phi/phi2-ft.yml b/examples/phi/phi2-ft.yml
index 853250ccb..102c7ba03 100644
--- a/examples/phi/phi2-ft.yml
+++ b/examples/phi/phi2-ft.yml
@@ -15,7 +15,7 @@ output_dir: ./outputs/phi-sft-out
 
 sequence_len: 2048
 sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter:
 lora_model_dir:
diff --git a/examples/phi/phi3-ft-fsdp.yml b/examples/phi/phi3-ft-fsdp.yml
index 130298bc0..e8290ea1f 100644
--- a/examples/phi/phi3-ft-fsdp.yml
+++ b/examples/phi/phi3-ft-fsdp.yml
@@ -15,7 +15,7 @@ output_dir: ./phi-sft-out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 trust_remote_code: true
 
 adapter:
diff --git a/examples/phi/phi3-ft.yml b/examples/phi/phi3-ft.yml
index 42b87e8d0..0b204963c 100644
--- a/examples/phi/phi3-ft.yml
+++ b/examples/phi/phi3-ft.yml
@@ -18,7 +18,7 @@ output_dir: ./out
 
 sequence_len: 4096
 sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/qwen2/dpo.yaml b/examples/qwen2/dpo.yaml
index 69a74ae4a..3b1f817e5 100644
--- a/examples/qwen2/dpo.yaml
+++ b/examples/qwen2/dpo.yaml
@@ -27,7 +27,7 @@ output_dir: ./outputs/dpo-out
 
 sequence_len: 2048
 sample_packing: false
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/qwen2/prm.yaml b/examples/qwen2/prm.yaml
index af188f75d..a709a598d 100644
--- a/examples/qwen2/prm.yaml
+++ b/examples/qwen2/prm.yaml
@@ -22,7 +22,7 @@ remove_unused_columns: false
 sequence_len: 2048
 sample_packing: false
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/qwen2/qlora-fsdp.yaml b/examples/qwen2/qlora-fsdp.yaml
index 861ce5517..ca435b2bb 100644
--- a/examples/qwen2/qlora-fsdp.yaml
+++ b/examples/qwen2/qlora-fsdp.yaml
@@ -17,7 +17,7 @@ output_dir: ./outputs/out
 sequence_len: 2048
 sample_packing: true
 eval_sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/qwen2/reward-model.yaml b/examples/qwen2/reward-model.yaml
index 1854b8216..08b8b4552 100644
--- a/examples/qwen2/reward-model.yaml
+++ b/examples/qwen2/reward-model.yaml
@@ -18,7 +18,7 @@ remove_unused_columns: false
 sequence_len: 2048
 sample_packing: false
 eval_sample_packing: false
-pad_to_sequence_len: true
+
 
 wandb_project:
 wandb_entity:
diff --git a/examples/qwen3/32b-qlora.yaml b/examples/qwen3/32b-qlora.yaml
index 1f148ece5..87609c42f 100644
--- a/examples/qwen3/32b-qlora.yaml
+++ b/examples/qwen3/32b-qlora.yaml
@@ -22,7 +22,7 @@ dataset_prepared_path: last_run_prepared
 sequence_len: 2048
 sample_packing: true
 eval_sample_packing: true
-pad_to_sequence_len: true
+
 
 load_in_4bit: true
 adapter: qlora
diff --git a/examples/qwen3/8b-qat-fsdp2.yml b/examples/qwen3/8b-qat-fsdp2.yml
index e4d0ed4fb..395812a56 100644
--- a/examples/qwen3/8b-qat-fsdp2.yml
+++ b/examples/qwen3/8b-qat-fsdp2.yml
@@ -24,7 +24,7 @@ output_dir: ./outputs/qat_out/
 sequence_len: 2048
 sample_packing: true
 flex_attention: true
-pad_to_sequence_len: true
+
 
 flex_attn_compile_kwargs:
   dynamic: false
diff --git a/examples/qwen3/qlora-fsdp.yaml b/examples/qwen3/qlora-fsdp.yaml
index 762f9648d..6af3cfbc6 100644
--- a/examples/qwen3/qlora-fsdp.yaml
+++ b/examples/qwen3/qlora-fsdp.yaml
@@ -16,7 +16,7 @@ output_dir: ./outputs/out
 sequence_len: 2048
 sample_packing: true
 eval_sample_packing: true
-pad_to_sequence_len: true
+
 
 adapter: qlora
 lora_model_dir:
diff --git a/src/axolotl/utils/schemas/config.py b/src/axolotl/utils/schemas/config.py
index 96e3a8a3e..de928d11c 100644
--- a/src/axolotl/utils/schemas/config.py
+++ b/src/axolotl/utils/schemas/config.py
@@ -435,7 +435,7 @@ class AxolotlInputConfig(
     pad_to_sequence_len: bool | None = Field(
         default=None,
         json_schema_extra={
-            "description": "Pad inputs so each step uses constant sized buffers. This will reduce memory fragmentation and may prevent OOMs, by re-using memory more efficiently"
+            "description": "Pad inputs so each step uses constant sized buffers. This will reduce memory fragmentation and may prevent OOMs, by re-using memory more efficiently. Defaults to True if `sample_packing` enabled"
         },
     )
     curriculum_sampling: bool | None = Field(
diff --git a/tests/utils/schemas/validation/test_default_values.py b/tests/utils/schemas/validation/test_default_values.py
new file mode 100644
index 000000000..332dfe77f
--- /dev/null
+++ b/tests/utils/schemas/validation/test_default_values.py
@@ -0,0 +1,21 @@
+"""Tests for default values for configurations"""
+
+from axolotl.utils.config import validate_config
+from axolotl.utils.dict import DictDefault
+
+
+class TestDefaultConfigValues:
+    """Tests for default values for configurations"""
+
+    def test_pad_to_sequence_len(self, min_base_cfg):
+        """Tests that sample packing automatically sets pad_to_sequence_len to True"""
+        cfg = (
+            DictDefault(
+                sample_packing=True,
+            )
+            | min_base_cfg
+        )
+
+        cfg = validate_config(cfg)
+
+        assert cfg.pad_to_sequence_len is True