make pad_to_sequence_len default to the same value as sample_packing (#2941) [skip ci]

* make pad_to_sequence_len default to the same value as sample_packing

* remove duplicate validation

* fix test

* update description meta

Co-authored-by: NanoCode012 <nano@axolotl.ai>

---------

Co-authored-by: NanoCode012 <nano@axolotl.ai>
This commit is contained in:
Wing Lian
2025-07-21 11:40:56 -04:00
committed by GitHub
parent db5f6f4693
commit af8d257aa2
90 changed files with 109 additions and 90 deletions

View File

@@ -17,7 +17,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -20,7 +20,7 @@ lora_model_dir:
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -17,7 +17,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -20,7 +20,7 @@ lora_model_dir:
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -17,7 +17,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -20,7 +20,7 @@ lora_model_dir:
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -21,7 +21,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -25,7 +25,7 @@ lora_target_linear: true
sequence_len: 4096
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -16,7 +16,7 @@ output_dir: ./outputs/out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
adapter:
lora_model_dir:

View File

@@ -19,7 +19,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -19,7 +19,7 @@ lora_model_dir:
sequence_len: 8192
sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -17,7 +17,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
eval_sample_packing: false
adapter: lora

View File

@@ -17,7 +17,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -21,7 +21,7 @@ lora_model_dir:
sequence_len: 4096
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -26,5 +26,3 @@ timeout: 86400
# Preprocess specific configurations
memory_preprocess: 32
timeout_preprocess: 14400
# save_first_step: true # uncomment this to validate checkpoint saving works with your config

View File

@@ -27,7 +27,7 @@ lora_target_linear: true
sequence_len: 2048
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -21,7 +21,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -21,7 +21,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -12,7 +12,7 @@ output_dir: ./outputs/out
sequence_len: 2048
sample_packing: true
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -30,7 +30,7 @@ output_dir: ./outputs/out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -25,7 +25,7 @@ lora_model_dir:
sequence_len: 2048
sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -38,7 +38,7 @@ lora_target_modules:
sequence_len: 2048
sample_packing: false
eval_sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -38,7 +38,7 @@ lora_target_modules:
sequence_len: 2048
sample_packing: false
eval_sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -38,7 +38,7 @@ lora_target_modules:
sequence_len: 2048
sample_packing: false
eval_sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -38,7 +38,7 @@ lora_target_modules:
sequence_len: 2048
sample_packing: false
eval_sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -38,7 +38,7 @@ lora_target_modules:
sequence_len: 2048
sample_packing: false
eval_sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -38,7 +38,7 @@ lora_target_modules:
sequence_len: 2048
sample_packing: false
eval_sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -31,7 +31,7 @@ lora_target_linear: true
sequence_len: 2048
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -18,7 +18,7 @@ remove_unused_columns: false
sequence_len: 2048
sample_packing: false
eval_sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -35,7 +35,7 @@ lora_target_linear: true
sequence_len: 2048
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -25,7 +25,7 @@ lora_model_dir:
sequence_len: 2048
sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -17,7 +17,7 @@ lora_model_dir:
sequence_len: 2048
sample_packing: true
eval_sample_packing: true
pad_to_sequence_len: true
lora_r: 16
lora_alpha: 32

View File

@@ -23,7 +23,7 @@ save_safetensors: true
adapter: qlora
sequence_len: 2048
sample_packing: true
pad_to_sequence_len: true
lora_r: 16
lora_alpha: 16

View File

@@ -18,7 +18,7 @@ output_dir: ./outputs/out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -14,7 +14,7 @@ output_dir: ./outputs/out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
adapter:
lora_model_dir:

View File

@@ -14,7 +14,7 @@ output_dir: ./outputs/lisa-out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
adapter:
lora_model_dir:

View File

@@ -14,7 +14,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -17,7 +17,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -20,7 +20,7 @@ lora_model_dir:
sequence_len: 512
sample_packing: false
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -20,7 +20,7 @@ lora_model_dir:
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -18,7 +18,7 @@ lora_model_dir:
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
lora_r: 8
lora_alpha: 16

View File

@@ -22,7 +22,7 @@ datasets:
output_dir: ./outputs/qat_out/
sample_packing: true
pad_to_sequence_len: true
sequence_len: 512
flex_attention: true

View File

@@ -26,7 +26,7 @@ output_dir: ./outputs/out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -11,7 +11,7 @@ output_dir: ./outputs/out
sequence_len: 8192
sample_packing: true
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -37,7 +37,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: false
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -28,7 +28,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: false
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -49,7 +49,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: false
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -22,7 +22,7 @@ dataset_exact_deduplication: true
sequence_len: 4096
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -14,7 +14,7 @@ lora_model_dir:
sequence_len: 2048
sample_packing: true
pad_to_sequence_len: true
lora_r: 16
lora_alpha: 32

View File

@@ -15,7 +15,7 @@ lora_model_dir:
sequence_len: 2048
sample_packing: true
eval_sample_packing: true
pad_to_sequence_len: true
lora_r: 16
lora_alpha: 32

View File

@@ -24,7 +24,7 @@ sample_packing: true
sample_packing_sequentially: true
curriculum_sampling: true
eval_sample_packing: false
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -15,7 +15,7 @@ lora_model_dir:
sequence_len: 2048
sample_packing: true
eval_sample_packing: true
pad_to_sequence_len: true
lora_r: 16
lora_alpha: 32

View File

@@ -18,7 +18,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -18,7 +18,7 @@ lora_model_dir:
sequence_len: 2048
sample_packing: true
eval_sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -18,7 +18,7 @@ adapter: qlora
sequence_len: 2048
sample_packing: true
pad_to_sequence_len: true
lora_r: 16
lora_alpha: 16

View File

@@ -20,7 +20,7 @@ lora_model_dir:
sequence_len: 512
sample_packing: false
pad_to_sequence_len: true
lora_r: 8
lora_alpha: 16

View File

@@ -20,7 +20,7 @@ lora_model_dir:
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -16,7 +16,7 @@ output_dir: ./outputs/out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
eval_sample_packing: false
wandb_project:

View File

@@ -47,7 +47,7 @@ output_dir: ./outputs/out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
gradient_accumulation_steps: 1
micro_batch_size: 1

View File

@@ -48,7 +48,7 @@ output_dir: ./outputs/out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -51,7 +51,7 @@ output_dir: ./outputs/out
sequence_len: 4096 # up to 8k will work on a single H100
sample_packing: true
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -46,7 +46,7 @@ output_dir: ./outputs/out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
gradient_accumulation_steps: 1
micro_batch_size: 2

View File

@@ -51,7 +51,7 @@ output_dir: ./outputs/out
sequence_len: 4096 # up to 8k will work on a single H100
sample_packing: true
pad_to_sequence_len: true
gradient_accumulation_steps: 1
micro_batch_size: 1

View File

@@ -23,7 +23,7 @@ lora_model_dir:
sequence_len: 2048
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -22,7 +22,7 @@ lora_model_dir:
sequence_len: 2048
sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -27,7 +27,7 @@ output_dir: ./outputs/out
sequence_len: 2048
sample_packing: true
pad_to_sequence_len: true
gradient_accumulation_steps: 1
micro_batch_size: 1

View File

@@ -14,7 +14,7 @@ output_dir: ./outputs/out
sequence_len: 8192
sample_packing: true
pad_to_sequence_len: true
eval_sample_packing: false
wandb_project:

View File

@@ -18,7 +18,7 @@ lora_model_dir:
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -20,7 +20,7 @@ lora_model_dir:
sequence_len: 8192
sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -31,7 +31,7 @@ output_dir: ./outputs/dpo-qlora
sequence_len: 2048
sample_packing: false
pad_to_sequence_len: true
adapter: qlora
lora_model_dir:

View File

@@ -25,7 +25,7 @@ lora_model_dir:
sequence_len: 4096
sample_packing: false
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -34,7 +34,7 @@ lora_model_dir:
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -25,7 +25,7 @@ output_dir: ./outputs/out
sequence_len: 8000
sample_packing: true
pad_to_sequence_len: true
gradient_accumulation_steps: 1
micro_batch_size: 1

View File

@@ -20,7 +20,7 @@ lora_model_dir:
sequence_len: 8192
sample_packing: true
pad_to_sequence_len: true
lora_r: 32
lora_alpha: 16

View File

@@ -18,7 +18,7 @@ output_dir: ./outputs/out
sequence_len: 8192
sample_packing: true
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -28,7 +28,7 @@ output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: false
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -15,7 +15,7 @@ output_dir: ./outputs/phi-sft-out
sequence_len: 2048
sample_packing: true
pad_to_sequence_len: true
adapter:
lora_model_dir:

View File

@@ -18,7 +18,7 @@ output_dir: ./outputs/phi-sft-out
sequence_len: 2048
sample_packing: true
pad_to_sequence_len: true
adapter: qlora
lora_model_dir:

View File

@@ -15,7 +15,7 @@ output_dir: ./outputs/phi-sft-out
sequence_len: 2048
sample_packing: true
pad_to_sequence_len: true
adapter:
lora_model_dir:

View File

@@ -15,7 +15,7 @@ output_dir: ./phi-sft-out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
trust_remote_code: true
adapter:

View File

@@ -18,7 +18,7 @@ output_dir: ./out
sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true
adapter: lora
lora_model_dir:

View File

@@ -27,7 +27,7 @@ output_dir: ./outputs/dpo-out
sequence_len: 2048
sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -22,7 +22,7 @@ remove_unused_columns: false
sequence_len: 2048
sample_packing: false
eval_sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -17,7 +17,7 @@ output_dir: ./outputs/out
sequence_len: 2048
sample_packing: true
eval_sample_packing: true
pad_to_sequence_len: true
adapter: qlora
lora_model_dir:

View File

@@ -18,7 +18,7 @@ remove_unused_columns: false
sequence_len: 2048
sample_packing: false
eval_sample_packing: false
pad_to_sequence_len: true
wandb_project:
wandb_entity:

View File

@@ -22,7 +22,7 @@ dataset_prepared_path: last_run_prepared
sequence_len: 2048
sample_packing: true
eval_sample_packing: true
pad_to_sequence_len: true
load_in_4bit: true
adapter: qlora

View File

@@ -24,7 +24,7 @@ output_dir: ./outputs/qat_out/
sequence_len: 2048
sample_packing: true
flex_attention: true
pad_to_sequence_len: true
flex_attn_compile_kwargs:
dynamic: false

View File

@@ -16,7 +16,7 @@ output_dir: ./outputs/out
sequence_len: 2048
sample_packing: true
eval_sample_packing: true
pad_to_sequence_len: true
adapter: qlora
lora_model_dir:

View File

@@ -435,7 +435,7 @@ class AxolotlInputConfig(
pad_to_sequence_len: bool | None = Field(
default=None,
json_schema_extra={
"description": "Pad inputs so each step uses constant sized buffers. This will reduce memory fragmentation and may prevent OOMs, by re-using memory more efficiently"
"description": "Pad inputs so each step uses constant sized buffers. This will reduce memory fragmentation and may prevent OOMs, by re-using memory more efficiently. Defaults to True if `sample_packing` enabled"
},
)
curriculum_sampling: bool | None = Field(

View File

@@ -0,0 +1,21 @@
"""Tests for default values for configurations"""
from axolotl.utils.config import validate_config
from axolotl.utils.dict import DictDefault
class TestDefaultConfigValues:
"""Tests for default values for configurations"""
def test_pad_to_sequence_len(self, min_base_cfg):
"""Tests that sample packing automatically sets pad_to_sequence_len to True"""
cfg = (
DictDefault(
sample_packing=True,
)
| min_base_cfg
)
cfg = validate_config(cfg)
assert cfg.pad_to_sequence_len is True