diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml index 637c05143..e4384a893 100644 --- a/examples/code-llama/13b/lora.yml +++ b/examples/code-llama/13b/lora.yml @@ -17,6 +17,7 @@ output_dir: ./lora-out sequence_len: 100000 sample_packing: true +pad_to_sequence_len: true adapter: lora lora_model_dir: diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml index ae78f5bf2..8e482a22e 100644 --- a/examples/code-llama/13b/qlora.yml +++ b/examples/code-llama/13b/qlora.yml @@ -20,6 +20,7 @@ lora_model_dir: sequence_len: 100000 sample_packing: true +pad_to_sequence_len: true lora_r: 32 lora_alpha: 16 diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml index 9c4cfee10..8a5c457f6 100644 --- a/examples/code-llama/34b/lora.yml +++ b/examples/code-llama/34b/lora.yml @@ -17,6 +17,7 @@ output_dir: ./lora-out sequence_len: 100000 sample_packing: true +pad_to_sequence_len: true adapter: lora lora_model_dir: diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml index 9f5ce50f9..b0d91fae9 100644 --- a/examples/code-llama/34b/qlora.yml +++ b/examples/code-llama/34b/qlora.yml @@ -20,6 +20,7 @@ lora_model_dir: sequence_len: 100000 sample_packing: true +pad_to_sequence_len: true lora_r: 32 lora_alpha: 16 diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml index dfa3f2f7a..1e09555f7 100644 --- a/examples/code-llama/7b/lora.yml +++ b/examples/code-llama/7b/lora.yml @@ -17,6 +17,7 @@ output_dir: ./lora-out sequence_len: 100000 sample_packing: true +pad_to_sequence_len: true adapter: lora lora_model_dir: diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml index 704f058c3..fc9a5eb53 100644 --- a/examples/code-llama/7b/qlora.yml +++ b/examples/code-llama/7b/qlora.yml @@ -20,6 +20,7 @@ lora_model_dir: sequence_len: 100000 sample_packing: true +pad_to_sequence_len: true lora_r: 32 lora_alpha: 16 diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml index 2a0af130b..a54799b40 100644 --- a/examples/llama-2/lora.yml +++ b/examples/llama-2/lora.yml @@ -17,6 +17,7 @@ output_dir: ./lora-out sequence_len: 4096 sample_packing: true +pad_to_sequence_len: true adapter: lora lora_model_dir: diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml index 3ad2a7e4f..dd029859e 100644 --- a/examples/llama-2/qlora.yml +++ b/examples/llama-2/qlora.yml @@ -20,6 +20,7 @@ lora_model_dir: sequence_len: 4096 sample_packing: true +pad_to_sequence_len: true lora_r: 32 lora_alpha: 16 diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml index 66515dabc..b59a7da04 100644 --- a/examples/llama-2/relora.yml +++ b/examples/llama-2/relora.yml @@ -20,6 +20,7 @@ lora_model_dir: sequence_len: 4096 sample_packing: true +pad_to_sequence_len: true lora_r: 8 lora_alpha: 16 diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py index 0fbccd205..7fc6e1232 100644 --- a/src/axolotl/utils/config.py +++ b/src/axolotl/utils/config.py @@ -97,6 +97,11 @@ def validate_config(cfg): ) ) + if cfg.sample_packing and not cfg.pad_to_sequence_len: + LOG.warning( + "`pad_to_sequence_len: true` is recommended when using sample_packing" + ) + if cfg.gradient_accumulation_steps and cfg.batch_size: raise ValueError( "please set only one of gradient_accumulation_steps or batch_size" diff --git a/tests/test_validation.py b/tests/test_validation.py index 48b122f9a..f250e5cb4 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -328,6 +328,20 @@ class ValidationTest(unittest.TestCase): for record in self._caplog.records ) + cfg = DictDefault( + { + "sample_packing": True, + "pad_to_sequence_len": None, + } + ) + with self._caplog.at_level(logging.WARNING): + validate_config(cfg) + assert any( + "`pad_to_sequence_len: true` is recommended when using sample_packing" + in record.message + for record in self._caplog.records + ) + cfg = DictDefault( { "max_packed_sequence_len": 2048,