From 343714972bdb7ffacf5ddfc84f50918766dacb3a Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 6 Sep 2023 17:00:21 -0400 Subject: [PATCH] recommend padding when using sample packing (#531) --- examples/code-llama/13b/lora.yml | 1 + examples/code-llama/13b/qlora.yml | 1 + examples/code-llama/34b/lora.yml | 1 + examples/code-llama/34b/qlora.yml | 1 + examples/code-llama/7b/lora.yml | 1 + examples/code-llama/7b/qlora.yml | 1 + examples/llama-2/lora.yml | 1 + examples/llama-2/qlora.yml | 1 + examples/llama-2/relora.yml | 1 + src/axolotl/utils/config.py | 5 +++++ tests/test_validation.py | 14 ++++++++++++++ 11 files changed, 28 insertions(+) diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml index 637c05143..e4384a893 100644 --- a/examples/code-llama/13b/lora.yml +++ b/examples/code-llama/13b/lora.yml @@ -17,6 +17,7 @@ output_dir: ./lora-out sequence_len: 100000 sample_packing: true +pad_to_sequence_len: true adapter: lora lora_model_dir: diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml index ae78f5bf2..8e482a22e 100644 --- a/examples/code-llama/13b/qlora.yml +++ b/examples/code-llama/13b/qlora.yml @@ -20,6 +20,7 @@ lora_model_dir: sequence_len: 100000 sample_packing: true +pad_to_sequence_len: true lora_r: 32 lora_alpha: 16 diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml index 9c4cfee10..8a5c457f6 100644 --- a/examples/code-llama/34b/lora.yml +++ b/examples/code-llama/34b/lora.yml @@ -17,6 +17,7 @@ output_dir: ./lora-out sequence_len: 100000 sample_packing: true +pad_to_sequence_len: true adapter: lora lora_model_dir: diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml index 9f5ce50f9..b0d91fae9 100644 --- a/examples/code-llama/34b/qlora.yml +++ b/examples/code-llama/34b/qlora.yml @@ -20,6 +20,7 @@ lora_model_dir: sequence_len: 100000 sample_packing: true +pad_to_sequence_len: true lora_r: 32 lora_alpha: 16 diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml index dfa3f2f7a..1e09555f7 100644 --- a/examples/code-llama/7b/lora.yml +++ b/examples/code-llama/7b/lora.yml @@ -17,6 +17,7 @@ output_dir: ./lora-out sequence_len: 100000 sample_packing: true +pad_to_sequence_len: true adapter: lora lora_model_dir: diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml index 704f058c3..fc9a5eb53 100644 --- a/examples/code-llama/7b/qlora.yml +++ b/examples/code-llama/7b/qlora.yml @@ -20,6 +20,7 @@ lora_model_dir: sequence_len: 100000 sample_packing: true +pad_to_sequence_len: true lora_r: 32 lora_alpha: 16 diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml index 2a0af130b..a54799b40 100644 --- a/examples/llama-2/lora.yml +++ b/examples/llama-2/lora.yml @@ -17,6 +17,7 @@ output_dir: ./lora-out sequence_len: 4096 sample_packing: true +pad_to_sequence_len: true adapter: lora lora_model_dir: diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml index 3ad2a7e4f..dd029859e 100644 --- a/examples/llama-2/qlora.yml +++ b/examples/llama-2/qlora.yml @@ -20,6 +20,7 @@ lora_model_dir: sequence_len: 4096 sample_packing: true +pad_to_sequence_len: true lora_r: 32 lora_alpha: 16 diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml index 66515dabc..b59a7da04 100644 --- a/examples/llama-2/relora.yml +++ b/examples/llama-2/relora.yml @@ -20,6 +20,7 @@ lora_model_dir: sequence_len: 4096 sample_packing: true +pad_to_sequence_len: true lora_r: 8 lora_alpha: 16 diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py index 0fbccd205..7fc6e1232 100644 --- a/src/axolotl/utils/config.py +++ b/src/axolotl/utils/config.py @@ -97,6 +97,11 @@ def validate_config(cfg): ) ) + if cfg.sample_packing and not cfg.pad_to_sequence_len: + LOG.warning( + "`pad_to_sequence_len: true` is recommended when using sample_packing" + ) + if cfg.gradient_accumulation_steps and cfg.batch_size: raise ValueError( "please set only one of gradient_accumulation_steps or batch_size" diff --git a/tests/test_validation.py b/tests/test_validation.py index 48b122f9a..f250e5cb4 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -328,6 +328,20 @@ class ValidationTest(unittest.TestCase): for record in self._caplog.records ) + cfg = DictDefault( + { + "sample_packing": True, + "pad_to_sequence_len": None, + } + ) + with self._caplog.at_level(logging.WARNING): + validate_config(cfg) + assert any( + "`pad_to_sequence_len: true` is recommended when using sample_packing" + in record.message + for record in self._caplog.records + ) + cfg = DictDefault( { "max_packed_sequence_len": 2048,