From 343714972bdb7ffacf5ddfc84f50918766dacb3a Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Wed, 6 Sep 2023 17:00:21 -0400
Subject: [PATCH] recommend padding when using sample packing (#531)

---
 examples/code-llama/13b/lora.yml  |  1 +
 examples/code-llama/13b/qlora.yml |  1 +
 examples/code-llama/34b/lora.yml  |  1 +
 examples/code-llama/34b/qlora.yml |  1 +
 examples/code-llama/7b/lora.yml   |  1 +
 examples/code-llama/7b/qlora.yml  |  1 +
 examples/llama-2/lora.yml         |  1 +
 examples/llama-2/qlora.yml        |  1 +
 examples/llama-2/relora.yml       |  1 +
 src/axolotl/utils/config.py       |  5 +++++
 tests/test_validation.py          | 14 ++++++++++++++
 11 files changed, 28 insertions(+)

diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml
index 637c05143..e4384a893 100644
--- a/examples/code-llama/13b/lora.yml
+++ b/examples/code-llama/13b/lora.yml
@@ -17,6 +17,7 @@ output_dir: ./lora-out
 
 sequence_len: 100000
 sample_packing: true
+pad_to_sequence_len: true
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml
index ae78f5bf2..8e482a22e 100644
--- a/examples/code-llama/13b/qlora.yml
+++ b/examples/code-llama/13b/qlora.yml
@@ -20,6 +20,7 @@ lora_model_dir:
 
 sequence_len: 100000
 sample_packing: true
+pad_to_sequence_len: true
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml
index 9c4cfee10..8a5c457f6 100644
--- a/examples/code-llama/34b/lora.yml
+++ b/examples/code-llama/34b/lora.yml
@@ -17,6 +17,7 @@ output_dir: ./lora-out
 
 sequence_len: 100000
 sample_packing: true
+pad_to_sequence_len: true
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml
index 9f5ce50f9..b0d91fae9 100644
--- a/examples/code-llama/34b/qlora.yml
+++ b/examples/code-llama/34b/qlora.yml
@@ -20,6 +20,7 @@ lora_model_dir:
 
 sequence_len: 100000
 sample_packing: true
+pad_to_sequence_len: true
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml
index dfa3f2f7a..1e09555f7 100644
--- a/examples/code-llama/7b/lora.yml
+++ b/examples/code-llama/7b/lora.yml
@@ -17,6 +17,7 @@ output_dir: ./lora-out
 
 sequence_len: 100000
 sample_packing: true
+pad_to_sequence_len: true
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml
index 704f058c3..fc9a5eb53 100644
--- a/examples/code-llama/7b/qlora.yml
+++ b/examples/code-llama/7b/qlora.yml
@@ -20,6 +20,7 @@ lora_model_dir:
 
 sequence_len: 100000
 sample_packing: true
+pad_to_sequence_len: true
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml
index 2a0af130b..a54799b40 100644
--- a/examples/llama-2/lora.yml
+++ b/examples/llama-2/lora.yml
@@ -17,6 +17,7 @@ output_dir: ./lora-out
 
 sequence_len: 4096
 sample_packing: true
+pad_to_sequence_len: true
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml
index 3ad2a7e4f..dd029859e 100644
--- a/examples/llama-2/qlora.yml
+++ b/examples/llama-2/qlora.yml
@@ -20,6 +20,7 @@ lora_model_dir:
 
 sequence_len: 4096
 sample_packing: true
+pad_to_sequence_len: true
 
 lora_r: 32
 lora_alpha: 16
diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml
index 66515dabc..b59a7da04 100644
--- a/examples/llama-2/relora.yml
+++ b/examples/llama-2/relora.yml
@@ -20,6 +20,7 @@ lora_model_dir:
 
 sequence_len: 4096
 sample_packing: true
+pad_to_sequence_len: true
 
 lora_r: 8
 lora_alpha: 16
diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py
index 0fbccd205..7fc6e1232 100644
--- a/src/axolotl/utils/config.py
+++ b/src/axolotl/utils/config.py
@@ -97,6 +97,11 @@ def validate_config(cfg):
             )
         )
 
+    if cfg.sample_packing and not cfg.pad_to_sequence_len:
+        LOG.warning(
+            "`pad_to_sequence_len: true` is recommended when using sample_packing"
+        )
+
     if cfg.gradient_accumulation_steps and cfg.batch_size:
         raise ValueError(
             "please set only one of gradient_accumulation_steps or batch_size"
diff --git a/tests/test_validation.py b/tests/test_validation.py
index 48b122f9a..f250e5cb4 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -328,6 +328,20 @@ class ValidationTest(unittest.TestCase):
                 for record in self._caplog.records
             )
 
+        cfg = DictDefault(
+            {
+                "sample_packing": True,
+                "pad_to_sequence_len": None,
+            }
+        )
+        with self._caplog.at_level(logging.WARNING):
+            validate_config(cfg)
+            assert any(
+                "`pad_to_sequence_len: true` is recommended when using sample_packing"
+                in record.message
+                for record in self._caplog.records
+            )
+
         cfg = DictDefault(
             {
                 "max_packed_sequence_len": 2048,