From 2dac1edf7225cc75bd781d78a7d0cca33bd8560f Mon Sep 17 00:00:00 2001
From: Chiwan Park <bell.park@kakaocorp.com>
Date: Tue, 27 Aug 2024 01:56:12 +0900
Subject: [PATCH] Fix `drop_long_seq` bug due to truncation in prompt
 tokenization strategies when using `chat_template` (#1867)

---
 src/axolotl/prompt_strategies/chat_template.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/axolotl/prompt_strategies/chat_template.py b/src/axolotl/prompt_strategies/chat_template.py
index 8ae668d7e..19e36531a 100644
--- a/src/axolotl/prompt_strategies/chat_template.py
+++ b/src/axolotl/prompt_strategies/chat_template.py
@@ -350,7 +350,8 @@ def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None):
         ),
         "roles": ds_cfg.get("roles"),
         "drop_system_message": ds_cfg.get("drop_system_message", False),
-        "max_length": cfg.sequence_len,
+        # we need to add one for detecting sequences with exceeding the `sequence_len` limit.
+        "max_length": cfg.sequence_len + 1,
     }
 
     strategy_params = {