From 9d629d8bff1c69a8d7753b18ab6029a7aaea2090 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 20 Aug 2023 09:18:18 -0400 Subject: [PATCH] gracefully handle empty input (#442) --- src/axolotl/prompt_tokenizers.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/axolotl/prompt_tokenizers.py b/src/axolotl/prompt_tokenizers.py index 7612f4967..9bdd5644a 100644 --- a/src/axolotl/prompt_tokenizers.py +++ b/src/axolotl/prompt_tokenizers.py @@ -85,7 +85,11 @@ class PromptTokenizingStrategy(abc.ABC): result["input_ids"].append(self.tokenizer.eos_token_id) result["attention_mask"].append(1) - if result["input_ids"][0] == self.tokenizer.bos_token_id and strip_bos_token: + if ( + len(result["input_ids"]) > 0 + and result["input_ids"][0] == self.tokenizer.bos_token_id + and strip_bos_token + ): result["input_ids"] = result["input_ids"][1:] result["attention_mask"] = result["attention_mask"][1:]