fix(config): Set eos/bos to tokenizer if different (#801)

* fix(config): Set eos/bos to tokenizer if different * chore: fix lint
2023-10-29 21:32:37 +09:00
parent 827ec3d274
commit 637ed095a0
1 changed files with 14 additions and 0 deletions
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -386,6 +386,20 @@ def load_model(
        )
        model.config.max_position_embeddings = cfg.sequence_len

+    if (
+        hasattr(model.config, "bos_token_id")
+        and model.config.bos_token_id
+        and model.config.bos_token_id != tokenizer.bos_token_id
+    ):
+        model.config.bos_token_id = tokenizer.bos_token_id
+
+    if (
+        hasattr(model.config, "eos_token_id")
+        and model.config.eos_token_id
+        and model.config.eos_token_id != tokenizer.eos_token_id
+    ):
+        model.config.eos_token_id = tokenizer.eos_token_id
+
    if model.device.type == "cuda":
        log_gpu_memory_usage(LOG, "after model load", model.device)