Feat: Warns to add to modules_to_save when adding tokens or switching special_tokens (#787)
* Feat: Auto add to modules_to_save when adding tokens * fix: swap to error instead of warning * feat: add check when special_tokens differ and add test
This commit is contained in:
@@ -3,6 +3,8 @@ Test cases for the tokenizer loading
|
||||
"""
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
|
||||
from axolotl.utils.dict import DictDefault
|
||||
from axolotl.utils.models import load_tokenizer
|
||||
|
||||
@@ -31,6 +33,40 @@ class TestTokenizers(unittest.TestCase):
|
||||
tokenizer = load_tokenizer(cfg)
|
||||
assert "Fast" not in tokenizer.__class__.__name__
|
||||
|
||||
def test_special_tokens_modules_to_save(self):
|
||||
# setting special_tokens to new token
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"tokenizer_config": "huggyllama/llama-7b",
|
||||
"adapter": "lora",
|
||||
"special_tokens": {"bos_token": "[INST]"},
|
||||
}
|
||||
)
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=r".*Please set lora_modules_to_save*",
|
||||
):
|
||||
load_tokenizer(cfg)
|
||||
|
||||
# setting special_tokens but not changing from default
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"tokenizer_config": "huggyllama/llama-7b",
|
||||
"adapter": "lora",
|
||||
"special_tokens": {"bos_token": "<s>"},
|
||||
}
|
||||
)
|
||||
load_tokenizer(cfg)
|
||||
|
||||
# non-adapter setting special_tokens
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"tokenizer_config": "huggyllama/llama-7b",
|
||||
"special_tokens": {"bos_token": "[INST]"},
|
||||
}
|
||||
)
|
||||
load_tokenizer(cfg)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user