fix(dataset): normalize tokenizer config and change hash from tokenizer class to tokenizer path (#1298)

* fix(dataset): normalize tokenizer config and change hash from tokenizer class to tokenizer path

* fix: normalize config
This commit is contained in:
NanoCode012
2024-03-25 15:34:54 +09:00
committed by GitHub
parent 324d59ea0d
commit ff939d8a64
4 changed files with 13 additions and 4 deletions

View File

@@ -1,16 +1,18 @@
"""
unit tests for axolotl.core.trainer_builder
"""
import pytest
from axolotl.core.trainer_builder import HFDPOTrainerBuilder
from axolotl.utils.config import normalize_config
from axolotl.utils.dict import DictDefault
from axolotl.utils.models import load_model, load_tokenizer
@pytest.fixture(name="cfg")
def fixture_cfg():
return DictDefault(
cfg = DictDefault(
{
"base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6",
"model_type": "AutoModelForCausalLM",
@@ -34,6 +36,10 @@ def fixture_cfg():
}
)
normalize_config(cfg)
return cfg
@pytest.fixture(name="tokenizer")
def fixture_tokenizer(cfg):