set tokenizer_config in fixture

This commit is contained in:
Wing Lian
2025-07-06 12:24:21 -04:00
parent c40da3b5eb
commit 07ab737a55

View File

@@ -33,6 +33,7 @@ def download_model():
def sft_base_cfg():
cfg = DictDefault(
base_model="HuggingFaceTB/SmolLM2-135M",
tokenizer_config="HuggingFaceTB/SmolLM2-135M", # this has to be manually set since we haven't done validation
sequence_len=2048,
special_tokens={
"pad_token": "<|endoftext|>",