set tokenizer_config in fixture
This commit is contained in:
@@ -33,6 +33,7 @@ def download_model():
|
|||||||
def sft_base_cfg():
|
def sft_base_cfg():
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
base_model="HuggingFaceTB/SmolLM2-135M",
|
base_model="HuggingFaceTB/SmolLM2-135M",
|
||||||
|
tokenizer_config="HuggingFaceTB/SmolLM2-135M", # this has to be manually set since we haven't done validation
|
||||||
sequence_len=2048,
|
sequence_len=2048,
|
||||||
special_tokens={
|
special_tokens={
|
||||||
"pad_token": "<|endoftext|>",
|
"pad_token": "<|endoftext|>",
|
||||||
|
|||||||
Reference in New Issue
Block a user