replace references to random 68m model w 135m smollm2 (#2570) [skip ci]

* replace references to random 68m model w 135m smollm2

* use AutoTokenizer for smollm2
This commit is contained in:
Wing Lian
2025-04-28 10:08:07 -04:00
committed by GitHub
parent f1df73a798
commit 40f4ea23ab
13 changed files with 95 additions and 87 deletions

View File

@@ -32,7 +32,7 @@ class TestFusedLlama(unittest.TestCase):
# pylint: disable=duplicate-code
cfg = DictDefault(
{
"base_model": "JackFram/llama-68m",
"base_model": "HuggingFaceTB/SmolLM2-135M",
"flash_attention": True,
"pad_to_sequence_len": True,
"flash_attn_fuse_qkv": True,
@@ -41,9 +41,7 @@ class TestFusedLlama(unittest.TestCase):
"sequence_len": 1024,
"val_set_size": 0.02,
"special_tokens": {
"unk_token": "<unk>",
"bos_token": "<s>",
"eos_token": "</s>",
"pad_token": "<|endoftext|>",
},
"datasets": [
{