From 10d18e6c977b0e53fbf19dc7fe1fef59bf0e3b57 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Fri, 28 Feb 2025 16:40:49 +0700 Subject: [PATCH] fix(test): replace jackfram llama with smollm --- tests/conftest.py | 4 ++-- tests/e2e/patched/test_4d_multipack_llama.py | 4 ++-- tests/e2e/patched/test_fused_llama.py | 2 +- tests/e2e/patched/test_llama_s2_attention.py | 6 ++---- .../e2e/patched/test_lora_llama_multipack.py | 4 ++-- tests/e2e/test_dpo.py | 21 +++++++------------ tests/e2e/test_llama.py | 3 +-- tests/e2e/test_load_model.py | 5 ++--- tests/e2e/test_lora_llama.py | 4 ++-- tests/e2e/test_optimizers.py | 6 ++---- tests/test_normalize_config.py | 5 ++--- tests/utils/test_models.py | 3 +-- 12 files changed, 26 insertions(+), 41 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 85e276722..607dd2116 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -47,9 +47,9 @@ def download_smollm2_135m_model(): @pytest.fixture(scope="session", autouse=True) -def download_llama_68m_random_model(): +def download_smollm2_135m_instruct_model(): # download the model - snapshot_download_w_retry("JackFram/llama-68m") + snapshot_download_w_retry("HuggingFaceTB/SmolLM2-135M-Instruct") @pytest.fixture(scope="session", autouse=True) diff --git a/tests/e2e/patched/test_4d_multipack_llama.py b/tests/e2e/patched/test_4d_multipack_llama.py index 7beb71145..d4415972f 100644 --- a/tests/e2e/patched/test_4d_multipack_llama.py +++ b/tests/e2e/patched/test_4d_multipack_llama.py @@ -28,7 +28,7 @@ class Test4dMultipackLlama(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "flash_attention": False, "sdp_attention": True, "sample_packing": True, @@ -72,7 +72,7 @@ class Test4dMultipackLlama(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "flash_attention": False, "sdp_attention": False, "sample_packing": True, diff --git a/tests/e2e/patched/test_fused_llama.py b/tests/e2e/patched/test_fused_llama.py index f8f245514..6b9daac33 100644 --- a/tests/e2e/patched/test_fused_llama.py +++ b/tests/e2e/patched/test_fused_llama.py @@ -32,7 +32,7 @@ class TestFusedLlama(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "flash_attention": True, "pad_to_sequence_len": True, "flash_attn_fuse_qkv": True, diff --git a/tests/e2e/patched/test_llama_s2_attention.py b/tests/e2e/patched/test_llama_s2_attention.py index cfa70fd73..ab3ebae41 100644 --- a/tests/e2e/patched/test_llama_s2_attention.py +++ b/tests/e2e/patched/test_llama_s2_attention.py @@ -31,8 +31,7 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "sequence_len": 16384, "sample_packing": False, "flash_attention": True, @@ -77,8 +76,7 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "sequence_len": 16384, "sample_packing": False, "flash_attention": True, diff --git a/tests/e2e/patched/test_lora_llama_multipack.py b/tests/e2e/patched/test_lora_llama_multipack.py index e544eb4fd..3c564a8a9 100644 --- a/tests/e2e/patched/test_lora_llama_multipack.py +++ b/tests/e2e/patched/test_lora_llama_multipack.py @@ -31,8 +31,7 @@ class TestLoraLlama(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "sequence_len": 1024, "sample_packing": True, "flash_attention": True, @@ -43,6 +42,7 @@ class TestLoraLlama(unittest.TestCase): "lora_dropout": 0.05, "lora_target_linear": True, "val_set_size": 0.2, + "lora_modules_to_save": ["lm_head", "embed_tokens"], "special_tokens": { "unk_token": "", "bos_token": "", diff --git a/tests/e2e/test_dpo.py b/tests/e2e/test_dpo.py index cf7335805..ffb295079 100644 --- a/tests/e2e/test_dpo.py +++ b/tests/e2e/test_dpo.py @@ -31,8 +31,7 @@ class TestDPOLlamaLora(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -77,8 +76,7 @@ class TestDPOLlamaLora(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -124,8 +122,7 @@ class TestDPOLlamaLora(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -172,8 +169,7 @@ class TestDPOLlamaLora(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -218,8 +214,7 @@ class TestDPOLlamaLora(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -264,8 +259,7 @@ class TestDPOLlamaLora(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -314,8 +308,7 @@ class TestDPOLlamaLora(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", diff --git a/tests/e2e/test_llama.py b/tests/e2e/test_llama.py index 77e70d8c2..8a6fbd75f 100644 --- a/tests/e2e/test_llama.py +++ b/tests/e2e/test_llama.py @@ -26,8 +26,7 @@ class TestLlama: # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "trust_remote_code": True, "sequence_len": 512, "val_set_size": 0.1, diff --git a/tests/e2e/test_load_model.py b/tests/e2e/test_load_model.py index 255b096b0..d906b8618 100644 --- a/tests/e2e/test_load_model.py +++ b/tests/e2e/test_load_model.py @@ -26,9 +26,8 @@ class TestLoadModelUtils: # load config self.cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", - "tokenizer_config": "JackFram/llama-68m", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", + "tokenizer_config": "HuggingFaceTB/SmolLM2-135M-Instruct", "sequence_len": 1024, "load_in_8bit": False, "adapter": "lora", diff --git a/tests/e2e/test_lora_llama.py b/tests/e2e/test_lora_llama.py index d314fb197..f7c1b506a 100644 --- a/tests/e2e/test_lora_llama.py +++ b/tests/e2e/test_lora_llama.py @@ -28,8 +28,7 @@ class TestLoraLlama(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -37,6 +36,7 @@ class TestLoraLlama(unittest.TestCase): "lora_alpha": 16, "lora_dropout": 0.05, "lora_target_linear": True, + "lora_modules_to_save": ["lm_head", "embed_tokens"], "val_set_size": 0.1, "special_tokens": { "unk_token": "", diff --git a/tests/e2e/test_optimizers.py b/tests/e2e/test_optimizers.py index 4b0ad1142..0847664ad 100644 --- a/tests/e2e/test_optimizers.py +++ b/tests/e2e/test_optimizers.py @@ -28,8 +28,7 @@ class TestCustomOptimizers(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -74,8 +73,7 @@ class TestCustomOptimizers(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", diff --git a/tests/test_normalize_config.py b/tests/test_normalize_config.py index 0d663183d..90a3ed0f0 100644 --- a/tests/test_normalize_config.py +++ b/tests/test_normalize_config.py @@ -16,9 +16,8 @@ class NormalizeConfigTestCase(unittest.TestCase): def _get_base_cfg(self): return DictDefault( { - "base_model": "JackFram/llama-68m", - "base_model_config": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", + "base_model_config": "HuggingFaceTB/SmolLM2-135M-Instruct", "num_epochs": 1, "micro_batch_size": 1, "gradient_accumulation_steps": 1, diff --git a/tests/utils/test_models.py b/tests/utils/test_models.py index e78cdb5d7..c1297c3cd 100644 --- a/tests/utils/test_models.py +++ b/tests/utils/test_models.py @@ -18,9 +18,8 @@ class TestModelsUtils: # load config self.cfg = DictDefault( # pylint: disable=attribute-defined-outside-init { - "base_model": "JackFram/llama-68m", + "base_model": "HuggingFaceTB/SmolLM2-135M-Instruct", "model_type": "LlamaForCausalLM", - "tokenizer_type": "LlamaTokenizer", "load_in_8bit": True, "load_in_4bit": False, "adapter": "lora",