diff --git a/tests/conftest.py b/tests/conftest.py index 3f3cc2732..82e4e911b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -79,9 +79,9 @@ def download_smollm2_135m_model(): @pytest.fixture(scope="session", autouse=True) -def download_llama_68m_random_model(): +def download_smollm2_135m_gptq_model(): # download the model - snapshot_download_w_retry("JackFram/llama-68m", repo_type="model") + snapshot_download_w_retry("lilmeaty/SmolLM2-135M-Instruct-GPTQ", repo_type="model") @pytest.fixture(scope="session", autouse=True) diff --git a/tests/e2e/patched/test_4d_multipack_llama.py b/tests/e2e/patched/test_4d_multipack_llama.py index 33ba47abd..270956883 100644 --- a/tests/e2e/patched/test_4d_multipack_llama.py +++ b/tests/e2e/patched/test_4d_multipack_llama.py @@ -28,7 +28,7 @@ class Test4dMultipackLlama(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", + "base_model": "HuggingFaceTB/SmolLM2-135M", "flash_attention": False, "sdp_attention": True, "sample_packing": True, @@ -41,6 +41,9 @@ class Test4dMultipackLlama(unittest.TestCase): "lora_target_linear": True, "sequence_len": 1024, "val_set_size": 0.02, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, "datasets": [ { "path": "mhenrichsen/alpaca_2k_test", @@ -73,7 +76,7 @@ class Test4dMultipackLlama(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", + "base_model": "HuggingFaceTB/SmolLM2-135M", "flash_attention": False, "sdp_attention": False, "sample_packing": True, @@ -86,6 +89,9 @@ class Test4dMultipackLlama(unittest.TestCase): "lora_dropout": 0.05, "lora_target_linear": True, "val_set_size": 0.02, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, "datasets": [ { "path": "mhenrichsen/alpaca_2k_test", diff --git a/tests/e2e/patched/test_fused_llama.py b/tests/e2e/patched/test_fused_llama.py index 51dfec5f4..7725e095d 100644 --- a/tests/e2e/patched/test_fused_llama.py +++ b/tests/e2e/patched/test_fused_llama.py @@ -32,7 +32,7 @@ class TestFusedLlama(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", + "base_model": "HuggingFaceTB/SmolLM2-135M", "flash_attention": True, "pad_to_sequence_len": True, "flash_attn_fuse_qkv": True, @@ -41,9 +41,7 @@ class TestFusedLlama(unittest.TestCase): "sequence_len": 1024, "val_set_size": 0.02, "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", + "pad_token": "<|endoftext|>", }, "datasets": [ { diff --git a/tests/e2e/patched/test_llama_s2_attention.py b/tests/e2e/patched/test_llama_s2_attention.py index 3aa36772a..3cf43ba9d 100644 --- a/tests/e2e/patched/test_llama_s2_attention.py +++ b/tests/e2e/patched/test_llama_s2_attention.py @@ -31,8 +31,8 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "tokenizer_type": "AutoTokenizer", "sequence_len": 16384, "sample_packing": False, "flash_attention": True, @@ -44,7 +44,9 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase): "lora_dropout": 0.05, "lora_target_linear": True, "val_set_size": 0.02, - "special_tokens": {}, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, "datasets": [ { "path": "Yukang/LongAlpaca-12k", @@ -78,14 +80,16 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "tokenizer_type": "AutoTokenizer", "sequence_len": 16384, "sample_packing": False, "flash_attention": True, "s2_attention": True, "val_set_size": 0.02, - "special_tokens": {}, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, "datasets": [ { "path": "Yukang/LongAlpaca-12k", diff --git a/tests/e2e/patched/test_lora_llama_multipack.py b/tests/e2e/patched/test_lora_llama_multipack.py index ab6e87e2a..ca989f241 100644 --- a/tests/e2e/patched/test_lora_llama_multipack.py +++ b/tests/e2e/patched/test_lora_llama_multipack.py @@ -31,8 +31,8 @@ class TestLoraLlama(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "tokenizer_type": "AutoTokenizer", "sequence_len": 1024, "sample_packing": True, "flash_attention": True, @@ -44,9 +44,7 @@ class TestLoraLlama(unittest.TestCase): "lora_target_linear": True, "val_set_size": 0.2, "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", + "pad_token": "<|endoftext|>", }, "datasets": [ { @@ -84,9 +82,9 @@ class TestLoraLlama(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "TheBlokeAI/jackfram_llama-68m-GPTQ", + "base_model": "lilmeaty/SmolLM2-135M-Instruct-GPTQ", "model_type": "AutoModelForCausalLM", - "tokenizer_type": "LlamaTokenizer", + "tokenizer_type": "AutoTokenizer", "sequence_len": 1024, "sample_packing": True, "flash_attention": True, @@ -100,9 +98,7 @@ class TestLoraLlama(unittest.TestCase): "lora_target_linear": True, "val_set_size": 0.02, "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", + "pad_token": "<|endoftext|>", }, "datasets": [ { diff --git a/tests/e2e/test_dpo.py b/tests/e2e/test_dpo.py index cf7335805..84d723ec0 100644 --- a/tests/e2e/test_dpo.py +++ b/tests/e2e/test_dpo.py @@ -31,8 +31,8 @@ class TestDPOLlamaLora(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "tokenizer_type": "AutoTokenizer", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -40,7 +40,9 @@ class TestDPOLlamaLora(unittest.TestCase): "lora_alpha": 32, "lora_dropout": 0.1, "lora_target_linear": True, - "special_tokens": {}, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, "rl": "dpo", "datasets": [ { @@ -77,8 +79,8 @@ class TestDPOLlamaLora(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "tokenizer_type": "AutoTokenizer", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -86,7 +88,9 @@ class TestDPOLlamaLora(unittest.TestCase): "lora_alpha": 32, "lora_dropout": 0.1, "lora_target_linear": True, - "special_tokens": {}, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, "rl": "dpo", "rpo_alpha": 0.5, "datasets": [ @@ -124,8 +128,8 @@ class TestDPOLlamaLora(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "tokenizer_type": "AutoTokenizer", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -133,7 +137,9 @@ class TestDPOLlamaLora(unittest.TestCase): "lora_alpha": 32, "lora_dropout": 0.1, "lora_target_linear": True, - "special_tokens": {}, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, "rl": "dpo", "dpo_use_weighting": True, "datasets": [ @@ -172,8 +178,8 @@ class TestDPOLlamaLora(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "tokenizer_type": "AutoTokenizer", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -181,7 +187,9 @@ class TestDPOLlamaLora(unittest.TestCase): "lora_alpha": 32, "lora_dropout": 0.1, "lora_target_linear": True, - "special_tokens": {}, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, "rl": "kto_pair", "datasets": [ { @@ -218,8 +226,8 @@ class TestDPOLlamaLora(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "tokenizer_type": "AutoTokenizer", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -227,7 +235,9 @@ class TestDPOLlamaLora(unittest.TestCase): "lora_alpha": 32, "lora_dropout": 0.1, "lora_target_linear": True, - "special_tokens": {}, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, "rl": "ipo", "datasets": [ { @@ -264,8 +274,8 @@ class TestDPOLlamaLora(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "tokenizer_type": "AutoTokenizer", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -273,7 +283,9 @@ class TestDPOLlamaLora(unittest.TestCase): "lora_alpha": 32, "lora_dropout": 0.1, "lora_target_linear": True, - "special_tokens": {}, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, "rl": "orpo", "orpo_alpha": 0.1, "remove_unused_columns": False, @@ -314,7 +326,7 @@ class TestDPOLlamaLora(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", + "base_model": "HuggingFaceTB/SmolLM2-135M", "tokenizer_type": "LlamaTokenizer", "sequence_len": 1024, "load_in_8bit": True, @@ -323,7 +335,9 @@ class TestDPOLlamaLora(unittest.TestCase): "lora_alpha": 32, "lora_dropout": 0.1, "lora_target_linear": True, - "special_tokens": {}, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, "rl": "kto", "rl_beta": 0.5, "kto_desirable_weight": 1.0, diff --git a/tests/e2e/test_llama.py b/tests/e2e/test_llama.py index b84e432b5..d3e37fb3f 100644 --- a/tests/e2e/test_llama.py +++ b/tests/e2e/test_llama.py @@ -26,15 +26,13 @@ class TestLlama: # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "tokenizer_type": "AutoTokenizer", "trust_remote_code": True, "sequence_len": 512, "val_set_size": 0.02, "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", + "pad_token": "<|endoftext|>", }, "datasets": [ { diff --git a/tests/e2e/test_load_model.py b/tests/e2e/test_load_model.py index 2128dbef2..96745c040 100644 --- a/tests/e2e/test_load_model.py +++ b/tests/e2e/test_load_model.py @@ -26,9 +26,9 @@ class TestLoadModelUtils: # load config self.cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", - "tokenizer_config": "JackFram/llama-68m", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "tokenizer_type": "AutoTokenizer", + "tokenizer_config": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 1024, "load_in_8bit": False, "adapter": "lora", @@ -38,9 +38,7 @@ class TestLoadModelUtils: "lora_target_linear": True, "val_set_size": 0.02, "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", + "pad_token": "<|endoftext|>", }, "datasets": [ { diff --git a/tests/e2e/test_lora_llama.py b/tests/e2e/test_lora_llama.py index 8328d5b90..e5a734b33 100644 --- a/tests/e2e/test_lora_llama.py +++ b/tests/e2e/test_lora_llama.py @@ -28,8 +28,8 @@ class TestLoraLlama(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "tokenizer_type": "AutoTokenizer", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -39,9 +39,7 @@ class TestLoraLlama(unittest.TestCase): "lora_target_linear": True, "val_set_size": 0.02, "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", + "pad_token": "<|endoftext|>", }, "datasets": [ { diff --git a/tests/e2e/test_optimizers.py b/tests/e2e/test_optimizers.py index 8a82e3469..d3ff27ca5 100644 --- a/tests/e2e/test_optimizers.py +++ b/tests/e2e/test_optimizers.py @@ -28,8 +28,9 @@ class TestCustomOptimizers(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "model_type": "AutoModelForCausalLM", + "tokenizer_type": "AutoTokenizer", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -39,9 +40,7 @@ class TestCustomOptimizers(unittest.TestCase): "lora_target_linear": True, "val_set_size": 0.02, "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", + "pad_token": "<|endoftext|>", }, "datasets": [ { @@ -75,8 +74,9 @@ class TestCustomOptimizers(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "model_type": "AutoModelForCausalLM", + "tokenizer_type": "AutoTokenizer", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -86,9 +86,7 @@ class TestCustomOptimizers(unittest.TestCase): "lora_target_linear": True, "val_set_size": 0.02, "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", + "pad_token": "<|endoftext|>", }, "datasets": [ { @@ -122,8 +120,9 @@ class TestCustomOptimizers(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "model_type": "AutoModelForCausalLM", + "tokenizer_type": "AutoTokenizer", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -133,9 +132,7 @@ class TestCustomOptimizers(unittest.TestCase): "lora_target_linear": True, "val_set_size": 0.02, "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", + "pad_token": "<|endoftext|>", }, "datasets": [ { @@ -170,6 +167,7 @@ class TestCustomOptimizers(unittest.TestCase): cfg = DictDefault( { "base_model": "HuggingFaceTB/SmolLM2-135M", + "model_type": "AutoModelForCausalLM", "sequence_len": 1024, "val_set_size": 0.01, "special_tokens": { diff --git a/tests/e2e/test_schedulers.py b/tests/e2e/test_schedulers.py index c20cebf4e..694bb21e8 100644 --- a/tests/e2e/test_schedulers.py +++ b/tests/e2e/test_schedulers.py @@ -28,8 +28,8 @@ class TestCustomSchedulers(unittest.TestCase): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "tokenizer_type": "AutoTokenizer", "sequence_len": 1024, "load_in_8bit": True, "adapter": "lora", @@ -39,9 +39,7 @@ class TestCustomSchedulers(unittest.TestCase): "lora_target_linear": True, "val_set_size": 0.02, "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", + "pad_token": "<|endoftext|>", }, "datasets": [ { diff --git a/tests/test_normalize_config.py b/tests/test_normalize_config.py index c8ca3e550..ea98bf97d 100644 --- a/tests/test_normalize_config.py +++ b/tests/test_normalize_config.py @@ -17,9 +17,9 @@ class NormalizeConfigTestCase(unittest.TestCase): def _get_base_cfg(self): return DictDefault( { - "base_model": "JackFram/llama-68m", - "base_model_config": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "base_model_config": "HuggingFaceTB/SmolLM2-135M", + "tokenizer_type": "AutoTokenizer", "num_epochs": 1, "micro_batch_size": 1, "gradient_accumulation_steps": 1, diff --git a/tests/utils/test_models.py b/tests/utils/test_models.py index 83678430a..bcc1ba5d1 100644 --- a/tests/utils/test_models.py +++ b/tests/utils/test_models.py @@ -18,9 +18,9 @@ class TestModelsUtils: # load config self.cfg = DictDefault( # pylint: disable=attribute-defined-outside-init { - "base_model": "JackFram/llama-68m", - "model_type": "LlamaForCausalLM", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM2-135M", + "model_type": "AutoModelForCausalLM", + "tokenizer_type": "AutoTokenizer", "load_in_8bit": True, "load_in_4bit": False, "adapter": "lora", @@ -65,7 +65,7 @@ class TestModelsUtils: "s2_attention": True, "sample_packing": True, "base_model": "", - "model_type": "LlamaForCausalLM", + "model_type": "AutoModelForCausalLM", } )