optim e2e tests to run a bit faster (#2069) [skip ci]
* optim e2e tests to run a bit faster * run prequant w/o lora_modules_to_save * use smollm2
This commit is contained in:
@@ -5,6 +5,25 @@ import shutil
|
|||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from huggingface_hub import snapshot_download
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
|
def download_smollm2_135m_model():
|
||||||
|
# download the model
|
||||||
|
snapshot_download("HuggingFaceTB/SmolLM2-135M")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
|
def download_tatsu_lab_alpaca_dataset():
|
||||||
|
# download the model
|
||||||
|
snapshot_download("tatsu-lab/alpaca", repo_type="dataset")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
|
def download_mhenrichsen_alpaca_2k_dataset():
|
||||||
|
# download the model
|
||||||
|
snapshot_download("mhenrichsen/alpaca_2k_test", repo_type="dataset")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ AXOLOTL_ROOT = Path(__file__).parent.parent.parent.parent
|
|||||||
@pytest.fixture(scope="session", autouse=True)
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
def download_model():
|
def download_model():
|
||||||
# download the model
|
# download the model
|
||||||
snapshot_download("TinyLlama/TinyLlama_v1.1")
|
snapshot_download("HuggingFaceTB/SmolLM2-135M")
|
||||||
|
|
||||||
|
|
||||||
class TestMultiGPULlama:
|
class TestMultiGPULlama:
|
||||||
@@ -37,7 +37,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM-135M",
|
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"adapter": "lora",
|
"adapter": "lora",
|
||||||
"lora_r": 8,
|
"lora_r": 8,
|
||||||
@@ -93,7 +93,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM-135M",
|
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"eval_sample_packing": False,
|
"eval_sample_packing": False,
|
||||||
@@ -149,8 +149,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "TinyLlama/TinyLlama_v1.1",
|
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||||
"tokenizer_type": "LlamaTokenizer",
|
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"sample_packing": False,
|
"sample_packing": False,
|
||||||
"eval_sample_packing": False,
|
"eval_sample_packing": False,
|
||||||
@@ -163,12 +162,10 @@ class TestMultiGPULlama:
|
|||||||
"lora_target_linear": True,
|
"lora_target_linear": True,
|
||||||
"val_set_size": 0.05,
|
"val_set_size": 0.05,
|
||||||
"special_tokens": {
|
"special_tokens": {
|
||||||
"unk_token": "<unk>",
|
"pad_token": "<|endoftext|>",
|
||||||
"bos_token": "<s>",
|
|
||||||
"eos_token": "</s>",
|
|
||||||
},
|
},
|
||||||
"rl": "dpo",
|
"rl": "dpo",
|
||||||
"chat_template": "llama3",
|
"chat_template": "chatml",
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
"path": "fozziethebeat/alpaca_messages_2k_dpo_test",
|
"path": "fozziethebeat/alpaca_messages_2k_dpo_test",
|
||||||
@@ -221,7 +218,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM-135M",
|
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"sample_packing": False,
|
"sample_packing": False,
|
||||||
"eval_sample_packing": False,
|
"eval_sample_packing": False,
|
||||||
@@ -294,7 +291,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM-135M",
|
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"val_set_size": 0.01,
|
"val_set_size": 0.01,
|
||||||
"special_tokens": {
|
"special_tokens": {
|
||||||
@@ -359,7 +356,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM-135M",
|
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
@@ -422,8 +419,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "axolotl-ai-co/TinyLlama_v1.1-bnb-nf4-bf16",
|
"base_model": "axolotl-ai-co/SmolLM2-135M-bnb-nf4-bf16",
|
||||||
"tokenizer_type": "AutoTokenizer",
|
|
||||||
"adapter": "qlora",
|
"adapter": "qlora",
|
||||||
"mean_resizing_embeddings": True,
|
"mean_resizing_embeddings": True,
|
||||||
"load_in_4bit": True,
|
"load_in_4bit": True,
|
||||||
@@ -431,17 +427,17 @@ class TestMultiGPULlama:
|
|||||||
"lora_alpha": 16,
|
"lora_alpha": 16,
|
||||||
"lora_dropout": 0.05,
|
"lora_dropout": 0.05,
|
||||||
"lora_target_linear": True,
|
"lora_target_linear": True,
|
||||||
"lora_modules_to_save": [
|
# "lora_modules_to_save": [
|
||||||
"embed_tokens",
|
# "embed_tokens",
|
||||||
"lm_head",
|
# "lm_head",
|
||||||
],
|
# ],
|
||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"eval_sample_packing": False,
|
"eval_sample_packing": False,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"val_set_size": 0.05,
|
"val_set_size": 0.05,
|
||||||
"special_tokens": {
|
"special_tokens": {
|
||||||
"pad_token": "</s>",
|
"pad_token": "<|endoftext|>",
|
||||||
},
|
},
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
@@ -503,7 +499,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM-135M",
|
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
@@ -553,7 +549,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM-135M",
|
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||||
"load_in_4bit": True,
|
"load_in_4bit": True,
|
||||||
"adapter": "qlora",
|
"adapter": "qlora",
|
||||||
"lora_r": 8,
|
"lora_r": 8,
|
||||||
|
|||||||
@@ -66,6 +66,8 @@ class TestFAXentropyLlama(unittest.TestCase):
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 1,
|
"num_epochs": 1,
|
||||||
|
"max_steps": 10,
|
||||||
|
"save_steps": 10,
|
||||||
"micro_batch_size": 8,
|
"micro_batch_size": 8,
|
||||||
"gradient_accumulation_steps": 1,
|
"gradient_accumulation_steps": 1,
|
||||||
"output_dir": temp_dir,
|
"output_dir": temp_dir,
|
||||||
|
|||||||
@@ -56,6 +56,8 @@ class TestLoraLlama(unittest.TestCase):
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 2,
|
"num_epochs": 2,
|
||||||
|
"max_steps": 20,
|
||||||
|
"save_steps": 10,
|
||||||
"micro_batch_size": 8,
|
"micro_batch_size": 8,
|
||||||
"gradient_accumulation_steps": 1,
|
"gradient_accumulation_steps": 1,
|
||||||
"output_dir": temp_dir,
|
"output_dir": temp_dir,
|
||||||
@@ -109,6 +111,7 @@ class TestLoraLlama(unittest.TestCase):
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 2,
|
"num_epochs": 2,
|
||||||
|
"max_steps": 20,
|
||||||
"save_steps": 0.5,
|
"save_steps": 0.5,
|
||||||
"micro_batch_size": 8,
|
"micro_batch_size": 8,
|
||||||
"gradient_accumulation_steps": 1,
|
"gradient_accumulation_steps": 1,
|
||||||
|
|||||||
@@ -113,7 +113,7 @@ class TestCustomOptimizers(unittest.TestCase):
|
|||||||
def test_fft_schedule_free_adamw(self, temp_dir):
|
def test_fft_schedule_free_adamw(self, temp_dir):
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM-135M",
|
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||||
"sequence_len": 1024,
|
"sequence_len": 1024,
|
||||||
"val_set_size": 0.1,
|
"val_set_size": 0.1,
|
||||||
"special_tokens": {
|
"special_tokens": {
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ class TestPackedLlama(unittest.TestCase):
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM-135M",
|
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
||||||
"sequence_len": 1024,
|
"sequence_len": 1024,
|
||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"flash_attention": True,
|
"flash_attention": True,
|
||||||
|
|||||||
Reference in New Issue
Block a user