Compare commits

...

4 Commits

Author SHA1 Message Date
Wing Lian
a0670abc94 add output for train loss in assertian err 2025-04-18 08:11:11 -07:00
Wing Lian
08f287b57f swap llama tests for 7m param model 2025-04-17 09:52:35 -07:00
Wing Lian
b4c7d9c29d fix perplexity scores 2025-04-17 07:58:53 -07:00
Wing Lian
d2637fb01d first pass at modifying tests to use llama-7m 2025-04-16 21:14:04 -07:00
14 changed files with 45 additions and 39 deletions

View File

@@ -496,6 +496,12 @@ def dataset_fozziethebeat_alpaca_messages_2k_dpo_test_rev_ea82cff(
return datasets.load_from_disk(ds_path)["train"] return datasets.load_from_disk(ds_path)["train"]
@pytest.fixture(scope="session", autouse=True)
def download_tiny_llama_7m_model():
# download the model
return snapshot_download_w_retry("axolotl-ai-internal/llama-7m", repo_type="model")
# # pylint: disable=redefined-outer-name,unused-argument # # pylint: disable=redefined-outer-name,unused-argument
# def test_load_fixtures( # def test_load_fixtures(
# download_smollm2_135m_model, # download_smollm2_135m_model,

View File

@@ -90,7 +90,7 @@ class TestKnowledgeDistillation:
train(cfg=cfg, dataset_meta=dataset_meta) train(cfg=cfg, dataset_meta=dataset_meta)
assert (Path(temp_dir) / "model.safetensors").exists() assert (Path(temp_dir) / "model.safetensors").exists()
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/loss", 1.0, "Train Loss is too high" temp_dir + "/runs", "train/loss", 1.0, "Train loss (%s) is too high"
) )
@pytest.mark.parametrize( @pytest.mark.parametrize(
@@ -121,5 +121,5 @@ class TestKnowledgeDistillation:
train(cfg=cfg, dataset_meta=dataset_meta) train(cfg=cfg, dataset_meta=dataset_meta)
assert (Path(temp_dir) / "adapter_model.safetensors").exists() assert (Path(temp_dir) / "adapter_model.safetensors").exists()
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/loss", 1.0, "Train Loss is too high" temp_dir + "/runs", "train/loss", 1.0, "Train loss (%s) is too high"
) )

View File

@@ -89,5 +89,5 @@ class TestPackedFlex:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
) )

View File

@@ -96,5 +96,5 @@ class TestMultiGPUGemma3:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 1.8, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 1.8, "Train loss (%s) is too high"
) )

View File

@@ -43,7 +43,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM2-135M", "base_model": "axolotl-ai-internal/llama-7m",
"sequence_len": 2048, "sequence_len": 2048,
"adapter": "lora", "adapter": "lora",
"lora_r": 8, "lora_r": 8,
@@ -94,7 +94,7 @@ class TestMultiGPULlama:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
) )
@pytest.mark.parametrize( @pytest.mark.parametrize(
@@ -105,7 +105,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM2-135M", "base_model": "axolotl-ai-internal/llama-7m",
"sequence_len": 2048, "sequence_len": 2048,
"sample_packing": True, "sample_packing": True,
"eval_sample_packing": False, "eval_sample_packing": False,
@@ -159,14 +159,14 @@ class TestMultiGPULlama:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
) )
def test_dpo_lora_ddp(self, temp_dir): def test_dpo_lora_ddp(self, temp_dir):
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM2-135M", "base_model": "axolotl-ai-internal/llama-7m",
"sequence_len": 2048, "sequence_len": 2048,
"sample_packing": False, "sample_packing": False,
"eval_sample_packing": False, "eval_sample_packing": False,
@@ -244,7 +244,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM2-135M", "base_model": "axolotl-ai-internal/llama-7m",
"sequence_len": 2048, "sequence_len": 2048,
"sample_packing": False, "sample_packing": False,
"eval_sample_packing": False, "eval_sample_packing": False,
@@ -326,7 +326,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM2-135M", "base_model": "axolotl-ai-internal/llama-7m",
"sequence_len": 2048, "sequence_len": 2048,
"val_set_size": 0.01, "val_set_size": 0.01,
"special_tokens": { "special_tokens": {
@@ -385,7 +385,7 @@ class TestMultiGPULlama:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
) )
@pytest.mark.parametrize( @pytest.mark.parametrize(
@@ -396,7 +396,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM2-135M", "base_model": "axolotl-ai-internal/llama-7m",
"sample_packing": True, "sample_packing": True,
"pad_to_sequence_len": True, "pad_to_sequence_len": True,
"sequence_len": 1024, "sequence_len": 1024,
@@ -457,7 +457,7 @@ class TestMultiGPULlama:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
) )
@require_torch_2_6_0 @require_torch_2_6_0
@@ -475,7 +475,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM2-135M", "base_model": "axolotl-ai-internal/llama-7m",
"sample_packing": True, "sample_packing": True,
"pad_to_sequence_len": True, "pad_to_sequence_len": True,
"sequence_len": 2048, "sequence_len": 2048,
@@ -538,7 +538,7 @@ class TestMultiGPULlama:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.1, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.1, "Train loss (%s) is too high"
) )
def test_fsdp_qlora_prequant_packed(self, temp_dir): def test_fsdp_qlora_prequant_packed(self, temp_dir):
@@ -618,7 +618,7 @@ class TestMultiGPULlama:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
) )
@pytest.mark.parametrize( @pytest.mark.parametrize(
@@ -654,7 +654,7 @@ class TestMultiGPULlama:
adapter = {} adapter = {}
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM2-135M", "base_model": "axolotl-ai-internal/llama-7m",
"sample_packing": True, "sample_packing": True,
"pad_to_sequence_len": True, "pad_to_sequence_len": True,
"sequence_len": 1024, "sequence_len": 1024,
@@ -702,7 +702,7 @@ class TestMultiGPULlama:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
) )
@pytest.mark.parametrize( @pytest.mark.parametrize(
@@ -728,7 +728,7 @@ class TestMultiGPULlama:
adapter = {} adapter = {}
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM2-135M", "base_model": "axolotl-ai-internal/llama-7m",
"sample_packing": True, "sample_packing": True,
"pad_to_sequence_len": True, "pad_to_sequence_len": True,
"sequence_len": 1024, "sequence_len": 1024,
@@ -776,7 +776,7 @@ class TestMultiGPULlama:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
) )
@pytest.mark.parametrize( @pytest.mark.parametrize(
@@ -802,7 +802,7 @@ class TestMultiGPULlama:
adapter = {} adapter = {}
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM2-135M", "base_model": "axolotl-ai-internal/llama-7m",
"sample_packing": True, "sample_packing": True,
"pad_to_sequence_len": True, "pad_to_sequence_len": True,
"sequence_len": 1024, "sequence_len": 1024,
@@ -850,7 +850,7 @@ class TestMultiGPULlama:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
) )
@pytest.mark.skip( @pytest.mark.skip(
@@ -860,7 +860,7 @@ class TestMultiGPULlama:
# pylint: disable=duplicate-code # pylint: disable=duplicate-code
cfg = DictDefault( cfg = DictDefault(
{ {
"base_model": "HuggingFaceTB/SmolLM2-135M", "base_model": "axolotl-ai-internal/llama-7m",
"fix_untrained_tokens": True, "fix_untrained_tokens": True,
"sequence_len": 512, "sequence_len": 512,
"val_set_size": 0.0, "val_set_size": 0.0,
@@ -917,5 +917,5 @@ class TestMultiGPULlama:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 4.0, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 4.0, "Train loss (%s) is too high"
) )

View File

@@ -80,7 +80,7 @@ class TestMultiGPURay:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
) )
@require_torch_lt_2_6_0 @require_torch_lt_2_6_0
@@ -138,5 +138,5 @@ class TestMultiGPURay:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
) )

View File

@@ -93,7 +93,7 @@ class TestSequenceParallelism:
) )
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.6, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.6, "Train loss (%s) is too high"
) )
@pytest.mark.parametrize( @pytest.mark.parametrize(

View File

@@ -86,5 +86,5 @@ class TestFAXentropyLlama:
check_model_output_exists(temp_dir, cfg) check_model_output_exists(temp_dir, cfg)
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 1.5, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 1.5, "Train loss (%s) is too high"
) )

View File

@@ -80,7 +80,7 @@ class TestUnslothQLoRA:
check_model_output_exists(temp_dir, cfg) check_model_output_exists(temp_dir, cfg)
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
) )
def test_unsloth_llama_qlora_unpacked(self, temp_dir): def test_unsloth_llama_qlora_unpacked(self, temp_dir):
@@ -130,7 +130,7 @@ class TestUnslothQLoRA:
check_model_output_exists(temp_dir, cfg) check_model_output_exists(temp_dir, cfg)
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
) )
@pytest.mark.parametrize( @pytest.mark.parametrize(
@@ -185,5 +185,5 @@ class TestUnslothQLoRA:
check_model_output_exists(temp_dir, cfg) check_model_output_exists(temp_dir, cfg)
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
) )

View File

@@ -69,5 +69,5 @@ class TestPackedFlex(unittest.TestCase):
train(cfg=cfg, dataset_meta=dataset_meta) train(cfg=cfg, dataset_meta=dataset_meta)
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
) )

View File

@@ -84,5 +84,5 @@ class TestPretrainLlama:
temp_dir + "/runs", temp_dir + "/runs",
"train/train_loss", "train/train_loss",
loss_threshold, loss_threshold,
"Train Loss is too high", "Train Loss (%s) is too high",
) )

View File

@@ -68,5 +68,5 @@ class TestPackedLlama(unittest.TestCase):
train(cfg=cfg, dataset_meta=dataset_meta) train(cfg=cfg, dataset_meta=dataset_meta)
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
) )

View File

@@ -73,6 +73,6 @@ class TestRewardModelLoraSmolLM2(unittest.TestCase):
train(cfg=cfg, dataset_meta=dataset_meta) train(cfg=cfg, dataset_meta=dataset_meta)
check_tensorboard( check_tensorboard(
temp_dir + "/runs", "train/train_loss", 2.5, "Train Loss is too high" temp_dir + "/runs", "train/train_loss", 2.5, "Train loss (%s) is too high"
) )
check_model_output_exists(temp_dir, cfg) check_model_output_exists(temp_dir, cfg)

View File

@@ -8,7 +8,7 @@ from transformers.models.auto.tokenization_auto import AutoTokenizer
from axolotl.utils.callbacks.perplexity import Perplexity from axolotl.utils.callbacks.perplexity import Perplexity
MODEL_NAME = "HuggingFaceTB/SmolLM2-135M" MODEL_NAME = "axolotl-ai-internal/llama-7m"
@fixture() @fixture()
@@ -36,7 +36,7 @@ One day, a little fish named Fin was swimming near the shore. He saw a big crab
""" """
result = metric.compute(model, [sample_text]) result = metric.compute(model, [sample_text])
ppl = result["score"] ppl = result["score"]
assert round(ppl, 2) == 7.41 assert round(ppl, 2) == 75.14
def test_perplexity_short(model, metric): def test_perplexity_short(model, metric):
@@ -44,4 +44,4 @@ def test_perplexity_short(model, metric):
sample_text = "Once upon a time, there was a little car named Beep. Beep loved to go fast and play in the sun." sample_text = "Once upon a time, there was a little car named Beep. Beep loved to go fast and play in the sun."
result = metric.compute(model, [sample_text]) result = metric.compute(model, [sample_text])
ppl = result["score"] ppl = result["score"]
assert round(ppl, 2) == 10.33 assert round(ppl, 2) == 70.54