From a0670abc94bd70927ad80ef7f1e8a3b74f3f14f5 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Fri, 18 Apr 2025 08:11:11 -0700
Subject: [PATCH] add output for train loss in assertian err

---
 tests/e2e/integrations/test_kd.py       |  4 ++--
 tests/e2e/multigpu/solo/test_flex.py    |  2 +-
 tests/e2e/multigpu/test_gemma3.py       |  2 +-
 tests/e2e/multigpu/test_llama.py        | 20 ++++++++++----------
 tests/e2e/multigpu/test_ray.py          |  4 ++--
 tests/e2e/multigpu/test_sp.py           |  2 +-
 tests/e2e/patched/test_fa_xentropy.py   |  2 +-
 tests/e2e/patched/test_unsloth_qlora.py |  6 +++---
 tests/e2e/solo/test_flex.py             |  2 +-
 tests/e2e/test_llama_pretrain.py        |  2 +-
 tests/e2e/test_packing_loss.py          |  2 +-
 tests/e2e/test_reward_model_smollm2.py  |  2 +-
 12 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/tests/e2e/integrations/test_kd.py b/tests/e2e/integrations/test_kd.py
index 9bfe5aaef..681674f1c 100644
--- a/tests/e2e/integrations/test_kd.py
+++ b/tests/e2e/integrations/test_kd.py
@@ -90,7 +90,7 @@ class TestKnowledgeDistillation:
         train(cfg=cfg, dataset_meta=dataset_meta)
         assert (Path(temp_dir) / "model.safetensors").exists()
         check_tensorboard(
-            temp_dir + "/runs", "train/loss", 1.0, "Train Loss is too high"
+            temp_dir + "/runs", "train/loss", 1.0, "Train loss (%s) is too high"
         )
 
     @pytest.mark.parametrize(
@@ -121,5 +121,5 @@ class TestKnowledgeDistillation:
         train(cfg=cfg, dataset_meta=dataset_meta)
         assert (Path(temp_dir) / "adapter_model.safetensors").exists()
         check_tensorboard(
-            temp_dir + "/runs", "train/loss", 1.0, "Train Loss is too high"
+            temp_dir + "/runs", "train/loss", 1.0, "Train loss (%s) is too high"
         )
diff --git a/tests/e2e/multigpu/solo/test_flex.py b/tests/e2e/multigpu/solo/test_flex.py
index cbe3794b3..785788451 100644
--- a/tests/e2e/multigpu/solo/test_flex.py
+++ b/tests/e2e/multigpu/solo/test_flex.py
@@ -89,5 +89,5 @@ class TestPackedFlex:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
         )
diff --git a/tests/e2e/multigpu/test_gemma3.py b/tests/e2e/multigpu/test_gemma3.py
index 9de3ed82f..7de594718 100644
--- a/tests/e2e/multigpu/test_gemma3.py
+++ b/tests/e2e/multigpu/test_gemma3.py
@@ -96,5 +96,5 @@ class TestMultiGPUGemma3:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 1.8, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 1.8, "Train loss (%s) is too high"
         )
diff --git a/tests/e2e/multigpu/test_llama.py b/tests/e2e/multigpu/test_llama.py
index 6f875c4a7..28183d3f0 100644
--- a/tests/e2e/multigpu/test_llama.py
+++ b/tests/e2e/multigpu/test_llama.py
@@ -94,7 +94,7 @@ class TestMultiGPULlama:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
         )
 
     @pytest.mark.parametrize(
@@ -159,7 +159,7 @@ class TestMultiGPULlama:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
         )
 
     def test_dpo_lora_ddp(self, temp_dir):
@@ -385,7 +385,7 @@ class TestMultiGPULlama:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
         )
 
     @pytest.mark.parametrize(
@@ -457,7 +457,7 @@ class TestMultiGPULlama:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
         )
 
     @require_torch_2_6_0
@@ -538,7 +538,7 @@ class TestMultiGPULlama:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.1, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.1, "Train loss (%s) is too high"
         )
 
     def test_fsdp_qlora_prequant_packed(self, temp_dir):
@@ -618,7 +618,7 @@ class TestMultiGPULlama:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
         )
 
     @pytest.mark.parametrize(
@@ -702,7 +702,7 @@ class TestMultiGPULlama:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
         )
 
     @pytest.mark.parametrize(
@@ -776,7 +776,7 @@ class TestMultiGPULlama:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
         )
 
     @pytest.mark.parametrize(
@@ -850,7 +850,7 @@ class TestMultiGPULlama:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
         )
 
     @pytest.mark.skip(
@@ -917,5 +917,5 @@ class TestMultiGPULlama:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 4.0, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 4.0, "Train loss (%s) is too high"
         )
diff --git a/tests/e2e/multigpu/test_ray.py b/tests/e2e/multigpu/test_ray.py
index 843adac91..5a816fb57 100644
--- a/tests/e2e/multigpu/test_ray.py
+++ b/tests/e2e/multigpu/test_ray.py
@@ -80,7 +80,7 @@ class TestMultiGPURay:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
         )
 
     @require_torch_lt_2_6_0
@@ -138,5 +138,5 @@ class TestMultiGPURay:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
         )
diff --git a/tests/e2e/multigpu/test_sp.py b/tests/e2e/multigpu/test_sp.py
index 72e5cb88c..dc1f7494b 100644
--- a/tests/e2e/multigpu/test_sp.py
+++ b/tests/e2e/multigpu/test_sp.py
@@ -93,7 +93,7 @@ class TestSequenceParallelism:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.6, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.6, "Train loss (%s) is too high"
         )
 
     @pytest.mark.parametrize(
diff --git a/tests/e2e/patched/test_fa_xentropy.py b/tests/e2e/patched/test_fa_xentropy.py
index f71e4fb4a..39bb38c69 100644
--- a/tests/e2e/patched/test_fa_xentropy.py
+++ b/tests/e2e/patched/test_fa_xentropy.py
@@ -86,5 +86,5 @@ class TestFAXentropyLlama:
         check_model_output_exists(temp_dir, cfg)
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 1.5, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 1.5, "Train loss (%s) is too high"
         )
diff --git a/tests/e2e/patched/test_unsloth_qlora.py b/tests/e2e/patched/test_unsloth_qlora.py
index 4cea0d26f..be31bf106 100644
--- a/tests/e2e/patched/test_unsloth_qlora.py
+++ b/tests/e2e/patched/test_unsloth_qlora.py
@@ -80,7 +80,7 @@ class TestUnslothQLoRA:
         check_model_output_exists(temp_dir, cfg)
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
         )
 
     def test_unsloth_llama_qlora_unpacked(self, temp_dir):
@@ -130,7 +130,7 @@ class TestUnslothQLoRA:
         check_model_output_exists(temp_dir, cfg)
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
         )
 
     @pytest.mark.parametrize(
@@ -185,5 +185,5 @@ class TestUnslothQLoRA:
         check_model_output_exists(temp_dir, cfg)
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
         )
diff --git a/tests/e2e/solo/test_flex.py b/tests/e2e/solo/test_flex.py
index 6de813e37..28c926bcd 100644
--- a/tests/e2e/solo/test_flex.py
+++ b/tests/e2e/solo/test_flex.py
@@ -69,5 +69,5 @@ class TestPackedFlex(unittest.TestCase):
         train(cfg=cfg, dataset_meta=dataset_meta)
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
         )
diff --git a/tests/e2e/test_llama_pretrain.py b/tests/e2e/test_llama_pretrain.py
index 647285e46..a8bfb60da 100644
--- a/tests/e2e/test_llama_pretrain.py
+++ b/tests/e2e/test_llama_pretrain.py
@@ -84,5 +84,5 @@ class TestPretrainLlama:
             temp_dir + "/runs",
             "train/train_loss",
             loss_threshold,
-            "Train Loss is too high",
+            "Train Loss (%s) is too high",
         )
diff --git a/tests/e2e/test_packing_loss.py b/tests/e2e/test_packing_loss.py
index 4e8e70419..e4c448963 100644
--- a/tests/e2e/test_packing_loss.py
+++ b/tests/e2e/test_packing_loss.py
@@ -68,5 +68,5 @@ class TestPackedLlama(unittest.TestCase):
         train(cfg=cfg, dataset_meta=dataset_meta)
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
         )
diff --git a/tests/e2e/test_reward_model_smollm2.py b/tests/e2e/test_reward_model_smollm2.py
index 240c4b392..9411ba304 100644
--- a/tests/e2e/test_reward_model_smollm2.py
+++ b/tests/e2e/test_reward_model_smollm2.py
@@ -73,6 +73,6 @@ class TestRewardModelLoraSmolLM2(unittest.TestCase):
 
         train(cfg=cfg, dataset_meta=dataset_meta)
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.5, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.5, "Train loss (%s) is too high"
         )
         check_model_output_exists(temp_dir, cfg)