bump transformers==4.52.4 (#2800) [skip ci]

* bump transformers==4.52.4 * don't use hf offline for qwen tokenizer * increase timeout * don't use methodtype * increase timeout * better assertion logging * upgrade deepspeed version too
2025-06-18 15:46:14 -04:00
parent 06a648263b
commit a85efffbef
18 changed files with 34 additions and 35 deletions
--- a/tests/e2e/patched/test_fa_xentropy.py
+++ b/tests/e2e/patched/test_fa_xentropy.py
@@ -78,5 +78,5 @@ class TestFAXentropyLlama:
        check_model_output_exists(temp_dir, cfg)

        check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 1.5, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 1.5, "Train Loss (%s) is too high"
        )
--- a/tests/e2e/patched/test_unsloth_qlora.py
+++ b/tests/e2e/patched/test_unsloth_qlora.py
@@ -73,7 +73,7 @@ class TestUnslothQLoRA:
        check_model_output_exists(temp_dir, cfg)

        check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss (%s) is too high"
        )

    def test_unsloth_llama_qlora_unpacked(self, temp_dir):
@@ -123,7 +123,7 @@ class TestUnslothQLoRA:
        check_model_output_exists(temp_dir, cfg)

        check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss (%s) is too high"
        )

    @pytest.mark.parametrize(
@@ -178,5 +178,5 @@ class TestUnslothQLoRA:
        check_model_output_exists(temp_dir, cfg)

        check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
+            temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss (%s) is too high"
        )