diff --git a/tests/e2e/multigpu/test_llama.py b/tests/e2e/multigpu/test_llama.py
index 3893a8ed3..635e08529 100644
--- a/tests/e2e/multigpu/test_llama.py
+++ b/tests/e2e/multigpu/test_llama.py
@@ -48,6 +48,7 @@ def sft_base_cfg():
         flash_attention=True,
         learning_rate=0.00001,
         optimizer="adamw_8bit",
+        seed=42,
         # these need to be set since we aren't running schema validation
         micro_batch_size=2,
         gradient_accumulation_steps=1,
@@ -431,7 +432,7 @@ class TestMultiGPULlama:
         )
 
         check_tensorboard(
-            temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss (%s) is too high"
+            temp_dir + "/runs", "train/train_loss", 2.4, "Train Loss (%s) is too high"
         )
 
     @pytest.mark.parametrize(