make e2e tests a bit faster by reducing test split size (#2522) [skip ci]

* [ci] make e2e tests a bit faster by reducing test split size * use 10% split of alpaca dataset to speed up dataset loading/tokenization * reduce gas 4->2 for most e2e tests * increase val set size for packing
2025-04-12 07:24:43 -07:00
parent 51267ded04
commit de8a625dd7
24 changed files with 54 additions and 44 deletions
--- a/tests/e2e/test_gemma2.py
+++ b/tests/e2e/test_gemma2.py
@@ -62,7 +62,7 @@ class TestGemma2:
                "chat_template": "gemma",  # gemma2's template is same as gemma
                "num_epochs": 1,
                "micro_batch_size": 1,
-                "gradient_accumulation_steps": 4,
+                "gradient_accumulation_steps": 2,
                "output_dir": temp_dir,
                "learning_rate": 0.00001,
                "optimizer": "adamw_bnb_8bit",
@@ -114,7 +114,7 @@ class TestGemma2:
                },
                "num_epochs": 1,
                "micro_batch_size": 1,
-                "gradient_accumulation_steps": 4,
+                "gradient_accumulation_steps": 2,
                "output_dir": temp_dir,
                "learning_rate": 0.00001,
                "optimizer": "adamw_bnb_8bit",