Update dependencies and show slow tests in CI (#2492)

* use latest torchao, gradio, schedule-free

* get info on slow tests

* speed up tests by avoiding gradient checkpointing and reducing eval size
This commit is contained in:
Wing Lian
2025-04-05 17:41:31 -04:00
committed by GitHub
parent 949471039f
commit e7e0cd97ce
4 changed files with 24 additions and 24 deletions

View File

@@ -2,5 +2,5 @@
set -e
# only run one test at a time so as not to OOM the GPU
pytest -v -n2 /workspace/axolotl/tests/e2e/multigpu/ --ignore=/workspace/axolotl/tests/e2e/multigpu/solo/
pytest -v -n1 /workspace/axolotl/tests/e2e/multigpu/solo/
pytest -v --durations=10 -n2 /workspace/axolotl/tests/e2e/multigpu/ --ignore=/workspace/axolotl/tests/e2e/multigpu/solo/
pytest -v --durations=10 -n1 /workspace/axolotl/tests/e2e/multigpu/solo/

View File

@@ -22,7 +22,7 @@ trl==0.16.0
optimum==1.16.2
hf_transfer
sentencepiece
gradio==3.50.2
gradio==5.23.3
modal==0.70.5
pydantic==2.10.6
@@ -59,8 +59,8 @@ langdetect==1.0.9
immutabledict==4.2.0
antlr4-python3-runtime==4.13.2
torchao==0.7.0
schedulefree==1.3.0
torchao==0.9.0
schedulefree==1.4.1
axolotl-contribs-lgpl==0.0.6
axolotl-contribs-mit==0.0.3

View File

@@ -44,7 +44,7 @@ class TestMultiGPULlama:
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_target_linear": True,
"val_set_size": 0.05,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
@@ -58,7 +58,7 @@ class TestMultiGPULlama:
"max_steps": 2,
"micro_batch_size": 4,
"gradient_accumulation_steps": 4,
"gradient_checkpointing": True,
# "gradient_checkpointing": True,
"output_dir": temp_dir,
"learning_rate": 0.00001,
"optimizer": "adamw_8bit",
@@ -108,7 +108,7 @@ class TestMultiGPULlama:
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_target_linear": True,
"val_set_size": 0.05,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
@@ -122,7 +122,7 @@ class TestMultiGPULlama:
"max_steps": 2,
"micro_batch_size": 1,
"gradient_accumulation_steps": gradient_accumulation_steps,
"gradient_checkpointing": True,
# "gradient_checkpointing": True,
"output_dir": temp_dir,
"learning_rate": 0.00001,
"optimizer": "adamw_8bit",
@@ -169,7 +169,7 @@ class TestMultiGPULlama:
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_target_linear": True,
"val_set_size": 0.05,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
@@ -195,7 +195,7 @@ class TestMultiGPULlama:
"max_steps": 2,
"micro_batch_size": 4,
"gradient_accumulation_steps": 4,
"gradient_checkpointing": True,
# "gradient_checkpointing": True,
"output_dir": temp_dir,
"warmup_steps": 0,
"learning_rate": 0.00001,
@@ -247,7 +247,7 @@ class TestMultiGPULlama:
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_target_linear": True,
"val_set_size": 0.05,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
@@ -273,7 +273,7 @@ class TestMultiGPULlama:
"max_steps": 2,
"micro_batch_size": 2,
"gradient_accumulation_steps": 4,
"gradient_checkpointing": True,
# "gradient_checkpointing": True,
"output_dir": temp_dir,
"warmup_steps": 0,
"learning_rate": 0.00001,
@@ -334,7 +334,7 @@ class TestMultiGPULlama:
"max_steps": 2,
"micro_batch_size": 2,
"gradient_accumulation_steps": gradient_accumulation_steps,
"gradient_checkpointing": True,
# "gradient_checkpointing": True,
"output_dir": temp_dir,
"learning_rate": 0.00001,
"optimizer": "adamw_torch_fused",
@@ -391,7 +391,7 @@ class TestMultiGPULlama:
"sample_packing": True,
"pad_to_sequence_len": True,
"sequence_len": 2048,
"val_set_size": 0.05,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
@@ -405,7 +405,7 @@ class TestMultiGPULlama:
"max_steps": 2,
"micro_batch_size": 4,
"gradient_accumulation_steps": 2,
"gradient_checkpointing": True,
# "gradient_checkpointing": True,
"output_dir": temp_dir,
"learning_rate": 0.00001,
"optimizer": "adamw_torch_fused",
@@ -470,7 +470,7 @@ class TestMultiGPULlama:
"eval_sample_packing": False,
"pad_to_sequence_len": True,
"sequence_len": 2048,
"val_set_size": 0.05,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
@@ -485,7 +485,7 @@ class TestMultiGPULlama:
"max_steps": 2,
"micro_batch_size": 4,
"gradient_accumulation_steps": 2,
"gradient_checkpointing": True,
# "gradient_checkpointing": True,
"output_dir": temp_dir,
"learning_rate": 0.00001,
"optimizer": "adamw_torch_fused",
@@ -567,7 +567,7 @@ class TestMultiGPULlama:
"sample_packing": True,
"pad_to_sequence_len": True,
"sequence_len": 2048,
"val_set_size": 0.05,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
@@ -640,7 +640,7 @@ class TestMultiGPULlama:
"sample_packing": True,
"pad_to_sequence_len": True,
"sequence_len": 2048,
"val_set_size": 0.05,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
@@ -713,7 +713,7 @@ class TestMultiGPULlama:
"sample_packing": True,
"pad_to_sequence_len": True,
"sequence_len": 2048,
"val_set_size": 0.05,
"val_set_size": 0.01,
"special_tokens": {
"pad_token": "<|endoftext|>",
},
@@ -788,7 +788,7 @@ class TestMultiGPULlama:
"max_steps": 2,
"micro_batch_size": 1,
"gradient_accumulation_steps": 1,
"gradient_checkpointing": True,
# "gradient_checkpointing": True,
"output_dir": temp_dir,
"learning_rate": 0.00001,
"optimizer": "adamw_torch_fused",

View File

@@ -37,7 +37,7 @@ class TestMultiGPUQwen2:
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_target_linear": True,
"val_set_size": 0.05,
"val_set_size": 0.01,
"datasets": [
{
"path": "Intel/orca_dpo_pairs",
@@ -57,7 +57,7 @@ class TestMultiGPUQwen2:
"flash_attention": True,
"bf16": "auto",
"tf32": True,
"gradient_checkpointing": True,
# "gradient_checkpointing": True,
"gradient_checkpointing_kwargs": {
"use_reentrant": False,
},