From 54392ac8a662f0746f4128e68d1088edb58a3711 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Fri, 9 Aug 2024 11:50:13 -0400 Subject: [PATCH] Attempt to run multigpu in PR CI for now to ensure it works (#1815) [skip ci] * Attempt to run multigpu in PR CI for now to ensure it works * fix yaml file * forgot to include multigpu tests * fix call to cicd.multigpu * dump dictdefault to dict for yaml conversion * use to_dict instead of casting * 16bit-lora w flash attention, 8bit lora seems problematic * add llama fsdp test * more tests * Add test for qlora + fsdp with prequant * limit accelerate to 2 processes and disable broken qlora+fsdp+bnb test * move multigpu tests to biweekly --- .github/workflows/multi-gpu-e2e.yml | 44 ++++ cicd/cicd.sh | 2 +- cicd/multigpu.py | 77 +++++++ cicd/multigpu.sh | 5 + cicd/tests.py | 8 +- tests/e2e/multigpu/__init__.py | 0 tests/e2e/multigpu/test_llama.py | 341 ++++++++++++++++++++++++++++ 7 files changed, 473 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/multi-gpu-e2e.yml create mode 100644 cicd/multigpu.py create mode 100755 cicd/multigpu.sh create mode 100644 tests/e2e/multigpu/__init__.py create mode 100644 tests/e2e/multigpu/test_llama.py diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml new file mode 100644 index 000000000..c854af9ab --- /dev/null +++ b/.github/workflows/multi-gpu-e2e.yml @@ -0,0 +1,44 @@ +name: docker-multigpu-tests-biweekly + +on: + workflow_dispatch: + schedule: + - cron: '0 0 * * 1,4' # Runs at 00:00 UTC every monday & thursday + +jobs: + test-axolotl-multigpu: + if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'axolotl-ai-cloud' }} + strategy: + fail-fast: false + matrix: + include: + - cuda: 121 + cuda_version: 12.1.1 + python_version: "3.11" + pytorch: 2.3.1 + axolotl_extras: + num_gpus: 2 + runs-on: [self-hosted, modal] + timeout-minutes: 120 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: Install Modal + run: | + python -m pip install --upgrade pip + pip install modal==0.63.64 jinja2 + - name: Update env vars + run: | + echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV + echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV + echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV + echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV + echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV + echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV + - name: Run tests job on Modal + run: | + modal run cicd.multigpu diff --git a/cicd/cicd.sh b/cicd/cicd.sh index 180150ea2..eceda9b37 100755 --- a/cicd/cicd.sh +++ b/cicd/cicd.sh @@ -3,4 +3,4 @@ set -e pytest --ignore=tests/e2e/ /workspace/axolotl/tests/ pytest -n1 --dist loadfile -v /workspace/axolotl/tests/e2e/patched/ -pytest --ignore=tests/e2e/patched/ /workspace/axolotl/tests/e2e/ +pytest --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ /workspace/axolotl/tests/e2e/ diff --git a/cicd/multigpu.py b/cicd/multigpu.py new file mode 100644 index 000000000..be10fbc73 --- /dev/null +++ b/cicd/multigpu.py @@ -0,0 +1,77 @@ +""" + modal application to run axolotl gpu tests in Modal + """ +# pylint: disable=duplicate-code + +import os +import pathlib +import tempfile + +import jinja2 +import modal +from jinja2 import select_autoescape +from modal import Image, Stub + +cicd_path = pathlib.Path(__file__).parent.resolve() + +template_loader = jinja2.FileSystemLoader(searchpath=cicd_path) +template_env = jinja2.Environment( + loader=template_loader, autoescape=select_autoescape() +) +df_template = template_env.get_template("Dockerfile.jinja") + +df_args = { + "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""), + "AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""), + "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.3.1"), + "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu121-2.3.1"), + "CUDA": os.environ.get("CUDA", "121"), + "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"), + "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""), +} + +dockerfile_contents = df_template.render(**df_args) + +temp_dir = tempfile.mkdtemp() +with open(pathlib.Path(temp_dir) / "Dockerfile", "w", encoding="utf-8") as f: + f.write(dockerfile_contents) + +cicd_image = ( + Image.from_dockerfile( + pathlib.Path(temp_dir) / "Dockerfile", + force_build=True, + gpu="A10G", + ) + .env(df_args) + .pip_install("fastapi==0.110.0", "pydantic==2.6.3") +) + +stub = Stub("Axolotl CI/CD", secrets=[]) + + +N_GPUS = int(os.environ.get("N_GPUS", 2)) +GPU_CONFIG = modal.gpu.H100(count=N_GPUS) + + +def run_cmd(cmd: str, run_folder: str): + import subprocess # nosec + + # Propagate errors from subprocess. + if exit_code := subprocess.call(cmd.split(), cwd=run_folder): # nosec + exit(exit_code) # pylint: disable=consider-using-sys-exit + + +@stub.function( + image=cicd_image, + gpu=GPU_CONFIG, + timeout=45 * 60, + cpu=8.0, + memory=131072 * N_GPUS, +) +def cicd_pytest(): + run_cmd("./cicd/multigpu.sh", "/workspace/axolotl") + + +@stub.local_entrypoint() +def main(): + cicd_pytest.remote() diff --git a/cicd/multigpu.sh b/cicd/multigpu.sh new file mode 100755 index 000000000..ff7f9f19a --- /dev/null +++ b/cicd/multigpu.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e + +# only run one test at a time so as not to OOM the GPU +pytest -n1 /workspace/axolotl/tests/e2e/multigpu/ diff --git a/cicd/tests.py b/cicd/tests.py index bfbdb7b90..c21467637 100644 --- a/cicd/tests.py +++ b/cicd/tests.py @@ -1,6 +1,8 @@ """ modal application to run axolotl gpu tests in Modal """ +# pylint: disable=duplicate-code + import os import pathlib import tempfile @@ -21,9 +23,9 @@ df_template = template_env.get_template("Dockerfile.jinja") df_args = { "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""), "AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""), - "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.0.1"), - "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.10-cu118-2.0.1"), - "CUDA": os.environ.get("CUDA", "118"), + "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.3.1"), + "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu121-2.3.1"), + "CUDA": os.environ.get("CUDA", "121"), "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"), "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""), } diff --git a/tests/e2e/multigpu/__init__.py b/tests/e2e/multigpu/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e/multigpu/test_llama.py b/tests/e2e/multigpu/test_llama.py new file mode 100644 index 000000000..344c57fb8 --- /dev/null +++ b/tests/e2e/multigpu/test_llama.py @@ -0,0 +1,341 @@ +""" +E2E tests for multigpu lora tinyllama +""" + +import logging +import os +import unittest +from pathlib import Path + +import pytest +import yaml +from accelerate.test_utils import execute_subprocess_async + +from axolotl.utils.dict import DictDefault + +from ..utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e.multigpu") +os.environ["WANDB_DISABLED"] = "true" + + +class TestMultiGPULlama(unittest.TestCase): + """ + Test case for Llama models using LoRA + """ + + @with_temp_dir + def test_lora_ddp(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "TinyLlama/TinyLlama_v1.1", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 2048, + "adapter": "lora", + "lora_r": 8, + "lora_alpha": 16, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.05, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "tatsu-lab/alpaca", + "type": "alpaca", + }, + ], + "num_epochs": 1, + "max_steps": 100, + "micro_batch_size": 4, + "gradient_accumulation_steps": 4, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_8bit", + "lr_scheduler": "cosine", + "flash_attention": True, + } + ) + + # write cfg to yaml file + Path(temp_dir).mkdir(parents=True, exist_ok=True) + with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout: + fout.write(yaml.dump(cfg.to_dict(), Dumper=yaml.Dumper)) + + execute_subprocess_async( + [ + "accelerate", + "launch", + "--num-processes", + "2", + "-m", + "axolotl.cli.train", + str(Path(temp_dir) / "config.yaml"), + ] + ) + + @with_temp_dir + def test_lora_ddp_packed(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "TinyLlama/TinyLlama_v1.1", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 2048, + "sample_packing": True, + "eval_sample_packing": False, + "pad_to_sequence_len": True, + "adapter": "lora", + "lora_r": 8, + "lora_alpha": 16, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.05, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "tatsu-lab/alpaca", + "type": "alpaca", + }, + ], + "num_epochs": 1, + "max_steps": 50, + "micro_batch_size": 4, + "gradient_accumulation_steps": 4, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_8bit", + "lr_scheduler": "cosine", + "flash_attention": True, + } + ) + + # write cfg to yaml file + Path(temp_dir).mkdir(parents=True, exist_ok=True) + with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout: + fout.write(yaml.dump(cfg.to_dict(), Dumper=yaml.Dumper)) + + execute_subprocess_async( + [ + "accelerate", + "launch", + "--num-processes", + "2", + "-m", + "axolotl.cli.train", + str(Path(temp_dir) / "config.yaml"), + ] + ) + + @with_temp_dir + def test_fsdp(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "TinyLlama/TinyLlama_v1.1", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 2048, + "val_set_size": 0.05, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "tatsu-lab/alpaca", + "type": "alpaca", + }, + ], + "num_epochs": 1, + "max_steps": 100, + "micro_batch_size": 4, + "gradient_accumulation_steps": 4, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "flash_attention": True, + "fsdp": [ + "full_shard", + "auto_wrap", + ], + "fsdp_config": { + "fsdp_limit_all_gathers": True, + "fsdp_offload_params": False, + "fsdp_sync_module_states": True, + "fsdp_use_orig_params": False, + "fsdp_cpu_ram_efficient_loading": False, + "fsdp_transformer_layer_cls_to_wrap": "LlamaDecoderLayer", + "fsdp_state_dict_type": "SHARDED_STATE_DICT", + "fsdp_auto_wrap_policy": "TRANSFORMER_BASED_WRAP", + }, + } + ) + + # write cfg to yaml file + Path(temp_dir).mkdir(parents=True, exist_ok=True) + with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout: + fout.write(yaml.dump(cfg.to_dict(), Dumper=yaml.Dumper)) + + execute_subprocess_async( + [ + "accelerate", + "launch", + "--num-processes", + "2", + "-m", + "axolotl.cli.train", + str(Path(temp_dir) / "config.yaml"), + ] + ) + + @with_temp_dir + def test_fsdp_packed(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "TinyLlama/TinyLlama_v1.1", + "tokenizer_type": "LlamaTokenizer", + "sample_packing": True, + "eval_sample_packing": False, + "pad_to_sequence_len": True, + "sequence_len": 2048, + "val_set_size": 0.05, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "tatsu-lab/alpaca", + "type": "alpaca", + }, + ], + "num_epochs": 1, + "max_steps": 100, + "micro_batch_size": 4, + "gradient_accumulation_steps": 4, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "flash_attention": True, + "fsdp": [ + "full_shard", + "auto_wrap", + ], + "fsdp_config": { + "fsdp_limit_all_gathers": True, + "fsdp_offload_params": False, + "fsdp_sync_module_states": True, + "fsdp_use_orig_params": False, + "fsdp_cpu_ram_efficient_loading": False, + "fsdp_transformer_layer_cls_to_wrap": "LlamaDecoderLayer", + "fsdp_state_dict_type": "SHARDED_STATE_DICT", + "fsdp_auto_wrap_policy": "TRANSFORMER_BASED_WRAP", + }, + } + ) + + # write cfg to yaml file + Path(temp_dir).mkdir(parents=True, exist_ok=True) + with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout: + fout.write(yaml.dump(cfg.to_dict(), Dumper=yaml.Dumper)) + + execute_subprocess_async( + [ + "accelerate", + "launch", + "--num-processes", + "2", + "-m", + "axolotl.cli.train", + str(Path(temp_dir) / "config.yaml"), + ] + ) + + @pytest.mark.skip("disabled due to upstream issue") + @with_temp_dir + def test_fsdp_qlora_prequant_packed(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "axolotl-ai-co/TinyLlama_v1.1-bnb-nf4-bf16", + "tokenizer_type": "AutoTokenizer", + "adapter": "qlora", + "load_in_4bit": True, + "lora_r": 8, + "lora_alpha": 16, + "lora_dropout": 0.05, + "lora_target_linear": True, + "lora_modules_to_save": [ + "embed_tokens", + "lm_head", + ], + "sample_packing": True, + "eval_sample_packing": False, + "pad_to_sequence_len": True, + "sequence_len": 2048, + "val_set_size": 0.05, + "special_tokens": { + "pad_token": "<|end_of_text|>", + }, + "datasets": [ + { + "path": "tatsu-lab/alpaca", + "type": "alpaca", + "split": "train[:25%]", + }, + ], + "num_epochs": 1, + "max_steps": 100, + "micro_batch_size": 4, + "gradient_accumulation_steps": 4, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "flash_attention": True, + "fsdp": [ + "full_shard", + "auto_wrap", + ], + "fsdp_config": { + "fsdp_limit_all_gathers": True, + "fsdp_offload_params": False, + "fsdp_sync_module_states": True, + "fsdp_use_orig_params": False, + "fsdp_cpu_ram_efficient_loading": True, + "fsdp_transformer_layer_cls_to_wrap": "LlamaDecoderLayer", + "fsdp_state_dict_type": "SHARDED_STATE_DICT", + "fsdp_auto_wrap_policy": "TRANSFORMER_BASED_WRAP", + }, + } + ) + + # write cfg to yaml file + Path(temp_dir).mkdir(parents=True, exist_ok=True) + with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout: + fout.write(yaml.dump(cfg.to_dict(), Dumper=yaml.Dumper)) + + execute_subprocess_async( + [ + "accelerate", + "launch", + "--num-processes", + "2", + "-m", + "axolotl.cli.train", + str(Path(temp_dir) / "config.yaml"), + ] + )