From c9880977be751aad882d20db3aa38064d5f8ae78 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 29 Apr 2025 08:35:06 -0400 Subject: [PATCH] split llmcompressor from vllm checks --- .github/workflows/tests.yml | 6 ++++++ cicd/Dockerfile.jinja | 4 ++-- docker/Dockerfile | 4 ++-- tests/e2e/integrations/test_llm_compressor.py | 7 ++++++- tests/e2e/utils.py | 18 ++++++++++++++++++ 5 files changed, 34 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b14925437..6a8c3bea4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -258,6 +258,12 @@ jobs: fail-fast: false matrix: include: + - cuda: 124 + cuda_version: 12.4.1 + python_version: "3.11" + pytorch: 2.6.0 + num_gpus: 1 + axolotl_extras: llmcompressor - cuda: 124 cuda_version: 12.4.1 python_version: "3.11" diff --git a/cicd/Dockerfile.jinja b/cicd/Dockerfile.jinja index 9cad43f40..6988e092b 100644 --- a/cicd/Dockerfile.jinja +++ b/cicd/Dockerfile.jinja @@ -33,9 +33,9 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \ RUN pip install packaging==23.2 setuptools==75.8.0 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ - pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,llmcompressor,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \ + pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \ else \ - pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,llmcompressor] $AXOLOTL_ARGS; \ + pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \ fi RUN python scripts/unsloth_install.py | sh diff --git a/docker/Dockerfile b/docker/Dockerfile index bac02c057..e23a729d4 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -20,9 +20,9 @@ WORKDIR /workspace/axolotl # If AXOLOTL_EXTRAS is set, append it in brackets RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ - pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,llmcompressor,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \ + pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \ else \ - pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,llmcompressor] $AXOLOTL_ARGS; \ + pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \ fi RUN python scripts/unsloth_install.py | sh diff --git a/tests/e2e/integrations/test_llm_compressor.py b/tests/e2e/integrations/test_llm_compressor.py index a3e62c009..f4317559d 100644 --- a/tests/e2e/integrations/test_llm_compressor.py +++ b/tests/e2e/integrations/test_llm_compressor.py @@ -12,7 +12,11 @@ from axolotl.train import train from axolotl.utils.config import normalize_config, prepare_plugins, validate_config from axolotl.utils.dict import DictDefault -from tests.e2e.utils import check_model_output_exists, require_torch_2_4_1 +from tests.e2e.utils import ( + check_model_output_exists, + require_llmcompressor, + require_torch_2_4_1, +) MODELS = [ "nm-testing/llama2.c-stories42M-pruned2.4-compressed", @@ -26,6 +30,7 @@ MODELS = [ @pytest.mark.parametrize( "save_compressed", [True, False], ids=["save_compressed", "save_uncompressed"] ) +@require_llmcompressor class TestLLMCompressorIntegration: """ e2e tests for axolotl.integrations.llm_compressor.LLMCompressorPlugin diff --git a/tests/e2e/utils.py b/tests/e2e/utils.py index 2fbf333c4..0228957c1 100644 --- a/tests/e2e/utils.py +++ b/tests/e2e/utils.py @@ -109,6 +109,24 @@ def require_vllm(test_case): )(test_case) +def require_llmcompressor(test_case): + """ + Decorator marking a test that requires a llmcompressor to be installed + """ + + def is_llmcompressor_installed(): + try: + import llmcompressor # pylint: disable=unused-import # noqa: F401 + + return True + except ImportError: + return False + + return unittest.skipUnless( + is_llmcompressor_installed(), "test requires a llmcompressor to be installed" + )(test_case) + + def is_hopper(): compute_capability = torch.cuda.get_device_capability() return compute_capability == (9, 0)