From c9880977be751aad882d20db3aa38064d5f8ae78 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Tue, 29 Apr 2025 08:35:06 -0400
Subject: [PATCH] split llmcompressor from vllm checks

---
 .github/workflows/tests.yml                   |  6 ++++++
 cicd/Dockerfile.jinja                         |  4 ++--
 docker/Dockerfile                             |  4 ++--
 tests/e2e/integrations/test_llm_compressor.py |  7 ++++++-
 tests/e2e/utils.py                            | 18 ++++++++++++++++++
 5 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index b14925437..6a8c3bea4 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -258,6 +258,12 @@ jobs:
       fail-fast: false
       matrix:
         include:
+          - cuda: 124
+            cuda_version: 12.4.1
+            python_version: "3.11"
+            pytorch: 2.6.0
+            num_gpus: 1
+            axolotl_extras: llmcompressor
           - cuda: 124
             cuda_version: 12.4.1
             python_version: "3.11"
diff --git a/cicd/Dockerfile.jinja b/cicd/Dockerfile.jinja
index 9cad43f40..6988e092b 100644
--- a/cicd/Dockerfile.jinja
+++ b/cicd/Dockerfile.jinja
@@ -33,9 +33,9 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
 
 RUN pip install packaging==23.2 setuptools==75.8.0
 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,llmcompressor,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
+        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
     else \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,llmcompressor] $AXOLOTL_ARGS; \
+        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
     fi
 
 RUN python scripts/unsloth_install.py | sh
diff --git a/docker/Dockerfile b/docker/Dockerfile
index bac02c057..e23a729d4 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -20,9 +20,9 @@ WORKDIR /workspace/axolotl
 
 # If AXOLOTL_EXTRAS is set, append it in brackets
 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,llmcompressor,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
+        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
     else \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,llmcompressor] $AXOLOTL_ARGS; \
+        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
     fi
 
 RUN python scripts/unsloth_install.py | sh
diff --git a/tests/e2e/integrations/test_llm_compressor.py b/tests/e2e/integrations/test_llm_compressor.py
index a3e62c009..f4317559d 100644
--- a/tests/e2e/integrations/test_llm_compressor.py
+++ b/tests/e2e/integrations/test_llm_compressor.py
@@ -12,7 +12,11 @@ from axolotl.train import train
 from axolotl.utils.config import normalize_config, prepare_plugins, validate_config
 from axolotl.utils.dict import DictDefault
 
-from tests.e2e.utils import check_model_output_exists, require_torch_2_4_1
+from tests.e2e.utils import (
+    check_model_output_exists,
+    require_llmcompressor,
+    require_torch_2_4_1,
+)
 
 MODELS = [
     "nm-testing/llama2.c-stories42M-pruned2.4-compressed",
@@ -26,6 +30,7 @@ MODELS = [
 @pytest.mark.parametrize(
     "save_compressed", [True, False], ids=["save_compressed", "save_uncompressed"]
 )
+@require_llmcompressor
 class TestLLMCompressorIntegration:
     """
     e2e tests for axolotl.integrations.llm_compressor.LLMCompressorPlugin
diff --git a/tests/e2e/utils.py b/tests/e2e/utils.py
index 2fbf333c4..0228957c1 100644
--- a/tests/e2e/utils.py
+++ b/tests/e2e/utils.py
@@ -109,6 +109,24 @@ def require_vllm(test_case):
     )(test_case)
 
 
+def require_llmcompressor(test_case):
+    """
+    Decorator marking a test that requires a llmcompressor to be installed
+    """
+
+    def is_llmcompressor_installed():
+        try:
+            import llmcompressor  # pylint: disable=unused-import  # noqa: F401
+
+            return True
+        except ImportError:
+            return False
+
+    return unittest.skipUnless(
+        is_llmcompressor_installed(), "test requires a llmcompressor to be installed"
+    )(test_case)
+
+
 def is_hopper():
     compute_capability = torch.cuda.get_device_capability()
     return compute_capability == (9, 0)