split llmcompressor from vllm checks

2025-04-29 08:35:06 -04:00
parent fd5c985038
commit 82b16bd040
5 changed files with 40 additions and 5 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -261,6 +261,18 @@ jobs:
      fail-fast: false
      matrix:
        include:
+          - cuda: 124
+            cuda_version: 12.4.1
+            python_version: "3.11"
+            pytorch: 2.6.0
+            num_gpus: 1
+            axolotl_extras: llmcompressor
+          - cuda: 124
+            cuda_version: 12.4.1
+            python_version: "3.11"
+            pytorch: 2.4.1
+            num_gpus: 1
+            axolotl_extras:
          - cuda: 124
            cuda_version: 12.4.1
            python_version: "3.11"
--- a/cicd/Dockerfile.jinja
+++ b/cicd/Dockerfile.jinja
@@ -33,9 +33,9 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \

 RUN pip install packaging==23.2 setuptools==75.8.0
 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,llmcompressor,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
+        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
    else \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,llmcompressor] $AXOLOTL_ARGS; \
+        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
    fi

 RUN python scripts/unsloth_install.py | sh
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -20,9 +20,9 @@ WORKDIR /workspace/axolotl

 # If AXOLOTL_EXTRAS is set, append it in brackets
 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,llmcompressor,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
+        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
    else \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,llmcompressor] $AXOLOTL_ARGS; \
+        pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
    fi

 RUN python scripts/unsloth_install.py | sh
--- a/tests/e2e/integrations/test_llm_compressor.py
+++ b/tests/e2e/integrations/test_llm_compressor.py
@@ -12,7 +12,11 @@ from axolotl.train import train
 from axolotl.utils.config import normalize_config, prepare_plugins, validate_config
 from axolotl.utils.dict import DictDefault

-from tests.e2e.utils import check_model_output_exists, require_torch_2_4_1
+from tests.e2e.utils import (
+    check_model_output_exists,
+    require_llmcompressor,
+    require_torch_2_4_1,
+)

 MODELS = [
    "nm-testing/llama2.c-stories42M-pruned2.4-compressed",
@@ -26,6 +30,7 @@ MODELS = [
@pytest.mark.parametrize(
    "save_compressed", [True, False], ids=["save_compressed", "save_uncompressed"]
 )
+@require_llmcompressor
 class TestLLMCompressorIntegration:
    """
    e2e tests for axolotl.integrations.llm_compressor.LLMCompressorPlugin
--- a/tests/e2e/utils.py
+++ b/tests/e2e/utils.py
@@ -109,6 +109,24 @@ def require_vllm(test_case):
    )(test_case)


+def require_llmcompressor(test_case):
+    """
+    Decorator marking a test that requires a llmcompressor to be installed
+    """
+
+    def is_llmcompressor_installed():
+        try:
+            import llmcompressor  # pylint: disable=unused-import  # noqa: F401
+
+            return True
+        except ImportError:
+            return False
+
+    return unittest.skipUnless(
+        is_llmcompressor_installed(), "test requires a llmcompressor to be installed"
+    )(test_case)
+
+
 def is_hopper():
    compute_capability = torch.cuda.get_device_capability()
    return compute_capability == (9, 0)