move import of llmcompressor to reset session inside test

make sure to reset the session after each test
move decorator to test method instead of class
2025-04-30 18:10:44 -04:00 · 2025-04-30 17:21:53 -04:00 · 2025-04-30 17:21:53 -04:00 · 2025-04-30 17:21:53 -04:00 · 2025-04-30 17:21:53 -04:00 · 2025-04-30 17:21:53 -04:00
18 changed files with 29 additions and 137 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -30,7 +30,7 @@ jobs:
            cuda_version: 12.6.3
            python_version: "3.11"
            pytorch: 2.7.0
-            axolotl_extras:
+            axolotl_extras: vllm
    runs-on: axolotl-gpu-runner
    steps:
      - name: Checkout
--- a/.runpod/tests.json
+++ b/.runpod/tests.json
@@ -1,90 +0,0 @@
 {
  "tests": [
    {
      "name": "quick_smoke_test_sft",
      "input": {
        "user_id": "user",
        "model_id": "llama-test",
        "run_id": "llama-test",
        "credentials": {
          "wandb_api_key": "",
          "hf_token": ""
        },
        "args": {
          "base_model": "HuggingFaceTB/SmolLM2-135M",
          "model_type": "AutoModelForCausalLM",
          "tokenizer_type": "AutoTokenizer",
          "load_in_4bit": true,
          "strict": false,
          "datasets": [
            {
              "path": "mhenrichsen/alpaca_2k_test",
              "type": "alpaca",
              "split": "train[:10%]"
            }
          ],
          "val_set_size": 0.02,
          "output_dir": "./outputs/lora-out",
          "sequence_len": 4096,
          "sample_packing": true,
          "eval_sample_packing": false,
          "pad_to_sequence_len": true,
          "adapter": "qlora",
          "lora_r": 32,
          "lora_alpha": 64,
          "lora_dropout": 0.05,
          "lora_target_linear": true,
          "lora_modules_to_save": [
            "embed_tokens",
            "lm_head"
          ],
          "gradient_accumulation_steps": 2,
          "micro_batch_size": 1,
          "num_epochs": 1,
          "optimizer": "adamw_torch_fused",
          "lr_scheduler": "cosine",
          "learning_rate": 0.0002,
          "train_on_inputs": false,
          "group_by_length": false,
          "bf16": "auto",
          "tf32": true,
          "gradient_checkpointing": true,
          "logging_steps": 1,
          "flash_attention": true,
          "warmup_steps": 1,
          "evals_per_epoch": 1,
          "eval_max_new_tokens": 128,
          "saves_per_epoch": 1,
          "weight_decay": 0.0,
          "special_tokens": {
            "pad_token": "<|endoftext|>"
          },
          "max_steps": 20
        }
      },
      "timeout": 100000
    }
  ],
  "config": {
    "gpuTypeId": "NVIDIA GeForce RTX 4090",
    "gpuCount": 1,
    "containerDiskInGb": 200,
    "env": [
      {
        "key": "TOKENIZER",
        "value": ""
      },
      {
        "key": "DISABLE_LOG_STATS",
        "value": "true"
      }
    ],
    "allowedCudaVersions": [
      "12.8",
      "12.7",
      "12.6",
      "12.5",
      "12.4"
    ]
  }
 }
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,7 +18,7 @@ accelerate==1.6.0
 datasets==3.5.0
 deepspeed>=0.15.4
 trl==0.17.0
-hf_xet==1.1.0
+hf_xet==1.0.0
 hqq==0.2.5
 optimum==1.16.2
--- a/src/axolotl/cli/init.py
+++ b/src/axolotl/cli/init.py
@@ -2,7 +2,4 @@
 import os
 from axolotl.logging_config import configure_logging
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 configure_logging()
--- a/src/axolotl/cli/checks.py
+++ b/src/axolotl/cli/checks.py
@@ -8,6 +8,9 @@ from accelerate.commands.config import config_args
 from huggingface_hub import HfApi
 from huggingface_hub.utils import LocalTokenNotFoundError
 from axolotl.logging_config import configure_logging
 configure_logging()
 LOG = logging.getLogger(__name__)
--- a/src/axolotl/cli/config.py
+++ b/src/axolotl/cli/config.py
@@ -5,7 +5,6 @@ import logging
 import os
 import tempfile
 from pathlib import Path
 from tempfile import NamedTemporaryFile
 from typing import Union
 from urllib.parse import urlparse
@@ -159,9 +158,7 @@ def plugin_set_cfg(cfg: DictDefault):
        plugin_manager.cfg = cfg
-def load_cfg(
+def load_cfg(config: Union[str, Path] = Path("examples/"), **kwargs) -> DictDefault:
    config: str | Path | DictDefault = Path("examples/"), **kwargs
 ) -> DictDefault:
    """
    Loads the `axolotl` configuration stored at `config`, validates it, and performs
    various setup.
@@ -173,24 +170,13 @@ def load_cfg(
    Returns:
        `DictDefault` mapping configuration keys to values.
    """
-    if isinstance(config, (str, Path)):
+    config = check_remote_config(config)
-        config = check_remote_config(config)
+    if Path(config).is_dir():
-        if Path(config).is_dir():
+        config = choose_config(Path(config))
            config = choose_config(Path(config))
-        # Load the config from the yaml file
+    # Load the config from the yaml file
-        with open(config, encoding="utf-8") as file:
+    with open(config, encoding="utf-8") as file:
-            cfg: DictDefault = DictDefault(yaml.safe_load(file))
+        cfg: DictDefault = DictDefault(yaml.safe_load(file))
        cfg.axolotl_config_path = config
    else:
        cfg = config
        with NamedTemporaryFile(
            mode="w", delete=False, suffix=".yml", prefix="axolotl_config_"
        ) as temp_file:
            temp_file.write(yaml.dump(config.to_dict()))
            temp_file.close()
        cfg.axolotl_config_path = temp_file.name
    # If there are any options passed in the cli, if it is something that seems valid
    # from the yaml, then overwrite the value
@@ -204,6 +190,8 @@ def load_cfg(
            else:
                cfg[k] = kwargs[k]
    cfg.axolotl_config_path = config
    try:
        device_props = torch.cuda.get_device_properties("cuda")
        gpu_version = "sm_" + str(device_props.major) + str(device_props.minor)
--- a/src/axolotl/cli/utils.py
+++ b/src/axolotl/cli/utils.py
@@ -20,9 +20,11 @@ from transformers import (
    ProcessorMixin,
 )
 from axolotl.logging_config import configure_logging
 from axolotl.utils.dict import DictDefault
 from axolotl.utils.models import load_model, load_processor, load_tokenizer
 configure_logging()
 LOG = logging.getLogger(__name__)
--- a/src/axolotl/common/datasets.py
+++ b/src/axolotl/common/datasets.py
@@ -47,7 +47,7 @@ def sample_dataset(dataset: Dataset, num_samples: int) -> Dataset:
 def load_datasets(
    *,
    cfg: DictDefault,
-    cli_args: PreprocessCliArgs | TrainerCliArgs | None = None,
+    cli_args: Union[PreprocessCliArgs, TrainerCliArgs],
 ) -> TrainDatasetMeta:
    """
    Loads one or more training or evaluation datasets, calling
@@ -64,8 +64,7 @@ def load_datasets(
    tokenizer = load_tokenizer(cfg)
    processor = load_processor(cfg, tokenizer=tokenizer) if cfg.processor_type else None
    preprocess_iterable = (
-        cli_args
+        hasattr(cli_args, "iterable")
        and hasattr(cli_args, "iterable")
        and cli_args.iterable is not None
        and cli_args.iterable
    )
@@ -77,7 +76,7 @@ def load_datasets(
        preprocess_iterable=preprocess_iterable,
    )
-    if cli_args and (
+    if (
        cli_args.debug
        or cfg.debug
        or cli_args.debug_text_only
--- a/src/axolotl/core/trainer_builder.py
+++ b/src/axolotl/core/trainer_builder.py
@@ -488,7 +488,7 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
        # these are all the "standard" kwargs that are def used
        training_arguments_kwargs["max_steps"] = (
-            self.cfg.max_steps if self.cfg.max_steps else -1
+            total_num_steps if self.cfg.max_steps else -1
        )
        training_arguments_kwargs["max_seq_length"] = self.cfg.sequence_len
        training_arguments_kwargs["per_device_train_batch_size"] = (
--- a/src/axolotl/core/trainers/grpo/init.py
+++ b/src/axolotl/core/trainers/grpo/init.py
@@ -63,7 +63,6 @@ class GRPOStrategy:
        grpo_args_kwargs["max_completion_length"] = trl.max_completion_length
        grpo_args_kwargs["log_completions"] = trl.log_completions
        grpo_args_kwargs["num_completions_to_print"] = trl.num_completions_to_print
        if trl.reward_weights:
            grpo_args_kwargs["reward_weights"] = trl.reward_weights
--- a/src/axolotl/evaluate.py
+++ b/src/axolotl/evaluate.py
@@ -11,6 +11,7 @@ from accelerate.logging import get_logger
 from datasets import Dataset
 from transformers.trainer import Trainer
 from axolotl.logging_config import configure_logging
 from axolotl.train import (
    TrainDatasetMeta,
    setup_model_and_tokenizer,
@@ -23,6 +24,7 @@ project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
 src_dir = os.path.join(project_root, "src")
 sys.path.insert(0, src_dir)
 configure_logging()
 LOG = get_logger(__name__)
--- a/src/axolotl/kernels/quantize.py
+++ b/src/axolotl/kernels/quantize.py
@@ -55,16 +55,13 @@ def dequantize(
    target_device = W.device
    # Extract quantization state
    nested = False
    if not isinstance(quant_state, list):
        # New style quant_state class
        absmax = quant_state.absmax.to(target_device)
        shape = quant_state.shape
        dtype = quant_state.dtype
        blocksize = quant_state.blocksize
-        if quant_state.nested:
+        offset = quant_state.offset.to(target_device)
            nested = True
            offset = quant_state.offset.to(target_device)
        state2 = quant_state.state2
        absmax2 = state2.absmax.to(target_device)
        code2 = state2.code.to(target_device)
@@ -118,8 +115,7 @@ def dequantize(
            ctypes.c_int(n_elements_absmax),
        )
-    if nested:
+    out_absmax += offset
        out_absmax += offset
    # Choose appropriate dequantization function
    fx = (
--- a/src/axolotl/monkeypatch/attention/ring_attn/patch.py
+++ b/src/axolotl/monkeypatch/attention/ring_attn/patch.py
@@ -12,8 +12,10 @@ import torch
 import torch.distributed as dist
 from accelerate.logging import get_logger
 from axolotl.logging_config import configure_logging
 from axolotl.monkeypatch.utils import get_cu_seqlens_from_pos_ids
 configure_logging()
 LOG = get_logger(__name__)
--- a/src/axolotl/monkeypatch/trainer/init.py
+++ b/src/axolotl/monkeypatch/trainer/init.py
--- a/src/axolotl/train.py
+++ b/src/axolotl/train.py
@@ -30,6 +30,7 @@ from axolotl.core.trainers.mixins.sequence_parallel import (
    SequenceParallelContextManager,
 )
 from axolotl.integrations.base import PluginManager
 from axolotl.logging_config import configure_logging
 from axolotl.utils.dict import DictDefault
 from axolotl.utils.distributed import cleanup_distributed
 from axolotl.utils.freeze import freeze_layers_except
@@ -41,6 +42,7 @@ try:
 except ImportError:
    BetterTransformer = None
 configure_logging()
 LOG = get_logger(__name__)
--- a/src/axolotl/utils/config/init.py
+++ b/src/axolotl/utils/config/init.py
@@ -67,7 +67,7 @@ def resolve_dtype(cfg):
        else:
            LOG.debug("bf16 support not detected, disabling for this configuration.")
            cfg.bf16 = False
-            if cfg.fp16 is None and not cfg.float16:
+            if cfg.fp16 is None:
                cfg.fp16 = True
    if cfg.device == "mps":
--- a/src/axolotl/utils/schemas/trl.py
+++ b/src/axolotl/utils/schemas/trl.py
@@ -67,12 +67,6 @@ class TRLConfig(BaseModel):
        default=False,
        json_schema_extra={"description": "Whether to log completions"},
    )
    num_completions_to_print: int | None = Field(
        default=None,
        json_schema_extra={
            "description": "Number of completions to print. If `log_completions` is `True`, this will be the number of completions logged."
        },
    )
    sync_ref_model: bool | None = Field(
        default=False,
        json_schema_extra={
--- a/src/axolotl/utils/trainer.py
+++ b/src/axolotl/utils/trainer.py
@@ -597,8 +597,6 @@ def prepare_optim_env(cfg):
        os.environ["ACCELERATE_MIXED_PRECISION"] = "bf16"
    elif cfg.fp16:
        os.environ["ACCELERATE_MIXED_PRECISION"] = "fp16"
    else:
        os.environ["ACCELERATE_MIXED_PRECISION"] = "no"
 def prepare_opinionated_env(cfg):
Author	SHA1	Message	Date
Wing Lian	6affbb1f85	move import of llmcompressor to reset session inside test	2025-04-30 18:10:44 -04:00
Wing Lian	0ed4b4c310	make sure to reset the session after each test	2025-04-30 17:21:53 -04:00
Wing Lian	f4a0f496a0	move decorator to test method instead of class	2025-04-30 17:21:53 -04:00
Wing Lian	82b16bd040	split llmcompressor from vllm checks	2025-04-30 17:21:53 -04:00
Wing Lian	fd5c985038	additional fixes for docker and saving compressed	2025-04-30 17:21:53 -04:00
Rahul Tuli	5246aebc04	Fix: Test Signed-off-by: Rahul Tuli <rtuli@redhat.com>	2025-04-30 17:21:53 -04:00
Rahul Tuli	f4bcc71c86	Apply patch from @winglian Signed-off-by: Rahul Tuli <rtuli@redhat.com>	2025-04-30 17:21:53 -04:00
Rahul Tuli	3a9e172272	Add: line about further optimizations using llmcompressor Signed-off-by: Rahul Tuli <rtuli@redhat.com>	2025-04-30 17:21:53 -04:00
Rahul Tuli	372f0e137b	Address Review Comments: * deleted redundant docs/llm_compressor.qmd * incorporated feedback in integration README.md * added llmcompressor integration to docs/custom_integrations.qmd Signed-off-by: Rahul Tuli <rtuli@redhat.com>	2025-04-30 17:21:52 -04:00
Rahul Tuli	17dffec71d	Add: .qmd file	2025-04-30 17:21:52 -04:00
Rahul Tuli	3a8b637598	Tests, Style, Updates	2025-04-30 17:21:52 -04:00
Rahul Tuli	12cd09e6f5	Rebase and updates!	2025-04-30 17:21:52 -04:00
Rahul Tuli	fe82f62248	Add: `llm_compressor` integration documentation	2025-04-30 17:21:52 -04:00
Rahul Tuli	db31d7ad22	Move: LLMCompressorPlugin into it's own submodule	2025-04-30 17:21:52 -04:00
Rahul Tuli	eb7f2aa4b9	Update model config	2025-04-30 17:21:51 -04:00
Rahul Tuli	f80e36ddd2	Use: absolute import	2025-04-30 17:21:51 -04:00
Rahul Tuli	412d2ec6d0	Rename: sft.yaml to sparse-finetuning.yaml	2025-04-30 17:21:51 -04:00
Rahul Tuli	50fc5e6984	Add: llcompressor installable	2025-04-30 17:21:51 -04:00
Rahul Tuli	83a88b745f	Address review comments from @markurtz	2025-04-30 17:21:51 -04:00
Rahul Tuli	8855bb115f	Apply suggestions from @markurtz Co-authored-by: Mark Kurtz <mark.j.kurtz@gmail.com>	2025-04-30 17:21:51 -04:00
Rahul Tuli	ef9543b371	Update llmcompressor version to latest	2025-04-30 17:21:51 -04:00
Rahul Tuli	25e701e885	Revert: TODO's	2025-04-30 17:21:50 -04:00
Rahul Tuli	891a21e599	Use: warning over warn	2025-04-30 17:21:50 -04:00
Rahul Tuli	8beb2f27ad	pre commit hooks	2025-04-30 17:21:50 -04:00
Rahul Tuli	56ba66b60f	Add:llmcompressor instalable	2025-04-30 17:21:50 -04:00
Rahul Tuli	13d4b865d6	Update: review comments!	2025-04-30 17:21:50 -04:00
Rahul Tuli	3da866b2b9	Add: SFTPlugin with llmcompressor	2025-04-30 17:21:50 -04:00