From 17dffec71d2c8e8fc22af7cacf825be6b2fbfee3 Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Mon, 21 Apr 2025 20:40:49 -0400 Subject: [PATCH] Add: .qmd file --- docs/llm_compressor.qmd | 98 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 docs/llm_compressor.qmd diff --git a/docs/llm_compressor.qmd b/docs/llm_compressor.qmd new file mode 100644 index 000000000..60b685973 --- /dev/null +++ b/docs/llm_compressor.qmd @@ -0,0 +1,98 @@ +--- +title: "LLMCompressor Sparse Fine-tuning" +format: + html: + toc: true + toc-depth: 3 + number-sections: true +execute: + enabled: false +--- + +# LLMCompressor Integration + +Fine-tune sparsified models in Axolotl using [LLMCompressor](https://github.com/vllm-project/llm-compressor). + +This integration enables fine-tuning of models **already sparsified** using LLMCompressor. +It hooks into Axolotl’s training pipeline using the plugin system and maintains sparsity throughout the fine-tuning process. + +--- + +## Requirements + +- Install Axolotl with `llmcompressor` extras: + +```bash +pip install "axolotl[llmcompressor]" +``` + +- Requires `llmcompressor >= 0.5.1` + +This will install all required dependencies for sparse model fine-tuning. + +--- + +## Usage + +To enable sparse fine-tuning with this integration, configure your Axolotl YAML like so: + +```yaml +plugins: + - axolotl.integrations.llm_compressor.LLMCompressorPlugin + +llmcompressor: + recipe: + finetuning_stage: + finetuning_modifiers: + ConstantPruningModifier: + targets: [ + 're:.*q_proj.weight', + 're:.*k_proj.weight', + 're:.*v_proj.weight', + 're:.*o_proj.weight', + 're:.*gate_proj.weight', + 're:.*up_proj.weight', + 're:.*down_proj.weight', + ] + start: 0 +# ... (other Axolotl training arguments) +``` + +::: {.callout-note} +This plugin **does not prune or sparsify the model**. It is only meant for **fine-tuning models that are already sparsified**. +::: + +--- + +## Pre-Sparsified Checkpoints + +You can use: + +- Your own LLMCompressor-sparsified model +- Or one from [Neural Magic's Hugging Face page](https://huggingface.co/neuralmagic) + +Refer to the [LLMCompressor README](https://github.com/vllm-project/llm-compressor/blob/main/README.md) to learn how to sparsify models or write custom recipes. + +--- + +## Example Config + +A full working example is provided at: + +```bash +examples/llama-3/sparse-finetuning.yaml +``` + +Run fine-tuning using: + +```bash +axolotl train examples/llama-3/sparse-finetuning.yaml +``` + +--- + +## Learn More + +Explore LLMCompressor capabilities, supported modifiers, and detailed examples: + +👉 [LLMCompressor GitHub](https://github.com/vllm-project/llm-compressor) \ No newline at end of file