* Add: SFTPlugin with llmcompressor * Update: review comments! * Add:llmcompressor instalable * pre commit hooks * Use: warning over warn * Revert: TODO's * Update llmcompressor version to latest * Apply suggestions from @markurtz Co-authored-by: Mark Kurtz <mark.j.kurtz@gmail.com> * Address review comments from @markurtz * Add: llcompressor installable * Rename: sft.yaml to sparse-finetuning.yaml * Use: absolute import * Update model config * Move: LLMCompressorPlugin into it's own submodule * Add: `llm_compressor` integration documentation * Rebase and updates! * Tests, Style, Updates * Add: .qmd file * Address Review Comments: * deleted redundant docs/llm_compressor.qmd * incorporated feedback in integration README.md * added llmcompressor integration to docs/custom_integrations.qmd Signed-off-by: Rahul Tuli <rtuli@redhat.com> * Add: line about further optimizations using llmcompressor Signed-off-by: Rahul Tuli <rtuli@redhat.com> * Apply patch from @winglian Signed-off-by: Rahul Tuli <rtuli@redhat.com> * Fix: Test Signed-off-by: Rahul Tuli <rtuli@redhat.com> * additional fixes for docker and saving compressed * split llmcompressor from vllm checks * Reset session between tests Signed-off-by: Rahul Tuli <rtuli@redhat.com> * move decorator to test method instead of class * make sure to reset the session after each test * move import of llmcompressor to reset session inside test --------- Signed-off-by: Rahul Tuli <rtuli@redhat.com> Co-authored-by: Mark Kurtz <mark.j.kurtz@gmail.com> Co-authored-by: Wing Lian <wing@axolotl.ai>
41 lines
1.3 KiB
Python
41 lines
1.3 KiB
Python
"""Utilities for llmcompressor integration with axolotl."""
|
|
|
|
from typing import Union
|
|
|
|
from llmcompressor.transformers.sparsification.compressed_tensors_utils import (
|
|
modify_save_pretrained,
|
|
)
|
|
from transformers import PreTrainedModel, Trainer
|
|
|
|
|
|
def save_compressed_model(
|
|
model: PreTrainedModel,
|
|
output_dir: Union[str, bytes],
|
|
trainer: Trainer,
|
|
safe_serialization: bool = False,
|
|
save_compressed: bool = False,
|
|
) -> None:
|
|
"""
|
|
Synchronize processes, apply compression hooks, and save the model.
|
|
|
|
Args:
|
|
model (PreTrainedModel): The model to be saved.
|
|
output_dir (str or bytes): Path where the model files will be written.
|
|
trainer (Trainer): Hugging Face Trainer for process synchronization.
|
|
safe_serialization (bool): Use safe serialization if True.
|
|
save_compressed (bool): Write compressed tensors if True.
|
|
"""
|
|
trainer.accelerator.wait_for_everyone()
|
|
|
|
# Only the main process writes the files
|
|
if not trainer.accelerator.is_main_process:
|
|
return
|
|
|
|
modify_save_pretrained(model)
|
|
model.save_pretrained(
|
|
output_dir,
|
|
safe_serialization=safe_serialization,
|
|
save_compressed=save_compressed,
|
|
skip_sparsity_compression_stats=not save_compressed,
|
|
)
|