feat: add glm 4.7 flash

2026-02-10 18:57:20 +07:00
parent 2d44432e6c
commit 87e0fd6b52
2 changed files with 103 additions and 0 deletions
--- a/examples/glm4.7-flash/README.md
+++ b/examples/glm4.7-flash/README.md
@@ -0,0 +1,40 @@
 # Finetune Z.ai's GLM-4.7-Flash with Axolotl
 [GLM-4.7-Flash](https://huggingface.co/zai-org/GLM-4.7-Flash) is a 30B-A3B MoE model.
 This guide shows how to fine-tune it with Axolotl.
 ## Getting started
 1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html).
 2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage
 3. Run the finetuning example:
 ```bash
 axolotl train examples/glm4.7-flash/glm4.7-flash-qlora.yaml
 ```
 This config uses about X GiB VRAM.
 Let us know how it goes. Happy finetuning! 🚀
 ### TIPS
 - For inference, the official Z.ai team recommends `top_p: 0.95`, `temperature: 1.0`, and `max_new_tokens: 131072`.
 - You can run a full finetuning by removing the `adapter: qlora` and `load_in_4bit: true` from the config.
 - Read more on how to load your own dataset at [docs](https://docs.axolotl.ai/docs/dataset_loading.html).
 ## Optimization Guides
 Please check the [Optimizations doc](https://docs.axolotl.ai/docs/optimizations.html).
 ## Related Resources
 - [GLM-4.7-Flash on HuggingFace](https://huggingface.co/zai-org/GLM-4.7-Flash)
 - [GLM-4.7 Blog](https://z.ai/blog/glm-4.7)
 - [Axolotl Docs](https://docs.axolotl.ai)
 - [Axolotl Website](https://axolotl.ai)
 - [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl)
 - [Axolotl Discord](https://discord.gg/7m9sfhzaf3)
--- a/examples/glm4.7-flash/glm4.7-flash-qlora.yaml
+++ b/examples/glm4.7-flash/glm4.7-flash-qlora.yaml
@@ -0,0 +1,63 @@
 base_model: zai-org/GLM-4.7-Flash
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 plugins:
  - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
 load_in_4bit: true
 datasets:
  - path: fozziethebeat/alpaca_messages_2k_test
    type: chat_template
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.1
 output_dir: ./outputs/lora-out
 adapter: qlora
 lora_model_dir:
 sequence_len: 2048
 sample_packing: true
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
 lora_target_linear: true
 lora_target_modules:
  - gate_proj
  - down_proj
  - up_proj
  - q_proj
  - v_proj
  - k_proj
  - o_proj
 wandb_project: glm-4.7-flash
 wandb_entity:
 wandb_watch:
 wandb_name: qlora
 wandb_log_model:
 gradient_accumulation_steps: 4
 micro_batch_size: 2
 num_epochs: 1
 optimizer: adamw_bnb_8bit
 lr_scheduler: cosine
 learning_rate: 0.0002
 bf16: auto
 tf32: false
 gradient_checkpointing: true
 resume_from_checkpoint:
 logging_steps: 1
 flash_attention: true
 warmup_ratio: 0.1
 evals_per_epoch: 1
 saves_per_epoch: 1
 # save_first_step: true  # uncomment this to validate checkpoint saving works with your config