diff --git a/examples/glm4.7-flash/README.md b/examples/glm4.7-flash/README.md new file mode 100644 index 000000000..acf0b7fb9 --- /dev/null +++ b/examples/glm4.7-flash/README.md @@ -0,0 +1,40 @@ +# Finetune Z.ai's GLM-4.7-Flash with Axolotl + +[GLM-4.7-Flash](https://huggingface.co/zai-org/GLM-4.7-Flash) is a 30B-A3B MoE model. + +This guide shows how to fine-tune it with Axolotl. + +## Getting started + +1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html). + +2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage + +3. Run the finetuning example: + +```bash +axolotl train examples/glm4.7-flash/glm4.7-flash-qlora.yaml +``` + +This config uses about X GiB VRAM. + +Let us know how it goes. Happy finetuning! 🚀 + +### TIPS + +- For inference, the official Z.ai team recommends `top_p: 0.95`, `temperature: 1.0`, and `max_new_tokens: 131072`. +- You can run a full finetuning by removing the `adapter: qlora` and `load_in_4bit: true` from the config. +- Read more on how to load your own dataset at [docs](https://docs.axolotl.ai/docs/dataset_loading.html). + +## Optimization Guides + +Please check the [Optimizations doc](https://docs.axolotl.ai/docs/optimizations.html). + +## Related Resources + +- [GLM-4.7-Flash on HuggingFace](https://huggingface.co/zai-org/GLM-4.7-Flash) +- [GLM-4.7 Blog](https://z.ai/blog/glm-4.7) +- [Axolotl Docs](https://docs.axolotl.ai) +- [Axolotl Website](https://axolotl.ai) +- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl) +- [Axolotl Discord](https://discord.gg/7m9sfhzaf3) diff --git a/examples/glm4.7-flash/glm4.7-flash-qlora.yaml b/examples/glm4.7-flash/glm4.7-flash-qlora.yaml new file mode 100644 index 000000000..af39acf27 --- /dev/null +++ b/examples/glm4.7-flash/glm4.7-flash-qlora.yaml @@ -0,0 +1,63 @@ +base_model: zai-org/GLM-4.7-Flash + +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + +plugins: + - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin + +load_in_4bit: true + +datasets: + - path: fozziethebeat/alpaca_messages_2k_test + type: chat_template + +dataset_prepared_path: last_run_prepared +val_set_size: 0.1 +output_dir: ./outputs/lora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 2048 +sample_packing: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_target_modules: + - gate_proj + - down_proj + - up_proj + - q_proj + - v_proj + - k_proj + - o_proj + +wandb_project: glm-4.7-flash +wandb_entity: +wandb_watch: +wandb_name: qlora +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +bf16: auto +tf32: false + +gradient_checkpointing: true +resume_from_checkpoint: +logging_steps: 1 +flash_attention: true + +warmup_ratio: 0.1 +evals_per_epoch: 1 +saves_per_epoch: 1 + +# save_first_step: true # uncomment this to validate checkpoint saving works with your config