feat: add readme and better examples

2025-08-13 13:57:15 +07:00
parent 4b16f363bc
commit a28eb600e9
3 changed files with 69 additions and 16 deletions
--- a/examples/glm45/README.md
+++ b/examples/glm45/README.md
@@ -0,0 +1,48 @@
 # Finetune GLM4.5 with Axolotl
 [UNSTABLE]
 ```bash
 # LoRA SFT (4xH200 @ 84GB/GPU)
 axolotl train examples/glm45/glm4.5-lora-fsdp2.yaml
 # FFT SFT (4xH200)
 # Checkpointing error on backward pass
 # Without checkpointing => OOM
 axolotl train examples/glm45/glm4.5-fft-fsdp2.yaml
 ```
 ## Dataset
 In addition to normal OpenAI Messages format, GLM4.5 support an extra parameter for thinking in assistant section.
 ```json
 {
    "role": "assistant",
    "reasoning_content": "...",  // or have </think>...</think> in `content`
    "content": "...",
 }
 ```
 Note:
 - The role name for tools in this template is `tool`.
 - You will see this Axolotl WARNING. This is to be as expected as the template does not use EOS.
 ```bash
 EOS token '<|endoftext|>' not found in chat_template. Please check if your template/EOS token is correct.
 ```
 - Make sure you set the below extra attributes if needed
 ```yaml
 datasets:
  - path: ...
    type: chat_template
    message_property_mappings:
      role: role
      content: content
    #   tool_calls: tool_calls  # uncomment if using tools
    #   reasoning_content: reasoning_content  # uncomment if have reasoning
 # Uncomment if training on tool role (you would rarely if ever need this)
 # eot_tokens:
 #   - <|observation|>
 ```
--- a/examples/glm45/glm4.5-fft-fsdp2-offload.yaml
+++ b/examples/glm45/glm4.5-fft-fsdp2-offload.yaml
@@ -50,12 +50,10 @@ special_tokens:
 fsdp_version: 2
 fsdp_config:
-  offload_params: true
+  offload_params: false
  cpu_ram_efficient_loading: true
  auto_wrap_policy: TRANSFORMER_BASED_WRAP
  transformer_layer_cls_to_wrap: Glm4MoeDecoderLayer
-  state_dict_type: FULL_STATE_DICT
+  state_dict_type: SHARDED_STATE_DICT
  reshard_after_forward: true
  activation_checkpointing: true
 # save_first_step: true  # uncomment this to validate checkpoint saving works with your config
--- a/examples/glm45/glm4.5-lora-fsdp2.yaml
+++ b/examples/glm45/glm4.5-lora-fsdp2.yaml
@@ -5,7 +5,7 @@ base_model: zai-org/GLM-4.5-Air
 plugins:
  - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
-load_in_4bit: true
+experimental_skip_move_to_device: true  # prevent OOM by NOT putting model to GPU before sharding
 datasets:
  - path: winglian/pirate-ultrachat-10k
@@ -14,14 +14,9 @@ dataset_prepared_path: last_run_prepared
 val_set_size: 0
 output_dir: ./outputs/qlora-out
-adapter: qlora
+adapter: lora
 lora_model_dir:
 sequence_len: 2048
 sample_packing: true
 eval_sample_packing: true
 lora_r: 16
 lora_alpha: 32
 lora_dropout: 0.05
@@ -34,23 +29,27 @@ lora_target_modules:
  - k_proj
  - o_proj
 sequence_len: 2048
 sample_packing: true
 eval_sample_packing: true
 wandb_project:
 wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-gradient_accumulation_steps: 2
+gradient_accumulation_steps: 1
-micro_batch_size: 2
+micro_batch_size: 1
 num_epochs: 1
-optimizer: adamw_torch_8bit
+optimizer: adamw_torch_4bit
 lr_scheduler: cosine
 learning_rate: 0.0002
 bf16: auto
 tf32: false
-gradient_checkpointing: true
+# gradient_checkpointing: true
 resume_from_checkpoint:
 logging_steps: 1
 flash_attention: true
@@ -64,4 +63,12 @@ saves_per_epoch: 1
 weight_decay: 0.0
 special_tokens:
-# save_first_step: true  # uncomment this to validate checkpoint saving works with your config
+fsdp_version: 2
 fsdp_config:
  offload_params: false
  cpu_ram_efficient_loading: true
  auto_wrap_policy: TRANSFORMER_BASED_WRAP
  transformer_layer_cls_to_wrap: Glm4MoeDecoderLayer
  state_dict_type: SHARDED_STATE_DICT
  reshard_after_forward: true
  # activation_checkpointing: false