axolotl/examples/swanlab/dpo-swanlab-completions.yml

# SwanLab DPO Training Example with Completion Logging
#
# This example demonstrates DPO (Direct Preference Optimization) training
# with SwanLab integration for experiment tracking and completion table logging.
#
# Features enabled:
# - SwanLab experiment tracking
# - RLHF completion table logging (prompts, chosen/rejected responses, rewards)
# - Lark (Feishu) team notifications (optional)
#
# To run:
#   export SWANLAB_API_KEY=your-api-key
#   accelerate launch -m axolotl.cli.train examples/swanlab/dpo-swanlab-completions.yml

# Model Configuration
base_model: meta-llama/Meta-Llama-3-8B-Instruct
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer

special_tokens:
  pad_token: <|finetune_right_pad_id|>
  eos_token: <|eot_id|>

# Quantization
load_in_8bit: true
load_in_4bit: false

# LoRA Configuration
adapter: lora
lora_r: 32
lora_alpha: 16
lora_dropout: 0.05
lora_target_linear: true

# DPO Configuration
chat_template: llama3
rl: dpo

datasets:
  - path: fozziethebeat/alpaca_messages_2k_dpo_test
    type: chat_template.default
    field_messages: conversation
    field_chosen: chosen
    field_rejected: rejected
    message_property_mappings:
      role: role
      content: content
    roles:
      system:
        - system
      user:
        - user
      assistant:
        - assistant

# Dataset and Output
dataset_prepared_path:
val_set_size: 0.05
output_dir: ./outputs/dpo-swanlab-out

# Training Configuration
sequence_len: 4096
sample_packing: false
micro_batch_size: 2
gradient_accumulation_steps: 4
num_epochs: 4

# Optimization
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002
warmup_ratio: 0.1
weight_decay: 0.0

# Precision
bf16: auto
tf32: false

# Performance
gradient_checkpointing: true
flash_attention: true

# Checkpointing and Logging
logging_steps: 1
evals_per_epoch: 4
saves_per_epoch: 1

# ============================================================================
# SwanLab Integration
# ============================================================================

plugins:
  - axolotl.integrations.swanlab.SwanLabPlugin

# Basic SwanLab Configuration
use_swanlab: true
swanlab_project: dpo-training
swanlab_experiment_name: llama-3-dpo-completions-demo
swanlab_description: "DPO training with completion table logging"
swanlab_mode: cloud  # Options: cloud, local, offline, disabled

# SwanLab Authentication
# Recommended: Set via environment variable
#   export SWANLAB_API_KEY=your-api-key
# Or set in config (less secure):
# swanlab_api_key: your-api-key

# Optional: Team workspace
# swanlab_workspace: my-research-team

# ============================================================================
# RLHF Completion Table Logging
# ============================================================================
#
# Automatically logs model completions to SwanLab for qualitative analysis:
# - Prompts from your DPO dataset
# - Chosen responses (preferred)
# - Rejected responses (non-preferred)
# - Reward differences
#
# View the table in SwanLab dashboard under "rlhf_completions"

swanlab_log_completions: true
swanlab_completion_log_interval: 100  # Log every 100 training steps
swanlab_completion_max_buffer: 128    # Keep last 128 completions in memory

# Memory Usage Notes:
# - Buffer size 128: ~64 KB (default, recommended)
# - Buffer size 512: ~256 KB (for more historical completions)
# - Buffer size 1024: ~512 KB (maximum for very long training runs)

# Performance Notes:
# - Completion logging overhead: < 0.5% per training step
# - Only logs every N steps to minimize impact
# - Memory-bounded buffer prevents memory leaks

# ============================================================================
# Optional: Lark (Feishu) Team Notifications
# ============================================================================
#
# Get real-time training notifications in your team chat
# Uncomment to enable:

# swanlab_lark_webhook_url: https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxxxx
# swanlab_lark_secret: your-webhook-secret  # Recommended for production

# Notifications sent for:
# - Training start
# - Training completion
# - Training errors
# - Metric milestones (if configured)

# ============================================================================
# Optional: Private SwanLab Deployment
# ============================================================================
#
# For enterprise users with private SwanLab deployment:

# swanlab_web_host: https://swanlab.yourcompany.com
# swanlab_api_host: https://api.swanlab.yourcompany.com

# ============================================================================
# Disable WandB if you're migrating from it
# ============================================================================

# wandb_project:
# wandb_entity:
# use_wandb: false