Files
axolotl/examples/streaming/streaming-sft.yml
2025-08-25 14:22:32 +00:00

53 lines
1.4 KiB
YAML

# Example configuration for streaming SFT training
# This enables training on datasets larger than memory by streaming them from HuggingFace Hub
base_model: HuggingFaceTB/SmolLM2-135M
# Enable streaming mode for datasets
streaming: true
# When using streaming, max_steps is required
max_steps: 3 # Just test a few steps
# Training datasets - these will be streamed
# datasets:
# - path: tatsu-lab/alpaca
# type: alpaca
# split: train
pretraining_dataset:
- path: tatsu-lab/alpaca
type: alpaca
split: train
# Dataset configuration
sequence_len: 2048
sample_packing: true # Enable multipack batching
pretrain_multipack_attn: true # Enable multipack attention masking
pretrain_multipack_buffer_size: 1000 # Buffer size for packing (smaller for streaming SFT)
special_tokens:
pad_token: <|endoftext|>
# Training hyperparameters
gradient_accumulation_steps: 4
micro_batch_size: 1 # Always 1 for multipack - sequences are packed into single samples
num_epochs: 1 # With streaming, typically use max_steps instead
optimizer: adamw_torch
lr_scheduler: cosine
learning_rate: 2e-5
# Enable efficient training
bf16: auto
tf32: false
gradient_checkpointing: true
flash_attention: true # Enable flash attention with multipack
# Logging and checkpointing
logging_steps: 10
eval_steps: 100
save_steps: 200
output_dir: ./outputs/streaming-model
# Warmup
warmup_steps: 100