update config.qmd and rename option

This commit is contained in:
Dan Saunders
2025-03-13 23:13:37 +00:00
parent 345a9dd831
commit 919b88f11b
11 changed files with 58 additions and 54 deletions

View File

@@ -620,6 +620,11 @@ ddp_timeout:
ddp_bucket_cap_mb:
ddp_broadcast_buffers:
# Sequence parallelism
# Set to a divisor of the number of GPUs available to split sequences into chunks of equal size.
# Use in long context training to prevent OOM when sequences cannot fit into a single GPU's VRAM.
sequence_parallel_degree:
# Path to torch distx for optim 'adamw_anyprecision'
torchdistx_path: