use warmup_ratio as a better default than warmup steps since it's data dependent (#2897) [skip ci]

* use warmup_ratio as a better default than warmup steps since it's data dependent

* replace remainder of warmup_steps
This commit is contained in:
Wing Lian
2025-07-30 06:44:06 -04:00
committed by GitHub
parent 2eb7ff95af
commit 22810c97b7
99 changed files with 100 additions and 100 deletions

View File

@@ -58,7 +58,7 @@ logging_steps: 1
evals_per_epoch: 1
saves_per_epoch: 1
warmup_steps: 10
warmup_ratio: 0.1
weight_decay: 0.0
fsdp:
- full_shard

View File

@@ -51,7 +51,7 @@ resume_from_checkpoint:
logging_steps: 1
flash_attention: true
warmup_steps: 100
warmup_ratio: 0.1
evals_per_epoch: 2
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -36,7 +36,7 @@ resume_from_checkpoint:
logging_steps: 1
flash_attention: true
warmup_steps: 100
warmup_ratio: 0.1
evals_per_epoch: 2
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -67,7 +67,7 @@ resume_from_checkpoint:
logging_steps: 1
flash_attention: true
warmup_steps: 10
warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -58,7 +58,7 @@ resume_from_checkpoint:
logging_steps: 1
flash_attention: true
warmup_steps: 10
warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -79,7 +79,7 @@ resume_from_checkpoint:
logging_steps: 1
flash_attention: true
warmup_steps: 10
warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -55,7 +55,7 @@ resume_from_checkpoint:
logging_steps: 1
flash_attention: true
warmup_steps: 10
warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -59,7 +59,7 @@ flash_attention: true
loss_watchdog_threshold: 5.0
loss_watchdog_patience: 3
warmup_steps: 10
warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -53,7 +53,7 @@ flash_attention: true
loss_watchdog_threshold: 5.0
loss_watchdog_patience: 3
warmup_steps: 10
warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 1

View File

@@ -57,7 +57,7 @@ resume_from_checkpoint:
logging_steps: 1
flash_attention: true
warmup_steps: 10
warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -54,7 +54,7 @@ flash_attention: true
loss_watchdog_threshold: 5.0
loss_watchdog_patience: 3
warmup_steps: 10
warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -51,7 +51,7 @@ resume_from_checkpoint:
logging_steps: 1
flash_attention: true
warmup_steps: 10
warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -55,7 +55,7 @@ resume_from_checkpoint:
logging_steps: 1
flash_attention: true
warmup_steps: 20
warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -56,7 +56,7 @@ flash_attention: true
loss_watchdog_threshold: 5.0
loss_watchdog_patience: 3
warmup_steps: 10
warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -41,7 +41,7 @@ gradient_checkpointing_kwargs:
logging_steps: 1
flash_attention: true
warmup_steps: 10
warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -50,7 +50,7 @@ resume_from_checkpoint:
logging_steps: 1
flash_attention: true
warmup_steps: 10
warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -48,7 +48,7 @@ resume_from_checkpoint:
logging_steps: 1
flash_attention: true
warmup_steps: 10
warmup_ratio: 0.1
evals_per_epoch: 4
saves_per_epoch: 1
weight_decay: 0.0

View File

@@ -47,7 +47,7 @@ logging_steps: 1
xformers_attention:
flash_attention: true
warmup_steps: 100
warmup_ratio: 0.1
evals_per_epoch: 2
eval_table_size:
saves_per_epoch: 1