@@ -512,49 +512,50 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
< span id = "cb1-4" > < a href = "#cb1-4" aria-hidden = "true" tabindex = "-1" > < / a > lr_quadratic_warmup< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb1-5" > < a href = "#cb1-5" aria-hidden = "true" tabindex = "-1" > < / a > pretraining< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb1-6" > < a href = "#cb1-6" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb1-7" > < a href = "#cb1-7" aria-hidden = "true" tabindex = "-1" > < / a > multipack_real_batches < span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb1-8" > < a href = "#cb1-8" aria-hidden = "true" tabindex = "-1" > < / a > eval_sample_packing < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb1-9" > < a href = "#cb1-9" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_efficiency < span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb1-10" > < a href = "#cb1-10" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_bin_size < span class = "op" > =< / span > < span class = "dv " > 20 0< / span > ,< / span >
< span id = "cb1-11" > < a href = "#cb1-11" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_group _size< span class = "op" > =< / span > < span class = "dv" > 1000 00< / span > ,< / span >
< span id = "cb1-12" > < a href = "#cb1-12" aria-hidden = "true" tabindex = "-1" > < / a > max_seq_length < span class = "op" > =< / span > < span class = "dv" > 2048 < / span > ,< / span >
< span id = "cb1-13" > < a href = "#cb1-13" aria-hidden = "true" tabindex = "-1" > < / a > relora_steps < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb1-14" > < a href = "#cb1-14" aria-hidden = "true" tabindex = "-1" > < / a > relora_warmup_ steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-15" > < a href = "#cb1-15" aria-hidden = "true" tabindex = "-1" > < / a > relora_anneal _steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-16" > < a href = "#cb1-16" aria-hidden = "true" tabindex = "-1" > < / a > relora_prune_ratio < span class = "op" > =< / span > < span class = "fl " > 0.9 < / span > ,< / span >
< span id = "cb1-17" > < a href = "#cb1-17" aria-hidden = "true" tabindex = "-1" > < / a > bench_split < span class = "op" > =< / span > < span class = "st " > 'eval' < / span > ,< / span >
< span id = "cb1-18" > < a href = "#cb1-18" aria-hidden = "true" tabindex = "-1" > < / a > bench_datase t< span class = "op" > =< / span > < span class = "st" > 'pharaouk/dharma-1/dharma_1_mini.json '< / span > ,< / span >
< span id = "cb1-19" > < a href = "#cb1-19" aria-hidden = "true" tabindex = "-1" > < / a > do_ bench_eval < span class = "op" > =< / span > < span class = "va " > False < / span > ,< / span >
< span id = "cb1-20" > < a href = "#cb1-20" aria-hidden = "true" tabindex = "-1" > < / a > do_causal_lm _eval< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb1-21" > < a href = "#cb1-21" aria-hidden = "true" tabindex = "-1" > < / a > max_bench_samples < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb1-22" > < a href = "#cb1-22" aria-hidden = "true" tabindex = "-1" > < / a > bench_source_max_ len < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb1-23" > < a href = "#cb1-23" aria-hidden = "true" tabindex = "-1" > < / a > dataloader_prefetch_factor < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb1-24" > < a href = "#cb1-24" aria-hidden = "true" tabindex = "-1" > < / a > cosine_min_lr_ratio < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-25" > < a href = "#cb1-25" aria-hidden = "true" tabindex = "-1" > < / a > cosine_constant _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-26" > < a href = "#cb1-26" aria-hidden = "true" tabindex = "-1" > < / a > loraplus _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-27" > < a href = "#cb1-27" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_embedding < span class = "op" > =< / span > < span class = "fl " > 1e-06 < / span > ,< / span >
< span id = "cb1-28" > < a href = "#cb1-28" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr_scale < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb1-29" > < a href = "#cb1-29" aria-hidden = "true" tabindex = "-1" > < / a > lr_groups < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-30" > < a href = "#cb1-30" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-31" > < a href = "#cb1-31" aria-hidden = "true" tabindex = "-1" > < / a > qlora < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb1-32" > < a href = "#cb1-32" aria-hidden = "true" tabindex = "-1" > < / a > orpo_alph a< span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb1-33" > < a href = "#cb1-33" aria-hidden = "true" tabindex = "-1" > < / a > lisa_n_layers < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-34" > < a href = "#cb1-34" aria-hidden = "true" tabindex = "-1" > < / a > lisa_step_interval < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-35" > < a href = "#cb1-35" aria-hidden = "true" tabindex = "-1" > < / a > lisa_layers_attribute < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-36" > < a href = "#cb1-36" aria-hidden = "true" tabindex = "-1" > < / a > curriculum_sampling < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-37" > < a href = "#cb1-37" aria-hidden = "true" tabindex = "-1" > < / a > alternate_optimizer < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-38" > < a href = "#cb1-38" aria-hidden = "true" tabindex = "-1" > < / a > alternate_lr_scheduler_type < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-39" > < a href = "#cb1-39" aria-hidden = "true" tabindex = "-1" > < / a > chat_templat e< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-40" > < a href = "#cb1-40" aria-hidden = "true" tabindex = "-1" > < / a > kd_ce_alpha < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-41" > < a href = "#cb1-41" aria-hidden = "true" tabindex = "-1" > < / a > kd_alpha< span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb1-42" > < a href = "#cb1-42" aria-hidden = "true" tabindex = "-1" > < / a > kd_temperature < span class = "op" > =< / span > < span class = "fl" > 1.0< / span > ,< / span >
< span id = "cb1-43" > < a href = "#cb1-43" aria-hidden = "true" tabindex = "-1" > < / a > kd_zscore_base_temp < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb1-44" > < a href = "#cb1-44" aria-hidden = "true" tabindex = "-1" > < / a > kd_top_k_before_softmax < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-45" > < a href = "#cb1-45" aria-hidden = "true" tabindex = "-1" > < / a > sequence_parallel_degree < span class = "op" > =< / span > < span class = "d v" > 1 < / span > ,< / span >
< span id = "cb1-46" > < a href = "#cb1-46" aria-hidden = "true" tabindex = "-1" > < / a > image_siz e< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb1-47" > < a href = "#cb1-47" aria-hidden = "true" tabindex = "-1" > < / a > image_re size_algorithm < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-48" > < a href = "#cb1-48" aria-hidden = "true" tabindex = "-1" > < / a > s impo_gamma < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-49" > < a href = "#cb1-49" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button " > < i class = "bi " > < / i > < / button > < / pre > < / div >
< span id = "cb1-7" > < a href = "#cb1-7" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_sequentially < span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb1-8" > < a href = "#cb1-8" aria-hidden = "true" tabindex = "-1" > < / a > multipack_real_batches < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb1-9" > < a href = "#cb1-9" aria-hidden = "true" tabindex = "-1" > < / a > eval_ sample_packing< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb1-10" > < a href = "#cb1-10" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_efficiency < span class = "op" > =< / span > < span class = "fl " > 1. 0< / span > ,< / span >
< span id = "cb1-11" > < a href = "#cb1-11" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_bin _size< span class = "op" > =< / span > < span class = "dv" > 2 00< / span > ,< / span >
< span id = "cb1-12" > < a href = "#cb1-12" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_group_size < span class = "op" > =< / span > < span class = "dv" > 100000 < / span > ,< / span >
< span id = "cb1-13" > < a href = "#cb1-13" aria-hidden = "true" tabindex = "-1" > < / a > max_seq_length < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb1-14" > < a href = "#cb1-14" aria-hidden = "true" tabindex = "-1" > < / a > relora_steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-15" > < a href = "#cb1-15" aria-hidden = "true" tabindex = "-1" > < / a > relora_warmup _steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-16" > < a href = "#cb1-16" aria-hidden = "true" tabindex = "-1" > < / a > relora_anneal_steps < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb1-17" > < a href = "#cb1-17" aria-hidden = "true" tabindex = "-1" > < / a > relora_prune_ratio < span class = "op" > =< / span > < span class = "fl " > 0.9 < / span > ,< / span >
< span id = "cb1-18" > < a href = "#cb1-18" aria-hidden = "true" tabindex = "-1" > < / a > bench_spli t< span class = "op" > =< / span > < span class = "st" > 'eval '< / span > ,< / span >
< span id = "cb1-19" > < a href = "#cb1-19" aria-hidden = "true" tabindex = "-1" > < / a > bench_dataset < span class = "op" > =< / span > < span class = "st " > 'pharaouk/dharma-1/dharma_1_mini.json' < / span > ,< / span >
< span id = "cb1-20" > < a href = "#cb1-20" aria-hidden = "true" tabindex = "-1" > < / a > do_bench _eval< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb1-21" > < a href = "#cb1-21" aria-hidden = "true" tabindex = "-1" > < / a > do_causal_lm_eval < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb1-22" > < a href = "#cb1-22" aria-hidden = "true" tabindex = "-1" > < / a > max_ bench_samp les < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb1-23" > < a href = "#cb1-23" aria-hidden = "true" tabindex = "-1" > < / a > bench_source_max_len < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb1-24" > < a href = "#cb1-24" aria-hidden = "true" tabindex = "-1" > < / a > dataloader_prefetch_factor < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-25" > < a href = "#cb1-25" aria-hidden = "true" tabindex = "-1" > < / a > cosine_min _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-26" > < a href = "#cb1-26" aria-hidden = "true" tabindex = "-1" > < / a > cosine_constant _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-27" > < a href = "#cb1-27" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_ratio < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb1-28" > < a href = "#cb1-28" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_ embedding< span class = "op" > =< / span > < span class = "fl " > 1e-06 < / span > ,< / span >
< span id = "cb1-29" > < a href = "#cb1-29" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr_scale < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-30" > < a href = "#cb1-30" aria-hidden = "true" tabindex = "-1" > < / a > lr_groups < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-31" > < a href = "#cb1-31" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb1-32" > < a href = "#cb1-32" aria-hidden = "true" tabindex = "-1" > < / a > ql ora< span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb1-33" > < a href = "#cb1-33" aria-hidden = "true" tabindex = "-1" > < / a > orpo_alpha < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-34" > < a href = "#cb1-34" aria-hidden = "true" tabindex = "-1" > < / a > lisa_n_layers < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-35" > < a href = "#cb1-35" aria-hidden = "true" tabindex = "-1" > < / a > lisa_step_interval < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-36" > < a href = "#cb1-36" aria-hidden = "true" tabindex = "-1" > < / a > lisa_layers_attribute < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-37" > < a href = "#cb1-37" aria-hidden = "true" tabindex = "-1" > < / a > curriculum_sampling < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-38" > < a href = "#cb1-38" aria-hidden = "true" tabindex = "-1" > < / a > alternate_optimizer < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-39" > < a href = "#cb1-39" aria-hidden = "true" tabindex = "-1" > < / a > alternate_lr_scheduler_typ e< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-40" > < a href = "#cb1-40" aria-hidden = "true" tabindex = "-1" > < / a > chat_template < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-41" > < a href = "#cb1-41" aria-hidden = "true" tabindex = "-1" > < / a > kd_ce_ alpha< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb1-42" > < a href = "#cb1-42" aria-hidden = "true" tabindex = "-1" > < / a > kd_alpha < span class = "op" > =< / span > < span class = "fl" > 1.0< / span > ,< / span >
< span id = "cb1-43" > < a href = "#cb1-43" aria-hidden = "true" tabindex = "-1" > < / a > kd_temperature < span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb1-44" > < a href = "#cb1-44" aria-hidden = "true" tabindex = "-1" > < / a > kd_zscore_base_temp < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-45" > < a href = "#cb1-45" aria-hidden = "true" tabindex = "-1" > < / a > kd_top_k_before_softmax < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb1-46" > < a href = "#cb1-46" aria-hidden = "true" tabindex = "-1" > < / a > sequence_parallel_degre e< span class = "op" > =< / span > < span class = "d v" > 1 < / span > ,< / span >
< span id = "cb1-47" > < a href = "#cb1-47" aria-hidden = "true" tabindex = "-1" > < / a > image_size< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-48" > < a href = "#cb1-48" aria-hidden = "true" tabindex = "-1" > < / a > image_resize_algorithm < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb1-49" > < a href = "#cb1-49" aria-hidden = "true" tabindex = "-1" > < / a > simpo_gamma< span class = "op " > = < / span > < span class = "va " > None < / span > , < / span >
< span id = "cb1-50" > < a href = "#cb1-50" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< p > CPO config for CPO training< / p >
< / section >
< section id = "axolotl.core.training_args.AxolotlKTOConfig" class = "level3" >
@@ -565,48 +566,49 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
< span id = "cb2-4" > < a href = "#cb2-4" aria-hidden = "true" tabindex = "-1" > < / a > lr_quadratic_warmup< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb2-5" > < a href = "#cb2-5" aria-hidden = "true" tabindex = "-1" > < / a > pretraining< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb2-6" > < a href = "#cb2-6" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb2-7" > < a href = "#cb2-7" aria-hidden = "true" tabindex = "-1" > < / a > multipack_real_batches < span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb2-8" > < a href = "#cb2-8" aria-hidden = "true" tabindex = "-1" > < / a > eval_sample_packing < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb2-9" > < a href = "#cb2-9" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_efficiency < span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb2-10" > < a href = "#cb2-10" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_bin_size < span class = "op" > =< / span > < span class = "dv " > 20 0< / span > ,< / span >
< span id = "cb2-11" > < a href = "#cb2-11" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_group _size< span class = "op" > =< / span > < span class = "dv" > 1000 00< / span > ,< / span >
< span id = "cb2-12" > < a href = "#cb2-12" aria-hidden = "true" tabindex = "-1" > < / a > max_seq_length < span class = "op" > =< / span > < span class = "dv" > 2048 < / span > ,< / span >
< span id = "cb2-13" > < a href = "#cb2-13" aria-hidden = "true" tabindex = "-1" > < / a > relora_steps < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb2-14" > < a href = "#cb2-14" aria-hidden = "true" tabindex = "-1" > < / a > relora_warmup_ steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-15" > < a href = "#cb2-15" aria-hidden = "true" tabindex = "-1" > < / a > relora_anneal _steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-16" > < a href = "#cb2-16" aria-hidden = "true" tabindex = "-1" > < / a > relora_prune_ratio < span class = "op" > =< / span > < span class = "fl " > 0.9 < / span > ,< / span >
< span id = "cb2-17" > < a href = "#cb2-17" aria-hidden = "true" tabindex = "-1" > < / a > bench_split < span class = "op" > =< / span > < span class = "st " > 'eval' < / span > ,< / span >
< span id = "cb2-18" > < a href = "#cb2-18" aria-hidden = "true" tabindex = "-1" > < / a > bench_datase t< span class = "op" > =< / span > < span class = "st" > 'pharaouk/dharma-1/dharma_1_mini.json '< / span > ,< / span >
< span id = "cb2-19" > < a href = "#cb2-19" aria-hidden = "true" tabindex = "-1" > < / a > do_ bench_eval < span class = "op" > =< / span > < span class = "va " > False < / span > ,< / span >
< span id = "cb2-20" > < a href = "#cb2-20" aria-hidden = "true" tabindex = "-1" > < / a > do_causal_lm _eval< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb2-21" > < a href = "#cb2-21" aria-hidden = "true" tabindex = "-1" > < / a > max_bench_samples < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb2-22" > < a href = "#cb2-22" aria-hidden = "true" tabindex = "-1" > < / a > bench_source_max_ len < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb2-23" > < a href = "#cb2-23" aria-hidden = "true" tabindex = "-1" > < / a > dataloader_prefetch_factor < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb2-24" > < a href = "#cb2-24" aria-hidden = "true" tabindex = "-1" > < / a > cosine_min_lr_ratio < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-25" > < a href = "#cb2-25" aria-hidden = "true" tabindex = "-1" > < / a > cosine_constant _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-26" > < a href = "#cb2-26" aria-hidden = "true" tabindex = "-1" > < / a > loraplus _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-27" > < a href = "#cb2-27" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_embedding < span class = "op" > =< / span > < span class = "fl " > 1e-06 < / span > ,< / span >
< span id = "cb2-28" > < a href = "#cb2-28" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr_scale < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb2-29" > < a href = "#cb2-29" aria-hidden = "true" tabindex = "-1" > < / a > lr_groups < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-30" > < a href = "#cb2-30" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-31" > < a href = "#cb2-31" aria-hidden = "true" tabindex = "-1" > < / a > qlora < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb2-32" > < a href = "#cb2-32" aria-hidden = "true" tabindex = "-1" > < / a > orpo_alph a< span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb2-33" > < a href = "#cb2-33" aria-hidden = "true" tabindex = "-1" > < / a > lisa_n_layers < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-34" > < a href = "#cb2-34" aria-hidden = "true" tabindex = "-1" > < / a > lisa_step_interval < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-35" > < a href = "#cb2-35" aria-hidden = "true" tabindex = "-1" > < / a > lisa_layers_attribute < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-36" > < a href = "#cb2-36" aria-hidden = "true" tabindex = "-1" > < / a > curriculum_sampling < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-37" > < a href = "#cb2-37" aria-hidden = "true" tabindex = "-1" > < / a > alternate_optimizer < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-38" > < a href = "#cb2-38" aria-hidden = "true" tabindex = "-1" > < / a > alternate_lr_scheduler_type < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-39" > < a href = "#cb2-39" aria-hidden = "true" tabindex = "-1" > < / a > chat_templat e< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-40" > < a href = "#cb2-40" aria-hidden = "true" tabindex = "-1" > < / a > kd_ce_alpha < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-41" > < a href = "#cb2-41" aria-hidden = "true" tabindex = "-1" > < / a > kd_alpha< span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb2-42" > < a href = "#cb2-42" aria-hidden = "true" tabindex = "-1" > < / a > kd_temperature < span class = "op" > =< / span > < span class = "fl" > 1.0< / span > ,< / span >
< span id = "cb2-43" > < a href = "#cb2-43" aria-hidden = "true" tabindex = "-1" > < / a > kd_zscore_base_temp < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb2-44" > < a href = "#cb2-44" aria-hidden = "true" tabindex = "-1" > < / a > kd_top_k_before_softmax < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-45" > < a href = "#cb2-45" aria-hidden = "true" tabindex = "-1" > < / a > sequence_parallel_degree < span class = "op" > =< / span > < span class = "d v" > 1 < / span > ,< / span >
< span id = "cb2-46" > < a href = "#cb2-46" aria-hidden = "true" tabindex = "-1" > < / a > image_siz e< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb2-47" > < a href = "#cb2-47" aria-hidden = "true" tabindex = "-1" > < / a > image_re size_algorithm < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-48" > < a href = "#cb2-48" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button " > < i class = "bi " > < / i > < / button > < / pre > < / div >
< span id = "cb2-7" > < a href = "#cb2-7" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_sequentially < span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb2-8" > < a href = "#cb2-8" aria-hidden = "true" tabindex = "-1" > < / a > multipack_real_batches < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb2-9" > < a href = "#cb2-9" aria-hidden = "true" tabindex = "-1" > < / a > eval_ sample_packing< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb2-10" > < a href = "#cb2-10" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_efficiency < span class = "op" > =< / span > < span class = "fl " > 1. 0< / span > ,< / span >
< span id = "cb2-11" > < a href = "#cb2-11" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_bin _size< span class = "op" > =< / span > < span class = "dv" > 2 00< / span > ,< / span >
< span id = "cb2-12" > < a href = "#cb2-12" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_group_size < span class = "op" > =< / span > < span class = "dv" > 100000 < / span > ,< / span >
< span id = "cb2-13" > < a href = "#cb2-13" aria-hidden = "true" tabindex = "-1" > < / a > max_seq_length < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb2-14" > < a href = "#cb2-14" aria-hidden = "true" tabindex = "-1" > < / a > relora_steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-15" > < a href = "#cb2-15" aria-hidden = "true" tabindex = "-1" > < / a > relora_warmup _steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-16" > < a href = "#cb2-16" aria-hidden = "true" tabindex = "-1" > < / a > relora_anneal_steps < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb2-17" > < a href = "#cb2-17" aria-hidden = "true" tabindex = "-1" > < / a > relora_prune_ratio < span class = "op" > =< / span > < span class = "fl " > 0.9 < / span > ,< / span >
< span id = "cb2-18" > < a href = "#cb2-18" aria-hidden = "true" tabindex = "-1" > < / a > bench_spli t< span class = "op" > =< / span > < span class = "st" > 'eval '< / span > ,< / span >
< span id = "cb2-19" > < a href = "#cb2-19" aria-hidden = "true" tabindex = "-1" > < / a > bench_dataset < span class = "op" > =< / span > < span class = "st " > 'pharaouk/dharma-1/dharma_1_mini.json' < / span > ,< / span >
< span id = "cb2-20" > < a href = "#cb2-20" aria-hidden = "true" tabindex = "-1" > < / a > do_bench _eval< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb2-21" > < a href = "#cb2-21" aria-hidden = "true" tabindex = "-1" > < / a > do_causal_lm_eval < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb2-22" > < a href = "#cb2-22" aria-hidden = "true" tabindex = "-1" > < / a > max_ bench_samp les < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb2-23" > < a href = "#cb2-23" aria-hidden = "true" tabindex = "-1" > < / a > bench_source_max_len < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb2-24" > < a href = "#cb2-24" aria-hidden = "true" tabindex = "-1" > < / a > dataloader_prefetch_factor < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-25" > < a href = "#cb2-25" aria-hidden = "true" tabindex = "-1" > < / a > cosine_min _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-26" > < a href = "#cb2-26" aria-hidden = "true" tabindex = "-1" > < / a > cosine_constant _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-27" > < a href = "#cb2-27" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_ratio < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb2-28" > < a href = "#cb2-28" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_ embedding< span class = "op" > =< / span > < span class = "fl " > 1e-06 < / span > ,< / span >
< span id = "cb2-29" > < a href = "#cb2-29" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr_scale < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-30" > < a href = "#cb2-30" aria-hidden = "true" tabindex = "-1" > < / a > lr_groups < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-31" > < a href = "#cb2-31" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb2-32" > < a href = "#cb2-32" aria-hidden = "true" tabindex = "-1" > < / a > ql ora< span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb2-33" > < a href = "#cb2-33" aria-hidden = "true" tabindex = "-1" > < / a > orpo_alpha < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-34" > < a href = "#cb2-34" aria-hidden = "true" tabindex = "-1" > < / a > lisa_n_layers < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-35" > < a href = "#cb2-35" aria-hidden = "true" tabindex = "-1" > < / a > lisa_step_interval < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-36" > < a href = "#cb2-36" aria-hidden = "true" tabindex = "-1" > < / a > lisa_layers_attribute < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-37" > < a href = "#cb2-37" aria-hidden = "true" tabindex = "-1" > < / a > curriculum_sampling < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-38" > < a href = "#cb2-38" aria-hidden = "true" tabindex = "-1" > < / a > alternate_optimizer < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-39" > < a href = "#cb2-39" aria-hidden = "true" tabindex = "-1" > < / a > alternate_lr_scheduler_typ e< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-40" > < a href = "#cb2-40" aria-hidden = "true" tabindex = "-1" > < / a > chat_template < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-41" > < a href = "#cb2-41" aria-hidden = "true" tabindex = "-1" > < / a > kd_ce_ alpha< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb2-42" > < a href = "#cb2-42" aria-hidden = "true" tabindex = "-1" > < / a > kd_alpha < span class = "op" > =< / span > < span class = "fl" > 1.0< / span > ,< / span >
< span id = "cb2-43" > < a href = "#cb2-43" aria-hidden = "true" tabindex = "-1" > < / a > kd_temperature < span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb2-44" > < a href = "#cb2-44" aria-hidden = "true" tabindex = "-1" > < / a > kd_zscore_base_temp < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-45" > < a href = "#cb2-45" aria-hidden = "true" tabindex = "-1" > < / a > kd_top_k_before_softmax < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb2-46" > < a href = "#cb2-46" aria-hidden = "true" tabindex = "-1" > < / a > sequence_parallel_degre e< span class = "op" > =< / span > < span class = "d v" > 1 < / span > ,< / span >
< span id = "cb2-47" > < a href = "#cb2-47" aria-hidden = "true" tabindex = "-1" > < / a > image_size< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb2-48" > < a href = "#cb2-48" aria-hidden = "true" tabindex = "-1" > < / a > image_resize_algorithm< span class = "op " > = < / span > < span class = "va " > None < / span > , < / span >
< span id = "cb2-49" > < a href = "#cb2-49" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< p > KTO config for KTO training< / p >
< / section >
< section id = "axolotl.core.training_args.AxolotlORPOConfig" class = "level3" >
@@ -617,48 +619,49 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
< span id = "cb3-4" > < a href = "#cb3-4" aria-hidden = "true" tabindex = "-1" > < / a > lr_quadratic_warmup< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb3-5" > < a href = "#cb3-5" aria-hidden = "true" tabindex = "-1" > < / a > pretraining< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb3-6" > < a href = "#cb3-6" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb3-7" > < a href = "#cb3-7" aria-hidden = "true" tabindex = "-1" > < / a > multipack_real_batches < span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb3-8" > < a href = "#cb3-8" aria-hidden = "true" tabindex = "-1" > < / a > eval_sample_packing < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb3-9" > < a href = "#cb3-9" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_efficiency < span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb3-10" > < a href = "#cb3-10" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_bin_size < span class = "op" > =< / span > < span class = "dv " > 20 0< / span > ,< / span >
< span id = "cb3-11" > < a href = "#cb3-11" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_group _size< span class = "op" > =< / span > < span class = "dv" > 1000 00< / span > ,< / span >
< span id = "cb3-12" > < a href = "#cb3-12" aria-hidden = "true" tabindex = "-1" > < / a > max_seq_length < span class = "op" > =< / span > < span class = "dv" > 2048 < / span > ,< / span >
< span id = "cb3-13" > < a href = "#cb3-13" aria-hidden = "true" tabindex = "-1" > < / a > relora_steps < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb3-14" > < a href = "#cb3-14" aria-hidden = "true" tabindex = "-1" > < / a > relora_warmup_ steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-15" > < a href = "#cb3-15" aria-hidden = "true" tabindex = "-1" > < / a > relora_anneal _steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-16" > < a href = "#cb3-16" aria-hidden = "true" tabindex = "-1" > < / a > relora_prune_ratio < span class = "op" > =< / span > < span class = "fl " > 0.9 < / span > ,< / span >
< span id = "cb3-17" > < a href = "#cb3-17" aria-hidden = "true" tabindex = "-1" > < / a > bench_split < span class = "op" > =< / span > < span class = "st " > 'eval' < / span > ,< / span >
< span id = "cb3-18" > < a href = "#cb3-18" aria-hidden = "true" tabindex = "-1" > < / a > bench_datase t< span class = "op" > =< / span > < span class = "st" > 'pharaouk/dharma-1/dharma_1_mini.json '< / span > ,< / span >
< span id = "cb3-19" > < a href = "#cb3-19" aria-hidden = "true" tabindex = "-1" > < / a > do_ bench_eval < span class = "op" > =< / span > < span class = "va " > False < / span > ,< / span >
< span id = "cb3-20" > < a href = "#cb3-20" aria-hidden = "true" tabindex = "-1" > < / a > do_causal_lm _eval< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb3-21" > < a href = "#cb3-21" aria-hidden = "true" tabindex = "-1" > < / a > max_bench_samples < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb3-22" > < a href = "#cb3-22" aria-hidden = "true" tabindex = "-1" > < / a > bench_source_max_ len < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb3-23" > < a href = "#cb3-23" aria-hidden = "true" tabindex = "-1" > < / a > dataloader_prefetch_factor < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb3-24" > < a href = "#cb3-24" aria-hidden = "true" tabindex = "-1" > < / a > cosine_min_lr_ratio < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-25" > < a href = "#cb3-25" aria-hidden = "true" tabindex = "-1" > < / a > cosine_constant _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-26" > < a href = "#cb3-26" aria-hidden = "true" tabindex = "-1" > < / a > loraplus _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-27" > < a href = "#cb3-27" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_embedding < span class = "op" > =< / span > < span class = "fl " > 1e-06 < / span > ,< / span >
< span id = "cb3-28" > < a href = "#cb3-28" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr_scale < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb3-29" > < a href = "#cb3-29" aria-hidden = "true" tabindex = "-1" > < / a > lr_groups < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-30" > < a href = "#cb3-30" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-31" > < a href = "#cb3-31" aria-hidden = "true" tabindex = "-1" > < / a > qlora < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb3-32" > < a href = "#cb3-32" aria-hidden = "true" tabindex = "-1" > < / a > orpo_alph a< span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb3-33" > < a href = "#cb3-33" aria-hidden = "true" tabindex = "-1" > < / a > lisa_n_layers < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-34" > < a href = "#cb3-34" aria-hidden = "true" tabindex = "-1" > < / a > lisa_step_interval < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-35" > < a href = "#cb3-35" aria-hidden = "true" tabindex = "-1" > < / a > lisa_layers_attribute < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-36" > < a href = "#cb3-36" aria-hidden = "true" tabindex = "-1" > < / a > curriculum_sampling < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-37" > < a href = "#cb3-37" aria-hidden = "true" tabindex = "-1" > < / a > alternate_optimizer < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-38" > < a href = "#cb3-38" aria-hidden = "true" tabindex = "-1" > < / a > alternate_lr_scheduler_type < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-39" > < a href = "#cb3-39" aria-hidden = "true" tabindex = "-1" > < / a > chat_templat e< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-40" > < a href = "#cb3-40" aria-hidden = "true" tabindex = "-1" > < / a > kd_ce_alpha < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-41" > < a href = "#cb3-41" aria-hidden = "true" tabindex = "-1" > < / a > kd_alpha< span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb3-42" > < a href = "#cb3-42" aria-hidden = "true" tabindex = "-1" > < / a > kd_temperature < span class = "op" > =< / span > < span class = "fl" > 1.0< / span > ,< / span >
< span id = "cb3-43" > < a href = "#cb3-43" aria-hidden = "true" tabindex = "-1" > < / a > kd_zscore_base_temp < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb3-44" > < a href = "#cb3-44" aria-hidden = "true" tabindex = "-1" > < / a > kd_top_k_before_softmax < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-45" > < a href = "#cb3-45" aria-hidden = "true" tabindex = "-1" > < / a > sequence_parallel_degree < span class = "op" > =< / span > < span class = "d v" > 1 < / span > ,< / span >
< span id = "cb3-46" > < a href = "#cb3-46" aria-hidden = "true" tabindex = "-1" > < / a > image_siz e< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb3-47" > < a href = "#cb3-47" aria-hidden = "true" tabindex = "-1" > < / a > image_re size_algorithm < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-48" > < a href = "#cb3-48" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button " > < i class = "bi " > < / i > < / button > < / pre > < / div >
< span id = "cb3-7" > < a href = "#cb3-7" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_sequentially < span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb3-8" > < a href = "#cb3-8" aria-hidden = "true" tabindex = "-1" > < / a > multipack_real_batches < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb3-9" > < a href = "#cb3-9" aria-hidden = "true" tabindex = "-1" > < / a > eval_ sample_packing< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb3-10" > < a href = "#cb3-10" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_efficiency < span class = "op" > =< / span > < span class = "fl " > 1. 0< / span > ,< / span >
< span id = "cb3-11" > < a href = "#cb3-11" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_bin _size< span class = "op" > =< / span > < span class = "dv" > 2 00< / span > ,< / span >
< span id = "cb3-12" > < a href = "#cb3-12" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_group_size < span class = "op" > =< / span > < span class = "dv" > 100000 < / span > ,< / span >
< span id = "cb3-13" > < a href = "#cb3-13" aria-hidden = "true" tabindex = "-1" > < / a > max_seq_length < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb3-14" > < a href = "#cb3-14" aria-hidden = "true" tabindex = "-1" > < / a > relora_steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-15" > < a href = "#cb3-15" aria-hidden = "true" tabindex = "-1" > < / a > relora_warmup _steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-16" > < a href = "#cb3-16" aria-hidden = "true" tabindex = "-1" > < / a > relora_anneal_steps < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb3-17" > < a href = "#cb3-17" aria-hidden = "true" tabindex = "-1" > < / a > relora_prune_ratio < span class = "op" > =< / span > < span class = "fl " > 0.9 < / span > ,< / span >
< span id = "cb3-18" > < a href = "#cb3-18" aria-hidden = "true" tabindex = "-1" > < / a > bench_spli t< span class = "op" > =< / span > < span class = "st" > 'eval '< / span > ,< / span >
< span id = "cb3-19" > < a href = "#cb3-19" aria-hidden = "true" tabindex = "-1" > < / a > bench_dataset < span class = "op" > =< / span > < span class = "st " > 'pharaouk/dharma-1/dharma_1_mini.json' < / span > ,< / span >
< span id = "cb3-20" > < a href = "#cb3-20" aria-hidden = "true" tabindex = "-1" > < / a > do_bench _eval< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb3-21" > < a href = "#cb3-21" aria-hidden = "true" tabindex = "-1" > < / a > do_causal_lm_eval < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb3-22" > < a href = "#cb3-22" aria-hidden = "true" tabindex = "-1" > < / a > max_ bench_samp les < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb3-23" > < a href = "#cb3-23" aria-hidden = "true" tabindex = "-1" > < / a > bench_source_max_len < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb3-24" > < a href = "#cb3-24" aria-hidden = "true" tabindex = "-1" > < / a > dataloader_prefetch_factor < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-25" > < a href = "#cb3-25" aria-hidden = "true" tabindex = "-1" > < / a > cosine_min _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-26" > < a href = "#cb3-26" aria-hidden = "true" tabindex = "-1" > < / a > cosine_constant _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-27" > < a href = "#cb3-27" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_ratio < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb3-28" > < a href = "#cb3-28" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_ embedding< span class = "op" > =< / span > < span class = "fl " > 1e-06 < / span > ,< / span >
< span id = "cb3-29" > < a href = "#cb3-29" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr_scale < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-30" > < a href = "#cb3-30" aria-hidden = "true" tabindex = "-1" > < / a > lr_groups < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-31" > < a href = "#cb3-31" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb3-32" > < a href = "#cb3-32" aria-hidden = "true" tabindex = "-1" > < / a > ql ora< span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb3-33" > < a href = "#cb3-33" aria-hidden = "true" tabindex = "-1" > < / a > orpo_alpha < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-34" > < a href = "#cb3-34" aria-hidden = "true" tabindex = "-1" > < / a > lisa_n_layers < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-35" > < a href = "#cb3-35" aria-hidden = "true" tabindex = "-1" > < / a > lisa_step_interval < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-36" > < a href = "#cb3-36" aria-hidden = "true" tabindex = "-1" > < / a > lisa_layers_attribute < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-37" > < a href = "#cb3-37" aria-hidden = "true" tabindex = "-1" > < / a > curriculum_sampling < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-38" > < a href = "#cb3-38" aria-hidden = "true" tabindex = "-1" > < / a > alternate_optimizer < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-39" > < a href = "#cb3-39" aria-hidden = "true" tabindex = "-1" > < / a > alternate_lr_scheduler_typ e< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-40" > < a href = "#cb3-40" aria-hidden = "true" tabindex = "-1" > < / a > chat_template < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-41" > < a href = "#cb3-41" aria-hidden = "true" tabindex = "-1" > < / a > kd_ce_ alpha< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb3-42" > < a href = "#cb3-42" aria-hidden = "true" tabindex = "-1" > < / a > kd_alpha < span class = "op" > =< / span > < span class = "fl" > 1.0< / span > ,< / span >
< span id = "cb3-43" > < a href = "#cb3-43" aria-hidden = "true" tabindex = "-1" > < / a > kd_temperature < span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb3-44" > < a href = "#cb3-44" aria-hidden = "true" tabindex = "-1" > < / a > kd_zscore_base_temp < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-45" > < a href = "#cb3-45" aria-hidden = "true" tabindex = "-1" > < / a > kd_top_k_before_softmax < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb3-46" > < a href = "#cb3-46" aria-hidden = "true" tabindex = "-1" > < / a > sequence_parallel_degre e< span class = "op" > =< / span > < span class = "d v" > 1 < / span > ,< / span >
< span id = "cb3-47" > < a href = "#cb3-47" aria-hidden = "true" tabindex = "-1" > < / a > image_size< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb3-48" > < a href = "#cb3-48" aria-hidden = "true" tabindex = "-1" > < / a > image_resize_algorithm< span class = "op " > = < / span > < span class = "va " > None < / span > , < / span >
< span id = "cb3-49" > < a href = "#cb3-49" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< p > ORPO config for ORPO training< / p >
< / section >
< section id = "axolotl.core.training_args.AxolotlPRMConfig" class = "level3" >
@@ -669,48 +672,49 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
< span id = "cb4-4" > < a href = "#cb4-4" aria-hidden = "true" tabindex = "-1" > < / a > lr_quadratic_warmup< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb4-5" > < a href = "#cb4-5" aria-hidden = "true" tabindex = "-1" > < / a > pretraining< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb4-6" > < a href = "#cb4-6" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb4-7" > < a href = "#cb4-7" aria-hidden = "true" tabindex = "-1" > < / a > multipack_real_batches < span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb4-8" > < a href = "#cb4-8" aria-hidden = "true" tabindex = "-1" > < / a > eval_sample_packing < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb4-9" > < a href = "#cb4-9" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_efficiency < span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb4-10" > < a href = "#cb4-10" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_bin_size < span class = "op" > =< / span > < span class = "dv " > 20 0< / span > ,< / span >
< span id = "cb4-11" > < a href = "#cb4-11" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_group _size< span class = "op" > =< / span > < span class = "dv" > 1000 00< / span > ,< / span >
< span id = "cb4-12" > < a href = "#cb4-12" aria-hidden = "true" tabindex = "-1" > < / a > max_seq_length < span class = "op" > =< / span > < span class = "dv" > 2048 < / span > ,< / span >
< span id = "cb4-13" > < a href = "#cb4-13" aria-hidden = "true" tabindex = "-1" > < / a > relora_steps < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb4-14" > < a href = "#cb4-14" aria-hidden = "true" tabindex = "-1" > < / a > relora_warmup_ steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-15" > < a href = "#cb4-15" aria-hidden = "true" tabindex = "-1" > < / a > relora_anneal _steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-16" > < a href = "#cb4-16" aria-hidden = "true" tabindex = "-1" > < / a > relora_prune_ratio < span class = "op" > =< / span > < span class = "fl " > 0.9 < / span > ,< / span >
< span id = "cb4-17" > < a href = "#cb4-17" aria-hidden = "true" tabindex = "-1" > < / a > bench_split < span class = "op" > =< / span > < span class = "st " > 'eval' < / span > ,< / span >
< span id = "cb4-18" > < a href = "#cb4-18" aria-hidden = "true" tabindex = "-1" > < / a > bench_datase t< span class = "op" > =< / span > < span class = "st" > 'pharaouk/dharma-1/dharma_1_mini.json '< / span > ,< / span >
< span id = "cb4-19" > < a href = "#cb4-19" aria-hidden = "true" tabindex = "-1" > < / a > do_ bench_eval < span class = "op" > =< / span > < span class = "va " > False < / span > ,< / span >
< span id = "cb4-20" > < a href = "#cb4-20" aria-hidden = "true" tabindex = "-1" > < / a > do_causal_lm _eval< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb4-21" > < a href = "#cb4-21" aria-hidden = "true" tabindex = "-1" > < / a > max_bench_samples < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb4-22" > < a href = "#cb4-22" aria-hidden = "true" tabindex = "-1" > < / a > bench_source_max_ len < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb4-23" > < a href = "#cb4-23" aria-hidden = "true" tabindex = "-1" > < / a > dataloader_prefetch_factor < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb4-24" > < a href = "#cb4-24" aria-hidden = "true" tabindex = "-1" > < / a > cosine_min_lr_ratio < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-25" > < a href = "#cb4-25" aria-hidden = "true" tabindex = "-1" > < / a > cosine_constant _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-26" > < a href = "#cb4-26" aria-hidden = "true" tabindex = "-1" > < / a > loraplus _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-27" > < a href = "#cb4-27" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_embedding < span class = "op" > =< / span > < span class = "fl " > 1e-06 < / span > ,< / span >
< span id = "cb4-28" > < a href = "#cb4-28" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr_scale < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb4-29" > < a href = "#cb4-29" aria-hidden = "true" tabindex = "-1" > < / a > lr_groups < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-30" > < a href = "#cb4-30" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-31" > < a href = "#cb4-31" aria-hidden = "true" tabindex = "-1" > < / a > qlora < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb4-32" > < a href = "#cb4-32" aria-hidden = "true" tabindex = "-1" > < / a > orpo_alph a< span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb4-33" > < a href = "#cb4-33" aria-hidden = "true" tabindex = "-1" > < / a > lisa_n_layers < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-34" > < a href = "#cb4-34" aria-hidden = "true" tabindex = "-1" > < / a > lisa_step_interval < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-35" > < a href = "#cb4-35" aria-hidden = "true" tabindex = "-1" > < / a > lisa_layers_attribute < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-36" > < a href = "#cb4-36" aria-hidden = "true" tabindex = "-1" > < / a > curriculum_sampling < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-37" > < a href = "#cb4-37" aria-hidden = "true" tabindex = "-1" > < / a > alternate_optimizer < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-38" > < a href = "#cb4-38" aria-hidden = "true" tabindex = "-1" > < / a > alternate_lr_scheduler_type < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-39" > < a href = "#cb4-39" aria-hidden = "true" tabindex = "-1" > < / a > chat_templat e< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-40" > < a href = "#cb4-40" aria-hidden = "true" tabindex = "-1" > < / a > kd_ce_alpha < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-41" > < a href = "#cb4-41" aria-hidden = "true" tabindex = "-1" > < / a > kd_alpha< span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb4-42" > < a href = "#cb4-42" aria-hidden = "true" tabindex = "-1" > < / a > kd_temperature < span class = "op" > =< / span > < span class = "fl" > 1.0< / span > ,< / span >
< span id = "cb4-43" > < a href = "#cb4-43" aria-hidden = "true" tabindex = "-1" > < / a > kd_zscore_base_temp < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb4-44" > < a href = "#cb4-44" aria-hidden = "true" tabindex = "-1" > < / a > kd_top_k_before_softmax < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-45" > < a href = "#cb4-45" aria-hidden = "true" tabindex = "-1" > < / a > sequence_parallel_degree < span class = "op" > =< / span > < span class = "d v" > 1 < / span > ,< / span >
< span id = "cb4-46" > < a href = "#cb4-46" aria-hidden = "true" tabindex = "-1" > < / a > image_siz e< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb4-47" > < a href = "#cb4-47" aria-hidden = "true" tabindex = "-1" > < / a > image_re size_algorithm < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-48" > < a href = "#cb4-48" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button " > < i class = "bi " > < / i > < / button > < / pre > < / div >
< span id = "cb4-7" > < a href = "#cb4-7" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_sequentially < span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb4-8" > < a href = "#cb4-8" aria-hidden = "true" tabindex = "-1" > < / a > multipack_real_batches < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb4-9" > < a href = "#cb4-9" aria-hidden = "true" tabindex = "-1" > < / a > eval_ sample_packing< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb4-10" > < a href = "#cb4-10" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_efficiency < span class = "op" > =< / span > < span class = "fl " > 1. 0< / span > ,< / span >
< span id = "cb4-11" > < a href = "#cb4-11" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_bin _size< span class = "op" > =< / span > < span class = "dv" > 2 00< / span > ,< / span >
< span id = "cb4-12" > < a href = "#cb4-12" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_group_size < span class = "op" > =< / span > < span class = "dv" > 100000 < / span > ,< / span >
< span id = "cb4-13" > < a href = "#cb4-13" aria-hidden = "true" tabindex = "-1" > < / a > max_seq_length < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb4-14" > < a href = "#cb4-14" aria-hidden = "true" tabindex = "-1" > < / a > relora_steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-15" > < a href = "#cb4-15" aria-hidden = "true" tabindex = "-1" > < / a > relora_warmup _steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-16" > < a href = "#cb4-16" aria-hidden = "true" tabindex = "-1" > < / a > relora_anneal_steps < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb4-17" > < a href = "#cb4-17" aria-hidden = "true" tabindex = "-1" > < / a > relora_prune_ratio < span class = "op" > =< / span > < span class = "fl " > 0.9 < / span > ,< / span >
< span id = "cb4-18" > < a href = "#cb4-18" aria-hidden = "true" tabindex = "-1" > < / a > bench_spli t< span class = "op" > =< / span > < span class = "st" > 'eval '< / span > ,< / span >
< span id = "cb4-19" > < a href = "#cb4-19" aria-hidden = "true" tabindex = "-1" > < / a > bench_dataset < span class = "op" > =< / span > < span class = "st " > 'pharaouk/dharma-1/dharma_1_mini.json' < / span > ,< / span >
< span id = "cb4-20" > < a href = "#cb4-20" aria-hidden = "true" tabindex = "-1" > < / a > do_bench _eval< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb4-21" > < a href = "#cb4-21" aria-hidden = "true" tabindex = "-1" > < / a > do_causal_lm_eval < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb4-22" > < a href = "#cb4-22" aria-hidden = "true" tabindex = "-1" > < / a > max_ bench_samp les < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb4-23" > < a href = "#cb4-23" aria-hidden = "true" tabindex = "-1" > < / a > bench_source_max_len < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb4-24" > < a href = "#cb4-24" aria-hidden = "true" tabindex = "-1" > < / a > dataloader_prefetch_factor < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-25" > < a href = "#cb4-25" aria-hidden = "true" tabindex = "-1" > < / a > cosine_min _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-26" > < a href = "#cb4-26" aria-hidden = "true" tabindex = "-1" > < / a > cosine_constant _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-27" > < a href = "#cb4-27" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_ratio < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb4-28" > < a href = "#cb4-28" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_ embedding< span class = "op" > =< / span > < span class = "fl " > 1e-06 < / span > ,< / span >
< span id = "cb4-29" > < a href = "#cb4-29" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr_scale < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-30" > < a href = "#cb4-30" aria-hidden = "true" tabindex = "-1" > < / a > lr_groups < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-31" > < a href = "#cb4-31" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb4-32" > < a href = "#cb4-32" aria-hidden = "true" tabindex = "-1" > < / a > ql ora< span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb4-33" > < a href = "#cb4-33" aria-hidden = "true" tabindex = "-1" > < / a > orpo_alpha < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-34" > < a href = "#cb4-34" aria-hidden = "true" tabindex = "-1" > < / a > lisa_n_layers < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-35" > < a href = "#cb4-35" aria-hidden = "true" tabindex = "-1" > < / a > lisa_step_interval < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-36" > < a href = "#cb4-36" aria-hidden = "true" tabindex = "-1" > < / a > lisa_layers_attribute < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-37" > < a href = "#cb4-37" aria-hidden = "true" tabindex = "-1" > < / a > curriculum_sampling < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-38" > < a href = "#cb4-38" aria-hidden = "true" tabindex = "-1" > < / a > alternate_optimizer < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-39" > < a href = "#cb4-39" aria-hidden = "true" tabindex = "-1" > < / a > alternate_lr_scheduler_typ e< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-40" > < a href = "#cb4-40" aria-hidden = "true" tabindex = "-1" > < / a > chat_template < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-41" > < a href = "#cb4-41" aria-hidden = "true" tabindex = "-1" > < / a > kd_ce_ alpha< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb4-42" > < a href = "#cb4-42" aria-hidden = "true" tabindex = "-1" > < / a > kd_alpha < span class = "op" > =< / span > < span class = "fl" > 1.0< / span > ,< / span >
< span id = "cb4-43" > < a href = "#cb4-43" aria-hidden = "true" tabindex = "-1" > < / a > kd_temperature < span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb4-44" > < a href = "#cb4-44" aria-hidden = "true" tabindex = "-1" > < / a > kd_zscore_base_temp < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-45" > < a href = "#cb4-45" aria-hidden = "true" tabindex = "-1" > < / a > kd_top_k_before_softmax < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb4-46" > < a href = "#cb4-46" aria-hidden = "true" tabindex = "-1" > < / a > sequence_parallel_degre e< span class = "op" > =< / span > < span class = "d v" > 1 < / span > ,< / span >
< span id = "cb4-47" > < a href = "#cb4-47" aria-hidden = "true" tabindex = "-1" > < / a > image_size< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb4-48" > < a href = "#cb4-48" aria-hidden = "true" tabindex = "-1" > < / a > image_resize_algorithm< span class = "op " > = < / span > < span class = "va " > None < / span > , < / span >
< span id = "cb4-49" > < a href = "#cb4-49" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< p > PRM config for PRM training< / p >
< / section >
< section id = "axolotl.core.training_args.AxolotlRewardConfig" class = "level3" >
@@ -721,48 +725,49 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
< span id = "cb5-4" > < a href = "#cb5-4" aria-hidden = "true" tabindex = "-1" > < / a > lr_quadratic_warmup< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb5-5" > < a href = "#cb5-5" aria-hidden = "true" tabindex = "-1" > < / a > pretraining< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb5-6" > < a href = "#cb5-6" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb5-7" > < a href = "#cb5-7" aria-hidden = "true" tabindex = "-1" > < / a > multipack_real_batches < span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb5-8" > < a href = "#cb5-8" aria-hidden = "true" tabindex = "-1" > < / a > eval_sample_packing < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb5-9" > < a href = "#cb5-9" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_efficiency < span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb5-10" > < a href = "#cb5-10" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_bin_size < span class = "op" > =< / span > < span class = "dv " > 20 0< / span > ,< / span >
< span id = "cb5-11" > < a href = "#cb5-11" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_group _size< span class = "op" > =< / span > < span class = "dv" > 1000 00< / span > ,< / span >
< span id = "cb5-12" > < a href = "#cb5-12" aria-hidden = "true" tabindex = "-1" > < / a > max_seq_length < span class = "op" > =< / span > < span class = "dv" > 2048 < / span > ,< / span >
< span id = "cb5-13" > < a href = "#cb5-13" aria-hidden = "true" tabindex = "-1" > < / a > relora_steps < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb5-14" > < a href = "#cb5-14" aria-hidden = "true" tabindex = "-1" > < / a > relora_warmup_ steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-15" > < a href = "#cb5-15" aria-hidden = "true" tabindex = "-1" > < / a > relora_anneal _steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-16" > < a href = "#cb5-16" aria-hidden = "true" tabindex = "-1" > < / a > relora_prune_ratio < span class = "op" > =< / span > < span class = "fl " > 0.9 < / span > ,< / span >
< span id = "cb5-17" > < a href = "#cb5-17" aria-hidden = "true" tabindex = "-1" > < / a > bench_split < span class = "op" > =< / span > < span class = "st " > 'eval' < / span > ,< / span >
< span id = "cb5-18" > < a href = "#cb5-18" aria-hidden = "true" tabindex = "-1" > < / a > bench_datase t< span class = "op" > =< / span > < span class = "st" > 'pharaouk/dharma-1/dharma_1_mini.json '< / span > ,< / span >
< span id = "cb5-19" > < a href = "#cb5-19" aria-hidden = "true" tabindex = "-1" > < / a > do_ bench_eval < span class = "op" > =< / span > < span class = "va " > False < / span > ,< / span >
< span id = "cb5-20" > < a href = "#cb5-20" aria-hidden = "true" tabindex = "-1" > < / a > do_causal_lm _eval< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb5-21" > < a href = "#cb5-21" aria-hidden = "true" tabindex = "-1" > < / a > max_bench_samples < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb5-22" > < a href = "#cb5-22" aria-hidden = "true" tabindex = "-1" > < / a > bench_source_max_ len < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb5-23" > < a href = "#cb5-23" aria-hidden = "true" tabindex = "-1" > < / a > dataloader_prefetch_factor < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb5-24" > < a href = "#cb5-24" aria-hidden = "true" tabindex = "-1" > < / a > cosine_min_lr_ratio < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-25" > < a href = "#cb5-25" aria-hidden = "true" tabindex = "-1" > < / a > cosine_constant _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-26" > < a href = "#cb5-26" aria-hidden = "true" tabindex = "-1" > < / a > loraplus _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-27" > < a href = "#cb5-27" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_embedding < span class = "op" > =< / span > < span class = "fl " > 1e-06 < / span > ,< / span >
< span id = "cb5-28" > < a href = "#cb5-28" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr_scale < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb5-29" > < a href = "#cb5-29" aria-hidden = "true" tabindex = "-1" > < / a > lr_groups < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-30" > < a href = "#cb5-30" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-31" > < a href = "#cb5-31" aria-hidden = "true" tabindex = "-1" > < / a > qlora < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb5-32" > < a href = "#cb5-32" aria-hidden = "true" tabindex = "-1" > < / a > orpo_alph a< span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb5-33" > < a href = "#cb5-33" aria-hidden = "true" tabindex = "-1" > < / a > lisa_n_layers < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-34" > < a href = "#cb5-34" aria-hidden = "true" tabindex = "-1" > < / a > lisa_step_interval < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-35" > < a href = "#cb5-35" aria-hidden = "true" tabindex = "-1" > < / a > lisa_layers_attribute < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-36" > < a href = "#cb5-36" aria-hidden = "true" tabindex = "-1" > < / a > curriculum_sampling < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-37" > < a href = "#cb5-37" aria-hidden = "true" tabindex = "-1" > < / a > alternate_optimizer < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-38" > < a href = "#cb5-38" aria-hidden = "true" tabindex = "-1" > < / a > alternate_lr_scheduler_type < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-39" > < a href = "#cb5-39" aria-hidden = "true" tabindex = "-1" > < / a > chat_templat e< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-40" > < a href = "#cb5-40" aria-hidden = "true" tabindex = "-1" > < / a > kd_ce_alpha < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-41" > < a href = "#cb5-41" aria-hidden = "true" tabindex = "-1" > < / a > kd_alpha< span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb5-42" > < a href = "#cb5-42" aria-hidden = "true" tabindex = "-1" > < / a > kd_temperature < span class = "op" > =< / span > < span class = "fl" > 1.0< / span > ,< / span >
< span id = "cb5-43" > < a href = "#cb5-43" aria-hidden = "true" tabindex = "-1" > < / a > kd_zscore_base_temp < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb5-44" > < a href = "#cb5-44" aria-hidden = "true" tabindex = "-1" > < / a > kd_top_k_before_softmax < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-45" > < a href = "#cb5-45" aria-hidden = "true" tabindex = "-1" > < / a > sequence_parallel_degree < span class = "op" > =< / span > < span class = "d v" > 1 < / span > ,< / span >
< span id = "cb5-46" > < a href = "#cb5-46" aria-hidden = "true" tabindex = "-1" > < / a > image_siz e< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb5-47" > < a href = "#cb5-47" aria-hidden = "true" tabindex = "-1" > < / a > image_re size_algorithm < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-48" > < a href = "#cb5-48" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button " > < i class = "bi " > < / i > < / button > < / pre > < / div >
< span id = "cb5-7" > < a href = "#cb5-7" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_sequentially < span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb5-8" > < a href = "#cb5-8" aria-hidden = "true" tabindex = "-1" > < / a > multipack_real_batches < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb5-9" > < a href = "#cb5-9" aria-hidden = "true" tabindex = "-1" > < / a > eval_ sample_packing< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb5-10" > < a href = "#cb5-10" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_efficiency < span class = "op" > =< / span > < span class = "fl " > 1. 0< / span > ,< / span >
< span id = "cb5-11" > < a href = "#cb5-11" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_bin _size< span class = "op" > =< / span > < span class = "dv" > 2 00< / span > ,< / span >
< span id = "cb5-12" > < a href = "#cb5-12" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_group_size < span class = "op" > =< / span > < span class = "dv" > 100000 < / span > ,< / span >
< span id = "cb5-13" > < a href = "#cb5-13" aria-hidden = "true" tabindex = "-1" > < / a > max_seq_length < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb5-14" > < a href = "#cb5-14" aria-hidden = "true" tabindex = "-1" > < / a > relora_steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-15" > < a href = "#cb5-15" aria-hidden = "true" tabindex = "-1" > < / a > relora_warmup _steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-16" > < a href = "#cb5-16" aria-hidden = "true" tabindex = "-1" > < / a > relora_anneal_steps < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb5-17" > < a href = "#cb5-17" aria-hidden = "true" tabindex = "-1" > < / a > relora_prune_ratio < span class = "op" > =< / span > < span class = "fl " > 0.9 < / span > ,< / span >
< span id = "cb5-18" > < a href = "#cb5-18" aria-hidden = "true" tabindex = "-1" > < / a > bench_spli t< span class = "op" > =< / span > < span class = "st" > 'eval '< / span > ,< / span >
< span id = "cb5-19" > < a href = "#cb5-19" aria-hidden = "true" tabindex = "-1" > < / a > bench_dataset < span class = "op" > =< / span > < span class = "st " > 'pharaouk/dharma-1/dharma_1_mini.json' < / span > ,< / span >
< span id = "cb5-20" > < a href = "#cb5-20" aria-hidden = "true" tabindex = "-1" > < / a > do_bench _eval< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb5-21" > < a href = "#cb5-21" aria-hidden = "true" tabindex = "-1" > < / a > do_causal_lm_eval < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb5-22" > < a href = "#cb5-22" aria-hidden = "true" tabindex = "-1" > < / a > max_ bench_samp les < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb5-23" > < a href = "#cb5-23" aria-hidden = "true" tabindex = "-1" > < / a > bench_source_max_len < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb5-24" > < a href = "#cb5-24" aria-hidden = "true" tabindex = "-1" > < / a > dataloader_prefetch_factor < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-25" > < a href = "#cb5-25" aria-hidden = "true" tabindex = "-1" > < / a > cosine_min _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-26" > < a href = "#cb5-26" aria-hidden = "true" tabindex = "-1" > < / a > cosine_constant _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-27" > < a href = "#cb5-27" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_ratio < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb5-28" > < a href = "#cb5-28" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_ embedding< span class = "op" > =< / span > < span class = "fl " > 1e-06 < / span > ,< / span >
< span id = "cb5-29" > < a href = "#cb5-29" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr_scale < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-30" > < a href = "#cb5-30" aria-hidden = "true" tabindex = "-1" > < / a > lr_groups < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-31" > < a href = "#cb5-31" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb5-32" > < a href = "#cb5-32" aria-hidden = "true" tabindex = "-1" > < / a > ql ora< span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb5-33" > < a href = "#cb5-33" aria-hidden = "true" tabindex = "-1" > < / a > orpo_alpha < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-34" > < a href = "#cb5-34" aria-hidden = "true" tabindex = "-1" > < / a > lisa_n_layers < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-35" > < a href = "#cb5-35" aria-hidden = "true" tabindex = "-1" > < / a > lisa_step_interval < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-36" > < a href = "#cb5-36" aria-hidden = "true" tabindex = "-1" > < / a > lisa_layers_attribute < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-37" > < a href = "#cb5-37" aria-hidden = "true" tabindex = "-1" > < / a > curriculum_sampling < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-38" > < a href = "#cb5-38" aria-hidden = "true" tabindex = "-1" > < / a > alternate_optimizer < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-39" > < a href = "#cb5-39" aria-hidden = "true" tabindex = "-1" > < / a > alternate_lr_scheduler_typ e< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-40" > < a href = "#cb5-40" aria-hidden = "true" tabindex = "-1" > < / a > chat_template < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-41" > < a href = "#cb5-41" aria-hidden = "true" tabindex = "-1" > < / a > kd_ce_ alpha< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb5-42" > < a href = "#cb5-42" aria-hidden = "true" tabindex = "-1" > < / a > kd_alpha < span class = "op" > =< / span > < span class = "fl" > 1.0< / span > ,< / span >
< span id = "cb5-43" > < a href = "#cb5-43" aria-hidden = "true" tabindex = "-1" > < / a > kd_temperature < span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb5-44" > < a href = "#cb5-44" aria-hidden = "true" tabindex = "-1" > < / a > kd_zscore_base_temp < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-45" > < a href = "#cb5-45" aria-hidden = "true" tabindex = "-1" > < / a > kd_top_k_before_softmax < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb5-46" > < a href = "#cb5-46" aria-hidden = "true" tabindex = "-1" > < / a > sequence_parallel_degre e< span class = "op" > =< / span > < span class = "d v" > 1 < / span > ,< / span >
< span id = "cb5-47" > < a href = "#cb5-47" aria-hidden = "true" tabindex = "-1" > < / a > image_size< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb5-48" > < a href = "#cb5-48" aria-hidden = "true" tabindex = "-1" > < / a > image_resize_algorithm< span class = "op " > = < / span > < span class = "va " > None < / span > , < / span >
< span id = "cb5-49" > < a href = "#cb5-49" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< p > Reward config for Reward training< / p >
< / section >
< section id = "axolotl.core.training_args.AxolotlTrainingArguments" class = "level3" >
@@ -773,48 +778,49 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
< span id = "cb6-4" > < a href = "#cb6-4" aria-hidden = "true" tabindex = "-1" > < / a > lr_quadratic_warmup< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb6-5" > < a href = "#cb6-5" aria-hidden = "true" tabindex = "-1" > < / a > pretraining< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb6-6" > < a href = "#cb6-6" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb6-7" > < a href = "#cb6-7" aria-hidden = "true" tabindex = "-1" > < / a > multipack_real_batches < span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb6-8" > < a href = "#cb6-8" aria-hidden = "true" tabindex = "-1" > < / a > eval_sample_packing < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb6-9" > < a href = "#cb6-9" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_efficiency < span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb6-10" > < a href = "#cb6-10" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_bin_size < span class = "op" > =< / span > < span class = "dv " > 20 0< / span > ,< / span >
< span id = "cb6-11" > < a href = "#cb6-11" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_group _size< span class = "op" > =< / span > < span class = "dv" > 1000 00< / span > ,< / span >
< span id = "cb6-12" > < a href = "#cb6-12" aria-hidden = "true" tabindex = "-1" > < / a > max_seq_length < span class = "op" > =< / span > < span class = "dv" > 2048 < / span > ,< / span >
< span id = "cb6-13" > < a href = "#cb6-13" aria-hidden = "true" tabindex = "-1" > < / a > relora_steps < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb6-14" > < a href = "#cb6-14" aria-hidden = "true" tabindex = "-1" > < / a > relora_warmup_ steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-15" > < a href = "#cb6-15" aria-hidden = "true" tabindex = "-1" > < / a > relora_anneal _steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-16" > < a href = "#cb6-16" aria-hidden = "true" tabindex = "-1" > < / a > relora_prune_ratio < span class = "op" > =< / span > < span class = "fl " > 0.9 < / span > ,< / span >
< span id = "cb6-17" > < a href = "#cb6-17" aria-hidden = "true" tabindex = "-1" > < / a > bench_split < span class = "op" > =< / span > < span class = "st " > 'eval' < / span > ,< / span >
< span id = "cb6-18" > < a href = "#cb6-18" aria-hidden = "true" tabindex = "-1" > < / a > bench_datase t< span class = "op" > =< / span > < span class = "st" > 'pharaouk/dharma-1/dharma_1_mini.json '< / span > ,< / span >
< span id = "cb6-19" > < a href = "#cb6-19" aria-hidden = "true" tabindex = "-1" > < / a > do_ bench_eval < span class = "op" > =< / span > < span class = "va " > False < / span > ,< / span >
< span id = "cb6-20" > < a href = "#cb6-20" aria-hidden = "true" tabindex = "-1" > < / a > do_causal_lm _eval< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb6-21" > < a href = "#cb6-21" aria-hidden = "true" tabindex = "-1" > < / a > max_bench_samples < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb6-22" > < a href = "#cb6-22" aria-hidden = "true" tabindex = "-1" > < / a > bench_source_max_ len < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb6-23" > < a href = "#cb6-23" aria-hidden = "true" tabindex = "-1" > < / a > dataloader_prefetch_factor < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb6-24" > < a href = "#cb6-24" aria-hidden = "true" tabindex = "-1" > < / a > cosine_min_lr_ratio < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-25" > < a href = "#cb6-25" aria-hidden = "true" tabindex = "-1" > < / a > cosine_constant _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-26" > < a href = "#cb6-26" aria-hidden = "true" tabindex = "-1" > < / a > loraplus _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-27" > < a href = "#cb6-27" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_embedding < span class = "op" > =< / span > < span class = "fl " > 1e-06 < / span > ,< / span >
< span id = "cb6-28" > < a href = "#cb6-28" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr_scale < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb6-29" > < a href = "#cb6-29" aria-hidden = "true" tabindex = "-1" > < / a > lr_groups < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-30" > < a href = "#cb6-30" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-31" > < a href = "#cb6-31" aria-hidden = "true" tabindex = "-1" > < / a > qlora < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb6-32" > < a href = "#cb6-32" aria-hidden = "true" tabindex = "-1" > < / a > orpo_alph a< span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb6-33" > < a href = "#cb6-33" aria-hidden = "true" tabindex = "-1" > < / a > lisa_n_layers < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-34" > < a href = "#cb6-34" aria-hidden = "true" tabindex = "-1" > < / a > lisa_step_interval < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-35" > < a href = "#cb6-35" aria-hidden = "true" tabindex = "-1" > < / a > lisa_layers_attribute < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-36" > < a href = "#cb6-36" aria-hidden = "true" tabindex = "-1" > < / a > curriculum_sampling < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-37" > < a href = "#cb6-37" aria-hidden = "true" tabindex = "-1" > < / a > alternate_optimizer < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-38" > < a href = "#cb6-38" aria-hidden = "true" tabindex = "-1" > < / a > alternate_lr_scheduler_type < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-39" > < a href = "#cb6-39" aria-hidden = "true" tabindex = "-1" > < / a > chat_templat e< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-40" > < a href = "#cb6-40" aria-hidden = "true" tabindex = "-1" > < / a > kd_ce_alpha < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-41" > < a href = "#cb6-41" aria-hidden = "true" tabindex = "-1" > < / a > kd_alpha< span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb6-42" > < a href = "#cb6-42" aria-hidden = "true" tabindex = "-1" > < / a > kd_temperature < span class = "op" > =< / span > < span class = "fl" > 1.0< / span > ,< / span >
< span id = "cb6-43" > < a href = "#cb6-43" aria-hidden = "true" tabindex = "-1" > < / a > kd_zscore_base_temp < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb6-44" > < a href = "#cb6-44" aria-hidden = "true" tabindex = "-1" > < / a > kd_top_k_before_softmax < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-45" > < a href = "#cb6-45" aria-hidden = "true" tabindex = "-1" > < / a > sequence_parallel_degree < span class = "op" > =< / span > < span class = "d v" > 1 < / span > ,< / span >
< span id = "cb6-46" > < a href = "#cb6-46" aria-hidden = "true" tabindex = "-1" > < / a > image_siz e< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb6-47" > < a href = "#cb6-47" aria-hidden = "true" tabindex = "-1" > < / a > image_re size_algorithm < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-48" > < a href = "#cb6-48" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button " > < i class = "bi " > < / i > < / button > < / pre > < / div >
< span id = "cb6-7" > < a href = "#cb6-7" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_sequentially < span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb6-8" > < a href = "#cb6-8" aria-hidden = "true" tabindex = "-1" > < / a > multipack_real_batches < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb6-9" > < a href = "#cb6-9" aria-hidden = "true" tabindex = "-1" > < / a > eval_ sample_packing< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb6-10" > < a href = "#cb6-10" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_efficiency < span class = "op" > =< / span > < span class = "fl " > 1. 0< / span > ,< / span >
< span id = "cb6-11" > < a href = "#cb6-11" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_bin _size< span class = "op" > =< / span > < span class = "dv" > 2 00< / span > ,< / span >
< span id = "cb6-12" > < a href = "#cb6-12" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_group_size < span class = "op" > =< / span > < span class = "dv" > 100000 < / span > ,< / span >
< span id = "cb6-13" > < a href = "#cb6-13" aria-hidden = "true" tabindex = "-1" > < / a > max_seq_length < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb6-14" > < a href = "#cb6-14" aria-hidden = "true" tabindex = "-1" > < / a > relora_steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-15" > < a href = "#cb6-15" aria-hidden = "true" tabindex = "-1" > < / a > relora_warmup _steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-16" > < a href = "#cb6-16" aria-hidden = "true" tabindex = "-1" > < / a > relora_anneal_steps < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb6-17" > < a href = "#cb6-17" aria-hidden = "true" tabindex = "-1" > < / a > relora_prune_ratio < span class = "op" > =< / span > < span class = "fl " > 0.9 < / span > ,< / span >
< span id = "cb6-18" > < a href = "#cb6-18" aria-hidden = "true" tabindex = "-1" > < / a > bench_spli t< span class = "op" > =< / span > < span class = "st" > 'eval '< / span > ,< / span >
< span id = "cb6-19" > < a href = "#cb6-19" aria-hidden = "true" tabindex = "-1" > < / a > bench_dataset < span class = "op" > =< / span > < span class = "st " > 'pharaouk/dharma-1/dharma_1_mini.json' < / span > ,< / span >
< span id = "cb6-20" > < a href = "#cb6-20" aria-hidden = "true" tabindex = "-1" > < / a > do_bench _eval< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb6-21" > < a href = "#cb6-21" aria-hidden = "true" tabindex = "-1" > < / a > do_causal_lm_eval < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb6-22" > < a href = "#cb6-22" aria-hidden = "true" tabindex = "-1" > < / a > max_ bench_samp les < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb6-23" > < a href = "#cb6-23" aria-hidden = "true" tabindex = "-1" > < / a > bench_source_max_len < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb6-24" > < a href = "#cb6-24" aria-hidden = "true" tabindex = "-1" > < / a > dataloader_prefetch_factor < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-25" > < a href = "#cb6-25" aria-hidden = "true" tabindex = "-1" > < / a > cosine_min _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-26" > < a href = "#cb6-26" aria-hidden = "true" tabindex = "-1" > < / a > cosine_constant _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-27" > < a href = "#cb6-27" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_ratio < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb6-28" > < a href = "#cb6-28" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_ embedding< span class = "op" > =< / span > < span class = "fl " > 1e-06 < / span > ,< / span >
< span id = "cb6-29" > < a href = "#cb6-29" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr_scale < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-30" > < a href = "#cb6-30" aria-hidden = "true" tabindex = "-1" > < / a > lr_groups < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-31" > < a href = "#cb6-31" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb6-32" > < a href = "#cb6-32" aria-hidden = "true" tabindex = "-1" > < / a > ql ora< span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb6-33" > < a href = "#cb6-33" aria-hidden = "true" tabindex = "-1" > < / a > orpo_alpha < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-34" > < a href = "#cb6-34" aria-hidden = "true" tabindex = "-1" > < / a > lisa_n_layers < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-35" > < a href = "#cb6-35" aria-hidden = "true" tabindex = "-1" > < / a > lisa_step_interval < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-36" > < a href = "#cb6-36" aria-hidden = "true" tabindex = "-1" > < / a > lisa_layers_attribute < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-37" > < a href = "#cb6-37" aria-hidden = "true" tabindex = "-1" > < / a > curriculum_sampling < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-38" > < a href = "#cb6-38" aria-hidden = "true" tabindex = "-1" > < / a > alternate_optimizer < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-39" > < a href = "#cb6-39" aria-hidden = "true" tabindex = "-1" > < / a > alternate_lr_scheduler_typ e< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-40" > < a href = "#cb6-40" aria-hidden = "true" tabindex = "-1" > < / a > chat_template < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-41" > < a href = "#cb6-41" aria-hidden = "true" tabindex = "-1" > < / a > kd_ce_ alpha< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb6-42" > < a href = "#cb6-42" aria-hidden = "true" tabindex = "-1" > < / a > kd_alpha < span class = "op" > =< / span > < span class = "fl" > 1.0< / span > ,< / span >
< span id = "cb6-43" > < a href = "#cb6-43" aria-hidden = "true" tabindex = "-1" > < / a > kd_temperature < span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb6-44" > < a href = "#cb6-44" aria-hidden = "true" tabindex = "-1" > < / a > kd_zscore_base_temp < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-45" > < a href = "#cb6-45" aria-hidden = "true" tabindex = "-1" > < / a > kd_top_k_before_softmax < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb6-46" > < a href = "#cb6-46" aria-hidden = "true" tabindex = "-1" > < / a > sequence_parallel_degre e< span class = "op" > =< / span > < span class = "d v" > 1 < / span > ,< / span >
< span id = "cb6-47" > < a href = "#cb6-47" aria-hidden = "true" tabindex = "-1" > < / a > image_size< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb6-48" > < a href = "#cb6-48" aria-hidden = "true" tabindex = "-1" > < / a > image_resize_algorithm< span class = "op " > = < / span > < span class = "va " > None < / span > , < / span >
< span id = "cb6-49" > < a href = "#cb6-49" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< p > Training arguments for Causal trainer< / p >
< p > This code is duplicated due to HF TrainingArguments not setting output_dir with a
default value so it can’ t be used as a mixin.< / p >
@@ -827,48 +833,49 @@ default value so it can’ t be used as a mixin.</p>
< span id = "cb7-4" > < a href = "#cb7-4" aria-hidden = "true" tabindex = "-1" > < / a > lr_quadratic_warmup< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb7-5" > < a href = "#cb7-5" aria-hidden = "true" tabindex = "-1" > < / a > pretraining< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb7-6" > < a href = "#cb7-6" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb7-7" > < a href = "#cb7-7" aria-hidden = "true" tabindex = "-1" > < / a > multipack_real_batches < span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb7-8" > < a href = "#cb7-8" aria-hidden = "true" tabindex = "-1" > < / a > eval_sample_packing < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb7-9" > < a href = "#cb7-9" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_efficiency < span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb7-10" > < a href = "#cb7-10" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_bin_size < span class = "op" > =< / span > < span class = "dv " > 20 0< / span > ,< / span >
< span id = "cb7-11" > < a href = "#cb7-11" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_group _size< span class = "op" > =< / span > < span class = "dv" > 1000 00< / span > ,< / span >
< span id = "cb7-12" > < a href = "#cb7-12" aria-hidden = "true" tabindex = "-1" > < / a > max_seq_length < span class = "op" > =< / span > < span class = "dv" > 2048 < / span > ,< / span >
< span id = "cb7-13" > < a href = "#cb7-13" aria-hidden = "true" tabindex = "-1" > < / a > relora_steps < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb7-14" > < a href = "#cb7-14" aria-hidden = "true" tabindex = "-1" > < / a > relora_warmup_ steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-15" > < a href = "#cb7-15" aria-hidden = "true" tabindex = "-1" > < / a > relora_anneal _steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-16" > < a href = "#cb7-16" aria-hidden = "true" tabindex = "-1" > < / a > relora_prune_ratio < span class = "op" > =< / span > < span class = "fl " > 0.9 < / span > ,< / span >
< span id = "cb7-17" > < a href = "#cb7-17" aria-hidden = "true" tabindex = "-1" > < / a > bench_split < span class = "op" > =< / span > < span class = "st " > 'eval' < / span > ,< / span >
< span id = "cb7-18" > < a href = "#cb7-18" aria-hidden = "true" tabindex = "-1" > < / a > bench_datase t< span class = "op" > =< / span > < span class = "st" > 'pharaouk/dharma-1/dharma_1_mini.json '< / span > ,< / span >
< span id = "cb7-19" > < a href = "#cb7-19" aria-hidden = "true" tabindex = "-1" > < / a > do_ bench_eval < span class = "op" > =< / span > < span class = "va " > False < / span > ,< / span >
< span id = "cb7-20" > < a href = "#cb7-20" aria-hidden = "true" tabindex = "-1" > < / a > do_causal_lm _eval< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb7-21" > < a href = "#cb7-21" aria-hidden = "true" tabindex = "-1" > < / a > max_bench_samples < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb7-22" > < a href = "#cb7-22" aria-hidden = "true" tabindex = "-1" > < / a > bench_source_max_ len < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb7-23" > < a href = "#cb7-23" aria-hidden = "true" tabindex = "-1" > < / a > dataloader_prefetch_factor < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb7-24" > < a href = "#cb7-24" aria-hidden = "true" tabindex = "-1" > < / a > cosine_min_lr_ratio < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-25" > < a href = "#cb7-25" aria-hidden = "true" tabindex = "-1" > < / a > cosine_constant _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-26" > < a href = "#cb7-26" aria-hidden = "true" tabindex = "-1" > < / a > loraplus _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-27" > < a href = "#cb7-27" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_embedding < span class = "op" > =< / span > < span class = "fl " > 1e-06 < / span > ,< / span >
< span id = "cb7-28" > < a href = "#cb7-28" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr_scale < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb7-29" > < a href = "#cb7-29" aria-hidden = "true" tabindex = "-1" > < / a > lr_groups < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-30" > < a href = "#cb7-30" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-31" > < a href = "#cb7-31" aria-hidden = "true" tabindex = "-1" > < / a > qlora < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb7-32" > < a href = "#cb7-32" aria-hidden = "true" tabindex = "-1" > < / a > orpo_alph a< span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb7-33" > < a href = "#cb7-33" aria-hidden = "true" tabindex = "-1" > < / a > lisa_n_layers < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-34" > < a href = "#cb7-34" aria-hidden = "true" tabindex = "-1" > < / a > lisa_step_interval < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-35" > < a href = "#cb7-35" aria-hidden = "true" tabindex = "-1" > < / a > lisa_layers_attribute < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-36" > < a href = "#cb7-36" aria-hidden = "true" tabindex = "-1" > < / a > curriculum_sampling < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-37" > < a href = "#cb7-37" aria-hidden = "true" tabindex = "-1" > < / a > alternate_optimizer < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-38" > < a href = "#cb7-38" aria-hidden = "true" tabindex = "-1" > < / a > alternate_lr_scheduler_type < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-39" > < a href = "#cb7-39" aria-hidden = "true" tabindex = "-1" > < / a > chat_templat e< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-40" > < a href = "#cb7-40" aria-hidden = "true" tabindex = "-1" > < / a > kd_ce_alpha < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-41" > < a href = "#cb7-41" aria-hidden = "true" tabindex = "-1" > < / a > kd_alpha< span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb7-42" > < a href = "#cb7-42" aria-hidden = "true" tabindex = "-1" > < / a > kd_temperature < span class = "op" > =< / span > < span class = "fl" > 1.0< / span > ,< / span >
< span id = "cb7-43" > < a href = "#cb7-43" aria-hidden = "true" tabindex = "-1" > < / a > kd_zscore_base_temp < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb7-44" > < a href = "#cb7-44" aria-hidden = "true" tabindex = "-1" > < / a > kd_top_k_before_softmax < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-45" > < a href = "#cb7-45" aria-hidden = "true" tabindex = "-1" > < / a > sequence_parallel_degree < span class = "op" > =< / span > < span class = "d v" > 1 < / span > ,< / span >
< span id = "cb7-46" > < a href = "#cb7-46" aria-hidden = "true" tabindex = "-1" > < / a > image_siz e< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb7-47" > < a href = "#cb7-47" aria-hidden = "true" tabindex = "-1" > < / a > image_re size_algorithm < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-48" > < a href = "#cb7-48" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button " > < i class = "bi " > < / i > < / button > < / pre > < / div >
< span id = "cb7-7" > < a href = "#cb7-7" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_sequentially < span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb7-8" > < a href = "#cb7-8" aria-hidden = "true" tabindex = "-1" > < / a > multipack_real_batches < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb7-9" > < a href = "#cb7-9" aria-hidden = "true" tabindex = "-1" > < / a > eval_ sample_packing< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb7-10" > < a href = "#cb7-10" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_efficiency < span class = "op" > =< / span > < span class = "fl " > 1. 0< / span > ,< / span >
< span id = "cb7-11" > < a href = "#cb7-11" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_bin _size< span class = "op" > =< / span > < span class = "dv" > 2 00< / span > ,< / span >
< span id = "cb7-12" > < a href = "#cb7-12" aria-hidden = "true" tabindex = "-1" > < / a > sample_packing_group_size < span class = "op" > =< / span > < span class = "dv" > 100000 < / span > ,< / span >
< span id = "cb7-13" > < a href = "#cb7-13" aria-hidden = "true" tabindex = "-1" > < / a > max_seq_length < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb7-14" > < a href = "#cb7-14" aria-hidden = "true" tabindex = "-1" > < / a > relora_steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-15" > < a href = "#cb7-15" aria-hidden = "true" tabindex = "-1" > < / a > relora_warmup _steps< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-16" > < a href = "#cb7-16" aria-hidden = "true" tabindex = "-1" > < / a > relora_anneal_steps < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb7-17" > < a href = "#cb7-17" aria-hidden = "true" tabindex = "-1" > < / a > relora_prune_ratio < span class = "op" > =< / span > < span class = "fl " > 0.9 < / span > ,< / span >
< span id = "cb7-18" > < a href = "#cb7-18" aria-hidden = "true" tabindex = "-1" > < / a > bench_spli t< span class = "op" > =< / span > < span class = "st" > 'eval '< / span > ,< / span >
< span id = "cb7-19" > < a href = "#cb7-19" aria-hidden = "true" tabindex = "-1" > < / a > bench_dataset < span class = "op" > =< / span > < span class = "st " > 'pharaouk/dharma-1/dharma_1_mini.json' < / span > ,< / span >
< span id = "cb7-20" > < a href = "#cb7-20" aria-hidden = "true" tabindex = "-1" > < / a > do_bench _eval< span class = "op" > =< / span > < span class = "va" > False< / span > ,< / span >
< span id = "cb7-21" > < a href = "#cb7-21" aria-hidden = "true" tabindex = "-1" > < / a > do_causal_lm_eval < span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb7-22" > < a href = "#cb7-22" aria-hidden = "true" tabindex = "-1" > < / a > max_ bench_samp les < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb7-23" > < a href = "#cb7-23" aria-hidden = "true" tabindex = "-1" > < / a > bench_source_max_len < span class = "op" > =< / span > < span class = "d v" > 2048 < / span > ,< / span >
< span id = "cb7-24" > < a href = "#cb7-24" aria-hidden = "true" tabindex = "-1" > < / a > dataloader_prefetch_factor < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-25" > < a href = "#cb7-25" aria-hidden = "true" tabindex = "-1" > < / a > cosine_min _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-26" > < a href = "#cb7-26" aria-hidden = "true" tabindex = "-1" > < / a > cosine_constant _lr_ratio< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-27" > < a href = "#cb7-27" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_ratio < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb7-28" > < a href = "#cb7-28" aria-hidden = "true" tabindex = "-1" > < / a > loraplus_lr_ embedding< span class = "op" > =< / span > < span class = "fl " > 1e-06 < / span > ,< / span >
< span id = "cb7-29" > < a href = "#cb7-29" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr_scale < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-30" > < a href = "#cb7-30" aria-hidden = "true" tabindex = "-1" > < / a > lr_groups < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-31" > < a href = "#cb7-31" aria-hidden = "true" tabindex = "-1" > < / a > embedding_lr < span class = "op" > =< / span > < span class = "va" > Non e< / span > ,< / span >
< span id = "cb7-32" > < a href = "#cb7-32" aria-hidden = "true" tabindex = "-1" > < / a > ql ora< span class = "op" > =< / span > < span class = "va" > Fals e< / span > ,< / span >
< span id = "cb7-33" > < a href = "#cb7-33" aria-hidden = "true" tabindex = "-1" > < / a > orpo_alpha < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-34" > < a href = "#cb7-34" aria-hidden = "true" tabindex = "-1" > < / a > lisa_n_layers < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-35" > < a href = "#cb7-35" aria-hidden = "true" tabindex = "-1" > < / a > lisa_step_interval < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-36" > < a href = "#cb7-36" aria-hidden = "true" tabindex = "-1" > < / a > lisa_layers_attribute < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-37" > < a href = "#cb7-37" aria-hidden = "true" tabindex = "-1" > < / a > curriculum_sampling < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-38" > < a href = "#cb7-38" aria-hidden = "true" tabindex = "-1" > < / a > alternate_optimizer < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-39" > < a href = "#cb7-39" aria-hidden = "true" tabindex = "-1" > < / a > alternate_lr_scheduler_typ e< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-40" > < a href = "#cb7-40" aria-hidden = "true" tabindex = "-1" > < / a > chat_template < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-41" > < a href = "#cb7-41" aria-hidden = "true" tabindex = "-1" > < / a > kd_ce_ alpha< span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb7-42" > < a href = "#cb7-42" aria-hidden = "true" tabindex = "-1" > < / a > kd_alpha < span class = "op" > =< / span > < span class = "fl" > 1.0< / span > ,< / span >
< span id = "cb7-43" > < a href = "#cb7-43" aria-hidden = "true" tabindex = "-1" > < / a > kd_temperature < span class = "op" > =< / span > < span class = "fl " > 1.0 < / span > ,< / span >
< span id = "cb7-44" > < a href = "#cb7-44" aria-hidden = "true" tabindex = "-1" > < / a > kd_zscore_base_temp < span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-45" > < a href = "#cb7-45" aria-hidden = "true" tabindex = "-1" > < / a > kd_top_k_before_softmax < span class = "op" > =< / span > < span class = "va " > None < / span > ,< / span >
< span id = "cb7-46" > < a href = "#cb7-46" aria-hidden = "true" tabindex = "-1" > < / a > sequence_parallel_degre e< span class = "op" > =< / span > < span class = "d v" > 1 < / span > ,< / span >
< span id = "cb7-47" > < a href = "#cb7-47" aria-hidden = "true" tabindex = "-1" > < / a > image_size< span class = "op" > =< / span > < span class = "va" > None< / span > ,< / span >
< span id = "cb7-48" > < a href = "#cb7-48" aria-hidden = "true" tabindex = "-1" > < / a > image_resize_algorithm< span class = "op " > = < / span > < span class = "va " > None < / span > , < / span >
< span id = "cb7-49" > < a href = "#cb7-49" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < button title = "Copy to Clipboard" class = "code-copy-button" > < i class = "bi" > < / i > < / button > < / pre > < / div >
< p > Mixin class for the Axolotl training args.< / p >