Built site for gh-pages

This commit is contained in:
Quarto GHA Workflow Runner
2025-06-12 17:25:50 +00:00
parent f465e840cc
commit eac3a4860e
8 changed files with 501 additions and 702 deletions

View File

@@ -547,43 +547,44 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a> sample_packing_bin_size<span class="op">=</span><span class="dv">200</span>,</span>
<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a> sample_packing_group_size<span class="op">=</span><span class="dv">100000</span>,</span>
<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a> max_seq_length<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a> relora_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-14"><a href="#cb1-14" aria-hidden="true" tabindex="-1"></a> relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-15"><a href="#cb1-15" aria-hidden="true" tabindex="-1"></a> relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-16"><a href="#cb1-16" aria-hidden="true" tabindex="-1"></a> relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
<span id="cb1-17"><a href="#cb1-17" aria-hidden="true" tabindex="-1"></a> bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
<span id="cb1-18"><a href="#cb1-18" aria-hidden="true" tabindex="-1"></a> bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
<span id="cb1-19"><a href="#cb1-19" aria-hidden="true" tabindex="-1"></a> do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb1-20"><a href="#cb1-20" aria-hidden="true" tabindex="-1"></a> do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb1-21"><a href="#cb1-21" aria-hidden="true" tabindex="-1"></a> max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-22"><a href="#cb1-22" aria-hidden="true" tabindex="-1"></a> bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb1-23"><a href="#cb1-23" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-24"><a href="#cb1-24" aria-hidden="true" tabindex="-1"></a> cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-25"><a href="#cb1-25" aria-hidden="true" tabindex="-1"></a> cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-26"><a href="#cb1-26" aria-hidden="true" tabindex="-1"></a> loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-27"><a href="#cb1-27" aria-hidden="true" tabindex="-1"></a> loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
<span id="cb1-28"><a href="#cb1-28" aria-hidden="true" tabindex="-1"></a> embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-29"><a href="#cb1-29" aria-hidden="true" tabindex="-1"></a> lr_groups<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-30"><a href="#cb1-30" aria-hidden="true" tabindex="-1"></a> embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-31"><a href="#cb1-31" aria-hidden="true" tabindex="-1"></a> qlora<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb1-32"><a href="#cb1-32" aria-hidden="true" tabindex="-1"></a> orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-33"><a href="#cb1-33" aria-hidden="true" tabindex="-1"></a> lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-34"><a href="#cb1-34" aria-hidden="true" tabindex="-1"></a> lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-35"><a href="#cb1-35" aria-hidden="true" tabindex="-1"></a> lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-36"><a href="#cb1-36" aria-hidden="true" tabindex="-1"></a> curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-37"><a href="#cb1-37" aria-hidden="true" tabindex="-1"></a> alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-38"><a href="#cb1-38" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-39"><a href="#cb1-39" aria-hidden="true" tabindex="-1"></a> kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-40"><a href="#cb1-40" aria-hidden="true" tabindex="-1"></a> kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb1-41"><a href="#cb1-41" aria-hidden="true" tabindex="-1"></a> kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb1-42"><a href="#cb1-42" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-43"><a href="#cb1-43" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-44"><a href="#cb1-44" aria-hidden="true" tabindex="-1"></a> adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-45"><a href="#cb1-45" aria-hidden="true" tabindex="-1"></a> adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-46"><a href="#cb1-46" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-47"><a href="#cb1-47" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-48"><a href="#cb1-48" aria-hidden="true" tabindex="-1"></a> simpo_gamma<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-49"><a href="#cb1-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a> dataset_num_proc<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-14"><a href="#cb1-14" aria-hidden="true" tabindex="-1"></a> relora_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-15"><a href="#cb1-15" aria-hidden="true" tabindex="-1"></a> relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-16"><a href="#cb1-16" aria-hidden="true" tabindex="-1"></a> relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-17"><a href="#cb1-17" aria-hidden="true" tabindex="-1"></a> relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
<span id="cb1-18"><a href="#cb1-18" aria-hidden="true" tabindex="-1"></a> bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
<span id="cb1-19"><a href="#cb1-19" aria-hidden="true" tabindex="-1"></a> bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
<span id="cb1-20"><a href="#cb1-20" aria-hidden="true" tabindex="-1"></a> do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb1-21"><a href="#cb1-21" aria-hidden="true" tabindex="-1"></a> do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb1-22"><a href="#cb1-22" aria-hidden="true" tabindex="-1"></a> max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-23"><a href="#cb1-23" aria-hidden="true" tabindex="-1"></a> bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb1-24"><a href="#cb1-24" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-25"><a href="#cb1-25" aria-hidden="true" tabindex="-1"></a> cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-26"><a href="#cb1-26" aria-hidden="true" tabindex="-1"></a> cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-27"><a href="#cb1-27" aria-hidden="true" tabindex="-1"></a> loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-28"><a href="#cb1-28" aria-hidden="true" tabindex="-1"></a> loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
<span id="cb1-29"><a href="#cb1-29" aria-hidden="true" tabindex="-1"></a> embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-30"><a href="#cb1-30" aria-hidden="true" tabindex="-1"></a> lr_groups<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-31"><a href="#cb1-31" aria-hidden="true" tabindex="-1"></a> embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-32"><a href="#cb1-32" aria-hidden="true" tabindex="-1"></a> qlora<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb1-33"><a href="#cb1-33" aria-hidden="true" tabindex="-1"></a> orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-34"><a href="#cb1-34" aria-hidden="true" tabindex="-1"></a> lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-35"><a href="#cb1-35" aria-hidden="true" tabindex="-1"></a> lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-36"><a href="#cb1-36" aria-hidden="true" tabindex="-1"></a> lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-37"><a href="#cb1-37" aria-hidden="true" tabindex="-1"></a> curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-38"><a href="#cb1-38" aria-hidden="true" tabindex="-1"></a> alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-39"><a href="#cb1-39" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-40"><a href="#cb1-40" aria-hidden="true" tabindex="-1"></a> kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-41"><a href="#cb1-41" aria-hidden="true" tabindex="-1"></a> kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb1-42"><a href="#cb1-42" aria-hidden="true" tabindex="-1"></a> kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb1-43"><a href="#cb1-43" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-44"><a href="#cb1-44" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-45"><a href="#cb1-45" aria-hidden="true" tabindex="-1"></a> adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-46"><a href="#cb1-46" aria-hidden="true" tabindex="-1"></a> adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-47"><a href="#cb1-47" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-48"><a href="#cb1-48" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-49"><a href="#cb1-49" aria-hidden="true" tabindex="-1"></a> simpo_gamma<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-50"><a href="#cb1-50" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>CPO config for CPO training</p>
</section>
<section id="axolotl.core.training_args.AxolotlKTOConfig" class="level3">
@@ -600,42 +601,43 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
<span id="cb2-10"><a href="#cb2-10" aria-hidden="true" tabindex="-1"></a> sample_packing_bin_size<span class="op">=</span><span class="dv">200</span>,</span>
<span id="cb2-11"><a href="#cb2-11" aria-hidden="true" tabindex="-1"></a> sample_packing_group_size<span class="op">=</span><span class="dv">100000</span>,</span>
<span id="cb2-12"><a href="#cb2-12" aria-hidden="true" tabindex="-1"></a> max_seq_length<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb2-13"><a href="#cb2-13" aria-hidden="true" tabindex="-1"></a> relora_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-14"><a href="#cb2-14" aria-hidden="true" tabindex="-1"></a> relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-15"><a href="#cb2-15" aria-hidden="true" tabindex="-1"></a> relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-16"><a href="#cb2-16" aria-hidden="true" tabindex="-1"></a> relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
<span id="cb2-17"><a href="#cb2-17" aria-hidden="true" tabindex="-1"></a> bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
<span id="cb2-18"><a href="#cb2-18" aria-hidden="true" tabindex="-1"></a> bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
<span id="cb2-19"><a href="#cb2-19" aria-hidden="true" tabindex="-1"></a> do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb2-20"><a href="#cb2-20" aria-hidden="true" tabindex="-1"></a> do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb2-21"><a href="#cb2-21" aria-hidden="true" tabindex="-1"></a> max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-22"><a href="#cb2-22" aria-hidden="true" tabindex="-1"></a> bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb2-23"><a href="#cb2-23" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-24"><a href="#cb2-24" aria-hidden="true" tabindex="-1"></a> cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-25"><a href="#cb2-25" aria-hidden="true" tabindex="-1"></a> cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-26"><a href="#cb2-26" aria-hidden="true" tabindex="-1"></a> loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-27"><a href="#cb2-27" aria-hidden="true" tabindex="-1"></a> loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
<span id="cb2-28"><a href="#cb2-28" aria-hidden="true" tabindex="-1"></a> embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-29"><a href="#cb2-29" aria-hidden="true" tabindex="-1"></a> lr_groups<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-30"><a href="#cb2-30" aria-hidden="true" tabindex="-1"></a> embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-31"><a href="#cb2-31" aria-hidden="true" tabindex="-1"></a> qlora<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb2-32"><a href="#cb2-32" aria-hidden="true" tabindex="-1"></a> orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-33"><a href="#cb2-33" aria-hidden="true" tabindex="-1"></a> lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-34"><a href="#cb2-34" aria-hidden="true" tabindex="-1"></a> lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-35"><a href="#cb2-35" aria-hidden="true" tabindex="-1"></a> lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-36"><a href="#cb2-36" aria-hidden="true" tabindex="-1"></a> curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-37"><a href="#cb2-37" aria-hidden="true" tabindex="-1"></a> alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-38"><a href="#cb2-38" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-39"><a href="#cb2-39" aria-hidden="true" tabindex="-1"></a> kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-40"><a href="#cb2-40" aria-hidden="true" tabindex="-1"></a> kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb2-41"><a href="#cb2-41" aria-hidden="true" tabindex="-1"></a> kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb2-42"><a href="#cb2-42" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-43"><a href="#cb2-43" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-44"><a href="#cb2-44" aria-hidden="true" tabindex="-1"></a> adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-45"><a href="#cb2-45" aria-hidden="true" tabindex="-1"></a> adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-46"><a href="#cb2-46" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-47"><a href="#cb2-47" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-48"><a href="#cb2-48" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb2-13"><a href="#cb2-13" aria-hidden="true" tabindex="-1"></a> dataset_num_proc<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-14"><a href="#cb2-14" aria-hidden="true" tabindex="-1"></a> relora_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-15"><a href="#cb2-15" aria-hidden="true" tabindex="-1"></a> relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-16"><a href="#cb2-16" aria-hidden="true" tabindex="-1"></a> relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-17"><a href="#cb2-17" aria-hidden="true" tabindex="-1"></a> relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
<span id="cb2-18"><a href="#cb2-18" aria-hidden="true" tabindex="-1"></a> bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
<span id="cb2-19"><a href="#cb2-19" aria-hidden="true" tabindex="-1"></a> bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
<span id="cb2-20"><a href="#cb2-20" aria-hidden="true" tabindex="-1"></a> do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb2-21"><a href="#cb2-21" aria-hidden="true" tabindex="-1"></a> do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb2-22"><a href="#cb2-22" aria-hidden="true" tabindex="-1"></a> max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-23"><a href="#cb2-23" aria-hidden="true" tabindex="-1"></a> bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb2-24"><a href="#cb2-24" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-25"><a href="#cb2-25" aria-hidden="true" tabindex="-1"></a> cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-26"><a href="#cb2-26" aria-hidden="true" tabindex="-1"></a> cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-27"><a href="#cb2-27" aria-hidden="true" tabindex="-1"></a> loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-28"><a href="#cb2-28" aria-hidden="true" tabindex="-1"></a> loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
<span id="cb2-29"><a href="#cb2-29" aria-hidden="true" tabindex="-1"></a> embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-30"><a href="#cb2-30" aria-hidden="true" tabindex="-1"></a> lr_groups<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-31"><a href="#cb2-31" aria-hidden="true" tabindex="-1"></a> embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-32"><a href="#cb2-32" aria-hidden="true" tabindex="-1"></a> qlora<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb2-33"><a href="#cb2-33" aria-hidden="true" tabindex="-1"></a> orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-34"><a href="#cb2-34" aria-hidden="true" tabindex="-1"></a> lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-35"><a href="#cb2-35" aria-hidden="true" tabindex="-1"></a> lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-36"><a href="#cb2-36" aria-hidden="true" tabindex="-1"></a> lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-37"><a href="#cb2-37" aria-hidden="true" tabindex="-1"></a> curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-38"><a href="#cb2-38" aria-hidden="true" tabindex="-1"></a> alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-39"><a href="#cb2-39" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-40"><a href="#cb2-40" aria-hidden="true" tabindex="-1"></a> kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-41"><a href="#cb2-41" aria-hidden="true" tabindex="-1"></a> kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb2-42"><a href="#cb2-42" aria-hidden="true" tabindex="-1"></a> kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb2-43"><a href="#cb2-43" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-44"><a href="#cb2-44" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-45"><a href="#cb2-45" aria-hidden="true" tabindex="-1"></a> adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-46"><a href="#cb2-46" aria-hidden="true" tabindex="-1"></a> adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-47"><a href="#cb2-47" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-48"><a href="#cb2-48" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-49"><a href="#cb2-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>KTO config for KTO training</p>
</section>
<section id="axolotl.core.training_args.AxolotlORPOConfig" class="level3">
@@ -652,42 +654,43 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a> sample_packing_bin_size<span class="op">=</span><span class="dv">200</span>,</span>
<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a> sample_packing_group_size<span class="op">=</span><span class="dv">100000</span>,</span>
<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a> max_seq_length<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a> relora_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a> relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a> relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a> relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
<span id="cb3-17"><a href="#cb3-17" aria-hidden="true" tabindex="-1"></a> bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
<span id="cb3-18"><a href="#cb3-18" aria-hidden="true" tabindex="-1"></a> bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
<span id="cb3-19"><a href="#cb3-19" aria-hidden="true" tabindex="-1"></a> do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb3-20"><a href="#cb3-20" aria-hidden="true" tabindex="-1"></a> do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb3-21"><a href="#cb3-21" aria-hidden="true" tabindex="-1"></a> max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-22"><a href="#cb3-22" aria-hidden="true" tabindex="-1"></a> bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb3-23"><a href="#cb3-23" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-24"><a href="#cb3-24" aria-hidden="true" tabindex="-1"></a> cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-25"><a href="#cb3-25" aria-hidden="true" tabindex="-1"></a> cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-26"><a href="#cb3-26" aria-hidden="true" tabindex="-1"></a> loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-27"><a href="#cb3-27" aria-hidden="true" tabindex="-1"></a> loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
<span id="cb3-28"><a href="#cb3-28" aria-hidden="true" tabindex="-1"></a> embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-29"><a href="#cb3-29" aria-hidden="true" tabindex="-1"></a> lr_groups<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-30"><a href="#cb3-30" aria-hidden="true" tabindex="-1"></a> embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-31"><a href="#cb3-31" aria-hidden="true" tabindex="-1"></a> qlora<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb3-32"><a href="#cb3-32" aria-hidden="true" tabindex="-1"></a> orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-33"><a href="#cb3-33" aria-hidden="true" tabindex="-1"></a> lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-34"><a href="#cb3-34" aria-hidden="true" tabindex="-1"></a> lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-35"><a href="#cb3-35" aria-hidden="true" tabindex="-1"></a> lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-36"><a href="#cb3-36" aria-hidden="true" tabindex="-1"></a> curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-37"><a href="#cb3-37" aria-hidden="true" tabindex="-1"></a> alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-38"><a href="#cb3-38" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-39"><a href="#cb3-39" aria-hidden="true" tabindex="-1"></a> kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-40"><a href="#cb3-40" aria-hidden="true" tabindex="-1"></a> kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb3-41"><a href="#cb3-41" aria-hidden="true" tabindex="-1"></a> kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb3-42"><a href="#cb3-42" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-43"><a href="#cb3-43" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-44"><a href="#cb3-44" aria-hidden="true" tabindex="-1"></a> adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-45"><a href="#cb3-45" aria-hidden="true" tabindex="-1"></a> adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-46"><a href="#cb3-46" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-47"><a href="#cb3-47" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-48"><a href="#cb3-48" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a> dataset_num_proc<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a> relora_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a> relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a> relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-17"><a href="#cb3-17" aria-hidden="true" tabindex="-1"></a> relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
<span id="cb3-18"><a href="#cb3-18" aria-hidden="true" tabindex="-1"></a> bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
<span id="cb3-19"><a href="#cb3-19" aria-hidden="true" tabindex="-1"></a> bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
<span id="cb3-20"><a href="#cb3-20" aria-hidden="true" tabindex="-1"></a> do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb3-21"><a href="#cb3-21" aria-hidden="true" tabindex="-1"></a> do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb3-22"><a href="#cb3-22" aria-hidden="true" tabindex="-1"></a> max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-23"><a href="#cb3-23" aria-hidden="true" tabindex="-1"></a> bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb3-24"><a href="#cb3-24" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-25"><a href="#cb3-25" aria-hidden="true" tabindex="-1"></a> cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-26"><a href="#cb3-26" aria-hidden="true" tabindex="-1"></a> cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-27"><a href="#cb3-27" aria-hidden="true" tabindex="-1"></a> loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-28"><a href="#cb3-28" aria-hidden="true" tabindex="-1"></a> loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
<span id="cb3-29"><a href="#cb3-29" aria-hidden="true" tabindex="-1"></a> embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-30"><a href="#cb3-30" aria-hidden="true" tabindex="-1"></a> lr_groups<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-31"><a href="#cb3-31" aria-hidden="true" tabindex="-1"></a> embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-32"><a href="#cb3-32" aria-hidden="true" tabindex="-1"></a> qlora<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb3-33"><a href="#cb3-33" aria-hidden="true" tabindex="-1"></a> orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-34"><a href="#cb3-34" aria-hidden="true" tabindex="-1"></a> lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-35"><a href="#cb3-35" aria-hidden="true" tabindex="-1"></a> lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-36"><a href="#cb3-36" aria-hidden="true" tabindex="-1"></a> lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-37"><a href="#cb3-37" aria-hidden="true" tabindex="-1"></a> curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-38"><a href="#cb3-38" aria-hidden="true" tabindex="-1"></a> alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-39"><a href="#cb3-39" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-40"><a href="#cb3-40" aria-hidden="true" tabindex="-1"></a> kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-41"><a href="#cb3-41" aria-hidden="true" tabindex="-1"></a> kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb3-42"><a href="#cb3-42" aria-hidden="true" tabindex="-1"></a> kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb3-43"><a href="#cb3-43" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-44"><a href="#cb3-44" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-45"><a href="#cb3-45" aria-hidden="true" tabindex="-1"></a> adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-46"><a href="#cb3-46" aria-hidden="true" tabindex="-1"></a> adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-47"><a href="#cb3-47" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-48"><a href="#cb3-48" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-49"><a href="#cb3-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>ORPO config for ORPO training</p>
</section>
<section id="axolotl.core.training_args.AxolotlPRMConfig" class="level3">
@@ -704,42 +707,43 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a> sample_packing_bin_size<span class="op">=</span><span class="dv">200</span>,</span>
<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a> sample_packing_group_size<span class="op">=</span><span class="dv">100000</span>,</span>
<span id="cb4-12"><a href="#cb4-12" aria-hidden="true" tabindex="-1"></a> max_seq_length<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb4-13"><a href="#cb4-13" aria-hidden="true" tabindex="-1"></a> relora_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-14"><a href="#cb4-14" aria-hidden="true" tabindex="-1"></a> relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-15"><a href="#cb4-15" aria-hidden="true" tabindex="-1"></a> relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-16"><a href="#cb4-16" aria-hidden="true" tabindex="-1"></a> relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
<span id="cb4-17"><a href="#cb4-17" aria-hidden="true" tabindex="-1"></a> bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
<span id="cb4-18"><a href="#cb4-18" aria-hidden="true" tabindex="-1"></a> bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
<span id="cb4-19"><a href="#cb4-19" aria-hidden="true" tabindex="-1"></a> do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb4-20"><a href="#cb4-20" aria-hidden="true" tabindex="-1"></a> do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb4-21"><a href="#cb4-21" aria-hidden="true" tabindex="-1"></a> max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-22"><a href="#cb4-22" aria-hidden="true" tabindex="-1"></a> bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb4-23"><a href="#cb4-23" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-24"><a href="#cb4-24" aria-hidden="true" tabindex="-1"></a> cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-25"><a href="#cb4-25" aria-hidden="true" tabindex="-1"></a> cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-26"><a href="#cb4-26" aria-hidden="true" tabindex="-1"></a> loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-27"><a href="#cb4-27" aria-hidden="true" tabindex="-1"></a> loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
<span id="cb4-28"><a href="#cb4-28" aria-hidden="true" tabindex="-1"></a> embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-29"><a href="#cb4-29" aria-hidden="true" tabindex="-1"></a> lr_groups<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-30"><a href="#cb4-30" aria-hidden="true" tabindex="-1"></a> embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-31"><a href="#cb4-31" aria-hidden="true" tabindex="-1"></a> qlora<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb4-32"><a href="#cb4-32" aria-hidden="true" tabindex="-1"></a> orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-33"><a href="#cb4-33" aria-hidden="true" tabindex="-1"></a> lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-34"><a href="#cb4-34" aria-hidden="true" tabindex="-1"></a> lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-35"><a href="#cb4-35" aria-hidden="true" tabindex="-1"></a> lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-36"><a href="#cb4-36" aria-hidden="true" tabindex="-1"></a> curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-37"><a href="#cb4-37" aria-hidden="true" tabindex="-1"></a> alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-38"><a href="#cb4-38" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-39"><a href="#cb4-39" aria-hidden="true" tabindex="-1"></a> kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-40"><a href="#cb4-40" aria-hidden="true" tabindex="-1"></a> kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb4-41"><a href="#cb4-41" aria-hidden="true" tabindex="-1"></a> kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb4-42"><a href="#cb4-42" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-43"><a href="#cb4-43" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-44"><a href="#cb4-44" aria-hidden="true" tabindex="-1"></a> adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-45"><a href="#cb4-45" aria-hidden="true" tabindex="-1"></a> adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-46"><a href="#cb4-46" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-47"><a href="#cb4-47" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-48"><a href="#cb4-48" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb4-13"><a href="#cb4-13" aria-hidden="true" tabindex="-1"></a> dataset_num_proc<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-14"><a href="#cb4-14" aria-hidden="true" tabindex="-1"></a> relora_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-15"><a href="#cb4-15" aria-hidden="true" tabindex="-1"></a> relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-16"><a href="#cb4-16" aria-hidden="true" tabindex="-1"></a> relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-17"><a href="#cb4-17" aria-hidden="true" tabindex="-1"></a> relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
<span id="cb4-18"><a href="#cb4-18" aria-hidden="true" tabindex="-1"></a> bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
<span id="cb4-19"><a href="#cb4-19" aria-hidden="true" tabindex="-1"></a> bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
<span id="cb4-20"><a href="#cb4-20" aria-hidden="true" tabindex="-1"></a> do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb4-21"><a href="#cb4-21" aria-hidden="true" tabindex="-1"></a> do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb4-22"><a href="#cb4-22" aria-hidden="true" tabindex="-1"></a> max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-23"><a href="#cb4-23" aria-hidden="true" tabindex="-1"></a> bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb4-24"><a href="#cb4-24" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-25"><a href="#cb4-25" aria-hidden="true" tabindex="-1"></a> cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-26"><a href="#cb4-26" aria-hidden="true" tabindex="-1"></a> cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-27"><a href="#cb4-27" aria-hidden="true" tabindex="-1"></a> loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-28"><a href="#cb4-28" aria-hidden="true" tabindex="-1"></a> loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
<span id="cb4-29"><a href="#cb4-29" aria-hidden="true" tabindex="-1"></a> embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-30"><a href="#cb4-30" aria-hidden="true" tabindex="-1"></a> lr_groups<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-31"><a href="#cb4-31" aria-hidden="true" tabindex="-1"></a> embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-32"><a href="#cb4-32" aria-hidden="true" tabindex="-1"></a> qlora<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb4-33"><a href="#cb4-33" aria-hidden="true" tabindex="-1"></a> orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-34"><a href="#cb4-34" aria-hidden="true" tabindex="-1"></a> lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-35"><a href="#cb4-35" aria-hidden="true" tabindex="-1"></a> lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-36"><a href="#cb4-36" aria-hidden="true" tabindex="-1"></a> lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-37"><a href="#cb4-37" aria-hidden="true" tabindex="-1"></a> curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-38"><a href="#cb4-38" aria-hidden="true" tabindex="-1"></a> alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-39"><a href="#cb4-39" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-40"><a href="#cb4-40" aria-hidden="true" tabindex="-1"></a> kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-41"><a href="#cb4-41" aria-hidden="true" tabindex="-1"></a> kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb4-42"><a href="#cb4-42" aria-hidden="true" tabindex="-1"></a> kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb4-43"><a href="#cb4-43" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-44"><a href="#cb4-44" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-45"><a href="#cb4-45" aria-hidden="true" tabindex="-1"></a> adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-46"><a href="#cb4-46" aria-hidden="true" tabindex="-1"></a> adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-47"><a href="#cb4-47" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-48"><a href="#cb4-48" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-49"><a href="#cb4-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>PRM config for PRM training</p>
</section>
<section id="axolotl.core.training_args.AxolotlRewardConfig" class="level3">
@@ -756,42 +760,43 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a> sample_packing_bin_size<span class="op">=</span><span class="dv">200</span>,</span>
<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a> sample_packing_group_size<span class="op">=</span><span class="dv">100000</span>,</span>
<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a> max_seq_length<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a> relora_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-14"><a href="#cb5-14" aria-hidden="true" tabindex="-1"></a> relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-15"><a href="#cb5-15" aria-hidden="true" tabindex="-1"></a> relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-16"><a href="#cb5-16" aria-hidden="true" tabindex="-1"></a> relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
<span id="cb5-17"><a href="#cb5-17" aria-hidden="true" tabindex="-1"></a> bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
<span id="cb5-18"><a href="#cb5-18" aria-hidden="true" tabindex="-1"></a> bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
<span id="cb5-19"><a href="#cb5-19" aria-hidden="true" tabindex="-1"></a> do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb5-20"><a href="#cb5-20" aria-hidden="true" tabindex="-1"></a> do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb5-21"><a href="#cb5-21" aria-hidden="true" tabindex="-1"></a> max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-22"><a href="#cb5-22" aria-hidden="true" tabindex="-1"></a> bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb5-23"><a href="#cb5-23" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-24"><a href="#cb5-24" aria-hidden="true" tabindex="-1"></a> cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-25"><a href="#cb5-25" aria-hidden="true" tabindex="-1"></a> cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-26"><a href="#cb5-26" aria-hidden="true" tabindex="-1"></a> loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-27"><a href="#cb5-27" aria-hidden="true" tabindex="-1"></a> loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
<span id="cb5-28"><a href="#cb5-28" aria-hidden="true" tabindex="-1"></a> embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-29"><a href="#cb5-29" aria-hidden="true" tabindex="-1"></a> lr_groups<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-30"><a href="#cb5-30" aria-hidden="true" tabindex="-1"></a> embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-31"><a href="#cb5-31" aria-hidden="true" tabindex="-1"></a> qlora<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb5-32"><a href="#cb5-32" aria-hidden="true" tabindex="-1"></a> orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-33"><a href="#cb5-33" aria-hidden="true" tabindex="-1"></a> lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-34"><a href="#cb5-34" aria-hidden="true" tabindex="-1"></a> lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-35"><a href="#cb5-35" aria-hidden="true" tabindex="-1"></a> lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-36"><a href="#cb5-36" aria-hidden="true" tabindex="-1"></a> curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-37"><a href="#cb5-37" aria-hidden="true" tabindex="-1"></a> alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-38"><a href="#cb5-38" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-39"><a href="#cb5-39" aria-hidden="true" tabindex="-1"></a> kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-40"><a href="#cb5-40" aria-hidden="true" tabindex="-1"></a> kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb5-41"><a href="#cb5-41" aria-hidden="true" tabindex="-1"></a> kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb5-42"><a href="#cb5-42" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-43"><a href="#cb5-43" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-44"><a href="#cb5-44" aria-hidden="true" tabindex="-1"></a> adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-45"><a href="#cb5-45" aria-hidden="true" tabindex="-1"></a> adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-46"><a href="#cb5-46" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-47"><a href="#cb5-47" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-48"><a href="#cb5-48" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a> dataset_num_proc<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-14"><a href="#cb5-14" aria-hidden="true" tabindex="-1"></a> relora_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-15"><a href="#cb5-15" aria-hidden="true" tabindex="-1"></a> relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-16"><a href="#cb5-16" aria-hidden="true" tabindex="-1"></a> relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-17"><a href="#cb5-17" aria-hidden="true" tabindex="-1"></a> relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
<span id="cb5-18"><a href="#cb5-18" aria-hidden="true" tabindex="-1"></a> bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
<span id="cb5-19"><a href="#cb5-19" aria-hidden="true" tabindex="-1"></a> bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
<span id="cb5-20"><a href="#cb5-20" aria-hidden="true" tabindex="-1"></a> do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb5-21"><a href="#cb5-21" aria-hidden="true" tabindex="-1"></a> do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb5-22"><a href="#cb5-22" aria-hidden="true" tabindex="-1"></a> max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-23"><a href="#cb5-23" aria-hidden="true" tabindex="-1"></a> bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb5-24"><a href="#cb5-24" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-25"><a href="#cb5-25" aria-hidden="true" tabindex="-1"></a> cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-26"><a href="#cb5-26" aria-hidden="true" tabindex="-1"></a> cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-27"><a href="#cb5-27" aria-hidden="true" tabindex="-1"></a> loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-28"><a href="#cb5-28" aria-hidden="true" tabindex="-1"></a> loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
<span id="cb5-29"><a href="#cb5-29" aria-hidden="true" tabindex="-1"></a> embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-30"><a href="#cb5-30" aria-hidden="true" tabindex="-1"></a> lr_groups<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-31"><a href="#cb5-31" aria-hidden="true" tabindex="-1"></a> embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-32"><a href="#cb5-32" aria-hidden="true" tabindex="-1"></a> qlora<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb5-33"><a href="#cb5-33" aria-hidden="true" tabindex="-1"></a> orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-34"><a href="#cb5-34" aria-hidden="true" tabindex="-1"></a> lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-35"><a href="#cb5-35" aria-hidden="true" tabindex="-1"></a> lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-36"><a href="#cb5-36" aria-hidden="true" tabindex="-1"></a> lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-37"><a href="#cb5-37" aria-hidden="true" tabindex="-1"></a> curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-38"><a href="#cb5-38" aria-hidden="true" tabindex="-1"></a> alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-39"><a href="#cb5-39" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-40"><a href="#cb5-40" aria-hidden="true" tabindex="-1"></a> kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-41"><a href="#cb5-41" aria-hidden="true" tabindex="-1"></a> kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb5-42"><a href="#cb5-42" aria-hidden="true" tabindex="-1"></a> kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb5-43"><a href="#cb5-43" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-44"><a href="#cb5-44" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-45"><a href="#cb5-45" aria-hidden="true" tabindex="-1"></a> adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-46"><a href="#cb5-46" aria-hidden="true" tabindex="-1"></a> adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-47"><a href="#cb5-47" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-48"><a href="#cb5-48" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-49"><a href="#cb5-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Reward config for Reward training</p>
</section>
<section id="axolotl.core.training_args.AxolotlTrainingArguments" class="level3">
@@ -808,42 +813,43 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a> sample_packing_bin_size<span class="op">=</span><span class="dv">200</span>,</span>
<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a> sample_packing_group_size<span class="op">=</span><span class="dv">100000</span>,</span>
<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a> max_seq_length<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a> relora_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-14"><a href="#cb6-14" aria-hidden="true" tabindex="-1"></a> relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-15"><a href="#cb6-15" aria-hidden="true" tabindex="-1"></a> relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-16"><a href="#cb6-16" aria-hidden="true" tabindex="-1"></a> relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
<span id="cb6-17"><a href="#cb6-17" aria-hidden="true" tabindex="-1"></a> bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
<span id="cb6-18"><a href="#cb6-18" aria-hidden="true" tabindex="-1"></a> bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
<span id="cb6-19"><a href="#cb6-19" aria-hidden="true" tabindex="-1"></a> do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb6-20"><a href="#cb6-20" aria-hidden="true" tabindex="-1"></a> do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb6-21"><a href="#cb6-21" aria-hidden="true" tabindex="-1"></a> max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-22"><a href="#cb6-22" aria-hidden="true" tabindex="-1"></a> bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb6-23"><a href="#cb6-23" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-24"><a href="#cb6-24" aria-hidden="true" tabindex="-1"></a> cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-25"><a href="#cb6-25" aria-hidden="true" tabindex="-1"></a> cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-26"><a href="#cb6-26" aria-hidden="true" tabindex="-1"></a> loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-27"><a href="#cb6-27" aria-hidden="true" tabindex="-1"></a> loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
<span id="cb6-28"><a href="#cb6-28" aria-hidden="true" tabindex="-1"></a> embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-29"><a href="#cb6-29" aria-hidden="true" tabindex="-1"></a> lr_groups<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-30"><a href="#cb6-30" aria-hidden="true" tabindex="-1"></a> embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-31"><a href="#cb6-31" aria-hidden="true" tabindex="-1"></a> qlora<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb6-32"><a href="#cb6-32" aria-hidden="true" tabindex="-1"></a> orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-33"><a href="#cb6-33" aria-hidden="true" tabindex="-1"></a> lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-34"><a href="#cb6-34" aria-hidden="true" tabindex="-1"></a> lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-35"><a href="#cb6-35" aria-hidden="true" tabindex="-1"></a> lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-36"><a href="#cb6-36" aria-hidden="true" tabindex="-1"></a> curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-37"><a href="#cb6-37" aria-hidden="true" tabindex="-1"></a> alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-38"><a href="#cb6-38" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-39"><a href="#cb6-39" aria-hidden="true" tabindex="-1"></a> kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-40"><a href="#cb6-40" aria-hidden="true" tabindex="-1"></a> kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb6-41"><a href="#cb6-41" aria-hidden="true" tabindex="-1"></a> kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb6-42"><a href="#cb6-42" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-43"><a href="#cb6-43" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-44"><a href="#cb6-44" aria-hidden="true" tabindex="-1"></a> adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-45"><a href="#cb6-45" aria-hidden="true" tabindex="-1"></a> adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-46"><a href="#cb6-46" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-47"><a href="#cb6-47" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-48"><a href="#cb6-48" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a> dataset_num_proc<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-14"><a href="#cb6-14" aria-hidden="true" tabindex="-1"></a> relora_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-15"><a href="#cb6-15" aria-hidden="true" tabindex="-1"></a> relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-16"><a href="#cb6-16" aria-hidden="true" tabindex="-1"></a> relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-17"><a href="#cb6-17" aria-hidden="true" tabindex="-1"></a> relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
<span id="cb6-18"><a href="#cb6-18" aria-hidden="true" tabindex="-1"></a> bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
<span id="cb6-19"><a href="#cb6-19" aria-hidden="true" tabindex="-1"></a> bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
<span id="cb6-20"><a href="#cb6-20" aria-hidden="true" tabindex="-1"></a> do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb6-21"><a href="#cb6-21" aria-hidden="true" tabindex="-1"></a> do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb6-22"><a href="#cb6-22" aria-hidden="true" tabindex="-1"></a> max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-23"><a href="#cb6-23" aria-hidden="true" tabindex="-1"></a> bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb6-24"><a href="#cb6-24" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-25"><a href="#cb6-25" aria-hidden="true" tabindex="-1"></a> cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-26"><a href="#cb6-26" aria-hidden="true" tabindex="-1"></a> cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-27"><a href="#cb6-27" aria-hidden="true" tabindex="-1"></a> loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-28"><a href="#cb6-28" aria-hidden="true" tabindex="-1"></a> loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
<span id="cb6-29"><a href="#cb6-29" aria-hidden="true" tabindex="-1"></a> embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-30"><a href="#cb6-30" aria-hidden="true" tabindex="-1"></a> lr_groups<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-31"><a href="#cb6-31" aria-hidden="true" tabindex="-1"></a> embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-32"><a href="#cb6-32" aria-hidden="true" tabindex="-1"></a> qlora<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb6-33"><a href="#cb6-33" aria-hidden="true" tabindex="-1"></a> orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-34"><a href="#cb6-34" aria-hidden="true" tabindex="-1"></a> lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-35"><a href="#cb6-35" aria-hidden="true" tabindex="-1"></a> lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-36"><a href="#cb6-36" aria-hidden="true" tabindex="-1"></a> lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-37"><a href="#cb6-37" aria-hidden="true" tabindex="-1"></a> curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-38"><a href="#cb6-38" aria-hidden="true" tabindex="-1"></a> alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-39"><a href="#cb6-39" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-40"><a href="#cb6-40" aria-hidden="true" tabindex="-1"></a> kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-41"><a href="#cb6-41" aria-hidden="true" tabindex="-1"></a> kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb6-42"><a href="#cb6-42" aria-hidden="true" tabindex="-1"></a> kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb6-43"><a href="#cb6-43" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-44"><a href="#cb6-44" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-45"><a href="#cb6-45" aria-hidden="true" tabindex="-1"></a> adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-46"><a href="#cb6-46" aria-hidden="true" tabindex="-1"></a> adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-47"><a href="#cb6-47" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-48"><a href="#cb6-48" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-49"><a href="#cb6-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Training arguments for Causal trainer</p>
<p>This code is duplicated due to HF TrainingArguments not setting output_dir with a
default value so it cant be used as a mixin.</p>
@@ -862,42 +868,43 @@ default value so it cant be used as a mixin.</p>
<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a> sample_packing_bin_size<span class="op">=</span><span class="dv">200</span>,</span>
<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a> sample_packing_group_size<span class="op">=</span><span class="dv">100000</span>,</span>
<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a> max_seq_length<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a> relora_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a> relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a> relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-16"><a href="#cb7-16" aria-hidden="true" tabindex="-1"></a> relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
<span id="cb7-17"><a href="#cb7-17" aria-hidden="true" tabindex="-1"></a> bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
<span id="cb7-18"><a href="#cb7-18" aria-hidden="true" tabindex="-1"></a> bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
<span id="cb7-19"><a href="#cb7-19" aria-hidden="true" tabindex="-1"></a> do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb7-20"><a href="#cb7-20" aria-hidden="true" tabindex="-1"></a> do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb7-21"><a href="#cb7-21" aria-hidden="true" tabindex="-1"></a> max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-22"><a href="#cb7-22" aria-hidden="true" tabindex="-1"></a> bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb7-23"><a href="#cb7-23" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-24"><a href="#cb7-24" aria-hidden="true" tabindex="-1"></a> cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-25"><a href="#cb7-25" aria-hidden="true" tabindex="-1"></a> cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-26"><a href="#cb7-26" aria-hidden="true" tabindex="-1"></a> loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-27"><a href="#cb7-27" aria-hidden="true" tabindex="-1"></a> loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
<span id="cb7-28"><a href="#cb7-28" aria-hidden="true" tabindex="-1"></a> embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-29"><a href="#cb7-29" aria-hidden="true" tabindex="-1"></a> lr_groups<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-30"><a href="#cb7-30" aria-hidden="true" tabindex="-1"></a> embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-31"><a href="#cb7-31" aria-hidden="true" tabindex="-1"></a> qlora<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb7-32"><a href="#cb7-32" aria-hidden="true" tabindex="-1"></a> orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-33"><a href="#cb7-33" aria-hidden="true" tabindex="-1"></a> lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-34"><a href="#cb7-34" aria-hidden="true" tabindex="-1"></a> lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-35"><a href="#cb7-35" aria-hidden="true" tabindex="-1"></a> lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-36"><a href="#cb7-36" aria-hidden="true" tabindex="-1"></a> curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-37"><a href="#cb7-37" aria-hidden="true" tabindex="-1"></a> alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-38"><a href="#cb7-38" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-39"><a href="#cb7-39" aria-hidden="true" tabindex="-1"></a> kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-40"><a href="#cb7-40" aria-hidden="true" tabindex="-1"></a> kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb7-41"><a href="#cb7-41" aria-hidden="true" tabindex="-1"></a> kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb7-42"><a href="#cb7-42" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-43"><a href="#cb7-43" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-44"><a href="#cb7-44" aria-hidden="true" tabindex="-1"></a> adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-45"><a href="#cb7-45" aria-hidden="true" tabindex="-1"></a> adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-46"><a href="#cb7-46" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-47"><a href="#cb7-47" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-48"><a href="#cb7-48" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a> dataset_num_proc<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a> relora_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a> relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-16"><a href="#cb7-16" aria-hidden="true" tabindex="-1"></a> relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-17"><a href="#cb7-17" aria-hidden="true" tabindex="-1"></a> relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
<span id="cb7-18"><a href="#cb7-18" aria-hidden="true" tabindex="-1"></a> bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
<span id="cb7-19"><a href="#cb7-19" aria-hidden="true" tabindex="-1"></a> bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
<span id="cb7-20"><a href="#cb7-20" aria-hidden="true" tabindex="-1"></a> do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb7-21"><a href="#cb7-21" aria-hidden="true" tabindex="-1"></a> do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb7-22"><a href="#cb7-22" aria-hidden="true" tabindex="-1"></a> max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-23"><a href="#cb7-23" aria-hidden="true" tabindex="-1"></a> bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
<span id="cb7-24"><a href="#cb7-24" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-25"><a href="#cb7-25" aria-hidden="true" tabindex="-1"></a> cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-26"><a href="#cb7-26" aria-hidden="true" tabindex="-1"></a> cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-27"><a href="#cb7-27" aria-hidden="true" tabindex="-1"></a> loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-28"><a href="#cb7-28" aria-hidden="true" tabindex="-1"></a> loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
<span id="cb7-29"><a href="#cb7-29" aria-hidden="true" tabindex="-1"></a> embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-30"><a href="#cb7-30" aria-hidden="true" tabindex="-1"></a> lr_groups<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-31"><a href="#cb7-31" aria-hidden="true" tabindex="-1"></a> embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-32"><a href="#cb7-32" aria-hidden="true" tabindex="-1"></a> qlora<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb7-33"><a href="#cb7-33" aria-hidden="true" tabindex="-1"></a> orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-34"><a href="#cb7-34" aria-hidden="true" tabindex="-1"></a> lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-35"><a href="#cb7-35" aria-hidden="true" tabindex="-1"></a> lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-36"><a href="#cb7-36" aria-hidden="true" tabindex="-1"></a> lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-37"><a href="#cb7-37" aria-hidden="true" tabindex="-1"></a> curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-38"><a href="#cb7-38" aria-hidden="true" tabindex="-1"></a> alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-39"><a href="#cb7-39" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-40"><a href="#cb7-40" aria-hidden="true" tabindex="-1"></a> kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-41"><a href="#cb7-41" aria-hidden="true" tabindex="-1"></a> kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb7-42"><a href="#cb7-42" aria-hidden="true" tabindex="-1"></a> kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb7-43"><a href="#cb7-43" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-44"><a href="#cb7-44" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-45"><a href="#cb7-45" aria-hidden="true" tabindex="-1"></a> adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-46"><a href="#cb7-46" aria-hidden="true" tabindex="-1"></a> adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-47"><a href="#cb7-47" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-48"><a href="#cb7-48" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-49"><a href="#cb7-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Mixin class for the Axolotl training args.</p>

View File

@@ -520,7 +520,7 @@ into fixed-capacity batches to optimize memory usage and training throughput.</p
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a> lengths,</span>
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a> packing_efficiency_estimate<span class="op">=</span><span class="fl">1.0</span>,</span>
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a> drop_last<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a> num_count_samples<span class="op">=</span><span class="dv">16</span>,</span>
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a> num_count_samples<span class="op">=</span><span class="dv">8</span>,</span>
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a> sequential<span class="op">=</span><span class="va">False</span>,</span>
<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a> group_size<span class="op">=</span><span class="dv">100000</span>,</span>
<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a> bin_size<span class="op">=</span><span class="dv">200</span>,</span>