Built site for gh-pages
This commit is contained in:
@@ -592,7 +592,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<span id="cb1-284"><a href="#cb1-284" aria-hidden="true" tabindex="-1"></a><span class="fu">logging_steps</span><span class="kw">:</span></span>
|
||||
<span id="cb1-285"><a href="#cb1-285" aria-hidden="true" tabindex="-1"></a><span class="fu">eval_steps</span><span class="kw">:</span><span class="co"> # Leave empty to eval at each epoch, integers for every N steps. decimal for fraction of total steps</span></span>
|
||||
<span id="cb1-286"><a href="#cb1-286" aria-hidden="true" tabindex="-1"></a><span class="fu">evals_per_epoch</span><span class="kw">:</span><span class="co"> # number of times per epoch to run evals, mutually exclusive with eval_steps</span></span>
|
||||
<span id="cb1-287"><a href="#cb1-287" aria-hidden="true" tabindex="-1"></a><span class="fu">save_strategy</span><span class="kw">:</span><span class="co"> # Set to `no` to skip checkpoint saves</span></span>
|
||||
<span id="cb1-287"><a href="#cb1-287" aria-hidden="true" tabindex="-1"></a><span class="fu">save_strategy</span><span class="kw">:</span><span class="co"> # Set to `"no"` to skip checkpoint saves</span></span>
|
||||
<span id="cb1-288"><a href="#cb1-288" aria-hidden="true" tabindex="-1"></a><span class="fu">save_steps</span><span class="kw">:</span><span class="co"> # Leave empty to save at each epoch</span></span>
|
||||
<span id="cb1-289"><a href="#cb1-289" aria-hidden="true" tabindex="-1"></a><span class="fu">saves_per_epoch</span><span class="kw">:</span><span class="co"> # number of times per epoch to save a checkpoint, mutually exclusive with save_steps</span></span>
|
||||
<span id="cb1-290"><a href="#cb1-290" aria-hidden="true" tabindex="-1"></a><span class="fu">save_total_limit</span><span class="kw">:</span><span class="co"> # Checkpoints saved at a time</span></span>
|
||||
|
||||
@@ -351,7 +351,7 @@ Description
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="list">
|
||||
<tr data-index="0" data-listing-file-modified-sort="1716499948222" data-listing-reading-time-sort="1" data-listing-word-count-sort="47" data-listing-title-sort="Pre-training" data-listing-filename-sort="pretraining.qmd">
|
||||
<tr data-index="0" data-listing-file-modified-sort="1716909922575" data-listing-reading-time-sort="1" data-listing-word-count-sort="47" data-listing-title-sort="Pre-training" data-listing-filename-sort="pretraining.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/pretraining.html" class="title listing-title">Pre-training</a>
|
||||
</td>
|
||||
@@ -359,7 +359,7 @@ Description
|
||||
<span class="listing-description">Data format for a pre-training completion task.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="1" data-listing-file-modified-sort="1716499948222" data-listing-reading-time-sort="2" data-listing-word-count-sort="308" data-listing-title-sort="Instruction Tuning" data-listing-filename-sort="inst_tune.qmd">
|
||||
<tr data-index="1" data-listing-file-modified-sort="1716909922575" data-listing-reading-time-sort="2" data-listing-word-count-sort="308" data-listing-title-sort="Instruction Tuning" data-listing-filename-sort="inst_tune.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/inst_tune.html" class="title listing-title">Instruction Tuning</a>
|
||||
</td>
|
||||
@@ -367,7 +367,7 @@ Description
|
||||
<span class="listing-description">Instruction tuning formats for supervised fine-tuning.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="2" data-listing-file-modified-sort="1716499948222" data-listing-reading-time-sort="2" data-listing-word-count-sort="235" data-listing-title-sort="Conversation" data-listing-filename-sort="conversation.qmd">
|
||||
<tr data-index="2" data-listing-file-modified-sort="1716909922575" data-listing-reading-time-sort="2" data-listing-word-count-sort="235" data-listing-title-sort="Conversation" data-listing-filename-sort="conversation.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/conversation.html" class="title listing-title">Conversation</a>
|
||||
</td>
|
||||
@@ -375,7 +375,7 @@ Description
|
||||
<span class="listing-description">Conversation format for supervised fine-tuning.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="3" data-listing-file-modified-sort="1716499948222" data-listing-reading-time-sort="1" data-listing-word-count-sort="3" data-listing-title-sort="Template-Free" data-listing-filename-sort="template_free.qmd">
|
||||
<tr data-index="3" data-listing-file-modified-sort="1716909922575" data-listing-reading-time-sort="1" data-listing-word-count-sort="3" data-listing-title-sort="Template-Free" data-listing-filename-sort="template_free.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/template_free.html" class="title listing-title">Template-Free</a>
|
||||
</td>
|
||||
@@ -383,7 +383,7 @@ Description
|
||||
<span class="listing-description">Construct prompts without a template.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="4" data-listing-file-modified-sort="1716499948222" data-listing-reading-time-sort="1" data-listing-word-count-sort="22" data-listing-title-sort="Custom Pre-Tokenized Dataset" data-listing-filename-sort="tokenized.qmd">
|
||||
<tr data-index="4" data-listing-file-modified-sort="1716909922575" data-listing-reading-time-sort="1" data-listing-word-count-sort="22" data-listing-title-sort="Custom Pre-Tokenized Dataset" data-listing-filename-sort="tokenized.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/tokenized.html" class="title listing-title">Custom Pre-Tokenized Dataset</a>
|
||||
</td>
|
||||
|
||||
@@ -320,7 +320,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>pip install torch<span class="op">==</span><span class="st">"2.1.2"</span></span>
|
||||
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>pip install <span class="op">-</span>e git<span class="op">+</span>https:<span class="op">//</span>github.com<span class="op">/</span>OpenAccess<span class="op">-</span>AI<span class="op">-</span>Collective<span class="op">/</span>axolotl<span class="co">#egg=axolotl</span></span>
|
||||
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>pip install flash<span class="op">-</span>attn<span class="op">==</span><span class="st">"2.5.0"</span></span>
|
||||
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>pip install deepspeed<span class="op">==</span><span class="st">"0.13.1"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>pip install deepspeed<span class="op">==</span><span class="st">"0.13.1"</span><span class="op">!</span>pip install mlflow<span class="op">==</span><span class="st">"2.13.0"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="create-an-yaml-config-file" class="level2">
|
||||
@@ -333,24 +333,24 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a><span class="st">base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T</span></span>
|
||||
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a><span class="st">model_type: LlamaForCausalLM</span></span>
|
||||
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a><span class="st">tokenizer_type: LlamaTokenizer</span></span>
|
||||
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a><span class="st">is_llama_derived_model: true</span></span>
|
||||
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a><span class="st">load_in_8bit: false</span></span>
|
||||
<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a><span class="st">load_in_4bit: true</span></span>
|
||||
<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a><span class="st">strict: false</span></span>
|
||||
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a><span class="st">datasets:</span></span>
|
||||
<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a><span class="st"> - path: mhenrichsen/alpaca_2k_test</span></span>
|
||||
<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a><span class="st"> type: alpaca</span></span>
|
||||
<span id="cb3-17"><a href="#cb3-17" aria-hidden="true" tabindex="-1"></a><span class="st">dataset_prepared_path:</span></span>
|
||||
<span id="cb3-18"><a href="#cb3-18" aria-hidden="true" tabindex="-1"></a><span class="st">val_set_size: 0.05</span></span>
|
||||
<span id="cb3-19"><a href="#cb3-19" aria-hidden="true" tabindex="-1"></a><span class="st">output_dir: ./outputs/qlora-out</span></span>
|
||||
<span id="cb3-20"><a href="#cb3-20" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-21"><a href="#cb3-21" aria-hidden="true" tabindex="-1"></a><span class="st">adapter: qlora</span></span>
|
||||
<span id="cb3-22"><a href="#cb3-22" aria-hidden="true" tabindex="-1"></a><span class="st">lora_model_dir:</span></span>
|
||||
<span id="cb3-23"><a href="#cb3-23" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-24"><a href="#cb3-24" aria-hidden="true" tabindex="-1"></a><span class="st">sequence_len: 1096</span></span>
|
||||
<span id="cb3-25"><a href="#cb3-25" aria-hidden="true" tabindex="-1"></a><span class="st">sample_packing: true</span></span>
|
||||
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a><span class="st">load_in_8bit: false</span></span>
|
||||
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a><span class="st">load_in_4bit: true</span></span>
|
||||
<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a><span class="st">strict: false</span></span>
|
||||
<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a><span class="st">datasets:</span></span>
|
||||
<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a><span class="st"> - path: mhenrichsen/alpaca_2k_test</span></span>
|
||||
<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a><span class="st"> type: alpaca</span></span>
|
||||
<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a><span class="st">dataset_prepared_path:</span></span>
|
||||
<span id="cb3-17"><a href="#cb3-17" aria-hidden="true" tabindex="-1"></a><span class="st">val_set_size: 0.05</span></span>
|
||||
<span id="cb3-18"><a href="#cb3-18" aria-hidden="true" tabindex="-1"></a><span class="st">output_dir: ./outputs/qlora-out</span></span>
|
||||
<span id="cb3-19"><a href="#cb3-19" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-20"><a href="#cb3-20" aria-hidden="true" tabindex="-1"></a><span class="st">adapter: qlora</span></span>
|
||||
<span id="cb3-21"><a href="#cb3-21" aria-hidden="true" tabindex="-1"></a><span class="st">lora_model_dir:</span></span>
|
||||
<span id="cb3-22"><a href="#cb3-22" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-23"><a href="#cb3-23" aria-hidden="true" tabindex="-1"></a><span class="st">sequence_len: 4096</span></span>
|
||||
<span id="cb3-24"><a href="#cb3-24" aria-hidden="true" tabindex="-1"></a><span class="st">sample_packing: true</span></span>
|
||||
<span id="cb3-25"><a href="#cb3-25" aria-hidden="true" tabindex="-1"></a><span class="st">eval_sample_packing: false</span></span>
|
||||
<span id="cb3-26"><a href="#cb3-26" aria-hidden="true" tabindex="-1"></a><span class="st">pad_to_sequence_len: true</span></span>
|
||||
<span id="cb3-27"><a href="#cb3-27" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-28"><a href="#cb3-28" aria-hidden="true" tabindex="-1"></a><span class="st">lora_r: 32</span></span>
|
||||
@@ -366,51 +366,48 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<span id="cb3-38"><a href="#cb3-38" aria-hidden="true" tabindex="-1"></a><span class="st">wandb_name:</span></span>
|
||||
<span id="cb3-39"><a href="#cb3-39" aria-hidden="true" tabindex="-1"></a><span class="st">wandb_log_model:</span></span>
|
||||
<span id="cb3-40"><a href="#cb3-40" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-41"><a href="#cb3-41" aria-hidden="true" tabindex="-1"></a><span class="st">mlflow_experiment_name: colab-example</span></span>
|
||||
<span id="cb3-42"><a href="#cb3-42" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-43"><a href="#cb3-43" aria-hidden="true" tabindex="-1"></a><span class="st">gradient_accumulation_steps: 1</span></span>
|
||||
<span id="cb3-44"><a href="#cb3-44" aria-hidden="true" tabindex="-1"></a><span class="st">micro_batch_size: 1</span></span>
|
||||
<span id="cb3-45"><a href="#cb3-45" aria-hidden="true" tabindex="-1"></a><span class="st">num_epochs: 4</span></span>
|
||||
<span id="cb3-46"><a href="#cb3-46" aria-hidden="true" tabindex="-1"></a><span class="st">max_steps: 20</span></span>
|
||||
<span id="cb3-47"><a href="#cb3-47" aria-hidden="true" tabindex="-1"></a><span class="st">optimizer: paged_adamw_32bit</span></span>
|
||||
<span id="cb3-48"><a href="#cb3-48" aria-hidden="true" tabindex="-1"></a><span class="st">lr_scheduler: cosine</span></span>
|
||||
<span id="cb3-49"><a href="#cb3-49" aria-hidden="true" tabindex="-1"></a><span class="st">learning_rate: 0.0002</span></span>
|
||||
<span id="cb3-50"><a href="#cb3-50" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-51"><a href="#cb3-51" aria-hidden="true" tabindex="-1"></a><span class="st">train_on_inputs: false</span></span>
|
||||
<span id="cb3-52"><a href="#cb3-52" aria-hidden="true" tabindex="-1"></a><span class="st">group_by_length: false</span></span>
|
||||
<span id="cb3-53"><a href="#cb3-53" aria-hidden="true" tabindex="-1"></a><span class="st">bf16: false</span></span>
|
||||
<span id="cb3-54"><a href="#cb3-54" aria-hidden="true" tabindex="-1"></a><span class="st">fp16: true</span></span>
|
||||
<span id="cb3-55"><a href="#cb3-55" aria-hidden="true" tabindex="-1"></a><span class="st">tf32: false</span></span>
|
||||
<span id="cb3-56"><a href="#cb3-56" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-57"><a href="#cb3-57" aria-hidden="true" tabindex="-1"></a><span class="st">gradient_checkpointing: true</span></span>
|
||||
<span id="cb3-58"><a href="#cb3-58" aria-hidden="true" tabindex="-1"></a><span class="st">early_stopping_patience:</span></span>
|
||||
<span id="cb3-59"><a href="#cb3-59" aria-hidden="true" tabindex="-1"></a><span class="st">resume_from_checkpoint:</span></span>
|
||||
<span id="cb3-60"><a href="#cb3-60" aria-hidden="true" tabindex="-1"></a><span class="st">local_rank:</span></span>
|
||||
<span id="cb3-61"><a href="#cb3-61" aria-hidden="true" tabindex="-1"></a><span class="st">logging_steps: 1</span></span>
|
||||
<span id="cb3-62"><a href="#cb3-62" aria-hidden="true" tabindex="-1"></a><span class="st">xformers_attention:</span></span>
|
||||
<span id="cb3-63"><a href="#cb3-63" aria-hidden="true" tabindex="-1"></a><span class="st">flash_attention: false</span></span>
|
||||
<span id="cb3-64"><a href="#cb3-64" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-65"><a href="#cb3-65" aria-hidden="true" tabindex="-1"></a><span class="st">warmup_steps: 10</span></span>
|
||||
<span id="cb3-66"><a href="#cb3-66" aria-hidden="true" tabindex="-1"></a><span class="st">evals_per_epoch:</span></span>
|
||||
<span id="cb3-67"><a href="#cb3-67" aria-hidden="true" tabindex="-1"></a><span class="st">saves_per_epoch:</span></span>
|
||||
<span id="cb3-68"><a href="#cb3-68" aria-hidden="true" tabindex="-1"></a><span class="st">debug:</span></span>
|
||||
<span id="cb3-69"><a href="#cb3-69" aria-hidden="true" tabindex="-1"></a><span class="st">deepspeed:</span></span>
|
||||
<span id="cb3-70"><a href="#cb3-70" aria-hidden="true" tabindex="-1"></a><span class="st">weight_decay: 0.0</span></span>
|
||||
<span id="cb3-71"><a href="#cb3-71" aria-hidden="true" tabindex="-1"></a><span class="st">fsdp:</span></span>
|
||||
<span id="cb3-72"><a href="#cb3-72" aria-hidden="true" tabindex="-1"></a><span class="st">fsdp_config:</span></span>
|
||||
<span id="cb3-73"><a href="#cb3-73" aria-hidden="true" tabindex="-1"></a><span class="st">special_tokens:</span></span>
|
||||
<span id="cb3-74"><a href="#cb3-74" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-75"><a href="#cb3-75" aria-hidden="true" tabindex="-1"></a><span class="st">"""</span></span>
|
||||
<span id="cb3-41"><a href="#cb3-41" aria-hidden="true" tabindex="-1"></a><span class="st">gradient_accumulation_steps: 4</span></span>
|
||||
<span id="cb3-42"><a href="#cb3-42" aria-hidden="true" tabindex="-1"></a><span class="st">micro_batch_size: 2</span></span>
|
||||
<span id="cb3-43"><a href="#cb3-43" aria-hidden="true" tabindex="-1"></a><span class="st">num_epochs: 4</span></span>
|
||||
<span id="cb3-44"><a href="#cb3-44" aria-hidden="true" tabindex="-1"></a><span class="st">optimizer: paged_adamw_32bit</span></span>
|
||||
<span id="cb3-45"><a href="#cb3-45" aria-hidden="true" tabindex="-1"></a><span class="st">lr_scheduler: cosine</span></span>
|
||||
<span id="cb3-46"><a href="#cb3-46" aria-hidden="true" tabindex="-1"></a><span class="st">learning_rate: 0.0002</span></span>
|
||||
<span id="cb3-47"><a href="#cb3-47" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-48"><a href="#cb3-48" aria-hidden="true" tabindex="-1"></a><span class="st">train_on_inputs: false</span></span>
|
||||
<span id="cb3-49"><a href="#cb3-49" aria-hidden="true" tabindex="-1"></a><span class="st">group_by_length: false</span></span>
|
||||
<span id="cb3-50"><a href="#cb3-50" aria-hidden="true" tabindex="-1"></a><span class="st">bf16: auto</span></span>
|
||||
<span id="cb3-51"><a href="#cb3-51" aria-hidden="true" tabindex="-1"></a><span class="st">fp16:</span></span>
|
||||
<span id="cb3-52"><a href="#cb3-52" aria-hidden="true" tabindex="-1"></a><span class="st">tf32: false</span></span>
|
||||
<span id="cb3-53"><a href="#cb3-53" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-54"><a href="#cb3-54" aria-hidden="true" tabindex="-1"></a><span class="st">gradient_checkpointing: true</span></span>
|
||||
<span id="cb3-55"><a href="#cb3-55" aria-hidden="true" tabindex="-1"></a><span class="st">early_stopping_patience:</span></span>
|
||||
<span id="cb3-56"><a href="#cb3-56" aria-hidden="true" tabindex="-1"></a><span class="st">resume_from_checkpoint:</span></span>
|
||||
<span id="cb3-57"><a href="#cb3-57" aria-hidden="true" tabindex="-1"></a><span class="st">local_rank:</span></span>
|
||||
<span id="cb3-58"><a href="#cb3-58" aria-hidden="true" tabindex="-1"></a><span class="st">logging_steps: 1</span></span>
|
||||
<span id="cb3-59"><a href="#cb3-59" aria-hidden="true" tabindex="-1"></a><span class="st">xformers_attention:</span></span>
|
||||
<span id="cb3-60"><a href="#cb3-60" aria-hidden="true" tabindex="-1"></a><span class="st">flash_attention: true</span></span>
|
||||
<span id="cb3-61"><a href="#cb3-61" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-62"><a href="#cb3-62" aria-hidden="true" tabindex="-1"></a><span class="st">warmup_steps: 10</span></span>
|
||||
<span id="cb3-63"><a href="#cb3-63" aria-hidden="true" tabindex="-1"></a><span class="st">evals_per_epoch: 4</span></span>
|
||||
<span id="cb3-64"><a href="#cb3-64" aria-hidden="true" tabindex="-1"></a><span class="st">saves_per_epoch: 1</span></span>
|
||||
<span id="cb3-65"><a href="#cb3-65" aria-hidden="true" tabindex="-1"></a><span class="st">debug:</span></span>
|
||||
<span id="cb3-66"><a href="#cb3-66" aria-hidden="true" tabindex="-1"></a><span class="st">deepspeed:</span></span>
|
||||
<span id="cb3-67"><a href="#cb3-67" aria-hidden="true" tabindex="-1"></a><span class="st">weight_decay: 0.0</span></span>
|
||||
<span id="cb3-68"><a href="#cb3-68" aria-hidden="true" tabindex="-1"></a><span class="st">fsdp:</span></span>
|
||||
<span id="cb3-69"><a href="#cb3-69" aria-hidden="true" tabindex="-1"></a><span class="st">fsdp_config:</span></span>
|
||||
<span id="cb3-70"><a href="#cb3-70" aria-hidden="true" tabindex="-1"></a><span class="st">special_tokens:</span></span>
|
||||
<span id="cb3-71"><a href="#cb3-71" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-72"><a href="#cb3-72" aria-hidden="true" tabindex="-1"></a><span class="st">"""</span></span>
|
||||
<span id="cb3-73"><a href="#cb3-73" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-74"><a href="#cb3-74" aria-hidden="true" tabindex="-1"></a><span class="co"># Convert the YAML string to a Python dictionary</span></span>
|
||||
<span id="cb3-75"><a href="#cb3-75" aria-hidden="true" tabindex="-1"></a>yaml_dict <span class="op">=</span> yaml.safe_load(yaml_string)</span>
|
||||
<span id="cb3-76"><a href="#cb3-76" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-77"><a href="#cb3-77" aria-hidden="true" tabindex="-1"></a><span class="co"># Convert the YAML string to a Python dictionary</span></span>
|
||||
<span id="cb3-78"><a href="#cb3-78" aria-hidden="true" tabindex="-1"></a>yaml_dict <span class="op">=</span> yaml.safe_load(yaml_string)</span>
|
||||
<span id="cb3-77"><a href="#cb3-77" aria-hidden="true" tabindex="-1"></a><span class="co"># Specify your file path</span></span>
|
||||
<span id="cb3-78"><a href="#cb3-78" aria-hidden="true" tabindex="-1"></a>file_path <span class="op">=</span> <span class="st">'test_axolotl.yaml'</span></span>
|
||||
<span id="cb3-79"><a href="#cb3-79" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-80"><a href="#cb3-80" aria-hidden="true" tabindex="-1"></a><span class="co"># Specify your file path</span></span>
|
||||
<span id="cb3-81"><a href="#cb3-81" aria-hidden="true" tabindex="-1"></a>file_path <span class="op">=</span> <span class="st">'test_axolotl.yaml'</span></span>
|
||||
<span id="cb3-82"><a href="#cb3-82" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-83"><a href="#cb3-83" aria-hidden="true" tabindex="-1"></a><span class="co"># Write the YAML file</span></span>
|
||||
<span id="cb3-84"><a href="#cb3-84" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(file_path, <span class="st">'w'</span>) <span class="im">as</span> <span class="bu">file</span>:</span>
|
||||
<span id="cb3-85"><a href="#cb3-85" aria-hidden="true" tabindex="-1"></a> yaml.dump(yaml_dict, <span class="bu">file</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb3-80"><a href="#cb3-80" aria-hidden="true" tabindex="-1"></a><span class="co"># Write the YAML file</span></span>
|
||||
<span id="cb3-81"><a href="#cb3-81" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(file_path, <span class="st">'w'</span>) <span class="im">as</span> <span class="bu">file</span>:</span>
|
||||
<span id="cb3-82"><a href="#cb3-82" aria-hidden="true" tabindex="-1"></a> yaml.dump(yaml_dict, <span class="bu">file</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="launch-the-training" class="level2">
|
||||
|
||||
File diff suppressed because one or more lines are too long
44
sitemap.xml
44
sitemap.xml
@@ -2,90 +2,90 @@
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/examples/colab-notebooks/colab-axolotl-example.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.226Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/FAQS.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.222Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.571Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/multi-node.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.226Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset_preprocessing.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.222Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/config.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.222Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/inst_tune.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.222Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/pretraining.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.222Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/tokenized.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.222Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/batch_vs_grad.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.222Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/debugging.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.222Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/faq.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.222Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/fsdp_qlora.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.222Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/input_output.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.226Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/multipack.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.226Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/mac.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.226Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/index.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.222Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/conversation.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.222Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/template_free.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.222Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/rlhf.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.226Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/nccl.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.226Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.575Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/index.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.234Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.587Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/TODO.html</loc>
|
||||
<lastmod>2024-05-23T21:32:28.222Z</lastmod>
|
||||
<lastmod>2024-05-28T15:25:22.571Z</lastmod>
|
||||
</url>
|
||||
</urlset>
|
||||
|
||||
Reference in New Issue
Block a user