Built site for gh-pages
This commit is contained in:
@@ -629,7 +629,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<span id="cb1-309"><a href="#cb1-309" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb1-310"><a href="#cb1-310" aria-hidden="true" tabindex="-1"></a><span class="fu">eval_table_size</span><span class="kw">:</span><span class="co"> # Approximate number of predictions sent to wandb depending on batch size. Enabled above 0. Default is 0</span></span>
|
||||
<span id="cb1-311"><a href="#cb1-311" aria-hidden="true" tabindex="-1"></a><span class="fu">eval_max_new_tokens</span><span class="kw">:</span><span class="co"> # Total number of tokens generated for predictions sent to wandb. Default is 128</span></span>
|
||||
<span id="cb1-312"><a href="#cb1-312" aria-hidden="true" tabindex="-1"></a><span class="fu">eval_causal_lm_metrics</span><span class="kw">:</span><span class="co"> # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", chrf]</span></span>
|
||||
<span id="cb1-312"><a href="#cb1-312" aria-hidden="true" tabindex="-1"></a><span class="fu">eval_causal_lm_metrics</span><span class="kw">:</span><span class="co"> # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"]</span></span>
|
||||
<span id="cb1-313"><a href="#cb1-313" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb1-314"><a href="#cb1-314" aria-hidden="true" tabindex="-1"></a><span class="fu">loss_watchdog_threshold</span><span class="kw">:</span><span class="co"> # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)</span></span>
|
||||
<span id="cb1-315"><a href="#cb1-315" aria-hidden="true" tabindex="-1"></a><span class="fu">loss_watchdog_patience</span><span class="kw">:</span><span class="co"> # Number of high-loss steps in a row before the trainer aborts (default: 3)</span></span>
|
||||
|
||||
@@ -363,7 +363,7 @@ Description
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="list">
|
||||
<tr data-index="0" data-listing-file-modified-sort="1728783650684" data-listing-reading-time-sort="1" data-listing-word-count-sort="47" data-listing-title-sort="Pre-training" data-listing-filename-sort="pretraining.qmd">
|
||||
<tr data-index="0" data-listing-file-modified-sort="1728783714748" data-listing-reading-time-sort="1" data-listing-word-count-sort="47" data-listing-title-sort="Pre-training" data-listing-filename-sort="pretraining.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/pretraining.html" class="title listing-title">Pre-training</a>
|
||||
</td>
|
||||
@@ -371,7 +371,7 @@ Description
|
||||
<span class="listing-description">Data format for a pre-training completion task.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="1" data-listing-file-modified-sort="1728783650684" data-listing-reading-time-sort="2" data-listing-word-count-sort="308" data-listing-title-sort="Instruction Tuning" data-listing-filename-sort="inst_tune.qmd">
|
||||
<tr data-index="1" data-listing-file-modified-sort="1728783714748" data-listing-reading-time-sort="2" data-listing-word-count-sort="308" data-listing-title-sort="Instruction Tuning" data-listing-filename-sort="inst_tune.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/inst_tune.html" class="title listing-title">Instruction Tuning</a>
|
||||
</td>
|
||||
@@ -379,7 +379,7 @@ Description
|
||||
<span class="listing-description">Instruction tuning formats for supervised fine-tuning.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="2" data-listing-file-modified-sort="1728783650684" data-listing-reading-time-sort="2" data-listing-word-count-sort="254" data-listing-title-sort="Conversation" data-listing-filename-sort="conversation.qmd">
|
||||
<tr data-index="2" data-listing-file-modified-sort="1728783714748" data-listing-reading-time-sort="2" data-listing-word-count-sort="254" data-listing-title-sort="Conversation" data-listing-filename-sort="conversation.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/conversation.html" class="title listing-title">Conversation</a>
|
||||
</td>
|
||||
@@ -387,7 +387,7 @@ Description
|
||||
<span class="listing-description">Conversation format for supervised fine-tuning.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="3" data-listing-file-modified-sort="1728783650684" data-listing-reading-time-sort="1" data-listing-word-count-sort="3" data-listing-title-sort="Template-Free" data-listing-filename-sort="template_free.qmd">
|
||||
<tr data-index="3" data-listing-file-modified-sort="1728783714748" data-listing-reading-time-sort="1" data-listing-word-count-sort="3" data-listing-title-sort="Template-Free" data-listing-filename-sort="template_free.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/template_free.html" class="title listing-title">Template-Free</a>
|
||||
</td>
|
||||
@@ -395,7 +395,7 @@ Description
|
||||
<span class="listing-description">Construct prompts without a template.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="4" data-listing-file-modified-sort="1728783650684" data-listing-reading-time-sort="1" data-listing-word-count-sort="92" data-listing-title-sort="Custom Pre-Tokenized Dataset" data-listing-filename-sort="tokenized.qmd">
|
||||
<tr data-index="4" data-listing-file-modified-sort="1728783714748" data-listing-reading-time-sort="1" data-listing-word-count-sort="92" data-listing-title-sort="Custom Pre-Tokenized Dataset" data-listing-filename-sort="tokenized.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/tokenized.html" class="title listing-title">Custom Pre-Tokenized Dataset</a>
|
||||
</td>
|
||||
|
||||
Reference in New Issue
Block a user