Built site for gh-pages

This commit is contained in:
Quarto GHA Workflow Runner
2024-11-08 18:46:41 +00:00
parent fc97455393
commit b4e3feb6ef
7 changed files with 557 additions and 674 deletions

View File

@@ -763,31 +763,30 @@ cd skypilot/llm/axolotl</code></pre>
<span id="cb18-17"><a href="#cb18-17" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> typescript</span></span>
<span id="cb18-18"><a href="#cb18-18" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> ...</span><span class="co"> # unimplemented custom format</span></span>
<span id="cb18-19"><a href="#cb18-19" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-20"><a href="#cb18-20" aria-hidden="true" tabindex="-1"></a><span class="co"> # fastchat conversation (deprecation soon, use chat_template https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/conversation.html#chat_template)</span></span>
<span id="cb18-21"><a href="#cb18-21" aria-hidden="true" tabindex="-1"></a><span class="co"> # See 'conversation' options: https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py</span></span>
<span id="cb18-22"><a href="#cb18-22" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
<span id="cb18-23"><a href="#cb18-23" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> sharegpt</span></span>
<span id="cb18-24"><a href="#cb18-24" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">conversation</span><span class="kw">:</span><span class="at"> chatml</span><span class="co"> # default: vicuna_v1.1</span></span>
<span id="cb18-25"><a href="#cb18-25" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-26"><a href="#cb18-26" aria-hidden="true" tabindex="-1"></a><span class="co"> # local</span></span>
<span id="cb18-27"><a href="#cb18-27" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> data.jsonl</span><span class="co"> # or json</span></span>
<span id="cb18-28"><a href="#cb18-28" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">ds_type</span><span class="kw">:</span><span class="at"> json</span><span class="co"> # see other options below</span></span>
<span id="cb18-29"><a href="#cb18-29" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> alpaca</span></span>
<span id="cb18-30"><a href="#cb18-30" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-31"><a href="#cb18-31" aria-hidden="true" tabindex="-1"></a><span class="co"> # dataset with splits, but no train split</span></span>
<span id="cb18-32"><a href="#cb18-32" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> knowrohit07/know_sql</span></span>
<span id="cb18-33"><a href="#cb18-33" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> context_qa.load_v2</span></span>
<span id="cb18-34"><a href="#cb18-34" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">train_on_split</span><span class="kw">:</span><span class="at"> validation</span></span>
<span id="cb18-35"><a href="#cb18-35" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-36"><a href="#cb18-36" aria-hidden="true" tabindex="-1"></a><span class="co"> # loading from s3 or gcs</span></span>
<span id="cb18-37"><a href="#cb18-37" aria-hidden="true" tabindex="-1"></a><span class="co"> # s3 creds will be loaded from the system default and gcs only supports public access</span></span>
<span id="cb18-38"><a href="#cb18-38" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> s3://path_to_ds</span><span class="co"> # Accepts folder with arrow/parquet or file path like above. Supports s3, gcs.</span></span>
<span id="cb18-39"><a href="#cb18-39" aria-hidden="true" tabindex="-1"></a><span class="at"> ...</span></span>
<span id="cb18-40"><a href="#cb18-40" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-41"><a href="#cb18-41" aria-hidden="true" tabindex="-1"></a><span class="co"> # Loading Data From a Public URL</span></span>
<span id="cb18-42"><a href="#cb18-42" aria-hidden="true" tabindex="-1"></a><span class="co"> # - The file format is `json` (which includes `jsonl`) by default. For different formats, adjust the `ds_type` option accordingly.</span></span>
<span id="cb18-43"><a href="#cb18-43" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> https://some.url.com/yourdata.jsonl</span><span class="co"> # The URL should be a direct link to the file you wish to load. URLs must use HTTPS protocol, not HTTP.</span></span>
<span id="cb18-44"><a href="#cb18-44" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">ds_type</span><span class="kw">:</span><span class="at"> json</span><span class="co"> # this is the default, see other options below.</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div></li>
<span id="cb18-20"><a href="#cb18-20" aria-hidden="true" tabindex="-1"></a><span class="co"> # chat_template https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/conversation.html#chat_template</span></span>
<span id="cb18-21"><a href="#cb18-21" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
<span id="cb18-22"><a href="#cb18-22" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template</span></span>
<span id="cb18-23"><a href="#cb18-23" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">chat_template</span><span class="kw">:</span><span class="at"> chatml</span><span class="co"> # defaults to tokenizer's chat_template</span></span>
<span id="cb18-24"><a href="#cb18-24" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-25"><a href="#cb18-25" aria-hidden="true" tabindex="-1"></a><span class="co"> # local</span></span>
<span id="cb18-26"><a href="#cb18-26" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> data.jsonl</span><span class="co"> # or json</span></span>
<span id="cb18-27"><a href="#cb18-27" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">ds_type</span><span class="kw">:</span><span class="at"> json</span><span class="co"> # see other options below</span></span>
<span id="cb18-28"><a href="#cb18-28" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> alpaca</span></span>
<span id="cb18-29"><a href="#cb18-29" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-30"><a href="#cb18-30" aria-hidden="true" tabindex="-1"></a><span class="co"> # dataset with splits, but no train split</span></span>
<span id="cb18-31"><a href="#cb18-31" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> knowrohit07/know_sql</span></span>
<span id="cb18-32"><a href="#cb18-32" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> context_qa.load_v2</span></span>
<span id="cb18-33"><a href="#cb18-33" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">train_on_split</span><span class="kw">:</span><span class="at"> validation</span></span>
<span id="cb18-34"><a href="#cb18-34" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-35"><a href="#cb18-35" aria-hidden="true" tabindex="-1"></a><span class="co"> # loading from s3 or gcs</span></span>
<span id="cb18-36"><a href="#cb18-36" aria-hidden="true" tabindex="-1"></a><span class="co"> # s3 creds will be loaded from the system default and gcs only supports public access</span></span>
<span id="cb18-37"><a href="#cb18-37" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> s3://path_to_ds</span><span class="co"> # Accepts folder with arrow/parquet or file path like above. Supports s3, gcs.</span></span>
<span id="cb18-38"><a href="#cb18-38" aria-hidden="true" tabindex="-1"></a><span class="at"> ...</span></span>
<span id="cb18-39"><a href="#cb18-39" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-40"><a href="#cb18-40" aria-hidden="true" tabindex="-1"></a><span class="co"> # Loading Data From a Public URL</span></span>
<span id="cb18-41"><a href="#cb18-41" aria-hidden="true" tabindex="-1"></a><span class="co"> # - The file format is `json` (which includes `jsonl`) by default. For different formats, adjust the `ds_type` option accordingly.</span></span>
<span id="cb18-42"><a href="#cb18-42" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> https://some.url.com/yourdata.jsonl</span><span class="co"> # The URL should be a direct link to the file you wish to load. URLs must use HTTPS protocol, not HTTP.</span></span>
<span id="cb18-43"><a href="#cb18-43" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">ds_type</span><span class="kw">:</span><span class="at"> json</span><span class="co"> # this is the default, see other options below.</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div></li>
<li><p>loading</p>
<div class="sourceCode" id="cb19"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="fu">load_in_4bit</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a><span class="fu">load_in_8bit</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>