Built site for gh-pages
This commit is contained in:
@@ -170,8 +170,11 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<span class="menu-text">docs/fdsp_qlora.qmd</span>
|
||||
</li>
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
@@ -206,8 +209,8 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a href="../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
@@ -215,6 +218,47 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/config.html" class="sidebar-item-text sidebar-link active">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
|
||||
820
docs/dataset-formats/conversation.html
Normal file
820
docs/dataset-formats/conversation.html
Normal file
@@ -0,0 +1,820 @@
|
||||
<!DOCTYPE html>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
|
||||
|
||||
<meta charset="utf-8">
|
||||
<meta name="generator" content="quarto-1.4.552">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||||
|
||||
<meta name="description" content="Conversation format for supervised fine-tuning.">
|
||||
|
||||
<title>Axolotl - Conversation</title>
|
||||
<style>
|
||||
code{white-space: pre-wrap;}
|
||||
span.smallcaps{font-variant: small-caps;}
|
||||
div.columns{display: flex; gap: min(4vw, 1.5em);}
|
||||
div.column{flex: auto; overflow-x: auto;}
|
||||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
||||
ul.task-list{list-style: none;}
|
||||
ul.task-list li input[type="checkbox"] {
|
||||
width: 0.8em;
|
||||
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
|
||||
vertical-align: middle;
|
||||
}
|
||||
/* CSS for syntax highlighting */
|
||||
pre > code.sourceCode { white-space: pre; position: relative; }
|
||||
pre > code.sourceCode > span { line-height: 1.25; }
|
||||
pre > code.sourceCode > span:empty { height: 1.2em; }
|
||||
.sourceCode { overflow: visible; }
|
||||
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
||||
div.sourceCode { margin: 1em 0; }
|
||||
pre.sourceCode { margin: 0; }
|
||||
@media screen {
|
||||
div.sourceCode { overflow: auto; }
|
||||
}
|
||||
@media print {
|
||||
pre > code.sourceCode { white-space: pre-wrap; }
|
||||
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
|
||||
}
|
||||
pre.numberSource code
|
||||
{ counter-reset: source-line 0; }
|
||||
pre.numberSource code > span
|
||||
{ position: relative; left: -4em; counter-increment: source-line; }
|
||||
pre.numberSource code > span > a:first-child::before
|
||||
{ content: counter(source-line);
|
||||
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
||||
border: none; display: inline-block;
|
||||
-webkit-touch-callout: none; -webkit-user-select: none;
|
||||
-khtml-user-select: none; -moz-user-select: none;
|
||||
-ms-user-select: none; user-select: none;
|
||||
padding: 0 4px; width: 4em;
|
||||
}
|
||||
pre.numberSource { margin-left: 3em; padding-left: 4px; }
|
||||
div.sourceCode
|
||||
{ }
|
||||
@media screen {
|
||||
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
<script src="../../site_libs/quarto-nav/quarto-nav.js"></script>
|
||||
<script src="../../site_libs/clipboard/clipboard.min.js"></script>
|
||||
<script src="../../site_libs/quarto-search/autocomplete.umd.js"></script>
|
||||
<script src="../../site_libs/quarto-search/fuse.min.js"></script>
|
||||
<script src="../../site_libs/quarto-search/quarto-search.js"></script>
|
||||
<meta name="quarto:offset" content="../../">
|
||||
<link href="../../favicon.jpg" rel="icon" type="image/jpeg">
|
||||
<script src="../../site_libs/quarto-html/quarto.js"></script>
|
||||
<script src="../../site_libs/quarto-html/popper.min.js"></script>
|
||||
<script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
|
||||
<script src="../../site_libs/quarto-html/anchor.min.js"></script>
|
||||
<link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
|
||||
<link href="../../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
|
||||
<link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||||
<link href="../../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
|
||||
<script id="quarto-search-options" type="application/json">{
|
||||
"location": "navbar",
|
||||
"copy-button": false,
|
||||
"collapse-after": 3,
|
||||
"panel-placement": "end",
|
||||
"type": "overlay",
|
||||
"limit": 50,
|
||||
"keyboard-shortcut": [
|
||||
"f",
|
||||
"/",
|
||||
"s"
|
||||
],
|
||||
"show-item-context": false,
|
||||
"language": {
|
||||
"search-no-results-text": "No results",
|
||||
"search-matching-documents-text": "matching documents",
|
||||
"search-copy-link-title": "Copy link to search",
|
||||
"search-hide-matches-text": "Hide additional matches",
|
||||
"search-more-match-text": "more match in this document",
|
||||
"search-more-matches-text": "more matches in this document",
|
||||
"search-clear-button-title": "Clear",
|
||||
"search-text-placeholder": "",
|
||||
"search-detached-cancel-button-title": "Cancel",
|
||||
"search-submit-button-title": "Submit",
|
||||
"search-label": "Search"
|
||||
}
|
||||
}</script>
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../styles.css">
|
||||
</head>
|
||||
|
||||
<body class="nav-sidebar docked nav-fixed">
|
||||
|
||||
<div id="quarto-search-results"></div>
|
||||
<header id="quarto-header" class="headroom fixed-top">
|
||||
<nav class="navbar navbar-expand " data-bs-theme="dark">
|
||||
<div class="navbar-container container-fluid">
|
||||
<div class="navbar-brand-container mx-auto">
|
||||
<a class="navbar-brand" href="../../index.html">
|
||||
<span class="navbar-title">Axolotl</span>
|
||||
</a>
|
||||
</div>
|
||||
<div class="quarto-navbar-tools tools-wide tools-end">
|
||||
<a href="https://twitter.com/axolotl_ai" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-twitter"></i></a>
|
||||
<a href="https://github.com/OpenAccess-AI-Collective/axolotl/" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-github"></i></a>
|
||||
<a href="https://discord.gg/7m9sfhzaf3" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-discord"></i></a>
|
||||
</div>
|
||||
<div id="quarto-search" class="" title="Search"></div>
|
||||
</div> <!-- /container-fluid -->
|
||||
</nav>
|
||||
<nav class="quarto-secondary-nav">
|
||||
<div class="container-fluid d-flex">
|
||||
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
<i class="bi bi-layout-text-sidebar-reverse"></i>
|
||||
</button>
|
||||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../docs/dataset-formats/index.html">Dataset Formats</a></li><li class="breadcrumb-item"><a href="../../docs/dataset-formats/conversation.html">Conversation</a></li></ol></nav>
|
||||
<a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
</a>
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
<!-- content -->
|
||||
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
|
||||
<!-- sidebar -->
|
||||
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
|
||||
<div class="sidebar-menu-container">
|
||||
<ul class="list-unstyled mt-1">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Home</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true">
|
||||
<span class="menu-text">How-To Guides</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/debugging.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Debugging</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/multipack.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multipack (Sample Packing)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-free prompt construction</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/rlhf.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">RLHF (Beta)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/nccl.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">NCCL</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/mac.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Mac M-series</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/multi-node.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multi Node</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link active">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/faq.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FAQ</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
|
||||
<!-- margin-sidebar -->
|
||||
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
|
||||
<nav id="TOC" role="doc-toc" class="toc-active">
|
||||
<h2 id="toc-title">On this page</h2>
|
||||
|
||||
<ul>
|
||||
<li><a href="#formats" id="toc-formats" class="nav-link active" data-scroll-target="#formats">Formats</a>
|
||||
<ul class="collapse">
|
||||
<li><a href="#sharegpt" id="toc-sharegpt" class="nav-link" data-scroll-target="#sharegpt">sharegpt</a></li>
|
||||
<li><a href="#pygmalion" id="toc-pygmalion" class="nav-link" data-scroll-target="#pygmalion">pygmalion</a></li>
|
||||
<li><a href="#sharegpt.load_role" id="toc-sharegpt.load_role" class="nav-link" data-scroll-target="#sharegpt.load_role">sharegpt.load_role</a></li>
|
||||
<li><a href="#sharegpt.load_guanaco" id="toc-sharegpt.load_guanaco" class="nav-link" data-scroll-target="#sharegpt.load_guanaco">sharegpt.load_guanaco</a></li>
|
||||
<li><a href="#sharegpt_jokes" id="toc-sharegpt_jokes" class="nav-link" data-scroll-target="#sharegpt_jokes">sharegpt_jokes</a></li>
|
||||
</ul></li>
|
||||
<li><a href="#how-to-add-custom-prompts-for-instruction-tuning" id="toc-how-to-add-custom-prompts-for-instruction-tuning" class="nav-link" data-scroll-target="#how-to-add-custom-prompts-for-instruction-tuning">How to add custom prompts for instruction-tuning</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
<!-- main -->
|
||||
<main class="content" id="quarto-document-content">
|
||||
|
||||
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../docs/dataset-formats/index.html">Dataset Formats</a></li><li class="breadcrumb-item"><a href="../../docs/dataset-formats/conversation.html">Conversation</a></li></ol></nav>
|
||||
<div class="quarto-title">
|
||||
<h1 class="title">Conversation</h1>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class="description">
|
||||
Conversation format for supervised fine-tuning.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="quarto-title-meta">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</header>
|
||||
|
||||
|
||||
<section id="formats" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="formats">Formats</h2>
|
||||
<section id="sharegpt" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="sharegpt">sharegpt</h3>
|
||||
<p>conversations where <code>from</code> is <code>human</code>/<code>gpt</code>. (optional: first row with role <code>system</code> to override default system prompt)</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb1" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"conversations"</span><span class="fu">:</span> <span class="ot">[</span><span class="fu">{</span><span class="dt">"from"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"value"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">]</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
<p>Note: <code>type: sharegpt</code> opens a special config <code>conversation:</code> that enables conversions to many Conversation types. See <a href="../docs/config.qmd">the docs</a> for all config options.</p>
|
||||
</section>
|
||||
<section id="pygmalion" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="pygmalion">pygmalion</h3>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb2" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"conversations"</span><span class="fu">:</span> <span class="ot">[</span><span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"value"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">]</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="sharegpt.load_role" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="sharegpt.load_role">sharegpt.load_role</h3>
|
||||
<p>conversations where <code>role</code> is used instead of <code>from</code></p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb3" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"conversations"</span><span class="fu">:</span> <span class="ot">[</span><span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"value"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">]</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="sharegpt.load_guanaco" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="sharegpt.load_guanaco">sharegpt.load_guanaco</h3>
|
||||
<p>conversations where <code>from</code> is <code>prompter</code> <code>assistant</code> instead of default sharegpt</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb4" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"conversations"</span><span class="fu">:</span> <span class="ot">[</span><span class="fu">{</span><span class="dt">"from"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"value"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">]</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="sharegpt_jokes" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="sharegpt_jokes">sharegpt_jokes</h3>
|
||||
<p>creates a chat where bot is asked to tell a joke, then explain why the joke is funny</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb5" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"conversations"</span><span class="fu">:</span> <span class="ot">[</span><span class="fu">{</span><span class="dt">"title"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"text"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"explanation"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">]</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="how-to-add-custom-prompts-for-instruction-tuning" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="how-to-add-custom-prompts-for-instruction-tuning">How to add custom prompts for instruction-tuning</h2>
|
||||
<p>For a dataset that is preprocessed for instruction purposes:</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb6" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"input"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"output"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
<p>You can use this example in your YAML config:</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>config.yaml</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb7" data-filename="config.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> repo</span></span>
|
||||
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span></span>
|
||||
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">system_prompt</span><span class="kw">:</span><span class="at"> </span><span class="st">""</span></span>
|
||||
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_system</span><span class="kw">:</span><span class="at"> system</span></span>
|
||||
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_instruction</span><span class="kw">:</span><span class="at"> input</span></span>
|
||||
<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_output</span><span class="kw">:</span><span class="at"> output</span></span>
|
||||
<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">format</span><span class="kw">:</span><span class="at"> </span><span class="st">"[INST] {instruction} [/INST]"</span></span>
|
||||
<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">no_input_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"[INST] {instruction} [/INST]"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
<p>See full config options under <a href="../docs/config.qmd">here</a>.</p>
|
||||
|
||||
|
||||
</section>
|
||||
|
||||
</main> <!-- /main -->
|
||||
<script id="quarto-html-after-body" type="application/javascript">
|
||||
window.document.addEventListener("DOMContentLoaded", function (event) {
|
||||
const toggleBodyColorMode = (bsSheetEl) => {
|
||||
const mode = bsSheetEl.getAttribute("data-mode");
|
||||
const bodyEl = window.document.querySelector("body");
|
||||
if (mode === "dark") {
|
||||
bodyEl.classList.add("quarto-dark");
|
||||
bodyEl.classList.remove("quarto-light");
|
||||
} else {
|
||||
bodyEl.classList.add("quarto-light");
|
||||
bodyEl.classList.remove("quarto-dark");
|
||||
}
|
||||
}
|
||||
const toggleBodyColorPrimary = () => {
|
||||
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
|
||||
if (bsSheetEl) {
|
||||
toggleBodyColorMode(bsSheetEl);
|
||||
}
|
||||
}
|
||||
toggleBodyColorPrimary();
|
||||
const icon = "";
|
||||
const anchorJS = new window.AnchorJS();
|
||||
anchorJS.options = {
|
||||
placement: 'right',
|
||||
icon: icon
|
||||
};
|
||||
anchorJS.add('.anchored');
|
||||
const isCodeAnnotation = (el) => {
|
||||
for (const clz of el.classList) {
|
||||
if (clz.startsWith('code-annotation-')) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
const clipboard = new window.ClipboardJS('.code-copy-button', {
|
||||
text: function(trigger) {
|
||||
const codeEl = trigger.previousElementSibling.cloneNode(true);
|
||||
for (const childEl of codeEl.children) {
|
||||
if (isCodeAnnotation(childEl)) {
|
||||
childEl.remove();
|
||||
}
|
||||
}
|
||||
return codeEl.innerText;
|
||||
}
|
||||
});
|
||||
clipboard.on('success', function(e) {
|
||||
// button target
|
||||
const button = e.trigger;
|
||||
// don't keep focus
|
||||
button.blur();
|
||||
// flash "checked"
|
||||
button.classList.add('code-copy-button-checked');
|
||||
var currentTitle = button.getAttribute("title");
|
||||
button.setAttribute("title", "Copied!");
|
||||
let tooltip;
|
||||
if (window.bootstrap) {
|
||||
button.setAttribute("data-bs-toggle", "tooltip");
|
||||
button.setAttribute("data-bs-placement", "left");
|
||||
button.setAttribute("data-bs-title", "Copied!");
|
||||
tooltip = new bootstrap.Tooltip(button,
|
||||
{ trigger: "manual",
|
||||
customClass: "code-copy-button-tooltip",
|
||||
offset: [0, -8]});
|
||||
tooltip.show();
|
||||
}
|
||||
setTimeout(function() {
|
||||
if (tooltip) {
|
||||
tooltip.hide();
|
||||
button.removeAttribute("data-bs-title");
|
||||
button.removeAttribute("data-bs-toggle");
|
||||
button.removeAttribute("data-bs-placement");
|
||||
}
|
||||
button.setAttribute("title", currentTitle);
|
||||
button.classList.remove('code-copy-button-checked');
|
||||
}, 1000);
|
||||
// clear code selection
|
||||
e.clearSelection();
|
||||
});
|
||||
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
|
||||
var mailtoRegex = new RegExp(/^mailto:/);
|
||||
var filterRegex = new RegExp("https:\/\/OpenAccess-AI-Collective\.github\.io\/axolotl\/");
|
||||
var isInternal = (href) => {
|
||||
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
|
||||
}
|
||||
// Inspect non-navigation links and adorn them if external
|
||||
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool)');
|
||||
for (var i=0; i<links.length; i++) {
|
||||
const link = links[i];
|
||||
if (!isInternal(link.href)) {
|
||||
// undo the damage that might have been done by quarto-nav.js in the case of
|
||||
// links that we want to consider external
|
||||
if (link.dataset.originalHref !== undefined) {
|
||||
link.href = link.dataset.originalHref;
|
||||
}
|
||||
}
|
||||
}
|
||||
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
|
||||
const config = {
|
||||
allowHTML: true,
|
||||
maxWidth: 500,
|
||||
delay: 100,
|
||||
arrow: false,
|
||||
appendTo: function(el) {
|
||||
return el.parentElement;
|
||||
},
|
||||
interactive: true,
|
||||
interactiveBorder: 10,
|
||||
theme: 'quarto',
|
||||
placement: 'bottom-start',
|
||||
};
|
||||
if (contentFn) {
|
||||
config.content = contentFn;
|
||||
}
|
||||
if (onTriggerFn) {
|
||||
config.onTrigger = onTriggerFn;
|
||||
}
|
||||
if (onUntriggerFn) {
|
||||
config.onUntrigger = onUntriggerFn;
|
||||
}
|
||||
window.tippy(el, config);
|
||||
}
|
||||
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
|
||||
for (var i=0; i<noterefs.length; i++) {
|
||||
const ref = noterefs[i];
|
||||
tippyHover(ref, function() {
|
||||
// use id or data attribute instead here
|
||||
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
|
||||
try { href = new URL(href).hash; } catch {}
|
||||
const id = href.replace(/^#\/?/, "");
|
||||
const note = window.document.getElementById(id);
|
||||
if (note) {
|
||||
return note.innerHTML;
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
});
|
||||
}
|
||||
const xrefs = window.document.querySelectorAll('a.quarto-xref');
|
||||
const processXRef = (id, note) => {
|
||||
// Strip column container classes
|
||||
const stripColumnClz = (el) => {
|
||||
el.classList.remove("page-full", "page-columns");
|
||||
if (el.children) {
|
||||
for (const child of el.children) {
|
||||
stripColumnClz(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
stripColumnClz(note)
|
||||
if (id === null || id.startsWith('sec-')) {
|
||||
// Special case sections, only their first couple elements
|
||||
const container = document.createElement("div");
|
||||
if (note.children && note.children.length > 2) {
|
||||
container.appendChild(note.children[0].cloneNode(true));
|
||||
for (let i = 1; i < note.children.length; i++) {
|
||||
const child = note.children[i];
|
||||
if (child.tagName === "P" && child.innerText === "") {
|
||||
continue;
|
||||
} else {
|
||||
container.appendChild(child.cloneNode(true));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(container);
|
||||
}
|
||||
return container.innerHTML
|
||||
} else {
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(note);
|
||||
}
|
||||
return note.innerHTML;
|
||||
}
|
||||
} else {
|
||||
// Remove any anchor links if they are present
|
||||
const anchorLink = note.querySelector('a.anchorjs-link');
|
||||
if (anchorLink) {
|
||||
anchorLink.remove();
|
||||
}
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(note);
|
||||
}
|
||||
// TODO in 1.5, we should make sure this works without a callout special case
|
||||
if (note.classList.contains("callout")) {
|
||||
return note.outerHTML;
|
||||
} else {
|
||||
return note.innerHTML;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (var i=0; i<xrefs.length; i++) {
|
||||
const xref = xrefs[i];
|
||||
tippyHover(xref, undefined, function(instance) {
|
||||
instance.disable();
|
||||
let url = xref.getAttribute('href');
|
||||
let hash = undefined;
|
||||
if (url.startsWith('#')) {
|
||||
hash = url;
|
||||
} else {
|
||||
try { hash = new URL(url).hash; } catch {}
|
||||
}
|
||||
if (hash) {
|
||||
const id = hash.replace(/^#\/?/, "");
|
||||
const note = window.document.getElementById(id);
|
||||
if (note !== null) {
|
||||
try {
|
||||
const html = processXRef(id, note.cloneNode(true));
|
||||
instance.setContent(html);
|
||||
} finally {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
}
|
||||
} else {
|
||||
// See if we can fetch this
|
||||
fetch(url.split('#')[0])
|
||||
.then(res => res.text())
|
||||
.then(html => {
|
||||
const parser = new DOMParser();
|
||||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||||
const note = htmlDoc.getElementById(id);
|
||||
if (note !== null) {
|
||||
const html = processXRef(id, note);
|
||||
instance.setContent(html);
|
||||
}
|
||||
}).finally(() => {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// See if we can fetch a full url (with no hash to target)
|
||||
// This is a special case and we should probably do some content thinning / targeting
|
||||
fetch(url)
|
||||
.then(res => res.text())
|
||||
.then(html => {
|
||||
const parser = new DOMParser();
|
||||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||||
const note = htmlDoc.querySelector('main.content');
|
||||
if (note !== null) {
|
||||
// This should only happen for chapter cross references
|
||||
// (since there is no id in the URL)
|
||||
// remove the first header
|
||||
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
|
||||
note.children[0].remove();
|
||||
}
|
||||
const html = processXRef(null, note);
|
||||
instance.setContent(html);
|
||||
}
|
||||
}).finally(() => {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
});
|
||||
}
|
||||
}, function(instance) {
|
||||
});
|
||||
}
|
||||
let selectedAnnoteEl;
|
||||
const selectorForAnnotation = ( cell, annotation) => {
|
||||
let cellAttr = 'data-code-cell="' + cell + '"';
|
||||
let lineAttr = 'data-code-annotation="' + annotation + '"';
|
||||
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
|
||||
return selector;
|
||||
}
|
||||
const selectCodeLines = (annoteEl) => {
|
||||
const doc = window.document;
|
||||
const targetCell = annoteEl.getAttribute("data-target-cell");
|
||||
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
|
||||
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
|
||||
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
|
||||
const lineIds = lines.map((line) => {
|
||||
return targetCell + "-" + line;
|
||||
})
|
||||
let top = null;
|
||||
let height = null;
|
||||
let parent = null;
|
||||
if (lineIds.length > 0) {
|
||||
//compute the position of the single el (top and bottom and make a div)
|
||||
const el = window.document.getElementById(lineIds[0]);
|
||||
top = el.offsetTop;
|
||||
height = el.offsetHeight;
|
||||
parent = el.parentElement.parentElement;
|
||||
if (lineIds.length > 1) {
|
||||
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
|
||||
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
|
||||
height = bottom - top;
|
||||
}
|
||||
if (top !== null && height !== null && parent !== null) {
|
||||
// cook up a div (if necessary) and position it
|
||||
let div = window.document.getElementById("code-annotation-line-highlight");
|
||||
if (div === null) {
|
||||
div = window.document.createElement("div");
|
||||
div.setAttribute("id", "code-annotation-line-highlight");
|
||||
div.style.position = 'absolute';
|
||||
parent.appendChild(div);
|
||||
}
|
||||
div.style.top = top - 2 + "px";
|
||||
div.style.height = height + 4 + "px";
|
||||
div.style.left = 0;
|
||||
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
|
||||
if (gutterDiv === null) {
|
||||
gutterDiv = window.document.createElement("div");
|
||||
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
|
||||
gutterDiv.style.position = 'absolute';
|
||||
const codeCell = window.document.getElementById(targetCell);
|
||||
const gutter = codeCell.querySelector('.code-annotation-gutter');
|
||||
gutter.appendChild(gutterDiv);
|
||||
}
|
||||
gutterDiv.style.top = top - 2 + "px";
|
||||
gutterDiv.style.height = height + 4 + "px";
|
||||
}
|
||||
selectedAnnoteEl = annoteEl;
|
||||
}
|
||||
};
|
||||
const unselectCodeLines = () => {
|
||||
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
|
||||
elementsIds.forEach((elId) => {
|
||||
const div = window.document.getElementById(elId);
|
||||
if (div) {
|
||||
div.remove();
|
||||
}
|
||||
});
|
||||
selectedAnnoteEl = undefined;
|
||||
};
|
||||
// Handle positioning of the toggle
|
||||
window.addEventListener(
|
||||
"resize",
|
||||
throttle(() => {
|
||||
elRect = undefined;
|
||||
if (selectedAnnoteEl) {
|
||||
selectCodeLines(selectedAnnoteEl);
|
||||
}
|
||||
}, 10)
|
||||
);
|
||||
function throttle(fn, ms) {
|
||||
let throttle = false;
|
||||
let timer;
|
||||
return (...args) => {
|
||||
if(!throttle) { // first call gets through
|
||||
fn.apply(this, args);
|
||||
throttle = true;
|
||||
} else { // all the others get throttled
|
||||
if(timer) clearTimeout(timer); // cancel #2
|
||||
timer = setTimeout(() => {
|
||||
fn.apply(this, args);
|
||||
timer = throttle = false;
|
||||
}, ms);
|
||||
}
|
||||
};
|
||||
}
|
||||
// Attach click handler to the DT
|
||||
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
|
||||
for (const annoteDlNode of annoteDls) {
|
||||
annoteDlNode.addEventListener('click', (event) => {
|
||||
const clickedEl = event.target;
|
||||
if (clickedEl !== selectedAnnoteEl) {
|
||||
unselectCodeLines();
|
||||
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
|
||||
if (activeEl) {
|
||||
activeEl.classList.remove('code-annotation-active');
|
||||
}
|
||||
selectCodeLines(clickedEl);
|
||||
clickedEl.classList.add('code-annotation-active');
|
||||
} else {
|
||||
// Unselect the line
|
||||
unselectCodeLines();
|
||||
clickedEl.classList.remove('code-annotation-active');
|
||||
}
|
||||
});
|
||||
}
|
||||
const findCites = (el) => {
|
||||
const parentEl = el.parentElement;
|
||||
if (parentEl) {
|
||||
const cites = parentEl.dataset.cites;
|
||||
if (cites) {
|
||||
return {
|
||||
el,
|
||||
cites: cites.split(' ')
|
||||
};
|
||||
} else {
|
||||
return findCites(el.parentElement)
|
||||
}
|
||||
} else {
|
||||
return undefined;
|
||||
}
|
||||
};
|
||||
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
|
||||
for (var i=0; i<bibliorefs.length; i++) {
|
||||
const ref = bibliorefs[i];
|
||||
const citeInfo = findCites(ref);
|
||||
if (citeInfo) {
|
||||
tippyHover(citeInfo.el, function() {
|
||||
var popup = window.document.createElement('div');
|
||||
citeInfo.cites.forEach(function(cite) {
|
||||
var citeDiv = window.document.createElement('div');
|
||||
citeDiv.classList.add('hanging-indent');
|
||||
citeDiv.classList.add('csl-entry');
|
||||
var biblioDiv = window.document.getElementById('ref-' + cite);
|
||||
if (biblioDiv) {
|
||||
citeDiv.innerHTML = biblioDiv.innerHTML;
|
||||
}
|
||||
popup.appendChild(citeDiv);
|
||||
});
|
||||
return popup.innerHTML;
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</div> <!-- /content -->
|
||||
|
||||
|
||||
|
||||
|
||||
</body></html>
|
||||
814
docs/dataset-formats/index.html
Normal file
814
docs/dataset-formats/index.html
Normal file
@@ -0,0 +1,814 @@
|
||||
<!DOCTYPE html>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
|
||||
|
||||
<meta charset="utf-8">
|
||||
<meta name="generator" content="quarto-1.4.552">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||||
|
||||
<meta name="description" content="Supported dataset formats.">
|
||||
|
||||
<title>Axolotl - Dataset Formats</title>
|
||||
<style>
|
||||
code{white-space: pre-wrap;}
|
||||
span.smallcaps{font-variant: small-caps;}
|
||||
div.columns{display: flex; gap: min(4vw, 1.5em);}
|
||||
div.column{flex: auto; overflow-x: auto;}
|
||||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
||||
ul.task-list{list-style: none;}
|
||||
ul.task-list li input[type="checkbox"] {
|
||||
width: 0.8em;
|
||||
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
|
||||
vertical-align: middle;
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
<script src="../../site_libs/quarto-nav/quarto-nav.js"></script>
|
||||
<script src="../../site_libs/clipboard/clipboard.min.js"></script>
|
||||
<script src="../../site_libs/quarto-search/autocomplete.umd.js"></script>
|
||||
<script src="../../site_libs/quarto-search/fuse.min.js"></script>
|
||||
<script src="../../site_libs/quarto-search/quarto-search.js"></script>
|
||||
<meta name="quarto:offset" content="../../">
|
||||
<link href="../../favicon.jpg" rel="icon" type="image/jpeg">
|
||||
<script src="../../site_libs/quarto-listing/list.min.js"></script>
|
||||
<script src="../../site_libs/quarto-listing/quarto-listing.js"></script>
|
||||
<script src="../../site_libs/quarto-html/quarto.js"></script>
|
||||
<script src="../../site_libs/quarto-html/popper.min.js"></script>
|
||||
<script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
|
||||
<script src="../../site_libs/quarto-html/anchor.min.js"></script>
|
||||
<link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
|
||||
<link href="../../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
|
||||
<link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||||
<link href="../../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
|
||||
<script id="quarto-search-options" type="application/json">{
|
||||
"location": "navbar",
|
||||
"copy-button": false,
|
||||
"collapse-after": 3,
|
||||
"panel-placement": "end",
|
||||
"type": "overlay",
|
||||
"limit": 50,
|
||||
"keyboard-shortcut": [
|
||||
"f",
|
||||
"/",
|
||||
"s"
|
||||
],
|
||||
"show-item-context": false,
|
||||
"language": {
|
||||
"search-no-results-text": "No results",
|
||||
"search-matching-documents-text": "matching documents",
|
||||
"search-copy-link-title": "Copy link to search",
|
||||
"search-hide-matches-text": "Hide additional matches",
|
||||
"search-more-match-text": "more match in this document",
|
||||
"search-more-matches-text": "more matches in this document",
|
||||
"search-clear-button-title": "Clear",
|
||||
"search-text-placeholder": "",
|
||||
"search-detached-cancel-button-title": "Cancel",
|
||||
"search-submit-button-title": "Submit",
|
||||
"search-label": "Search"
|
||||
}
|
||||
}</script>
|
||||
<script>
|
||||
|
||||
window.document.addEventListener("DOMContentLoaded", function (_event) {
|
||||
const listingTargetEl = window.document.querySelector('#listing-listing .list');
|
||||
if (!listingTargetEl) {
|
||||
// No listing discovered, do not attach.
|
||||
return;
|
||||
}
|
||||
|
||||
const options = {
|
||||
valueNames: ['listing-title','listing-description',{ data: ['index'] },{ data: ['categories'] },{ data: ['listing-date-sort'] },{ data: ['listing-title-sort'] }],
|
||||
|
||||
searchColumns: ["listing-title","listing-author"],
|
||||
};
|
||||
|
||||
window['quarto-listings'] = window['quarto-listings'] || {};
|
||||
window['quarto-listings']['listing-listing'] = new List('listing-listing', options);
|
||||
|
||||
if (window['quarto-listing-loaded']) {
|
||||
window['quarto-listing-loaded']();
|
||||
}
|
||||
});
|
||||
|
||||
window.addEventListener('hashchange',() => {
|
||||
if (window['quarto-listing-loaded']) {
|
||||
window['quarto-listing-loaded']();
|
||||
}
|
||||
})
|
||||
</script>
|
||||
|
||||
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>
|
||||
|
||||
<script type="text/javascript">
|
||||
const typesetMath = (el) => {
|
||||
if (window.MathJax) {
|
||||
// MathJax Typeset
|
||||
window.MathJax.typeset([el]);
|
||||
} else if (window.katex) {
|
||||
// KaTeX Render
|
||||
var mathElements = el.getElementsByClassName("math");
|
||||
var macros = [];
|
||||
for (var i = 0; i < mathElements.length; i++) {
|
||||
var texText = mathElements[i].firstChild;
|
||||
if (mathElements[i].tagName == "SPAN") {
|
||||
window.katex.render(texText.data, mathElements[i], {
|
||||
displayMode: mathElements[i].classList.contains('display'),
|
||||
throwOnError: false,
|
||||
macros: macros,
|
||||
fleqn: false
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
window.Quarto = {
|
||||
typesetMath
|
||||
};
|
||||
</script>
|
||||
|
||||
<link rel="stylesheet" href="../../styles.css">
|
||||
</head>
|
||||
|
||||
<body class="nav-sidebar docked nav-fixed">
|
||||
|
||||
<div id="quarto-search-results"></div>
|
||||
<header id="quarto-header" class="headroom fixed-top">
|
||||
<nav class="navbar navbar-expand " data-bs-theme="dark">
|
||||
<div class="navbar-container container-fluid">
|
||||
<div class="navbar-brand-container mx-auto">
|
||||
<a class="navbar-brand" href="../../index.html">
|
||||
<span class="navbar-title">Axolotl</span>
|
||||
</a>
|
||||
</div>
|
||||
<div class="quarto-navbar-tools tools-wide tools-end">
|
||||
<a href="https://twitter.com/axolotl_ai" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-twitter"></i></a>
|
||||
<a href="https://github.com/OpenAccess-AI-Collective/axolotl/" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-github"></i></a>
|
||||
<a href="https://discord.gg/7m9sfhzaf3" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-discord"></i></a>
|
||||
</div>
|
||||
<div id="quarto-search" class="" title="Search"></div>
|
||||
</div> <!-- /container-fluid -->
|
||||
</nav>
|
||||
<nav class="quarto-secondary-nav">
|
||||
<div class="container-fluid d-flex">
|
||||
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
<i class="bi bi-layout-text-sidebar-reverse"></i>
|
||||
</button>
|
||||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../docs/dataset-formats/index.html">Dataset Formats</a></li></ol></nav>
|
||||
<a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
</a>
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
<!-- content -->
|
||||
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
|
||||
<!-- sidebar -->
|
||||
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
|
||||
<div class="sidebar-menu-container">
|
||||
<ul class="list-unstyled mt-1">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Home</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true">
|
||||
<span class="menu-text">How-To Guides</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/debugging.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Debugging</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/multipack.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multipack (Sample Packing)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-free prompt construction</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/rlhf.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">RLHF (Beta)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/nccl.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">NCCL</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/mac.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Mac M-series</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/multi-node.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multi Node</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link active">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/faq.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FAQ</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
|
||||
<!-- margin-sidebar -->
|
||||
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
|
||||
|
||||
</div>
|
||||
<!-- main -->
|
||||
<main class="content" id="quarto-document-content">
|
||||
|
||||
<header id="title-block-header" class="quarto-title-block default">
|
||||
<div class="quarto-title">
|
||||
<h1 class="title">Dataset Formats</h1>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class="description">
|
||||
Supported dataset formats.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="quarto-title-meta">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</header>
|
||||
|
||||
|
||||
<p>Axolotl supports a variety of dataset formats. It is recommended to use a JSONL format. The schema of the JSONL depends upon the task and the prompt template you wish to use. Instead of a JSONL, you can also use a HuggingFace dataset with columns for each JSONL field.</p>
|
||||
<p>Below are these various formats organized by task:</p>
|
||||
|
||||
|
||||
|
||||
|
||||
<div class="quarto-listing quarto-listing-container-table" id="listing-listing">
|
||||
<table class="quarto-listing-table table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>
|
||||
Title
|
||||
</th>
|
||||
<th>
|
||||
Description
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="list">
|
||||
<tr data-index="0" data-listing-file-modified-sort="1711983664848" data-listing-reading-time-sort="1" data-listing-word-count-sort="189" data-listing-title-sort="Conversation" data-listing-filename-sort="conversation.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/conversation.html" class="title listing-title">Conversation</a>
|
||||
</td>
|
||||
<td>
|
||||
<span class="listing-description">Conversation format for supervised fine-tuning.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="1" data-listing-file-modified-sort="1711983664848" data-listing-reading-time-sort="2" data-listing-word-count-sort="222" data-listing-title-sort="Instruction Tuning" data-listing-filename-sort="inst_tune.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/inst_tune.html" class="title listing-title">Instruction Tuning</a>
|
||||
</td>
|
||||
<td>
|
||||
<span class="listing-description">Instruction tuning formats for supervised fine-tuning.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="2" data-listing-file-modified-sort="1711983664848" data-listing-reading-time-sort="1" data-listing-word-count-sort="47" data-listing-title-sort="Pre-training" data-listing-filename-sort="pretraining.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/pretraining.html" class="title listing-title">Pre-training</a>
|
||||
</td>
|
||||
<td>
|
||||
<span class="listing-description">Data format for a pre-training completion task.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="3" data-listing-file-modified-sort="1711983664848" data-listing-reading-time-sort="1" data-listing-word-count-sort="3" data-listing-title-sort="Template-Free" data-listing-filename-sort="template_free.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/template_free.html" class="title listing-title">Template-Free</a>
|
||||
</td>
|
||||
<td>
|
||||
<span class="listing-description">Construct prompts without a template.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="4" data-listing-file-modified-sort="1711983664848" data-listing-reading-time-sort="1" data-listing-word-count-sort="22" data-listing-title-sort="Custom Pre-Tokenized Dataset" data-listing-filename-sort="tokenized.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/tokenized.html" class="title listing-title">Custom Pre-Tokenized Dataset</a>
|
||||
</td>
|
||||
<td>
|
||||
<span class="listing-description">How to use a custom pre-tokenized dataset.</span>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<div class="listing-no-matching d-none">
|
||||
No matching items
|
||||
</div>
|
||||
</div></main> <!-- /main -->
|
||||
<script id="quarto-html-after-body" type="application/javascript">
|
||||
window.document.addEventListener("DOMContentLoaded", function (event) {
|
||||
const toggleBodyColorMode = (bsSheetEl) => {
|
||||
const mode = bsSheetEl.getAttribute("data-mode");
|
||||
const bodyEl = window.document.querySelector("body");
|
||||
if (mode === "dark") {
|
||||
bodyEl.classList.add("quarto-dark");
|
||||
bodyEl.classList.remove("quarto-light");
|
||||
} else {
|
||||
bodyEl.classList.add("quarto-light");
|
||||
bodyEl.classList.remove("quarto-dark");
|
||||
}
|
||||
}
|
||||
const toggleBodyColorPrimary = () => {
|
||||
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
|
||||
if (bsSheetEl) {
|
||||
toggleBodyColorMode(bsSheetEl);
|
||||
}
|
||||
}
|
||||
toggleBodyColorPrimary();
|
||||
const icon = "";
|
||||
const anchorJS = new window.AnchorJS();
|
||||
anchorJS.options = {
|
||||
placement: 'right',
|
||||
icon: icon
|
||||
};
|
||||
anchorJS.add('.anchored');
|
||||
const isCodeAnnotation = (el) => {
|
||||
for (const clz of el.classList) {
|
||||
if (clz.startsWith('code-annotation-')) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
const clipboard = new window.ClipboardJS('.code-copy-button', {
|
||||
text: function(trigger) {
|
||||
const codeEl = trigger.previousElementSibling.cloneNode(true);
|
||||
for (const childEl of codeEl.children) {
|
||||
if (isCodeAnnotation(childEl)) {
|
||||
childEl.remove();
|
||||
}
|
||||
}
|
||||
return codeEl.innerText;
|
||||
}
|
||||
});
|
||||
clipboard.on('success', function(e) {
|
||||
// button target
|
||||
const button = e.trigger;
|
||||
// don't keep focus
|
||||
button.blur();
|
||||
// flash "checked"
|
||||
button.classList.add('code-copy-button-checked');
|
||||
var currentTitle = button.getAttribute("title");
|
||||
button.setAttribute("title", "Copied!");
|
||||
let tooltip;
|
||||
if (window.bootstrap) {
|
||||
button.setAttribute("data-bs-toggle", "tooltip");
|
||||
button.setAttribute("data-bs-placement", "left");
|
||||
button.setAttribute("data-bs-title", "Copied!");
|
||||
tooltip = new bootstrap.Tooltip(button,
|
||||
{ trigger: "manual",
|
||||
customClass: "code-copy-button-tooltip",
|
||||
offset: [0, -8]});
|
||||
tooltip.show();
|
||||
}
|
||||
setTimeout(function() {
|
||||
if (tooltip) {
|
||||
tooltip.hide();
|
||||
button.removeAttribute("data-bs-title");
|
||||
button.removeAttribute("data-bs-toggle");
|
||||
button.removeAttribute("data-bs-placement");
|
||||
}
|
||||
button.setAttribute("title", currentTitle);
|
||||
button.classList.remove('code-copy-button-checked');
|
||||
}, 1000);
|
||||
// clear code selection
|
||||
e.clearSelection();
|
||||
});
|
||||
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
|
||||
var mailtoRegex = new RegExp(/^mailto:/);
|
||||
var filterRegex = new RegExp("https:\/\/OpenAccess-AI-Collective\.github\.io\/axolotl\/");
|
||||
var isInternal = (href) => {
|
||||
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
|
||||
}
|
||||
// Inspect non-navigation links and adorn them if external
|
||||
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool)');
|
||||
for (var i=0; i<links.length; i++) {
|
||||
const link = links[i];
|
||||
if (!isInternal(link.href)) {
|
||||
// undo the damage that might have been done by quarto-nav.js in the case of
|
||||
// links that we want to consider external
|
||||
if (link.dataset.originalHref !== undefined) {
|
||||
link.href = link.dataset.originalHref;
|
||||
}
|
||||
}
|
||||
}
|
||||
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
|
||||
const config = {
|
||||
allowHTML: true,
|
||||
maxWidth: 500,
|
||||
delay: 100,
|
||||
arrow: false,
|
||||
appendTo: function(el) {
|
||||
return el.parentElement;
|
||||
},
|
||||
interactive: true,
|
||||
interactiveBorder: 10,
|
||||
theme: 'quarto',
|
||||
placement: 'bottom-start',
|
||||
};
|
||||
if (contentFn) {
|
||||
config.content = contentFn;
|
||||
}
|
||||
if (onTriggerFn) {
|
||||
config.onTrigger = onTriggerFn;
|
||||
}
|
||||
if (onUntriggerFn) {
|
||||
config.onUntrigger = onUntriggerFn;
|
||||
}
|
||||
window.tippy(el, config);
|
||||
}
|
||||
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
|
||||
for (var i=0; i<noterefs.length; i++) {
|
||||
const ref = noterefs[i];
|
||||
tippyHover(ref, function() {
|
||||
// use id or data attribute instead here
|
||||
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
|
||||
try { href = new URL(href).hash; } catch {}
|
||||
const id = href.replace(/^#\/?/, "");
|
||||
const note = window.document.getElementById(id);
|
||||
if (note) {
|
||||
return note.innerHTML;
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
});
|
||||
}
|
||||
const xrefs = window.document.querySelectorAll('a.quarto-xref');
|
||||
const processXRef = (id, note) => {
|
||||
// Strip column container classes
|
||||
const stripColumnClz = (el) => {
|
||||
el.classList.remove("page-full", "page-columns");
|
||||
if (el.children) {
|
||||
for (const child of el.children) {
|
||||
stripColumnClz(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
stripColumnClz(note)
|
||||
if (id === null || id.startsWith('sec-')) {
|
||||
// Special case sections, only their first couple elements
|
||||
const container = document.createElement("div");
|
||||
if (note.children && note.children.length > 2) {
|
||||
container.appendChild(note.children[0].cloneNode(true));
|
||||
for (let i = 1; i < note.children.length; i++) {
|
||||
const child = note.children[i];
|
||||
if (child.tagName === "P" && child.innerText === "") {
|
||||
continue;
|
||||
} else {
|
||||
container.appendChild(child.cloneNode(true));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(container);
|
||||
}
|
||||
return container.innerHTML
|
||||
} else {
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(note);
|
||||
}
|
||||
return note.innerHTML;
|
||||
}
|
||||
} else {
|
||||
// Remove any anchor links if they are present
|
||||
const anchorLink = note.querySelector('a.anchorjs-link');
|
||||
if (anchorLink) {
|
||||
anchorLink.remove();
|
||||
}
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(note);
|
||||
}
|
||||
// TODO in 1.5, we should make sure this works without a callout special case
|
||||
if (note.classList.contains("callout")) {
|
||||
return note.outerHTML;
|
||||
} else {
|
||||
return note.innerHTML;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (var i=0; i<xrefs.length; i++) {
|
||||
const xref = xrefs[i];
|
||||
tippyHover(xref, undefined, function(instance) {
|
||||
instance.disable();
|
||||
let url = xref.getAttribute('href');
|
||||
let hash = undefined;
|
||||
if (url.startsWith('#')) {
|
||||
hash = url;
|
||||
} else {
|
||||
try { hash = new URL(url).hash; } catch {}
|
||||
}
|
||||
if (hash) {
|
||||
const id = hash.replace(/^#\/?/, "");
|
||||
const note = window.document.getElementById(id);
|
||||
if (note !== null) {
|
||||
try {
|
||||
const html = processXRef(id, note.cloneNode(true));
|
||||
instance.setContent(html);
|
||||
} finally {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
}
|
||||
} else {
|
||||
// See if we can fetch this
|
||||
fetch(url.split('#')[0])
|
||||
.then(res => res.text())
|
||||
.then(html => {
|
||||
const parser = new DOMParser();
|
||||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||||
const note = htmlDoc.getElementById(id);
|
||||
if (note !== null) {
|
||||
const html = processXRef(id, note);
|
||||
instance.setContent(html);
|
||||
}
|
||||
}).finally(() => {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// See if we can fetch a full url (with no hash to target)
|
||||
// This is a special case and we should probably do some content thinning / targeting
|
||||
fetch(url)
|
||||
.then(res => res.text())
|
||||
.then(html => {
|
||||
const parser = new DOMParser();
|
||||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||||
const note = htmlDoc.querySelector('main.content');
|
||||
if (note !== null) {
|
||||
// This should only happen for chapter cross references
|
||||
// (since there is no id in the URL)
|
||||
// remove the first header
|
||||
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
|
||||
note.children[0].remove();
|
||||
}
|
||||
const html = processXRef(null, note);
|
||||
instance.setContent(html);
|
||||
}
|
||||
}).finally(() => {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
});
|
||||
}
|
||||
}, function(instance) {
|
||||
});
|
||||
}
|
||||
let selectedAnnoteEl;
|
||||
const selectorForAnnotation = ( cell, annotation) => {
|
||||
let cellAttr = 'data-code-cell="' + cell + '"';
|
||||
let lineAttr = 'data-code-annotation="' + annotation + '"';
|
||||
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
|
||||
return selector;
|
||||
}
|
||||
const selectCodeLines = (annoteEl) => {
|
||||
const doc = window.document;
|
||||
const targetCell = annoteEl.getAttribute("data-target-cell");
|
||||
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
|
||||
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
|
||||
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
|
||||
const lineIds = lines.map((line) => {
|
||||
return targetCell + "-" + line;
|
||||
})
|
||||
let top = null;
|
||||
let height = null;
|
||||
let parent = null;
|
||||
if (lineIds.length > 0) {
|
||||
//compute the position of the single el (top and bottom and make a div)
|
||||
const el = window.document.getElementById(lineIds[0]);
|
||||
top = el.offsetTop;
|
||||
height = el.offsetHeight;
|
||||
parent = el.parentElement.parentElement;
|
||||
if (lineIds.length > 1) {
|
||||
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
|
||||
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
|
||||
height = bottom - top;
|
||||
}
|
||||
if (top !== null && height !== null && parent !== null) {
|
||||
// cook up a div (if necessary) and position it
|
||||
let div = window.document.getElementById("code-annotation-line-highlight");
|
||||
if (div === null) {
|
||||
div = window.document.createElement("div");
|
||||
div.setAttribute("id", "code-annotation-line-highlight");
|
||||
div.style.position = 'absolute';
|
||||
parent.appendChild(div);
|
||||
}
|
||||
div.style.top = top - 2 + "px";
|
||||
div.style.height = height + 4 + "px";
|
||||
div.style.left = 0;
|
||||
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
|
||||
if (gutterDiv === null) {
|
||||
gutterDiv = window.document.createElement("div");
|
||||
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
|
||||
gutterDiv.style.position = 'absolute';
|
||||
const codeCell = window.document.getElementById(targetCell);
|
||||
const gutter = codeCell.querySelector('.code-annotation-gutter');
|
||||
gutter.appendChild(gutterDiv);
|
||||
}
|
||||
gutterDiv.style.top = top - 2 + "px";
|
||||
gutterDiv.style.height = height + 4 + "px";
|
||||
}
|
||||
selectedAnnoteEl = annoteEl;
|
||||
}
|
||||
};
|
||||
const unselectCodeLines = () => {
|
||||
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
|
||||
elementsIds.forEach((elId) => {
|
||||
const div = window.document.getElementById(elId);
|
||||
if (div) {
|
||||
div.remove();
|
||||
}
|
||||
});
|
||||
selectedAnnoteEl = undefined;
|
||||
};
|
||||
// Handle positioning of the toggle
|
||||
window.addEventListener(
|
||||
"resize",
|
||||
throttle(() => {
|
||||
elRect = undefined;
|
||||
if (selectedAnnoteEl) {
|
||||
selectCodeLines(selectedAnnoteEl);
|
||||
}
|
||||
}, 10)
|
||||
);
|
||||
function throttle(fn, ms) {
|
||||
let throttle = false;
|
||||
let timer;
|
||||
return (...args) => {
|
||||
if(!throttle) { // first call gets through
|
||||
fn.apply(this, args);
|
||||
throttle = true;
|
||||
} else { // all the others get throttled
|
||||
if(timer) clearTimeout(timer); // cancel #2
|
||||
timer = setTimeout(() => {
|
||||
fn.apply(this, args);
|
||||
timer = throttle = false;
|
||||
}, ms);
|
||||
}
|
||||
};
|
||||
}
|
||||
// Attach click handler to the DT
|
||||
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
|
||||
for (const annoteDlNode of annoteDls) {
|
||||
annoteDlNode.addEventListener('click', (event) => {
|
||||
const clickedEl = event.target;
|
||||
if (clickedEl !== selectedAnnoteEl) {
|
||||
unselectCodeLines();
|
||||
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
|
||||
if (activeEl) {
|
||||
activeEl.classList.remove('code-annotation-active');
|
||||
}
|
||||
selectCodeLines(clickedEl);
|
||||
clickedEl.classList.add('code-annotation-active');
|
||||
} else {
|
||||
// Unselect the line
|
||||
unselectCodeLines();
|
||||
clickedEl.classList.remove('code-annotation-active');
|
||||
}
|
||||
});
|
||||
}
|
||||
const findCites = (el) => {
|
||||
const parentEl = el.parentElement;
|
||||
if (parentEl) {
|
||||
const cites = parentEl.dataset.cites;
|
||||
if (cites) {
|
||||
return {
|
||||
el,
|
||||
cites: cites.split(' ')
|
||||
};
|
||||
} else {
|
||||
return findCites(el.parentElement)
|
||||
}
|
||||
} else {
|
||||
return undefined;
|
||||
}
|
||||
};
|
||||
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
|
||||
for (var i=0; i<bibliorefs.length; i++) {
|
||||
const ref = bibliorefs[i];
|
||||
const citeInfo = findCites(ref);
|
||||
if (citeInfo) {
|
||||
tippyHover(citeInfo.el, function() {
|
||||
var popup = window.document.createElement('div');
|
||||
citeInfo.cites.forEach(function(cite) {
|
||||
var citeDiv = window.document.createElement('div');
|
||||
citeDiv.classList.add('hanging-indent');
|
||||
citeDiv.classList.add('csl-entry');
|
||||
var biblioDiv = window.document.getElementById('ref-' + cite);
|
||||
if (biblioDiv) {
|
||||
citeDiv.innerHTML = biblioDiv.innerHTML;
|
||||
}
|
||||
popup.appendChild(citeDiv);
|
||||
});
|
||||
return popup.innerHTML;
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</div> <!-- /content -->
|
||||
|
||||
|
||||
|
||||
|
||||
</body></html>
|
||||
952
docs/dataset-formats/inst_tune.html
Normal file
952
docs/dataset-formats/inst_tune.html
Normal file
@@ -0,0 +1,952 @@
|
||||
<!DOCTYPE html>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
|
||||
|
||||
<meta charset="utf-8">
|
||||
<meta name="generator" content="quarto-1.4.552">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||||
|
||||
<meta name="description" content="Instruction tuning formats for supervised fine-tuning.">
|
||||
|
||||
<title>Axolotl - Instruction Tuning</title>
|
||||
<style>
|
||||
code{white-space: pre-wrap;}
|
||||
span.smallcaps{font-variant: small-caps;}
|
||||
div.columns{display: flex; gap: min(4vw, 1.5em);}
|
||||
div.column{flex: auto; overflow-x: auto;}
|
||||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
||||
ul.task-list{list-style: none;}
|
||||
ul.task-list li input[type="checkbox"] {
|
||||
width: 0.8em;
|
||||
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
|
||||
vertical-align: middle;
|
||||
}
|
||||
/* CSS for syntax highlighting */
|
||||
pre > code.sourceCode { white-space: pre; position: relative; }
|
||||
pre > code.sourceCode > span { line-height: 1.25; }
|
||||
pre > code.sourceCode > span:empty { height: 1.2em; }
|
||||
.sourceCode { overflow: visible; }
|
||||
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
||||
div.sourceCode { margin: 1em 0; }
|
||||
pre.sourceCode { margin: 0; }
|
||||
@media screen {
|
||||
div.sourceCode { overflow: auto; }
|
||||
}
|
||||
@media print {
|
||||
pre > code.sourceCode { white-space: pre-wrap; }
|
||||
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
|
||||
}
|
||||
pre.numberSource code
|
||||
{ counter-reset: source-line 0; }
|
||||
pre.numberSource code > span
|
||||
{ position: relative; left: -4em; counter-increment: source-line; }
|
||||
pre.numberSource code > span > a:first-child::before
|
||||
{ content: counter(source-line);
|
||||
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
||||
border: none; display: inline-block;
|
||||
-webkit-touch-callout: none; -webkit-user-select: none;
|
||||
-khtml-user-select: none; -moz-user-select: none;
|
||||
-ms-user-select: none; user-select: none;
|
||||
padding: 0 4px; width: 4em;
|
||||
}
|
||||
pre.numberSource { margin-left: 3em; padding-left: 4px; }
|
||||
div.sourceCode
|
||||
{ }
|
||||
@media screen {
|
||||
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
<script src="../../site_libs/quarto-nav/quarto-nav.js"></script>
|
||||
<script src="../../site_libs/clipboard/clipboard.min.js"></script>
|
||||
<script src="../../site_libs/quarto-search/autocomplete.umd.js"></script>
|
||||
<script src="../../site_libs/quarto-search/fuse.min.js"></script>
|
||||
<script src="../../site_libs/quarto-search/quarto-search.js"></script>
|
||||
<meta name="quarto:offset" content="../../">
|
||||
<link href="../../favicon.jpg" rel="icon" type="image/jpeg">
|
||||
<script src="../../site_libs/quarto-html/quarto.js"></script>
|
||||
<script src="../../site_libs/quarto-html/popper.min.js"></script>
|
||||
<script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
|
||||
<script src="../../site_libs/quarto-html/anchor.min.js"></script>
|
||||
<link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
|
||||
<link href="../../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
|
||||
<link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||||
<link href="../../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
|
||||
<script id="quarto-search-options" type="application/json">{
|
||||
"location": "navbar",
|
||||
"copy-button": false,
|
||||
"collapse-after": 3,
|
||||
"panel-placement": "end",
|
||||
"type": "overlay",
|
||||
"limit": 50,
|
||||
"keyboard-shortcut": [
|
||||
"f",
|
||||
"/",
|
||||
"s"
|
||||
],
|
||||
"show-item-context": false,
|
||||
"language": {
|
||||
"search-no-results-text": "No results",
|
||||
"search-matching-documents-text": "matching documents",
|
||||
"search-copy-link-title": "Copy link to search",
|
||||
"search-hide-matches-text": "Hide additional matches",
|
||||
"search-more-match-text": "more match in this document",
|
||||
"search-more-matches-text": "more matches in this document",
|
||||
"search-clear-button-title": "Clear",
|
||||
"search-text-placeholder": "",
|
||||
"search-detached-cancel-button-title": "Cancel",
|
||||
"search-submit-button-title": "Submit",
|
||||
"search-label": "Search"
|
||||
}
|
||||
}</script>
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../styles.css">
|
||||
</head>
|
||||
|
||||
<body class="nav-sidebar docked nav-fixed">
|
||||
|
||||
<div id="quarto-search-results"></div>
|
||||
<header id="quarto-header" class="headroom fixed-top">
|
||||
<nav class="navbar navbar-expand " data-bs-theme="dark">
|
||||
<div class="navbar-container container-fluid">
|
||||
<div class="navbar-brand-container mx-auto">
|
||||
<a class="navbar-brand" href="../../index.html">
|
||||
<span class="navbar-title">Axolotl</span>
|
||||
</a>
|
||||
</div>
|
||||
<div class="quarto-navbar-tools tools-wide tools-end">
|
||||
<a href="https://twitter.com/axolotl_ai" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-twitter"></i></a>
|
||||
<a href="https://github.com/OpenAccess-AI-Collective/axolotl/" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-github"></i></a>
|
||||
<a href="https://discord.gg/7m9sfhzaf3" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-discord"></i></a>
|
||||
</div>
|
||||
<div id="quarto-search" class="" title="Search"></div>
|
||||
</div> <!-- /container-fluid -->
|
||||
</nav>
|
||||
<nav class="quarto-secondary-nav">
|
||||
<div class="container-fluid d-flex">
|
||||
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
<i class="bi bi-layout-text-sidebar-reverse"></i>
|
||||
</button>
|
||||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../docs/dataset-formats/index.html">Dataset Formats</a></li><li class="breadcrumb-item"><a href="../../docs/dataset-formats/inst_tune.html">Instruction Tuning</a></li></ol></nav>
|
||||
<a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
</a>
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
<!-- content -->
|
||||
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
|
||||
<!-- sidebar -->
|
||||
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
|
||||
<div class="sidebar-menu-container">
|
||||
<ul class="list-unstyled mt-1">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Home</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true">
|
||||
<span class="menu-text">How-To Guides</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/debugging.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Debugging</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/multipack.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multipack (Sample Packing)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-free prompt construction</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/rlhf.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">RLHF (Beta)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/nccl.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">NCCL</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/mac.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Mac M-series</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/multi-node.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multi Node</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link active">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/faq.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FAQ</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
|
||||
<!-- margin-sidebar -->
|
||||
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
|
||||
<nav id="TOC" role="doc-toc" class="toc-active">
|
||||
<h2 id="toc-title">On this page</h2>
|
||||
|
||||
<ul>
|
||||
<li><a href="#alpaca" id="toc-alpaca" class="nav-link active" data-scroll-target="#alpaca">alpaca</a></li>
|
||||
<li><a href="#jeopardy" id="toc-jeopardy" class="nav-link" data-scroll-target="#jeopardy">jeopardy</a></li>
|
||||
<li><a href="#oasst" id="toc-oasst" class="nav-link" data-scroll-target="#oasst">oasst</a></li>
|
||||
<li><a href="#gpteacher" id="toc-gpteacher" class="nav-link" data-scroll-target="#gpteacher">gpteacher</a></li>
|
||||
<li><a href="#reflection" id="toc-reflection" class="nav-link" data-scroll-target="#reflection">reflection</a></li>
|
||||
<li><a href="#explainchoice" id="toc-explainchoice" class="nav-link" data-scroll-target="#explainchoice">explainchoice</a></li>
|
||||
<li><a href="#concisechoice" id="toc-concisechoice" class="nav-link" data-scroll-target="#concisechoice">concisechoice</a></li>
|
||||
<li><a href="#summarizetldr" id="toc-summarizetldr" class="nav-link" data-scroll-target="#summarizetldr">summarizetldr</a></li>
|
||||
<li><a href="#alpaca_chat" id="toc-alpaca_chat" class="nav-link" data-scroll-target="#alpaca_chat">alpaca_chat</a></li>
|
||||
<li><a href="#alpaca_chat.load_qa" id="toc-alpaca_chat.load_qa" class="nav-link" data-scroll-target="#alpaca_chat.load_qa">alpaca_chat.load_qa</a></li>
|
||||
<li><a href="#alpaca_chat.load_concise" id="toc-alpaca_chat.load_concise" class="nav-link" data-scroll-target="#alpaca_chat.load_concise">alpaca_chat.load_concise</a></li>
|
||||
<li><a href="#alpaca_chat.load_camel_ai" id="toc-alpaca_chat.load_camel_ai" class="nav-link" data-scroll-target="#alpaca_chat.load_camel_ai">alpaca_chat.load_camel_ai</a></li>
|
||||
<li><a href="#alpaca_w_system.load_open_orca" id="toc-alpaca_w_system.load_open_orca" class="nav-link" data-scroll-target="#alpaca_w_system.load_open_orca">alpaca_w_system.load_open_orca</a></li>
|
||||
<li><a href="#context_qa" id="toc-context_qa" class="nav-link" data-scroll-target="#context_qa">context_qa</a></li>
|
||||
<li><a href="#context_qa.load_v2" id="toc-context_qa.load_v2" class="nav-link" data-scroll-target="#context_qa.load_v2">context_qa.load_v2</a></li>
|
||||
<li><a href="#context_qa.load_404" id="toc-context_qa.load_404" class="nav-link" data-scroll-target="#context_qa.load_404">context_qa.load_404</a></li>
|
||||
<li><a href="#creative_acr.load_answer" id="toc-creative_acr.load_answer" class="nav-link" data-scroll-target="#creative_acr.load_answer">creative_acr.load_answer</a></li>
|
||||
<li><a href="#creative_acr.load_critique" id="toc-creative_acr.load_critique" class="nav-link" data-scroll-target="#creative_acr.load_critique">creative_acr.load_critique</a></li>
|
||||
<li><a href="#creative_acr.load_revise" id="toc-creative_acr.load_revise" class="nav-link" data-scroll-target="#creative_acr.load_revise">creative_acr.load_revise</a></li>
|
||||
<li><a href="#metharme" id="toc-metharme" class="nav-link" data-scroll-target="#metharme">metharme</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
<!-- main -->
|
||||
<main class="content" id="quarto-document-content">
|
||||
|
||||
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../docs/dataset-formats/index.html">Dataset Formats</a></li><li class="breadcrumb-item"><a href="../../docs/dataset-formats/inst_tune.html">Instruction Tuning</a></li></ol></nav>
|
||||
<div class="quarto-title">
|
||||
<h1 class="title">Instruction Tuning</h1>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class="description">
|
||||
Instruction tuning formats for supervised fine-tuning.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="quarto-title-meta">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</header>
|
||||
|
||||
|
||||
<section id="alpaca" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="alpaca">alpaca</h2>
|
||||
<p>instruction; input(optional)</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb1" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"input"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"output"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="jeopardy" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="jeopardy">jeopardy</h2>
|
||||
<p>question and answer</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb2" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"category"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"answer"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="oasst" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="oasst">oasst</h2>
|
||||
<p>instruction</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb3" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"INSTRUCTION"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"RESPONSE"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="gpteacher" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="gpteacher">gpteacher</h2>
|
||||
<p>instruction; input(optional)</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb4" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"input"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"response"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="reflection" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="reflection">reflection</h2>
|
||||
<p>instruction with reflect; input(optional)</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb5" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"input"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"output"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"reflection"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"corrected"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="explainchoice" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="explainchoice">explainchoice</h2>
|
||||
<p>question, choices, (solution OR explanation)</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb6" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"choices"</span><span class="fu">:</span> <span class="ot">[</span><span class="st">"..."</span><span class="ot">]</span><span class="fu">,</span> <span class="dt">"solution"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"explanation"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="concisechoice" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="concisechoice">concisechoice</h2>
|
||||
<p>question, choices, (solution OR explanation)</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb7" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"choices"</span><span class="fu">:</span> <span class="ot">[</span><span class="st">"..."</span><span class="ot">]</span><span class="fu">,</span> <span class="dt">"solution"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"explanation"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="summarizetldr" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="summarizetldr">summarizetldr</h2>
|
||||
<p>article and summary</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb8" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"article"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"summary"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="alpaca_chat" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="alpaca_chat">alpaca_chat</h2>
|
||||
<p>basic instruct for alpaca chat</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb9" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"input"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"response"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="alpaca_chat.load_qa" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="alpaca_chat.load_qa">alpaca_chat.load_qa</h2>
|
||||
<p>question and answer for alpaca chat</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb10" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"answer"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="alpaca_chat.load_concise" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="alpaca_chat.load_concise">alpaca_chat.load_concise</h2>
|
||||
<p>question and answer for alpaca chat, for concise answers</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb11" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"input"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"response"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="alpaca_chat.load_camel_ai" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="alpaca_chat.load_camel_ai">alpaca_chat.load_camel_ai</h2>
|
||||
<p>question and answer for alpaca chat, for load_camel_ai</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb12" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"message_1"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"message_2"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="alpaca_w_system.load_open_orca" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="alpaca_w_system.load_open_orca">alpaca_w_system.load_open_orca</h2>
|
||||
<p>support for open orca datasets with included system prompts, instruct</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb13" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"system_prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"response"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="context_qa" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="context_qa">context_qa</h2>
|
||||
<p>in context question answering from an article</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb14" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"article"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"answer"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="context_qa.load_v2" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="context_qa.load_v2">context_qa.load_v2</h2>
|
||||
<p>in context question answering (alternate)</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb15" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"context"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"answer"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="context_qa.load_404" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="context_qa.load_404">context_qa.load_404</h2>
|
||||
<p>in context question answering from an article, with default response for no answer from context</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb16" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"article"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"unanswerable_question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="creative_acr.load_answer" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="creative_acr.load_answer">creative_acr.load_answer</h2>
|
||||
<p>instruction and revision</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb17" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"revision"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="creative_acr.load_critique" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="creative_acr.load_critique">creative_acr.load_critique</h2>
|
||||
<p>critique</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb18" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"scores"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"critiques"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"answer"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="creative_acr.load_revise" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="creative_acr.load_revise">creative_acr.load_revise</h2>
|
||||
<p>critique and revise</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb19" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"scores"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"critiques"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"answer"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"revision"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="metharme" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="metharme">metharme</h2>
|
||||
<p>instruction, adds additional eos tokens</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb20" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"generation"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
|
||||
|
||||
</section>
|
||||
|
||||
</main> <!-- /main -->
|
||||
<script id="quarto-html-after-body" type="application/javascript">
|
||||
window.document.addEventListener("DOMContentLoaded", function (event) {
|
||||
const toggleBodyColorMode = (bsSheetEl) => {
|
||||
const mode = bsSheetEl.getAttribute("data-mode");
|
||||
const bodyEl = window.document.querySelector("body");
|
||||
if (mode === "dark") {
|
||||
bodyEl.classList.add("quarto-dark");
|
||||
bodyEl.classList.remove("quarto-light");
|
||||
} else {
|
||||
bodyEl.classList.add("quarto-light");
|
||||
bodyEl.classList.remove("quarto-dark");
|
||||
}
|
||||
}
|
||||
const toggleBodyColorPrimary = () => {
|
||||
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
|
||||
if (bsSheetEl) {
|
||||
toggleBodyColorMode(bsSheetEl);
|
||||
}
|
||||
}
|
||||
toggleBodyColorPrimary();
|
||||
const icon = "";
|
||||
const anchorJS = new window.AnchorJS();
|
||||
anchorJS.options = {
|
||||
placement: 'right',
|
||||
icon: icon
|
||||
};
|
||||
anchorJS.add('.anchored');
|
||||
const isCodeAnnotation = (el) => {
|
||||
for (const clz of el.classList) {
|
||||
if (clz.startsWith('code-annotation-')) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
const clipboard = new window.ClipboardJS('.code-copy-button', {
|
||||
text: function(trigger) {
|
||||
const codeEl = trigger.previousElementSibling.cloneNode(true);
|
||||
for (const childEl of codeEl.children) {
|
||||
if (isCodeAnnotation(childEl)) {
|
||||
childEl.remove();
|
||||
}
|
||||
}
|
||||
return codeEl.innerText;
|
||||
}
|
||||
});
|
||||
clipboard.on('success', function(e) {
|
||||
// button target
|
||||
const button = e.trigger;
|
||||
// don't keep focus
|
||||
button.blur();
|
||||
// flash "checked"
|
||||
button.classList.add('code-copy-button-checked');
|
||||
var currentTitle = button.getAttribute("title");
|
||||
button.setAttribute("title", "Copied!");
|
||||
let tooltip;
|
||||
if (window.bootstrap) {
|
||||
button.setAttribute("data-bs-toggle", "tooltip");
|
||||
button.setAttribute("data-bs-placement", "left");
|
||||
button.setAttribute("data-bs-title", "Copied!");
|
||||
tooltip = new bootstrap.Tooltip(button,
|
||||
{ trigger: "manual",
|
||||
customClass: "code-copy-button-tooltip",
|
||||
offset: [0, -8]});
|
||||
tooltip.show();
|
||||
}
|
||||
setTimeout(function() {
|
||||
if (tooltip) {
|
||||
tooltip.hide();
|
||||
button.removeAttribute("data-bs-title");
|
||||
button.removeAttribute("data-bs-toggle");
|
||||
button.removeAttribute("data-bs-placement");
|
||||
}
|
||||
button.setAttribute("title", currentTitle);
|
||||
button.classList.remove('code-copy-button-checked');
|
||||
}, 1000);
|
||||
// clear code selection
|
||||
e.clearSelection();
|
||||
});
|
||||
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
|
||||
var mailtoRegex = new RegExp(/^mailto:/);
|
||||
var filterRegex = new RegExp("https:\/\/OpenAccess-AI-Collective\.github\.io\/axolotl\/");
|
||||
var isInternal = (href) => {
|
||||
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
|
||||
}
|
||||
// Inspect non-navigation links and adorn them if external
|
||||
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool)');
|
||||
for (var i=0; i<links.length; i++) {
|
||||
const link = links[i];
|
||||
if (!isInternal(link.href)) {
|
||||
// undo the damage that might have been done by quarto-nav.js in the case of
|
||||
// links that we want to consider external
|
||||
if (link.dataset.originalHref !== undefined) {
|
||||
link.href = link.dataset.originalHref;
|
||||
}
|
||||
}
|
||||
}
|
||||
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
|
||||
const config = {
|
||||
allowHTML: true,
|
||||
maxWidth: 500,
|
||||
delay: 100,
|
||||
arrow: false,
|
||||
appendTo: function(el) {
|
||||
return el.parentElement;
|
||||
},
|
||||
interactive: true,
|
||||
interactiveBorder: 10,
|
||||
theme: 'quarto',
|
||||
placement: 'bottom-start',
|
||||
};
|
||||
if (contentFn) {
|
||||
config.content = contentFn;
|
||||
}
|
||||
if (onTriggerFn) {
|
||||
config.onTrigger = onTriggerFn;
|
||||
}
|
||||
if (onUntriggerFn) {
|
||||
config.onUntrigger = onUntriggerFn;
|
||||
}
|
||||
window.tippy(el, config);
|
||||
}
|
||||
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
|
||||
for (var i=0; i<noterefs.length; i++) {
|
||||
const ref = noterefs[i];
|
||||
tippyHover(ref, function() {
|
||||
// use id or data attribute instead here
|
||||
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
|
||||
try { href = new URL(href).hash; } catch {}
|
||||
const id = href.replace(/^#\/?/, "");
|
||||
const note = window.document.getElementById(id);
|
||||
if (note) {
|
||||
return note.innerHTML;
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
});
|
||||
}
|
||||
const xrefs = window.document.querySelectorAll('a.quarto-xref');
|
||||
const processXRef = (id, note) => {
|
||||
// Strip column container classes
|
||||
const stripColumnClz = (el) => {
|
||||
el.classList.remove("page-full", "page-columns");
|
||||
if (el.children) {
|
||||
for (const child of el.children) {
|
||||
stripColumnClz(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
stripColumnClz(note)
|
||||
if (id === null || id.startsWith('sec-')) {
|
||||
// Special case sections, only their first couple elements
|
||||
const container = document.createElement("div");
|
||||
if (note.children && note.children.length > 2) {
|
||||
container.appendChild(note.children[0].cloneNode(true));
|
||||
for (let i = 1; i < note.children.length; i++) {
|
||||
const child = note.children[i];
|
||||
if (child.tagName === "P" && child.innerText === "") {
|
||||
continue;
|
||||
} else {
|
||||
container.appendChild(child.cloneNode(true));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(container);
|
||||
}
|
||||
return container.innerHTML
|
||||
} else {
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(note);
|
||||
}
|
||||
return note.innerHTML;
|
||||
}
|
||||
} else {
|
||||
// Remove any anchor links if they are present
|
||||
const anchorLink = note.querySelector('a.anchorjs-link');
|
||||
if (anchorLink) {
|
||||
anchorLink.remove();
|
||||
}
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(note);
|
||||
}
|
||||
// TODO in 1.5, we should make sure this works without a callout special case
|
||||
if (note.classList.contains("callout")) {
|
||||
return note.outerHTML;
|
||||
} else {
|
||||
return note.innerHTML;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (var i=0; i<xrefs.length; i++) {
|
||||
const xref = xrefs[i];
|
||||
tippyHover(xref, undefined, function(instance) {
|
||||
instance.disable();
|
||||
let url = xref.getAttribute('href');
|
||||
let hash = undefined;
|
||||
if (url.startsWith('#')) {
|
||||
hash = url;
|
||||
} else {
|
||||
try { hash = new URL(url).hash; } catch {}
|
||||
}
|
||||
if (hash) {
|
||||
const id = hash.replace(/^#\/?/, "");
|
||||
const note = window.document.getElementById(id);
|
||||
if (note !== null) {
|
||||
try {
|
||||
const html = processXRef(id, note.cloneNode(true));
|
||||
instance.setContent(html);
|
||||
} finally {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
}
|
||||
} else {
|
||||
// See if we can fetch this
|
||||
fetch(url.split('#')[0])
|
||||
.then(res => res.text())
|
||||
.then(html => {
|
||||
const parser = new DOMParser();
|
||||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||||
const note = htmlDoc.getElementById(id);
|
||||
if (note !== null) {
|
||||
const html = processXRef(id, note);
|
||||
instance.setContent(html);
|
||||
}
|
||||
}).finally(() => {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// See if we can fetch a full url (with no hash to target)
|
||||
// This is a special case and we should probably do some content thinning / targeting
|
||||
fetch(url)
|
||||
.then(res => res.text())
|
||||
.then(html => {
|
||||
const parser = new DOMParser();
|
||||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||||
const note = htmlDoc.querySelector('main.content');
|
||||
if (note !== null) {
|
||||
// This should only happen for chapter cross references
|
||||
// (since there is no id in the URL)
|
||||
// remove the first header
|
||||
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
|
||||
note.children[0].remove();
|
||||
}
|
||||
const html = processXRef(null, note);
|
||||
instance.setContent(html);
|
||||
}
|
||||
}).finally(() => {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
});
|
||||
}
|
||||
}, function(instance) {
|
||||
});
|
||||
}
|
||||
let selectedAnnoteEl;
|
||||
const selectorForAnnotation = ( cell, annotation) => {
|
||||
let cellAttr = 'data-code-cell="' + cell + '"';
|
||||
let lineAttr = 'data-code-annotation="' + annotation + '"';
|
||||
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
|
||||
return selector;
|
||||
}
|
||||
const selectCodeLines = (annoteEl) => {
|
||||
const doc = window.document;
|
||||
const targetCell = annoteEl.getAttribute("data-target-cell");
|
||||
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
|
||||
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
|
||||
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
|
||||
const lineIds = lines.map((line) => {
|
||||
return targetCell + "-" + line;
|
||||
})
|
||||
let top = null;
|
||||
let height = null;
|
||||
let parent = null;
|
||||
if (lineIds.length > 0) {
|
||||
//compute the position of the single el (top and bottom and make a div)
|
||||
const el = window.document.getElementById(lineIds[0]);
|
||||
top = el.offsetTop;
|
||||
height = el.offsetHeight;
|
||||
parent = el.parentElement.parentElement;
|
||||
if (lineIds.length > 1) {
|
||||
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
|
||||
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
|
||||
height = bottom - top;
|
||||
}
|
||||
if (top !== null && height !== null && parent !== null) {
|
||||
// cook up a div (if necessary) and position it
|
||||
let div = window.document.getElementById("code-annotation-line-highlight");
|
||||
if (div === null) {
|
||||
div = window.document.createElement("div");
|
||||
div.setAttribute("id", "code-annotation-line-highlight");
|
||||
div.style.position = 'absolute';
|
||||
parent.appendChild(div);
|
||||
}
|
||||
div.style.top = top - 2 + "px";
|
||||
div.style.height = height + 4 + "px";
|
||||
div.style.left = 0;
|
||||
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
|
||||
if (gutterDiv === null) {
|
||||
gutterDiv = window.document.createElement("div");
|
||||
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
|
||||
gutterDiv.style.position = 'absolute';
|
||||
const codeCell = window.document.getElementById(targetCell);
|
||||
const gutter = codeCell.querySelector('.code-annotation-gutter');
|
||||
gutter.appendChild(gutterDiv);
|
||||
}
|
||||
gutterDiv.style.top = top - 2 + "px";
|
||||
gutterDiv.style.height = height + 4 + "px";
|
||||
}
|
||||
selectedAnnoteEl = annoteEl;
|
||||
}
|
||||
};
|
||||
const unselectCodeLines = () => {
|
||||
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
|
||||
elementsIds.forEach((elId) => {
|
||||
const div = window.document.getElementById(elId);
|
||||
if (div) {
|
||||
div.remove();
|
||||
}
|
||||
});
|
||||
selectedAnnoteEl = undefined;
|
||||
};
|
||||
// Handle positioning of the toggle
|
||||
window.addEventListener(
|
||||
"resize",
|
||||
throttle(() => {
|
||||
elRect = undefined;
|
||||
if (selectedAnnoteEl) {
|
||||
selectCodeLines(selectedAnnoteEl);
|
||||
}
|
||||
}, 10)
|
||||
);
|
||||
function throttle(fn, ms) {
|
||||
let throttle = false;
|
||||
let timer;
|
||||
return (...args) => {
|
||||
if(!throttle) { // first call gets through
|
||||
fn.apply(this, args);
|
||||
throttle = true;
|
||||
} else { // all the others get throttled
|
||||
if(timer) clearTimeout(timer); // cancel #2
|
||||
timer = setTimeout(() => {
|
||||
fn.apply(this, args);
|
||||
timer = throttle = false;
|
||||
}, ms);
|
||||
}
|
||||
};
|
||||
}
|
||||
// Attach click handler to the DT
|
||||
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
|
||||
for (const annoteDlNode of annoteDls) {
|
||||
annoteDlNode.addEventListener('click', (event) => {
|
||||
const clickedEl = event.target;
|
||||
if (clickedEl !== selectedAnnoteEl) {
|
||||
unselectCodeLines();
|
||||
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
|
||||
if (activeEl) {
|
||||
activeEl.classList.remove('code-annotation-active');
|
||||
}
|
||||
selectCodeLines(clickedEl);
|
||||
clickedEl.classList.add('code-annotation-active');
|
||||
} else {
|
||||
// Unselect the line
|
||||
unselectCodeLines();
|
||||
clickedEl.classList.remove('code-annotation-active');
|
||||
}
|
||||
});
|
||||
}
|
||||
const findCites = (el) => {
|
||||
const parentEl = el.parentElement;
|
||||
if (parentEl) {
|
||||
const cites = parentEl.dataset.cites;
|
||||
if (cites) {
|
||||
return {
|
||||
el,
|
||||
cites: cites.split(' ')
|
||||
};
|
||||
} else {
|
||||
return findCites(el.parentElement)
|
||||
}
|
||||
} else {
|
||||
return undefined;
|
||||
}
|
||||
};
|
||||
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
|
||||
for (var i=0; i<bibliorefs.length; i++) {
|
||||
const ref = bibliorefs[i];
|
||||
const citeInfo = findCites(ref);
|
||||
if (citeInfo) {
|
||||
tippyHover(citeInfo.el, function() {
|
||||
var popup = window.document.createElement('div');
|
||||
citeInfo.cites.forEach(function(cite) {
|
||||
var citeDiv = window.document.createElement('div');
|
||||
citeDiv.classList.add('hanging-indent');
|
||||
citeDiv.classList.add('csl-entry');
|
||||
var biblioDiv = window.document.getElementById('ref-' + cite);
|
||||
if (biblioDiv) {
|
||||
citeDiv.innerHTML = biblioDiv.innerHTML;
|
||||
}
|
||||
popup.appendChild(citeDiv);
|
||||
});
|
||||
return popup.innerHTML;
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</div> <!-- /content -->
|
||||
|
||||
|
||||
|
||||
|
||||
</body></html>
|
||||
756
docs/dataset-formats/pretraining.html
Normal file
756
docs/dataset-formats/pretraining.html
Normal file
@@ -0,0 +1,756 @@
|
||||
<!DOCTYPE html>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
|
||||
|
||||
<meta charset="utf-8">
|
||||
<meta name="generator" content="quarto-1.4.552">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||||
|
||||
<meta name="description" content="Data format for a pre-training completion task.">
|
||||
|
||||
<title>Axolotl - Pre-training</title>
|
||||
<style>
|
||||
code{white-space: pre-wrap;}
|
||||
span.smallcaps{font-variant: small-caps;}
|
||||
div.columns{display: flex; gap: min(4vw, 1.5em);}
|
||||
div.column{flex: auto; overflow-x: auto;}
|
||||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
||||
ul.task-list{list-style: none;}
|
||||
ul.task-list li input[type="checkbox"] {
|
||||
width: 0.8em;
|
||||
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
|
||||
vertical-align: middle;
|
||||
}
|
||||
/* CSS for syntax highlighting */
|
||||
pre > code.sourceCode { white-space: pre; position: relative; }
|
||||
pre > code.sourceCode > span { line-height: 1.25; }
|
||||
pre > code.sourceCode > span:empty { height: 1.2em; }
|
||||
.sourceCode { overflow: visible; }
|
||||
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
||||
div.sourceCode { margin: 1em 0; }
|
||||
pre.sourceCode { margin: 0; }
|
||||
@media screen {
|
||||
div.sourceCode { overflow: auto; }
|
||||
}
|
||||
@media print {
|
||||
pre > code.sourceCode { white-space: pre-wrap; }
|
||||
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
|
||||
}
|
||||
pre.numberSource code
|
||||
{ counter-reset: source-line 0; }
|
||||
pre.numberSource code > span
|
||||
{ position: relative; left: -4em; counter-increment: source-line; }
|
||||
pre.numberSource code > span > a:first-child::before
|
||||
{ content: counter(source-line);
|
||||
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
||||
border: none; display: inline-block;
|
||||
-webkit-touch-callout: none; -webkit-user-select: none;
|
||||
-khtml-user-select: none; -moz-user-select: none;
|
||||
-ms-user-select: none; user-select: none;
|
||||
padding: 0 4px; width: 4em;
|
||||
}
|
||||
pre.numberSource { margin-left: 3em; padding-left: 4px; }
|
||||
div.sourceCode
|
||||
{ }
|
||||
@media screen {
|
||||
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
<script src="../../site_libs/quarto-nav/quarto-nav.js"></script>
|
||||
<script src="../../site_libs/clipboard/clipboard.min.js"></script>
|
||||
<script src="../../site_libs/quarto-search/autocomplete.umd.js"></script>
|
||||
<script src="../../site_libs/quarto-search/fuse.min.js"></script>
|
||||
<script src="../../site_libs/quarto-search/quarto-search.js"></script>
|
||||
<meta name="quarto:offset" content="../../">
|
||||
<link href="../../favicon.jpg" rel="icon" type="image/jpeg">
|
||||
<script src="../../site_libs/quarto-html/quarto.js"></script>
|
||||
<script src="../../site_libs/quarto-html/popper.min.js"></script>
|
||||
<script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
|
||||
<script src="../../site_libs/quarto-html/anchor.min.js"></script>
|
||||
<link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
|
||||
<link href="../../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
|
||||
<link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||||
<link href="../../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
|
||||
<script id="quarto-search-options" type="application/json">{
|
||||
"location": "navbar",
|
||||
"copy-button": false,
|
||||
"collapse-after": 3,
|
||||
"panel-placement": "end",
|
||||
"type": "overlay",
|
||||
"limit": 50,
|
||||
"keyboard-shortcut": [
|
||||
"f",
|
||||
"/",
|
||||
"s"
|
||||
],
|
||||
"show-item-context": false,
|
||||
"language": {
|
||||
"search-no-results-text": "No results",
|
||||
"search-matching-documents-text": "matching documents",
|
||||
"search-copy-link-title": "Copy link to search",
|
||||
"search-hide-matches-text": "Hide additional matches",
|
||||
"search-more-match-text": "more match in this document",
|
||||
"search-more-matches-text": "more matches in this document",
|
||||
"search-clear-button-title": "Clear",
|
||||
"search-text-placeholder": "",
|
||||
"search-detached-cancel-button-title": "Cancel",
|
||||
"search-submit-button-title": "Submit",
|
||||
"search-label": "Search"
|
||||
}
|
||||
}</script>
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../styles.css">
|
||||
</head>
|
||||
|
||||
<body class="nav-sidebar docked nav-fixed">
|
||||
|
||||
<div id="quarto-search-results"></div>
|
||||
<header id="quarto-header" class="headroom fixed-top">
|
||||
<nav class="navbar navbar-expand " data-bs-theme="dark">
|
||||
<div class="navbar-container container-fluid">
|
||||
<div class="navbar-brand-container mx-auto">
|
||||
<a class="navbar-brand" href="../../index.html">
|
||||
<span class="navbar-title">Axolotl</span>
|
||||
</a>
|
||||
</div>
|
||||
<div class="quarto-navbar-tools tools-wide tools-end">
|
||||
<a href="https://twitter.com/axolotl_ai" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-twitter"></i></a>
|
||||
<a href="https://github.com/OpenAccess-AI-Collective/axolotl/" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-github"></i></a>
|
||||
<a href="https://discord.gg/7m9sfhzaf3" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-discord"></i></a>
|
||||
</div>
|
||||
<div id="quarto-search" class="" title="Search"></div>
|
||||
</div> <!-- /container-fluid -->
|
||||
</nav>
|
||||
<nav class="quarto-secondary-nav">
|
||||
<div class="container-fluid d-flex">
|
||||
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
<i class="bi bi-layout-text-sidebar-reverse"></i>
|
||||
</button>
|
||||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../docs/dataset-formats/index.html">Dataset Formats</a></li><li class="breadcrumb-item"><a href="../../docs/dataset-formats/pretraining.html">Pre-training</a></li></ol></nav>
|
||||
<a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
</a>
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
<!-- content -->
|
||||
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
|
||||
<!-- sidebar -->
|
||||
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
|
||||
<div class="sidebar-menu-container">
|
||||
<ul class="list-unstyled mt-1">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Home</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true">
|
||||
<span class="menu-text">How-To Guides</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/debugging.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Debugging</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/multipack.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multipack (Sample Packing)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-free prompt construction</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/rlhf.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">RLHF (Beta)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/nccl.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">NCCL</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/mac.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Mac M-series</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/multi-node.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multi Node</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link active">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/faq.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FAQ</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
|
||||
<!-- margin-sidebar -->
|
||||
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
|
||||
|
||||
</div>
|
||||
<!-- main -->
|
||||
<main class="content" id="quarto-document-content">
|
||||
|
||||
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../docs/dataset-formats/index.html">Dataset Formats</a></li><li class="breadcrumb-item"><a href="../../docs/dataset-formats/pretraining.html">Pre-training</a></li></ol></nav>
|
||||
<div class="quarto-title">
|
||||
<h1 class="title">Pre-training</h1>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class="description">
|
||||
Data format for a pre-training completion task.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="quarto-title-meta">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</header>
|
||||
|
||||
|
||||
<p>For pretraining, there is no prompt template or roles. The only required field is <code>text</code>:</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>data.jsonl</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb1" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"text"</span><span class="fu">:</span> <span class="st">"first row"</span><span class="fu">}</span></span>
|
||||
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"text"</span><span class="fu">:</span> <span class="st">"second row"</span><span class="fu">}</span></span>
|
||||
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="er">...</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
<div class="callout callout-style-default callout-note callout-titled">
|
||||
<div class="callout-header d-flex align-content-center">
|
||||
<div class="callout-icon-container">
|
||||
<i class="callout-icon"></i>
|
||||
</div>
|
||||
<div class="callout-title-container flex-fill">
|
||||
Streaming is recommended for large datasets
|
||||
</div>
|
||||
</div>
|
||||
<div class="callout-body-container callout-body">
|
||||
<p>Axolotl usually loads the entire dataset into memory. This will be challenging for large datasets. Use the following config to enable streaming:</p>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>config.yaml</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb2" data-filename="config.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">pretraining_dataset</span><span class="kw">:</span><span class="co"> # hf path only</span></span>
|
||||
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="co">...</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</main> <!-- /main -->
|
||||
<script id="quarto-html-after-body" type="application/javascript">
|
||||
window.document.addEventListener("DOMContentLoaded", function (event) {
|
||||
const toggleBodyColorMode = (bsSheetEl) => {
|
||||
const mode = bsSheetEl.getAttribute("data-mode");
|
||||
const bodyEl = window.document.querySelector("body");
|
||||
if (mode === "dark") {
|
||||
bodyEl.classList.add("quarto-dark");
|
||||
bodyEl.classList.remove("quarto-light");
|
||||
} else {
|
||||
bodyEl.classList.add("quarto-light");
|
||||
bodyEl.classList.remove("quarto-dark");
|
||||
}
|
||||
}
|
||||
const toggleBodyColorPrimary = () => {
|
||||
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
|
||||
if (bsSheetEl) {
|
||||
toggleBodyColorMode(bsSheetEl);
|
||||
}
|
||||
}
|
||||
toggleBodyColorPrimary();
|
||||
const icon = "";
|
||||
const anchorJS = new window.AnchorJS();
|
||||
anchorJS.options = {
|
||||
placement: 'right',
|
||||
icon: icon
|
||||
};
|
||||
anchorJS.add('.anchored');
|
||||
const isCodeAnnotation = (el) => {
|
||||
for (const clz of el.classList) {
|
||||
if (clz.startsWith('code-annotation-')) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
const clipboard = new window.ClipboardJS('.code-copy-button', {
|
||||
text: function(trigger) {
|
||||
const codeEl = trigger.previousElementSibling.cloneNode(true);
|
||||
for (const childEl of codeEl.children) {
|
||||
if (isCodeAnnotation(childEl)) {
|
||||
childEl.remove();
|
||||
}
|
||||
}
|
||||
return codeEl.innerText;
|
||||
}
|
||||
});
|
||||
clipboard.on('success', function(e) {
|
||||
// button target
|
||||
const button = e.trigger;
|
||||
// don't keep focus
|
||||
button.blur();
|
||||
// flash "checked"
|
||||
button.classList.add('code-copy-button-checked');
|
||||
var currentTitle = button.getAttribute("title");
|
||||
button.setAttribute("title", "Copied!");
|
||||
let tooltip;
|
||||
if (window.bootstrap) {
|
||||
button.setAttribute("data-bs-toggle", "tooltip");
|
||||
button.setAttribute("data-bs-placement", "left");
|
||||
button.setAttribute("data-bs-title", "Copied!");
|
||||
tooltip = new bootstrap.Tooltip(button,
|
||||
{ trigger: "manual",
|
||||
customClass: "code-copy-button-tooltip",
|
||||
offset: [0, -8]});
|
||||
tooltip.show();
|
||||
}
|
||||
setTimeout(function() {
|
||||
if (tooltip) {
|
||||
tooltip.hide();
|
||||
button.removeAttribute("data-bs-title");
|
||||
button.removeAttribute("data-bs-toggle");
|
||||
button.removeAttribute("data-bs-placement");
|
||||
}
|
||||
button.setAttribute("title", currentTitle);
|
||||
button.classList.remove('code-copy-button-checked');
|
||||
}, 1000);
|
||||
// clear code selection
|
||||
e.clearSelection();
|
||||
});
|
||||
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
|
||||
var mailtoRegex = new RegExp(/^mailto:/);
|
||||
var filterRegex = new RegExp("https:\/\/OpenAccess-AI-Collective\.github\.io\/axolotl\/");
|
||||
var isInternal = (href) => {
|
||||
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
|
||||
}
|
||||
// Inspect non-navigation links and adorn them if external
|
||||
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool)');
|
||||
for (var i=0; i<links.length; i++) {
|
||||
const link = links[i];
|
||||
if (!isInternal(link.href)) {
|
||||
// undo the damage that might have been done by quarto-nav.js in the case of
|
||||
// links that we want to consider external
|
||||
if (link.dataset.originalHref !== undefined) {
|
||||
link.href = link.dataset.originalHref;
|
||||
}
|
||||
}
|
||||
}
|
||||
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
|
||||
const config = {
|
||||
allowHTML: true,
|
||||
maxWidth: 500,
|
||||
delay: 100,
|
||||
arrow: false,
|
||||
appendTo: function(el) {
|
||||
return el.parentElement;
|
||||
},
|
||||
interactive: true,
|
||||
interactiveBorder: 10,
|
||||
theme: 'quarto',
|
||||
placement: 'bottom-start',
|
||||
};
|
||||
if (contentFn) {
|
||||
config.content = contentFn;
|
||||
}
|
||||
if (onTriggerFn) {
|
||||
config.onTrigger = onTriggerFn;
|
||||
}
|
||||
if (onUntriggerFn) {
|
||||
config.onUntrigger = onUntriggerFn;
|
||||
}
|
||||
window.tippy(el, config);
|
||||
}
|
||||
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
|
||||
for (var i=0; i<noterefs.length; i++) {
|
||||
const ref = noterefs[i];
|
||||
tippyHover(ref, function() {
|
||||
// use id or data attribute instead here
|
||||
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
|
||||
try { href = new URL(href).hash; } catch {}
|
||||
const id = href.replace(/^#\/?/, "");
|
||||
const note = window.document.getElementById(id);
|
||||
if (note) {
|
||||
return note.innerHTML;
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
});
|
||||
}
|
||||
const xrefs = window.document.querySelectorAll('a.quarto-xref');
|
||||
const processXRef = (id, note) => {
|
||||
// Strip column container classes
|
||||
const stripColumnClz = (el) => {
|
||||
el.classList.remove("page-full", "page-columns");
|
||||
if (el.children) {
|
||||
for (const child of el.children) {
|
||||
stripColumnClz(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
stripColumnClz(note)
|
||||
if (id === null || id.startsWith('sec-')) {
|
||||
// Special case sections, only their first couple elements
|
||||
const container = document.createElement("div");
|
||||
if (note.children && note.children.length > 2) {
|
||||
container.appendChild(note.children[0].cloneNode(true));
|
||||
for (let i = 1; i < note.children.length; i++) {
|
||||
const child = note.children[i];
|
||||
if (child.tagName === "P" && child.innerText === "") {
|
||||
continue;
|
||||
} else {
|
||||
container.appendChild(child.cloneNode(true));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(container);
|
||||
}
|
||||
return container.innerHTML
|
||||
} else {
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(note);
|
||||
}
|
||||
return note.innerHTML;
|
||||
}
|
||||
} else {
|
||||
// Remove any anchor links if they are present
|
||||
const anchorLink = note.querySelector('a.anchorjs-link');
|
||||
if (anchorLink) {
|
||||
anchorLink.remove();
|
||||
}
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(note);
|
||||
}
|
||||
// TODO in 1.5, we should make sure this works without a callout special case
|
||||
if (note.classList.contains("callout")) {
|
||||
return note.outerHTML;
|
||||
} else {
|
||||
return note.innerHTML;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (var i=0; i<xrefs.length; i++) {
|
||||
const xref = xrefs[i];
|
||||
tippyHover(xref, undefined, function(instance) {
|
||||
instance.disable();
|
||||
let url = xref.getAttribute('href');
|
||||
let hash = undefined;
|
||||
if (url.startsWith('#')) {
|
||||
hash = url;
|
||||
} else {
|
||||
try { hash = new URL(url).hash; } catch {}
|
||||
}
|
||||
if (hash) {
|
||||
const id = hash.replace(/^#\/?/, "");
|
||||
const note = window.document.getElementById(id);
|
||||
if (note !== null) {
|
||||
try {
|
||||
const html = processXRef(id, note.cloneNode(true));
|
||||
instance.setContent(html);
|
||||
} finally {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
}
|
||||
} else {
|
||||
// See if we can fetch this
|
||||
fetch(url.split('#')[0])
|
||||
.then(res => res.text())
|
||||
.then(html => {
|
||||
const parser = new DOMParser();
|
||||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||||
const note = htmlDoc.getElementById(id);
|
||||
if (note !== null) {
|
||||
const html = processXRef(id, note);
|
||||
instance.setContent(html);
|
||||
}
|
||||
}).finally(() => {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// See if we can fetch a full url (with no hash to target)
|
||||
// This is a special case and we should probably do some content thinning / targeting
|
||||
fetch(url)
|
||||
.then(res => res.text())
|
||||
.then(html => {
|
||||
const parser = new DOMParser();
|
||||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||||
const note = htmlDoc.querySelector('main.content');
|
||||
if (note !== null) {
|
||||
// This should only happen for chapter cross references
|
||||
// (since there is no id in the URL)
|
||||
// remove the first header
|
||||
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
|
||||
note.children[0].remove();
|
||||
}
|
||||
const html = processXRef(null, note);
|
||||
instance.setContent(html);
|
||||
}
|
||||
}).finally(() => {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
});
|
||||
}
|
||||
}, function(instance) {
|
||||
});
|
||||
}
|
||||
let selectedAnnoteEl;
|
||||
const selectorForAnnotation = ( cell, annotation) => {
|
||||
let cellAttr = 'data-code-cell="' + cell + '"';
|
||||
let lineAttr = 'data-code-annotation="' + annotation + '"';
|
||||
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
|
||||
return selector;
|
||||
}
|
||||
const selectCodeLines = (annoteEl) => {
|
||||
const doc = window.document;
|
||||
const targetCell = annoteEl.getAttribute("data-target-cell");
|
||||
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
|
||||
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
|
||||
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
|
||||
const lineIds = lines.map((line) => {
|
||||
return targetCell + "-" + line;
|
||||
})
|
||||
let top = null;
|
||||
let height = null;
|
||||
let parent = null;
|
||||
if (lineIds.length > 0) {
|
||||
//compute the position of the single el (top and bottom and make a div)
|
||||
const el = window.document.getElementById(lineIds[0]);
|
||||
top = el.offsetTop;
|
||||
height = el.offsetHeight;
|
||||
parent = el.parentElement.parentElement;
|
||||
if (lineIds.length > 1) {
|
||||
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
|
||||
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
|
||||
height = bottom - top;
|
||||
}
|
||||
if (top !== null && height !== null && parent !== null) {
|
||||
// cook up a div (if necessary) and position it
|
||||
let div = window.document.getElementById("code-annotation-line-highlight");
|
||||
if (div === null) {
|
||||
div = window.document.createElement("div");
|
||||
div.setAttribute("id", "code-annotation-line-highlight");
|
||||
div.style.position = 'absolute';
|
||||
parent.appendChild(div);
|
||||
}
|
||||
div.style.top = top - 2 + "px";
|
||||
div.style.height = height + 4 + "px";
|
||||
div.style.left = 0;
|
||||
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
|
||||
if (gutterDiv === null) {
|
||||
gutterDiv = window.document.createElement("div");
|
||||
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
|
||||
gutterDiv.style.position = 'absolute';
|
||||
const codeCell = window.document.getElementById(targetCell);
|
||||
const gutter = codeCell.querySelector('.code-annotation-gutter');
|
||||
gutter.appendChild(gutterDiv);
|
||||
}
|
||||
gutterDiv.style.top = top - 2 + "px";
|
||||
gutterDiv.style.height = height + 4 + "px";
|
||||
}
|
||||
selectedAnnoteEl = annoteEl;
|
||||
}
|
||||
};
|
||||
const unselectCodeLines = () => {
|
||||
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
|
||||
elementsIds.forEach((elId) => {
|
||||
const div = window.document.getElementById(elId);
|
||||
if (div) {
|
||||
div.remove();
|
||||
}
|
||||
});
|
||||
selectedAnnoteEl = undefined;
|
||||
};
|
||||
// Handle positioning of the toggle
|
||||
window.addEventListener(
|
||||
"resize",
|
||||
throttle(() => {
|
||||
elRect = undefined;
|
||||
if (selectedAnnoteEl) {
|
||||
selectCodeLines(selectedAnnoteEl);
|
||||
}
|
||||
}, 10)
|
||||
);
|
||||
function throttle(fn, ms) {
|
||||
let throttle = false;
|
||||
let timer;
|
||||
return (...args) => {
|
||||
if(!throttle) { // first call gets through
|
||||
fn.apply(this, args);
|
||||
throttle = true;
|
||||
} else { // all the others get throttled
|
||||
if(timer) clearTimeout(timer); // cancel #2
|
||||
timer = setTimeout(() => {
|
||||
fn.apply(this, args);
|
||||
timer = throttle = false;
|
||||
}, ms);
|
||||
}
|
||||
};
|
||||
}
|
||||
// Attach click handler to the DT
|
||||
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
|
||||
for (const annoteDlNode of annoteDls) {
|
||||
annoteDlNode.addEventListener('click', (event) => {
|
||||
const clickedEl = event.target;
|
||||
if (clickedEl !== selectedAnnoteEl) {
|
||||
unselectCodeLines();
|
||||
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
|
||||
if (activeEl) {
|
||||
activeEl.classList.remove('code-annotation-active');
|
||||
}
|
||||
selectCodeLines(clickedEl);
|
||||
clickedEl.classList.add('code-annotation-active');
|
||||
} else {
|
||||
// Unselect the line
|
||||
unselectCodeLines();
|
||||
clickedEl.classList.remove('code-annotation-active');
|
||||
}
|
||||
});
|
||||
}
|
||||
const findCites = (el) => {
|
||||
const parentEl = el.parentElement;
|
||||
if (parentEl) {
|
||||
const cites = parentEl.dataset.cites;
|
||||
if (cites) {
|
||||
return {
|
||||
el,
|
||||
cites: cites.split(' ')
|
||||
};
|
||||
} else {
|
||||
return findCites(el.parentElement)
|
||||
}
|
||||
} else {
|
||||
return undefined;
|
||||
}
|
||||
};
|
||||
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
|
||||
for (var i=0; i<bibliorefs.length; i++) {
|
||||
const ref = bibliorefs[i];
|
||||
const citeInfo = findCites(ref);
|
||||
if (citeInfo) {
|
||||
tippyHover(citeInfo.el, function() {
|
||||
var popup = window.document.createElement('div');
|
||||
citeInfo.cites.forEach(function(cite) {
|
||||
var citeDiv = window.document.createElement('div');
|
||||
citeDiv.classList.add('hanging-indent');
|
||||
citeDiv.classList.add('csl-entry');
|
||||
var biblioDiv = window.document.getElementById('ref-' + cite);
|
||||
if (biblioDiv) {
|
||||
citeDiv.innerHTML = biblioDiv.innerHTML;
|
||||
}
|
||||
popup.appendChild(citeDiv);
|
||||
});
|
||||
return popup.innerHTML;
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</div> <!-- /content -->
|
||||
|
||||
|
||||
|
||||
|
||||
</body></html>
|
||||
694
docs/dataset-formats/template_free.html
Normal file
694
docs/dataset-formats/template_free.html
Normal file
@@ -0,0 +1,694 @@
|
||||
<!DOCTYPE html>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
|
||||
|
||||
<meta charset="utf-8">
|
||||
<meta name="generator" content="quarto-1.4.552">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||||
|
||||
<meta name="description" content="Construct prompts without a template.">
|
||||
|
||||
<title>Axolotl - Template-Free</title>
|
||||
<style>
|
||||
code{white-space: pre-wrap;}
|
||||
span.smallcaps{font-variant: small-caps;}
|
||||
div.columns{display: flex; gap: min(4vw, 1.5em);}
|
||||
div.column{flex: auto; overflow-x: auto;}
|
||||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
||||
ul.task-list{list-style: none;}
|
||||
ul.task-list li input[type="checkbox"] {
|
||||
width: 0.8em;
|
||||
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
|
||||
vertical-align: middle;
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
<script src="../../site_libs/quarto-nav/quarto-nav.js"></script>
|
||||
<script src="../../site_libs/clipboard/clipboard.min.js"></script>
|
||||
<script src="../../site_libs/quarto-search/autocomplete.umd.js"></script>
|
||||
<script src="../../site_libs/quarto-search/fuse.min.js"></script>
|
||||
<script src="../../site_libs/quarto-search/quarto-search.js"></script>
|
||||
<meta name="quarto:offset" content="../../">
|
||||
<link href="../../favicon.jpg" rel="icon" type="image/jpeg">
|
||||
<script src="../../site_libs/quarto-html/quarto.js"></script>
|
||||
<script src="../../site_libs/quarto-html/popper.min.js"></script>
|
||||
<script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
|
||||
<script src="../../site_libs/quarto-html/anchor.min.js"></script>
|
||||
<link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
|
||||
<link href="../../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
|
||||
<link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||||
<link href="../../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
|
||||
<script id="quarto-search-options" type="application/json">{
|
||||
"location": "navbar",
|
||||
"copy-button": false,
|
||||
"collapse-after": 3,
|
||||
"panel-placement": "end",
|
||||
"type": "overlay",
|
||||
"limit": 50,
|
||||
"keyboard-shortcut": [
|
||||
"f",
|
||||
"/",
|
||||
"s"
|
||||
],
|
||||
"show-item-context": false,
|
||||
"language": {
|
||||
"search-no-results-text": "No results",
|
||||
"search-matching-documents-text": "matching documents",
|
||||
"search-copy-link-title": "Copy link to search",
|
||||
"search-hide-matches-text": "Hide additional matches",
|
||||
"search-more-match-text": "more match in this document",
|
||||
"search-more-matches-text": "more matches in this document",
|
||||
"search-clear-button-title": "Clear",
|
||||
"search-text-placeholder": "",
|
||||
"search-detached-cancel-button-title": "Cancel",
|
||||
"search-submit-button-title": "Submit",
|
||||
"search-label": "Search"
|
||||
}
|
||||
}</script>
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../styles.css">
|
||||
</head>
|
||||
|
||||
<body class="nav-sidebar docked nav-fixed">
|
||||
|
||||
<div id="quarto-search-results"></div>
|
||||
<header id="quarto-header" class="headroom fixed-top">
|
||||
<nav class="navbar navbar-expand " data-bs-theme="dark">
|
||||
<div class="navbar-container container-fluid">
|
||||
<div class="navbar-brand-container mx-auto">
|
||||
<a class="navbar-brand" href="../../index.html">
|
||||
<span class="navbar-title">Axolotl</span>
|
||||
</a>
|
||||
</div>
|
||||
<div class="quarto-navbar-tools tools-wide tools-end">
|
||||
<a href="https://twitter.com/axolotl_ai" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-twitter"></i></a>
|
||||
<a href="https://github.com/OpenAccess-AI-Collective/axolotl/" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-github"></i></a>
|
||||
<a href="https://discord.gg/7m9sfhzaf3" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-discord"></i></a>
|
||||
</div>
|
||||
<div id="quarto-search" class="" title="Search"></div>
|
||||
</div> <!-- /container-fluid -->
|
||||
</nav>
|
||||
<nav class="quarto-secondary-nav">
|
||||
<div class="container-fluid d-flex">
|
||||
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
<i class="bi bi-layout-text-sidebar-reverse"></i>
|
||||
</button>
|
||||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../docs/dataset-formats/index.html">Dataset Formats</a></li><li class="breadcrumb-item"><a href="../../docs/dataset-formats/template_free.html">Template-Free</a></li></ol></nav>
|
||||
<a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
</a>
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
<!-- content -->
|
||||
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
|
||||
<!-- sidebar -->
|
||||
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
|
||||
<div class="sidebar-menu-container">
|
||||
<ul class="list-unstyled mt-1">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Home</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true">
|
||||
<span class="menu-text">How-To Guides</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/debugging.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Debugging</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/multipack.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multipack (Sample Packing)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-free prompt construction</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/rlhf.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">RLHF (Beta)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/nccl.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">NCCL</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/mac.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Mac M-series</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/multi-node.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multi Node</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link active">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/faq.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FAQ</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
|
||||
<!-- margin-sidebar -->
|
||||
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
|
||||
|
||||
</div>
|
||||
<!-- main -->
|
||||
<main class="content" id="quarto-document-content">
|
||||
|
||||
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../docs/dataset-formats/index.html">Dataset Formats</a></li><li class="breadcrumb-item"><a href="../../docs/dataset-formats/template_free.html">Template-Free</a></li></ol></nav>
|
||||
<div class="quarto-title">
|
||||
<h1 class="title">Template-Free</h1>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class="description">
|
||||
Construct prompts without a template.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="quarto-title-meta">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</header>
|
||||
|
||||
|
||||
<p>See <a href="../../docs/input_output.html">these docs</a>.</p>
|
||||
|
||||
|
||||
|
||||
</main> <!-- /main -->
|
||||
<script id="quarto-html-after-body" type="application/javascript">
|
||||
window.document.addEventListener("DOMContentLoaded", function (event) {
|
||||
const toggleBodyColorMode = (bsSheetEl) => {
|
||||
const mode = bsSheetEl.getAttribute("data-mode");
|
||||
const bodyEl = window.document.querySelector("body");
|
||||
if (mode === "dark") {
|
||||
bodyEl.classList.add("quarto-dark");
|
||||
bodyEl.classList.remove("quarto-light");
|
||||
} else {
|
||||
bodyEl.classList.add("quarto-light");
|
||||
bodyEl.classList.remove("quarto-dark");
|
||||
}
|
||||
}
|
||||
const toggleBodyColorPrimary = () => {
|
||||
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
|
||||
if (bsSheetEl) {
|
||||
toggleBodyColorMode(bsSheetEl);
|
||||
}
|
||||
}
|
||||
toggleBodyColorPrimary();
|
||||
const icon = "";
|
||||
const anchorJS = new window.AnchorJS();
|
||||
anchorJS.options = {
|
||||
placement: 'right',
|
||||
icon: icon
|
||||
};
|
||||
anchorJS.add('.anchored');
|
||||
const isCodeAnnotation = (el) => {
|
||||
for (const clz of el.classList) {
|
||||
if (clz.startsWith('code-annotation-')) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
const clipboard = new window.ClipboardJS('.code-copy-button', {
|
||||
text: function(trigger) {
|
||||
const codeEl = trigger.previousElementSibling.cloneNode(true);
|
||||
for (const childEl of codeEl.children) {
|
||||
if (isCodeAnnotation(childEl)) {
|
||||
childEl.remove();
|
||||
}
|
||||
}
|
||||
return codeEl.innerText;
|
||||
}
|
||||
});
|
||||
clipboard.on('success', function(e) {
|
||||
// button target
|
||||
const button = e.trigger;
|
||||
// don't keep focus
|
||||
button.blur();
|
||||
// flash "checked"
|
||||
button.classList.add('code-copy-button-checked');
|
||||
var currentTitle = button.getAttribute("title");
|
||||
button.setAttribute("title", "Copied!");
|
||||
let tooltip;
|
||||
if (window.bootstrap) {
|
||||
button.setAttribute("data-bs-toggle", "tooltip");
|
||||
button.setAttribute("data-bs-placement", "left");
|
||||
button.setAttribute("data-bs-title", "Copied!");
|
||||
tooltip = new bootstrap.Tooltip(button,
|
||||
{ trigger: "manual",
|
||||
customClass: "code-copy-button-tooltip",
|
||||
offset: [0, -8]});
|
||||
tooltip.show();
|
||||
}
|
||||
setTimeout(function() {
|
||||
if (tooltip) {
|
||||
tooltip.hide();
|
||||
button.removeAttribute("data-bs-title");
|
||||
button.removeAttribute("data-bs-toggle");
|
||||
button.removeAttribute("data-bs-placement");
|
||||
}
|
||||
button.setAttribute("title", currentTitle);
|
||||
button.classList.remove('code-copy-button-checked');
|
||||
}, 1000);
|
||||
// clear code selection
|
||||
e.clearSelection();
|
||||
});
|
||||
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
|
||||
var mailtoRegex = new RegExp(/^mailto:/);
|
||||
var filterRegex = new RegExp("https:\/\/OpenAccess-AI-Collective\.github\.io\/axolotl\/");
|
||||
var isInternal = (href) => {
|
||||
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
|
||||
}
|
||||
// Inspect non-navigation links and adorn them if external
|
||||
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool)');
|
||||
for (var i=0; i<links.length; i++) {
|
||||
const link = links[i];
|
||||
if (!isInternal(link.href)) {
|
||||
// undo the damage that might have been done by quarto-nav.js in the case of
|
||||
// links that we want to consider external
|
||||
if (link.dataset.originalHref !== undefined) {
|
||||
link.href = link.dataset.originalHref;
|
||||
}
|
||||
}
|
||||
}
|
||||
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
|
||||
const config = {
|
||||
allowHTML: true,
|
||||
maxWidth: 500,
|
||||
delay: 100,
|
||||
arrow: false,
|
||||
appendTo: function(el) {
|
||||
return el.parentElement;
|
||||
},
|
||||
interactive: true,
|
||||
interactiveBorder: 10,
|
||||
theme: 'quarto',
|
||||
placement: 'bottom-start',
|
||||
};
|
||||
if (contentFn) {
|
||||
config.content = contentFn;
|
||||
}
|
||||
if (onTriggerFn) {
|
||||
config.onTrigger = onTriggerFn;
|
||||
}
|
||||
if (onUntriggerFn) {
|
||||
config.onUntrigger = onUntriggerFn;
|
||||
}
|
||||
window.tippy(el, config);
|
||||
}
|
||||
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
|
||||
for (var i=0; i<noterefs.length; i++) {
|
||||
const ref = noterefs[i];
|
||||
tippyHover(ref, function() {
|
||||
// use id or data attribute instead here
|
||||
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
|
||||
try { href = new URL(href).hash; } catch {}
|
||||
const id = href.replace(/^#\/?/, "");
|
||||
const note = window.document.getElementById(id);
|
||||
if (note) {
|
||||
return note.innerHTML;
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
});
|
||||
}
|
||||
const xrefs = window.document.querySelectorAll('a.quarto-xref');
|
||||
const processXRef = (id, note) => {
|
||||
// Strip column container classes
|
||||
const stripColumnClz = (el) => {
|
||||
el.classList.remove("page-full", "page-columns");
|
||||
if (el.children) {
|
||||
for (const child of el.children) {
|
||||
stripColumnClz(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
stripColumnClz(note)
|
||||
if (id === null || id.startsWith('sec-')) {
|
||||
// Special case sections, only their first couple elements
|
||||
const container = document.createElement("div");
|
||||
if (note.children && note.children.length > 2) {
|
||||
container.appendChild(note.children[0].cloneNode(true));
|
||||
for (let i = 1; i < note.children.length; i++) {
|
||||
const child = note.children[i];
|
||||
if (child.tagName === "P" && child.innerText === "") {
|
||||
continue;
|
||||
} else {
|
||||
container.appendChild(child.cloneNode(true));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(container);
|
||||
}
|
||||
return container.innerHTML
|
||||
} else {
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(note);
|
||||
}
|
||||
return note.innerHTML;
|
||||
}
|
||||
} else {
|
||||
// Remove any anchor links if they are present
|
||||
const anchorLink = note.querySelector('a.anchorjs-link');
|
||||
if (anchorLink) {
|
||||
anchorLink.remove();
|
||||
}
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(note);
|
||||
}
|
||||
// TODO in 1.5, we should make sure this works without a callout special case
|
||||
if (note.classList.contains("callout")) {
|
||||
return note.outerHTML;
|
||||
} else {
|
||||
return note.innerHTML;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (var i=0; i<xrefs.length; i++) {
|
||||
const xref = xrefs[i];
|
||||
tippyHover(xref, undefined, function(instance) {
|
||||
instance.disable();
|
||||
let url = xref.getAttribute('href');
|
||||
let hash = undefined;
|
||||
if (url.startsWith('#')) {
|
||||
hash = url;
|
||||
} else {
|
||||
try { hash = new URL(url).hash; } catch {}
|
||||
}
|
||||
if (hash) {
|
||||
const id = hash.replace(/^#\/?/, "");
|
||||
const note = window.document.getElementById(id);
|
||||
if (note !== null) {
|
||||
try {
|
||||
const html = processXRef(id, note.cloneNode(true));
|
||||
instance.setContent(html);
|
||||
} finally {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
}
|
||||
} else {
|
||||
// See if we can fetch this
|
||||
fetch(url.split('#')[0])
|
||||
.then(res => res.text())
|
||||
.then(html => {
|
||||
const parser = new DOMParser();
|
||||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||||
const note = htmlDoc.getElementById(id);
|
||||
if (note !== null) {
|
||||
const html = processXRef(id, note);
|
||||
instance.setContent(html);
|
||||
}
|
||||
}).finally(() => {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// See if we can fetch a full url (with no hash to target)
|
||||
// This is a special case and we should probably do some content thinning / targeting
|
||||
fetch(url)
|
||||
.then(res => res.text())
|
||||
.then(html => {
|
||||
const parser = new DOMParser();
|
||||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||||
const note = htmlDoc.querySelector('main.content');
|
||||
if (note !== null) {
|
||||
// This should only happen for chapter cross references
|
||||
// (since there is no id in the URL)
|
||||
// remove the first header
|
||||
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
|
||||
note.children[0].remove();
|
||||
}
|
||||
const html = processXRef(null, note);
|
||||
instance.setContent(html);
|
||||
}
|
||||
}).finally(() => {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
});
|
||||
}
|
||||
}, function(instance) {
|
||||
});
|
||||
}
|
||||
let selectedAnnoteEl;
|
||||
const selectorForAnnotation = ( cell, annotation) => {
|
||||
let cellAttr = 'data-code-cell="' + cell + '"';
|
||||
let lineAttr = 'data-code-annotation="' + annotation + '"';
|
||||
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
|
||||
return selector;
|
||||
}
|
||||
const selectCodeLines = (annoteEl) => {
|
||||
const doc = window.document;
|
||||
const targetCell = annoteEl.getAttribute("data-target-cell");
|
||||
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
|
||||
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
|
||||
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
|
||||
const lineIds = lines.map((line) => {
|
||||
return targetCell + "-" + line;
|
||||
})
|
||||
let top = null;
|
||||
let height = null;
|
||||
let parent = null;
|
||||
if (lineIds.length > 0) {
|
||||
//compute the position of the single el (top and bottom and make a div)
|
||||
const el = window.document.getElementById(lineIds[0]);
|
||||
top = el.offsetTop;
|
||||
height = el.offsetHeight;
|
||||
parent = el.parentElement.parentElement;
|
||||
if (lineIds.length > 1) {
|
||||
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
|
||||
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
|
||||
height = bottom - top;
|
||||
}
|
||||
if (top !== null && height !== null && parent !== null) {
|
||||
// cook up a div (if necessary) and position it
|
||||
let div = window.document.getElementById("code-annotation-line-highlight");
|
||||
if (div === null) {
|
||||
div = window.document.createElement("div");
|
||||
div.setAttribute("id", "code-annotation-line-highlight");
|
||||
div.style.position = 'absolute';
|
||||
parent.appendChild(div);
|
||||
}
|
||||
div.style.top = top - 2 + "px";
|
||||
div.style.height = height + 4 + "px";
|
||||
div.style.left = 0;
|
||||
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
|
||||
if (gutterDiv === null) {
|
||||
gutterDiv = window.document.createElement("div");
|
||||
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
|
||||
gutterDiv.style.position = 'absolute';
|
||||
const codeCell = window.document.getElementById(targetCell);
|
||||
const gutter = codeCell.querySelector('.code-annotation-gutter');
|
||||
gutter.appendChild(gutterDiv);
|
||||
}
|
||||
gutterDiv.style.top = top - 2 + "px";
|
||||
gutterDiv.style.height = height + 4 + "px";
|
||||
}
|
||||
selectedAnnoteEl = annoteEl;
|
||||
}
|
||||
};
|
||||
const unselectCodeLines = () => {
|
||||
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
|
||||
elementsIds.forEach((elId) => {
|
||||
const div = window.document.getElementById(elId);
|
||||
if (div) {
|
||||
div.remove();
|
||||
}
|
||||
});
|
||||
selectedAnnoteEl = undefined;
|
||||
};
|
||||
// Handle positioning of the toggle
|
||||
window.addEventListener(
|
||||
"resize",
|
||||
throttle(() => {
|
||||
elRect = undefined;
|
||||
if (selectedAnnoteEl) {
|
||||
selectCodeLines(selectedAnnoteEl);
|
||||
}
|
||||
}, 10)
|
||||
);
|
||||
function throttle(fn, ms) {
|
||||
let throttle = false;
|
||||
let timer;
|
||||
return (...args) => {
|
||||
if(!throttle) { // first call gets through
|
||||
fn.apply(this, args);
|
||||
throttle = true;
|
||||
} else { // all the others get throttled
|
||||
if(timer) clearTimeout(timer); // cancel #2
|
||||
timer = setTimeout(() => {
|
||||
fn.apply(this, args);
|
||||
timer = throttle = false;
|
||||
}, ms);
|
||||
}
|
||||
};
|
||||
}
|
||||
// Attach click handler to the DT
|
||||
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
|
||||
for (const annoteDlNode of annoteDls) {
|
||||
annoteDlNode.addEventListener('click', (event) => {
|
||||
const clickedEl = event.target;
|
||||
if (clickedEl !== selectedAnnoteEl) {
|
||||
unselectCodeLines();
|
||||
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
|
||||
if (activeEl) {
|
||||
activeEl.classList.remove('code-annotation-active');
|
||||
}
|
||||
selectCodeLines(clickedEl);
|
||||
clickedEl.classList.add('code-annotation-active');
|
||||
} else {
|
||||
// Unselect the line
|
||||
unselectCodeLines();
|
||||
clickedEl.classList.remove('code-annotation-active');
|
||||
}
|
||||
});
|
||||
}
|
||||
const findCites = (el) => {
|
||||
const parentEl = el.parentElement;
|
||||
if (parentEl) {
|
||||
const cites = parentEl.dataset.cites;
|
||||
if (cites) {
|
||||
return {
|
||||
el,
|
||||
cites: cites.split(' ')
|
||||
};
|
||||
} else {
|
||||
return findCites(el.parentElement)
|
||||
}
|
||||
} else {
|
||||
return undefined;
|
||||
}
|
||||
};
|
||||
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
|
||||
for (var i=0; i<bibliorefs.length; i++) {
|
||||
const ref = bibliorefs[i];
|
||||
const citeInfo = findCites(ref);
|
||||
if (citeInfo) {
|
||||
tippyHover(citeInfo.el, function() {
|
||||
var popup = window.document.createElement('div');
|
||||
citeInfo.cites.forEach(function(cite) {
|
||||
var citeDiv = window.document.createElement('div');
|
||||
citeDiv.classList.add('hanging-indent');
|
||||
citeDiv.classList.add('csl-entry');
|
||||
var biblioDiv = window.document.getElementById('ref-' + cite);
|
||||
if (biblioDiv) {
|
||||
citeDiv.innerHTML = biblioDiv.innerHTML;
|
||||
}
|
||||
popup.appendChild(citeDiv);
|
||||
});
|
||||
return popup.innerHTML;
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</div> <!-- /content -->
|
||||
|
||||
|
||||
|
||||
|
||||
</body></html>
|
||||
737
docs/dataset-formats/tokenized.html
Normal file
737
docs/dataset-formats/tokenized.html
Normal file
@@ -0,0 +1,737 @@
|
||||
<!DOCTYPE html>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
|
||||
|
||||
<meta charset="utf-8">
|
||||
<meta name="generator" content="quarto-1.4.552">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||||
|
||||
<meta name="description" content="How to use a custom pre-tokenized dataset.">
|
||||
|
||||
<title>Axolotl - Custom Pre-Tokenized Dataset</title>
|
||||
<style>
|
||||
code{white-space: pre-wrap;}
|
||||
span.smallcaps{font-variant: small-caps;}
|
||||
div.columns{display: flex; gap: min(4vw, 1.5em);}
|
||||
div.column{flex: auto; overflow-x: auto;}
|
||||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
||||
ul.task-list{list-style: none;}
|
||||
ul.task-list li input[type="checkbox"] {
|
||||
width: 0.8em;
|
||||
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
|
||||
vertical-align: middle;
|
||||
}
|
||||
/* CSS for syntax highlighting */
|
||||
pre > code.sourceCode { white-space: pre; position: relative; }
|
||||
pre > code.sourceCode > span { line-height: 1.25; }
|
||||
pre > code.sourceCode > span:empty { height: 1.2em; }
|
||||
.sourceCode { overflow: visible; }
|
||||
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
||||
div.sourceCode { margin: 1em 0; }
|
||||
pre.sourceCode { margin: 0; }
|
||||
@media screen {
|
||||
div.sourceCode { overflow: auto; }
|
||||
}
|
||||
@media print {
|
||||
pre > code.sourceCode { white-space: pre-wrap; }
|
||||
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
|
||||
}
|
||||
pre.numberSource code
|
||||
{ counter-reset: source-line 0; }
|
||||
pre.numberSource code > span
|
||||
{ position: relative; left: -4em; counter-increment: source-line; }
|
||||
pre.numberSource code > span > a:first-child::before
|
||||
{ content: counter(source-line);
|
||||
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
||||
border: none; display: inline-block;
|
||||
-webkit-touch-callout: none; -webkit-user-select: none;
|
||||
-khtml-user-select: none; -moz-user-select: none;
|
||||
-ms-user-select: none; user-select: none;
|
||||
padding: 0 4px; width: 4em;
|
||||
}
|
||||
pre.numberSource { margin-left: 3em; padding-left: 4px; }
|
||||
div.sourceCode
|
||||
{ }
|
||||
@media screen {
|
||||
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
<script src="../../site_libs/quarto-nav/quarto-nav.js"></script>
|
||||
<script src="../../site_libs/clipboard/clipboard.min.js"></script>
|
||||
<script src="../../site_libs/quarto-search/autocomplete.umd.js"></script>
|
||||
<script src="../../site_libs/quarto-search/fuse.min.js"></script>
|
||||
<script src="../../site_libs/quarto-search/quarto-search.js"></script>
|
||||
<meta name="quarto:offset" content="../../">
|
||||
<link href="../../favicon.jpg" rel="icon" type="image/jpeg">
|
||||
<script src="../../site_libs/quarto-html/quarto.js"></script>
|
||||
<script src="../../site_libs/quarto-html/popper.min.js"></script>
|
||||
<script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
|
||||
<script src="../../site_libs/quarto-html/anchor.min.js"></script>
|
||||
<link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
|
||||
<link href="../../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
|
||||
<link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||||
<link href="../../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
|
||||
<script id="quarto-search-options" type="application/json">{
|
||||
"location": "navbar",
|
||||
"copy-button": false,
|
||||
"collapse-after": 3,
|
||||
"panel-placement": "end",
|
||||
"type": "overlay",
|
||||
"limit": 50,
|
||||
"keyboard-shortcut": [
|
||||
"f",
|
||||
"/",
|
||||
"s"
|
||||
],
|
||||
"show-item-context": false,
|
||||
"language": {
|
||||
"search-no-results-text": "No results",
|
||||
"search-matching-documents-text": "matching documents",
|
||||
"search-copy-link-title": "Copy link to search",
|
||||
"search-hide-matches-text": "Hide additional matches",
|
||||
"search-more-match-text": "more match in this document",
|
||||
"search-more-matches-text": "more matches in this document",
|
||||
"search-clear-button-title": "Clear",
|
||||
"search-text-placeholder": "",
|
||||
"search-detached-cancel-button-title": "Cancel",
|
||||
"search-submit-button-title": "Submit",
|
||||
"search-label": "Search"
|
||||
}
|
||||
}</script>
|
||||
|
||||
|
||||
<link rel="stylesheet" href="../../styles.css">
|
||||
</head>
|
||||
|
||||
<body class="nav-sidebar docked nav-fixed">
|
||||
|
||||
<div id="quarto-search-results"></div>
|
||||
<header id="quarto-header" class="headroom fixed-top">
|
||||
<nav class="navbar navbar-expand " data-bs-theme="dark">
|
||||
<div class="navbar-container container-fluid">
|
||||
<div class="navbar-brand-container mx-auto">
|
||||
<a class="navbar-brand" href="../../index.html">
|
||||
<span class="navbar-title">Axolotl</span>
|
||||
</a>
|
||||
</div>
|
||||
<div class="quarto-navbar-tools tools-wide tools-end">
|
||||
<a href="https://twitter.com/axolotl_ai" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-twitter"></i></a>
|
||||
<a href="https://github.com/OpenAccess-AI-Collective/axolotl/" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-github"></i></a>
|
||||
<a href="https://discord.gg/7m9sfhzaf3" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-discord"></i></a>
|
||||
</div>
|
||||
<div id="quarto-search" class="" title="Search"></div>
|
||||
</div> <!-- /container-fluid -->
|
||||
</nav>
|
||||
<nav class="quarto-secondary-nav">
|
||||
<div class="container-fluid d-flex">
|
||||
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
<i class="bi bi-layout-text-sidebar-reverse"></i>
|
||||
</button>
|
||||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../docs/dataset-formats/index.html">Dataset Formats</a></li><li class="breadcrumb-item"><a href="../../docs/dataset-formats/tokenized.html">Custom Pre-Tokenized Dataset</a></li></ol></nav>
|
||||
<a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
</a>
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
<!-- content -->
|
||||
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
|
||||
<!-- sidebar -->
|
||||
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
|
||||
<div class="sidebar-menu-container">
|
||||
<ul class="list-unstyled mt-1">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Home</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true">
|
||||
<span class="menu-text">How-To Guides</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/debugging.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Debugging</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/multipack.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multipack (Sample Packing)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-free prompt construction</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/rlhf.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">RLHF (Beta)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/nccl.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">NCCL</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/mac.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Mac M-series</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/multi-node.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multi Node</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link active">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../../docs/faq.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FAQ</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
|
||||
<!-- margin-sidebar -->
|
||||
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
|
||||
|
||||
</div>
|
||||
<!-- main -->
|
||||
<main class="content" id="quarto-document-content">
|
||||
|
||||
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../docs/dataset-formats/index.html">Dataset Formats</a></li><li class="breadcrumb-item"><a href="../../docs/dataset-formats/tokenized.html">Custom Pre-Tokenized Dataset</a></li></ol></nav>
|
||||
<div class="quarto-title">
|
||||
<h1 class="title">Custom Pre-Tokenized Dataset</h1>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class="description">
|
||||
How to use a custom pre-tokenized dataset.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="quarto-title-meta">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</header>
|
||||
|
||||
|
||||
<ul>
|
||||
<li>Do not pass a <code>type:</code> in your axolotl config.</li>
|
||||
<li>Columns in Dataset must be exactly <code>input_ids</code>, <code>attention_mask</code>, <code>labels</code></li>
|
||||
</ul>
|
||||
<div class="code-with-filename">
|
||||
<div class="code-with-filename-file">
|
||||
<pre><strong>config.yml</strong></pre>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb1" data-filename="config.yml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</main> <!-- /main -->
|
||||
<script id="quarto-html-after-body" type="application/javascript">
|
||||
window.document.addEventListener("DOMContentLoaded", function (event) {
|
||||
const toggleBodyColorMode = (bsSheetEl) => {
|
||||
const mode = bsSheetEl.getAttribute("data-mode");
|
||||
const bodyEl = window.document.querySelector("body");
|
||||
if (mode === "dark") {
|
||||
bodyEl.classList.add("quarto-dark");
|
||||
bodyEl.classList.remove("quarto-light");
|
||||
} else {
|
||||
bodyEl.classList.add("quarto-light");
|
||||
bodyEl.classList.remove("quarto-dark");
|
||||
}
|
||||
}
|
||||
const toggleBodyColorPrimary = () => {
|
||||
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
|
||||
if (bsSheetEl) {
|
||||
toggleBodyColorMode(bsSheetEl);
|
||||
}
|
||||
}
|
||||
toggleBodyColorPrimary();
|
||||
const icon = "";
|
||||
const anchorJS = new window.AnchorJS();
|
||||
anchorJS.options = {
|
||||
placement: 'right',
|
||||
icon: icon
|
||||
};
|
||||
anchorJS.add('.anchored');
|
||||
const isCodeAnnotation = (el) => {
|
||||
for (const clz of el.classList) {
|
||||
if (clz.startsWith('code-annotation-')) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
const clipboard = new window.ClipboardJS('.code-copy-button', {
|
||||
text: function(trigger) {
|
||||
const codeEl = trigger.previousElementSibling.cloneNode(true);
|
||||
for (const childEl of codeEl.children) {
|
||||
if (isCodeAnnotation(childEl)) {
|
||||
childEl.remove();
|
||||
}
|
||||
}
|
||||
return codeEl.innerText;
|
||||
}
|
||||
});
|
||||
clipboard.on('success', function(e) {
|
||||
// button target
|
||||
const button = e.trigger;
|
||||
// don't keep focus
|
||||
button.blur();
|
||||
// flash "checked"
|
||||
button.classList.add('code-copy-button-checked');
|
||||
var currentTitle = button.getAttribute("title");
|
||||
button.setAttribute("title", "Copied!");
|
||||
let tooltip;
|
||||
if (window.bootstrap) {
|
||||
button.setAttribute("data-bs-toggle", "tooltip");
|
||||
button.setAttribute("data-bs-placement", "left");
|
||||
button.setAttribute("data-bs-title", "Copied!");
|
||||
tooltip = new bootstrap.Tooltip(button,
|
||||
{ trigger: "manual",
|
||||
customClass: "code-copy-button-tooltip",
|
||||
offset: [0, -8]});
|
||||
tooltip.show();
|
||||
}
|
||||
setTimeout(function() {
|
||||
if (tooltip) {
|
||||
tooltip.hide();
|
||||
button.removeAttribute("data-bs-title");
|
||||
button.removeAttribute("data-bs-toggle");
|
||||
button.removeAttribute("data-bs-placement");
|
||||
}
|
||||
button.setAttribute("title", currentTitle);
|
||||
button.classList.remove('code-copy-button-checked');
|
||||
}, 1000);
|
||||
// clear code selection
|
||||
e.clearSelection();
|
||||
});
|
||||
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
|
||||
var mailtoRegex = new RegExp(/^mailto:/);
|
||||
var filterRegex = new RegExp("https:\/\/OpenAccess-AI-Collective\.github\.io\/axolotl\/");
|
||||
var isInternal = (href) => {
|
||||
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
|
||||
}
|
||||
// Inspect non-navigation links and adorn them if external
|
||||
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool)');
|
||||
for (var i=0; i<links.length; i++) {
|
||||
const link = links[i];
|
||||
if (!isInternal(link.href)) {
|
||||
// undo the damage that might have been done by quarto-nav.js in the case of
|
||||
// links that we want to consider external
|
||||
if (link.dataset.originalHref !== undefined) {
|
||||
link.href = link.dataset.originalHref;
|
||||
}
|
||||
}
|
||||
}
|
||||
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
|
||||
const config = {
|
||||
allowHTML: true,
|
||||
maxWidth: 500,
|
||||
delay: 100,
|
||||
arrow: false,
|
||||
appendTo: function(el) {
|
||||
return el.parentElement;
|
||||
},
|
||||
interactive: true,
|
||||
interactiveBorder: 10,
|
||||
theme: 'quarto',
|
||||
placement: 'bottom-start',
|
||||
};
|
||||
if (contentFn) {
|
||||
config.content = contentFn;
|
||||
}
|
||||
if (onTriggerFn) {
|
||||
config.onTrigger = onTriggerFn;
|
||||
}
|
||||
if (onUntriggerFn) {
|
||||
config.onUntrigger = onUntriggerFn;
|
||||
}
|
||||
window.tippy(el, config);
|
||||
}
|
||||
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
|
||||
for (var i=0; i<noterefs.length; i++) {
|
||||
const ref = noterefs[i];
|
||||
tippyHover(ref, function() {
|
||||
// use id or data attribute instead here
|
||||
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
|
||||
try { href = new URL(href).hash; } catch {}
|
||||
const id = href.replace(/^#\/?/, "");
|
||||
const note = window.document.getElementById(id);
|
||||
if (note) {
|
||||
return note.innerHTML;
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
});
|
||||
}
|
||||
const xrefs = window.document.querySelectorAll('a.quarto-xref');
|
||||
const processXRef = (id, note) => {
|
||||
// Strip column container classes
|
||||
const stripColumnClz = (el) => {
|
||||
el.classList.remove("page-full", "page-columns");
|
||||
if (el.children) {
|
||||
for (const child of el.children) {
|
||||
stripColumnClz(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
stripColumnClz(note)
|
||||
if (id === null || id.startsWith('sec-')) {
|
||||
// Special case sections, only their first couple elements
|
||||
const container = document.createElement("div");
|
||||
if (note.children && note.children.length > 2) {
|
||||
container.appendChild(note.children[0].cloneNode(true));
|
||||
for (let i = 1; i < note.children.length; i++) {
|
||||
const child = note.children[i];
|
||||
if (child.tagName === "P" && child.innerText === "") {
|
||||
continue;
|
||||
} else {
|
||||
container.appendChild(child.cloneNode(true));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(container);
|
||||
}
|
||||
return container.innerHTML
|
||||
} else {
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(note);
|
||||
}
|
||||
return note.innerHTML;
|
||||
}
|
||||
} else {
|
||||
// Remove any anchor links if they are present
|
||||
const anchorLink = note.querySelector('a.anchorjs-link');
|
||||
if (anchorLink) {
|
||||
anchorLink.remove();
|
||||
}
|
||||
if (window.Quarto?.typesetMath) {
|
||||
window.Quarto.typesetMath(note);
|
||||
}
|
||||
// TODO in 1.5, we should make sure this works without a callout special case
|
||||
if (note.classList.contains("callout")) {
|
||||
return note.outerHTML;
|
||||
} else {
|
||||
return note.innerHTML;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (var i=0; i<xrefs.length; i++) {
|
||||
const xref = xrefs[i];
|
||||
tippyHover(xref, undefined, function(instance) {
|
||||
instance.disable();
|
||||
let url = xref.getAttribute('href');
|
||||
let hash = undefined;
|
||||
if (url.startsWith('#')) {
|
||||
hash = url;
|
||||
} else {
|
||||
try { hash = new URL(url).hash; } catch {}
|
||||
}
|
||||
if (hash) {
|
||||
const id = hash.replace(/^#\/?/, "");
|
||||
const note = window.document.getElementById(id);
|
||||
if (note !== null) {
|
||||
try {
|
||||
const html = processXRef(id, note.cloneNode(true));
|
||||
instance.setContent(html);
|
||||
} finally {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
}
|
||||
} else {
|
||||
// See if we can fetch this
|
||||
fetch(url.split('#')[0])
|
||||
.then(res => res.text())
|
||||
.then(html => {
|
||||
const parser = new DOMParser();
|
||||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||||
const note = htmlDoc.getElementById(id);
|
||||
if (note !== null) {
|
||||
const html = processXRef(id, note);
|
||||
instance.setContent(html);
|
||||
}
|
||||
}).finally(() => {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// See if we can fetch a full url (with no hash to target)
|
||||
// This is a special case and we should probably do some content thinning / targeting
|
||||
fetch(url)
|
||||
.then(res => res.text())
|
||||
.then(html => {
|
||||
const parser = new DOMParser();
|
||||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||||
const note = htmlDoc.querySelector('main.content');
|
||||
if (note !== null) {
|
||||
// This should only happen for chapter cross references
|
||||
// (since there is no id in the URL)
|
||||
// remove the first header
|
||||
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
|
||||
note.children[0].remove();
|
||||
}
|
||||
const html = processXRef(null, note);
|
||||
instance.setContent(html);
|
||||
}
|
||||
}).finally(() => {
|
||||
instance.enable();
|
||||
instance.show();
|
||||
});
|
||||
}
|
||||
}, function(instance) {
|
||||
});
|
||||
}
|
||||
let selectedAnnoteEl;
|
||||
const selectorForAnnotation = ( cell, annotation) => {
|
||||
let cellAttr = 'data-code-cell="' + cell + '"';
|
||||
let lineAttr = 'data-code-annotation="' + annotation + '"';
|
||||
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
|
||||
return selector;
|
||||
}
|
||||
const selectCodeLines = (annoteEl) => {
|
||||
const doc = window.document;
|
||||
const targetCell = annoteEl.getAttribute("data-target-cell");
|
||||
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
|
||||
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
|
||||
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
|
||||
const lineIds = lines.map((line) => {
|
||||
return targetCell + "-" + line;
|
||||
})
|
||||
let top = null;
|
||||
let height = null;
|
||||
let parent = null;
|
||||
if (lineIds.length > 0) {
|
||||
//compute the position of the single el (top and bottom and make a div)
|
||||
const el = window.document.getElementById(lineIds[0]);
|
||||
top = el.offsetTop;
|
||||
height = el.offsetHeight;
|
||||
parent = el.parentElement.parentElement;
|
||||
if (lineIds.length > 1) {
|
||||
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
|
||||
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
|
||||
height = bottom - top;
|
||||
}
|
||||
if (top !== null && height !== null && parent !== null) {
|
||||
// cook up a div (if necessary) and position it
|
||||
let div = window.document.getElementById("code-annotation-line-highlight");
|
||||
if (div === null) {
|
||||
div = window.document.createElement("div");
|
||||
div.setAttribute("id", "code-annotation-line-highlight");
|
||||
div.style.position = 'absolute';
|
||||
parent.appendChild(div);
|
||||
}
|
||||
div.style.top = top - 2 + "px";
|
||||
div.style.height = height + 4 + "px";
|
||||
div.style.left = 0;
|
||||
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
|
||||
if (gutterDiv === null) {
|
||||
gutterDiv = window.document.createElement("div");
|
||||
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
|
||||
gutterDiv.style.position = 'absolute';
|
||||
const codeCell = window.document.getElementById(targetCell);
|
||||
const gutter = codeCell.querySelector('.code-annotation-gutter');
|
||||
gutter.appendChild(gutterDiv);
|
||||
}
|
||||
gutterDiv.style.top = top - 2 + "px";
|
||||
gutterDiv.style.height = height + 4 + "px";
|
||||
}
|
||||
selectedAnnoteEl = annoteEl;
|
||||
}
|
||||
};
|
||||
const unselectCodeLines = () => {
|
||||
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
|
||||
elementsIds.forEach((elId) => {
|
||||
const div = window.document.getElementById(elId);
|
||||
if (div) {
|
||||
div.remove();
|
||||
}
|
||||
});
|
||||
selectedAnnoteEl = undefined;
|
||||
};
|
||||
// Handle positioning of the toggle
|
||||
window.addEventListener(
|
||||
"resize",
|
||||
throttle(() => {
|
||||
elRect = undefined;
|
||||
if (selectedAnnoteEl) {
|
||||
selectCodeLines(selectedAnnoteEl);
|
||||
}
|
||||
}, 10)
|
||||
);
|
||||
function throttle(fn, ms) {
|
||||
let throttle = false;
|
||||
let timer;
|
||||
return (...args) => {
|
||||
if(!throttle) { // first call gets through
|
||||
fn.apply(this, args);
|
||||
throttle = true;
|
||||
} else { // all the others get throttled
|
||||
if(timer) clearTimeout(timer); // cancel #2
|
||||
timer = setTimeout(() => {
|
||||
fn.apply(this, args);
|
||||
timer = throttle = false;
|
||||
}, ms);
|
||||
}
|
||||
};
|
||||
}
|
||||
// Attach click handler to the DT
|
||||
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
|
||||
for (const annoteDlNode of annoteDls) {
|
||||
annoteDlNode.addEventListener('click', (event) => {
|
||||
const clickedEl = event.target;
|
||||
if (clickedEl !== selectedAnnoteEl) {
|
||||
unselectCodeLines();
|
||||
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
|
||||
if (activeEl) {
|
||||
activeEl.classList.remove('code-annotation-active');
|
||||
}
|
||||
selectCodeLines(clickedEl);
|
||||
clickedEl.classList.add('code-annotation-active');
|
||||
} else {
|
||||
// Unselect the line
|
||||
unselectCodeLines();
|
||||
clickedEl.classList.remove('code-annotation-active');
|
||||
}
|
||||
});
|
||||
}
|
||||
const findCites = (el) => {
|
||||
const parentEl = el.parentElement;
|
||||
if (parentEl) {
|
||||
const cites = parentEl.dataset.cites;
|
||||
if (cites) {
|
||||
return {
|
||||
el,
|
||||
cites: cites.split(' ')
|
||||
};
|
||||
} else {
|
||||
return findCites(el.parentElement)
|
||||
}
|
||||
} else {
|
||||
return undefined;
|
||||
}
|
||||
};
|
||||
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
|
||||
for (var i=0; i<bibliorefs.length; i++) {
|
||||
const ref = bibliorefs[i];
|
||||
const citeInfo = findCites(ref);
|
||||
if (citeInfo) {
|
||||
tippyHover(citeInfo.el, function() {
|
||||
var popup = window.document.createElement('div');
|
||||
citeInfo.cites.forEach(function(cite) {
|
||||
var citeDiv = window.document.createElement('div');
|
||||
citeDiv.classList.add('hanging-indent');
|
||||
citeDiv.classList.add('csl-entry');
|
||||
var biblioDiv = window.document.getElementById('ref-' + cite);
|
||||
if (biblioDiv) {
|
||||
citeDiv.innerHTML = biblioDiv.innerHTML;
|
||||
}
|
||||
popup.appendChild(citeDiv);
|
||||
});
|
||||
return popup.innerHTML;
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</div> <!-- /content -->
|
||||
|
||||
|
||||
|
||||
|
||||
</body></html>
|
||||
@@ -170,8 +170,11 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<span class="menu-text">docs/fdsp_qlora.qmd</span>
|
||||
</li>
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
@@ -206,8 +209,8 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a href="../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
@@ -215,6 +218,47 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
|
||||
@@ -136,8 +136,11 @@ ul.task-list li input[type="checkbox"] {
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<span class="menu-text">docs/fdsp_qlora.qmd</span>
|
||||
</li>
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
@@ -172,8 +175,8 @@ ul.task-list li input[type="checkbox"] {
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a href="../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
@@ -181,6 +184,47 @@ ul.task-list li input[type="checkbox"] {
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
|
||||
@@ -96,7 +96,7 @@ ul.task-list li input[type="checkbox"] {
|
||||
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
<i class="bi bi-layout-text-sidebar-reverse"></i>
|
||||
</button>
|
||||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item">FDSP + QLoRA</li></ol></nav>
|
||||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/debugging.html">How-To Guides</a></li><li class="breadcrumb-item"><a href="../docs/fsdp_qlora.html">FDSP + QLoRA</a></li></ol></nav>
|
||||
<a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
</a>
|
||||
</div>
|
||||
@@ -136,8 +136,11 @@ ul.task-list li input[type="checkbox"] {
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<span class="menu-text">docs/fdsp_qlora.qmd</span>
|
||||
</li>
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link active">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
@@ -172,8 +175,8 @@ ul.task-list li input[type="checkbox"] {
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a href="../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
@@ -181,6 +184,47 @@ ul.task-list li input[type="checkbox"] {
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
@@ -213,7 +257,7 @@ ul.task-list li input[type="checkbox"] {
|
||||
<!-- main -->
|
||||
<main class="content" id="quarto-document-content">
|
||||
|
||||
<header id="title-block-header" class="quarto-title-block default">
|
||||
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/debugging.html">How-To Guides</a></li><li class="breadcrumb-item"><a href="../docs/fsdp_qlora.html">FDSP + QLoRA</a></li></ol></nav>
|
||||
<div class="quarto-title">
|
||||
<h1 class="title">FDSP + QLoRA</h1>
|
||||
</div>
|
||||
|
||||
@@ -170,8 +170,11 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<span class="menu-text">docs/fdsp_qlora.qmd</span>
|
||||
</li>
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/input_output.html" class="sidebar-item-text sidebar-link active">
|
||||
@@ -206,8 +209,8 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a href="../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
@@ -215,6 +218,47 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
@@ -336,29 +380,29 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<section id="prepare-data" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="prepare-data">1. Prepare Data</h3>
|
||||
<p>To use the <code>input_output</code> format, collect your data in the following format into a jsonl file (below is the first row from the file <code>output</code>.jsonl` pretty printed):</p>
|
||||
<div class="sourceCode" id="cb3"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="ex">$</span> head <span class="at">-n1</span> output.jsonl <span class="kw">|</span> <span class="ex">python</span> <span class="at">-m</span> json.tool</span>
|
||||
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="ex">{.cell-output</span> .cell-output-stdout}</span>
|
||||
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a> <span class="kw">{</span></span>
|
||||
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a> <span class="st">"segments"</span><span class="ex">:</span> [</span>
|
||||
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a> <span class="kw">{</span></span>
|
||||
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a> <span class="st">"label"</span><span class="ex">:</span> true,</span>
|
||||
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a> <span class="st">"text"</span><span class="ex">:</span> <span class="st">"<s>Hello\n"</span></span>
|
||||
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a> <span class="ex">},</span></span>
|
||||
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a> <span class="kw">{</span></span>
|
||||
<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a> <span class="st">"label"</span><span class="ex">:</span> true,</span>
|
||||
<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a> <span class="st">"text"</span><span class="ex">:</span> <span class="st">"hi there!. "</span></span>
|
||||
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a> <span class="ex">},</span></span>
|
||||
<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a> <span class="kw">{</span></span>
|
||||
<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a> <span class="st">"label"</span><span class="ex">:</span> false,</span>
|
||||
<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a> <span class="st">"text"</span><span class="ex">:</span> <span class="st">"goodbye "</span></span>
|
||||
<span id="cb3-17"><a href="#cb3-17" aria-hidden="true" tabindex="-1"></a> <span class="ex">},</span></span>
|
||||
<span id="cb3-18"><a href="#cb3-18" aria-hidden="true" tabindex="-1"></a> <span class="kw">{</span></span>
|
||||
<span id="cb3-19"><a href="#cb3-19" aria-hidden="true" tabindex="-1"></a> <span class="st">"label"</span><span class="ex">:</span> true,</span>
|
||||
<span id="cb3-20"><a href="#cb3-20" aria-hidden="true" tabindex="-1"></a> <span class="st">"text"</span><span class="ex">:</span> <span class="st">"farewell</s>"</span></span>
|
||||
<span id="cb3-21"><a href="#cb3-21" aria-hidden="true" tabindex="-1"></a> <span class="kw">}</span></span>
|
||||
<span id="cb3-22"><a href="#cb3-22" aria-hidden="true" tabindex="-1"></a> <span class="ex">]</span></span>
|
||||
<span id="cb3-23"><a href="#cb3-23" aria-hidden="true" tabindex="-1"></a> <span class="kw">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="sourceCode" id="cb3"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="ex">$</span> head <span class="at">-n1</span> output.jsonl <span class="kw">|</span> <span class="ex">python</span> <span class="at">-m</span> json.tool</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="cell-output cell-output-stdout">
|
||||
<pre><code>{
|
||||
"segments": [
|
||||
{
|
||||
"label": true,
|
||||
"text": "<s>Hello\n"
|
||||
},
|
||||
{
|
||||
"label": true,
|
||||
"text": "hi there!. "
|
||||
},
|
||||
{
|
||||
"label": false,
|
||||
"text": "goodbye "
|
||||
},
|
||||
{
|
||||
"label": true,
|
||||
"text": "farewell</s>"
|
||||
}
|
||||
]
|
||||
}</code></pre>
|
||||
</div>
|
||||
<p>Set <code>label:false</code> when you want to mask a segment of text so that the model isn’t trained on it. Some things to keep in mind:</p>
|
||||
<blockquote class="blockquote">
|
||||
<p>[!IMPORTANT] 1. <strong>EOS, BOS, spaces, newlines etc. are entirely up to you. Axolotl concatenates all the segments as-is.</strong> The tokenizer doesn’t add anything additional. Notice how I added spaces, newlines, <code><s></code> (BOS), and <code></s></code> (EOS) myself. 2. Make sure you check the materialized output to validate that the prompt is getting assembled how you like.</p>
|
||||
@@ -368,60 +412,60 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<section id="use-type-input_output" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="use-type-input_output">2. Use <code>type: input_output</code></h3>
|
||||
<p>Let’s materialize data with our <code>output.jsonl</code> file by setting <code>type: input_output</code> in our axolotl config:</p>
|
||||
<div class="sourceCode" id="cb4"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># training_config.yaml</span></span>
|
||||
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a><span class="fu">base_model</span><span class="kw">:</span><span class="at"> mistralai/Mistral-7B-v0.1</span></span>
|
||||
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a><span class="fu">data_seed</span><span class="kw">:</span><span class="at"> </span><span class="dv">49</span></span>
|
||||
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a><span class="fu">seed</span><span class="kw">:</span><span class="at"> </span><span class="dv">49</span></span>
|
||||
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> output.jsonl</span></span>
|
||||
<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> input_output</span></span>
|
||||
<span id="cb4-9"><a href="#cb4-9" aria-hidden="true" tabindex="-1"></a><span class="fu">val_set_size</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span></span>
|
||||
<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a><span class="fu">sequence_len</span><span class="kw">:</span><span class="at"> </span><span class="dv">896</span></span>
|
||||
<span id="cb4-12"><a href="#cb4-12" aria-hidden="true" tabindex="-1"></a><span class="fu">sample_packing</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
|
||||
<span id="cb4-13"><a href="#cb4-13" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb4-14"><a href="#cb4-14" aria-hidden="true" tabindex="-1"></a><span class="fu">micro_batch_size</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span>
|
||||
<span id="cb4-15"><a href="#cb4-15" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_accumulation_steps</span><span class="kw">:</span><span class="at"> </span><span class="dv">3</span></span>
|
||||
<span id="cb4-16"><a href="#cb4-16" aria-hidden="true" tabindex="-1"></a><span class="fu">eval_batch_size</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span>
|
||||
<span id="cb4-17"><a href="#cb4-17" aria-hidden="true" tabindex="-1"></a><span class="fu">num_epochs</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span>
|
||||
<span id="cb4-18"><a href="#cb4-18" aria-hidden="true" tabindex="-1"></a><span class="fu">learning_rate</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.0002</span></span>
|
||||
<span id="cb4-19"><a href="#cb4-19" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb4-20"><a href="#cb4-20" aria-hidden="true" tabindex="-1"></a><span class="fu">train_on_inputs</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
|
||||
<span id="cb4-21"><a href="#cb4-21" aria-hidden="true" tabindex="-1"></a><span class="fu">special_tokens</span><span class="kw">:</span></span>
|
||||
<span id="cb4-22"><a href="#cb4-22" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">bos_token</span><span class="kw">:</span><span class="at"> </span><span class="st">"<s>"</span></span>
|
||||
<span id="cb4-23"><a href="#cb4-23" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">eos_token</span><span class="kw">:</span><span class="at"> </span><span class="st">"</s>"</span></span>
|
||||
<span id="cb4-24"><a href="#cb4-24" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">unk_token</span><span class="kw">:</span><span class="at"> </span><span class="st">"<unk>"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="sourceCode" id="cb5"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># training_config.yaml</span></span>
|
||||
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="fu">base_model</span><span class="kw">:</span><span class="at"> mistralai/Mistral-7B-v0.1</span></span>
|
||||
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a><span class="fu">data_seed</span><span class="kw">:</span><span class="at"> </span><span class="dv">49</span></span>
|
||||
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a><span class="fu">seed</span><span class="kw">:</span><span class="at"> </span><span class="dv">49</span></span>
|
||||
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> output.jsonl</span></span>
|
||||
<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> input_output</span></span>
|
||||
<span id="cb5-9"><a href="#cb5-9" aria-hidden="true" tabindex="-1"></a><span class="fu">val_set_size</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span></span>
|
||||
<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a><span class="fu">sequence_len</span><span class="kw">:</span><span class="at"> </span><span class="dv">896</span></span>
|
||||
<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a><span class="fu">sample_packing</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
|
||||
<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb5-14"><a href="#cb5-14" aria-hidden="true" tabindex="-1"></a><span class="fu">micro_batch_size</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span>
|
||||
<span id="cb5-15"><a href="#cb5-15" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_accumulation_steps</span><span class="kw">:</span><span class="at"> </span><span class="dv">3</span></span>
|
||||
<span id="cb5-16"><a href="#cb5-16" aria-hidden="true" tabindex="-1"></a><span class="fu">eval_batch_size</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span>
|
||||
<span id="cb5-17"><a href="#cb5-17" aria-hidden="true" tabindex="-1"></a><span class="fu">num_epochs</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span>
|
||||
<span id="cb5-18"><a href="#cb5-18" aria-hidden="true" tabindex="-1"></a><span class="fu">learning_rate</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.0002</span></span>
|
||||
<span id="cb5-19"><a href="#cb5-19" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb5-20"><a href="#cb5-20" aria-hidden="true" tabindex="-1"></a><span class="fu">train_on_inputs</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
|
||||
<span id="cb5-21"><a href="#cb5-21" aria-hidden="true" tabindex="-1"></a><span class="fu">special_tokens</span><span class="kw">:</span></span>
|
||||
<span id="cb5-22"><a href="#cb5-22" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">bos_token</span><span class="kw">:</span><span class="at"> </span><span class="st">"<s>"</span></span>
|
||||
<span id="cb5-23"><a href="#cb5-23" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">eos_token</span><span class="kw">:</span><span class="at"> </span><span class="st">"</s>"</span></span>
|
||||
<span id="cb5-24"><a href="#cb5-24" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">unk_token</span><span class="kw">:</span><span class="at"> </span><span class="st">"<unk>"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<p>You can use the following command to materialize your data. The <code>--debug</code> flag will print the tokens, along with the labels so you can verify that the correct items are being ignored:</p>
|
||||
<div class="sourceCode" id="cb5"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="ex">$</span> python <span class="at">-m</span> axolotl.cli.preprocess training_config.yaml <span class="at">--debug</span></span>
|
||||
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a><span class="ex">...</span></span>
|
||||
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a><span class="ex">[2024-03-05</span> 23:36:46,969] <span class="pp">[</span><span class="ss">INFO</span><span class="pp">]</span> <span class="pp">[</span><span class="ss">axolotl.check_example_labels:35</span><span class="pp">]</span> <span class="pp">[</span><span class="ss">PID:607731</span><span class="pp">]</span> <span class="pp">[</span><span class="ss">RANK:0</span><span class="pp">]</span> <span class="op"><</span>s<span class="op">>(</span><span class="ex">1,</span> 1<span class="op">)</span> Hello<span class="er">(</span><span class="ex">22557,</span> 22557<span class="kw">)</span></span>
|
||||
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a><span class="kw">(</span><span class="ex">13,</span> 13<span class="kw">)</span> <span class="ex">hi</span><span class="er">(</span><span class="ex">12014,</span> 12014<span class="kw">)</span> <span class="ex">there</span><span class="er">(</span><span class="ex">736,</span> 736<span class="kw">)</span> <span class="ex">!</span><span class="er">(</span><span class="ex">28808,</span> 28808<span class="kw">)</span> <span class="bu">.</span><span class="er">(</span><span class="ex">28723,</span> 28723<span class="kw">)</span> <span class="kw">(</span><span class="ex">28705,</span> 28705<span class="kw">)</span> <span class="ex">good</span><span class="er">(</span><span class="ex">-100,</span> 1179<span class="kw">)</span> <span class="ex">bye</span><span class="er">(</span><span class="ex">-100,</span> 17664<span class="kw">)</span> <span class="kw">(</span><span class="ex">-100,</span> 28705<span class="kw">)</span> <span class="ex">fare</span><span class="er">(</span><span class="ex">19111,</span> 19111<span class="kw">)</span> <span class="ex">well</span><span class="er">(</span><span class="ex">5458,</span> 5458<span class="kw">)</span> <span class="op"><</span>/s<span class="op">>(</span><span class="ex">2,</span> 2<span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="sourceCode" id="cb6"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="ex">$</span> python <span class="at">-m</span> axolotl.cli.preprocess training_config.yaml <span class="at">--debug</span></span>
|
||||
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a><span class="ex">...</span></span>
|
||||
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a><span class="ex">[2024-03-05</span> 23:36:46,969] <span class="pp">[</span><span class="ss">INFO</span><span class="pp">]</span> <span class="pp">[</span><span class="ss">axolotl.check_example_labels:35</span><span class="pp">]</span> <span class="pp">[</span><span class="ss">PID:607731</span><span class="pp">]</span> <span class="pp">[</span><span class="ss">RANK:0</span><span class="pp">]</span> <span class="op"><</span>s<span class="op">>(</span><span class="ex">1,</span> 1<span class="op">)</span> Hello<span class="er">(</span><span class="ex">22557,</span> 22557<span class="kw">)</span></span>
|
||||
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a><span class="kw">(</span><span class="ex">13,</span> 13<span class="kw">)</span> <span class="ex">hi</span><span class="er">(</span><span class="ex">12014,</span> 12014<span class="kw">)</span> <span class="ex">there</span><span class="er">(</span><span class="ex">736,</span> 736<span class="kw">)</span> <span class="ex">!</span><span class="er">(</span><span class="ex">28808,</span> 28808<span class="kw">)</span> <span class="bu">.</span><span class="er">(</span><span class="ex">28723,</span> 28723<span class="kw">)</span> <span class="kw">(</span><span class="ex">28705,</span> 28705<span class="kw">)</span> <span class="ex">good</span><span class="er">(</span><span class="ex">-100,</span> 1179<span class="kw">)</span> <span class="ex">bye</span><span class="er">(</span><span class="ex">-100,</span> 17664<span class="kw">)</span> <span class="kw">(</span><span class="ex">-100,</span> 28705<span class="kw">)</span> <span class="ex">fare</span><span class="er">(</span><span class="ex">19111,</span> 19111<span class="kw">)</span> <span class="ex">well</span><span class="er">(</span><span class="ex">5458,</span> 5458<span class="kw">)</span> <span class="op"><</span>/s<span class="op">>(</span><span class="ex">2,</span> 2<span class="op">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<p>The format is <code>decoded_token</code>(<code>label</code>, <code>token_id</code>), for example, <code><s>(1, 1)</code> means that the token is <code><s></code>, the label is <code>1</code> and the token_id is <code>1</code>. When the label is <code>-100</code> then that token is ignored for training.</p>
|
||||
<p><a id="markdown-3-check-the-prompts" name="3-check-the-prompts"></a></p>
|
||||
</section>
|
||||
<section id="check-the-prompts" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="check-the-prompts">3. Check the prompts</h3>
|
||||
<p>Here is another way to check the materialized output:</p>
|
||||
<div class="sourceCode" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> transformers <span class="im">import</span> AutoTokenizer</span>
|
||||
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> datasets <span class="im">import</span> load_from_disk</span>
|
||||
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> yaml</span>
|
||||
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>directory <span class="op">=</span> <span class="op">!</span>ls last_run_prepared<span class="op">/</span></span>
|
||||
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">'training_config.yaml'</span>, <span class="st">'r'</span>) <span class="im">as</span> f:</span>
|
||||
<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a> cfg <span class="op">=</span> yaml.safe_load(f)</span>
|
||||
<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a>model_id <span class="op">=</span> cfg[<span class="st">'base_model'</span>]</span>
|
||||
<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a>tok <span class="op">=</span> AutoTokenizer.from_pretrained(model_id)</span>
|
||||
<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_from_disk(<span class="ss">f'last_run_prepared/</span><span class="sc">{</span>directory[<span class="dv">0</span>]<span class="sc">}</span><span class="ss">/'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="sourceCode" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> row <span class="op">=</span> ds[<span class="dv">0</span>]</span>
|
||||
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> <span class="bu">print</span>(tok.decode(row[<span class="st">'input_ids'</span>]))</span>
|
||||
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="op"><</span>s<span class="op">></span> Hello</span>
|
||||
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a> hi there<span class="op">!</span>. goodbye farewell<span class="op"></</span>s<span class="op">></span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="sourceCode" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> transformers <span class="im">import</span> AutoTokenizer</span>
|
||||
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> datasets <span class="im">import</span> load_from_disk</span>
|
||||
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> yaml</span>
|
||||
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>directory <span class="op">=</span> <span class="op">!</span>ls last_run_prepared<span class="op">/</span></span>
|
||||
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">'training_config.yaml'</span>, <span class="st">'r'</span>) <span class="im">as</span> f:</span>
|
||||
<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a> cfg <span class="op">=</span> yaml.safe_load(f)</span>
|
||||
<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>model_id <span class="op">=</span> cfg[<span class="st">'base_model'</span>]</span>
|
||||
<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a>tok <span class="op">=</span> AutoTokenizer.from_pretrained(model_id)</span>
|
||||
<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>ds <span class="op">=</span> load_from_disk(<span class="ss">f'last_run_prepared/</span><span class="sc">{</span>directory[<span class="dv">0</span>]<span class="sc">}</span><span class="ss">/'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="sourceCode" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> row <span class="op">=</span> ds[<span class="dv">0</span>]</span>
|
||||
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a><span class="op">>>></span> <span class="bu">print</span>(tok.decode(row[<span class="st">'input_ids'</span>]))</span>
|
||||
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a><span class="op"><</span>s<span class="op">></span> Hello</span>
|
||||
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a> hi there<span class="op">!</span>. goodbye farewell<span class="op"></</span>s<span class="op">></span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<p>We can check that the right tokens are ingored by comparing the labels to each token:</p>
|
||||
<div class="sourceCode" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
|
||||
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>pd.DataFrame([{<span class="st">'token'</span>: tok.decode(i), <span class="st">'label'</span>: l, <span class="st">'id'</span>:i} <span class="cf">for</span> i,l <span class="kw">in</span></span>
|
||||
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a> <span class="bu">zip</span>(row[<span class="st">'input_ids'</span>], row[<span class="st">'labels'</span>])])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="sourceCode" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
|
||||
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>pd.DataFrame([{<span class="st">'token'</span>: tok.decode(i), <span class="st">'label'</span>: l, <span class="st">'id'</span>:i} <span class="cf">for</span> i,l <span class="kw">in</span></span>
|
||||
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a> <span class="bu">zip</span>(row[<span class="st">'input_ids'</span>], row[<span class="st">'labels'</span>])])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr class="header">
|
||||
@@ -504,29 +548,29 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
</tbody>
|
||||
</table>
|
||||
<p>If we look at the input data, the above table seems correct! (The jsonl version is repeated below for reference):</p>
|
||||
<div class="sourceCode" id="cb9"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="ex">$</span> head <span class="at">-n1</span> output.jsonl <span class="kw">|</span> <span class="ex">python</span> <span class="at">-m</span> json.tool</span>
|
||||
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a><span class="ex">{.cell-output</span> .cell-output-stdout}</span>
|
||||
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a> <span class="kw">{</span></span>
|
||||
<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a> <span class="st">"segments"</span><span class="ex">:</span> [</span>
|
||||
<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a> <span class="kw">{</span></span>
|
||||
<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a> <span class="st">"label"</span><span class="ex">:</span> true,</span>
|
||||
<span id="cb9-8"><a href="#cb9-8" aria-hidden="true" tabindex="-1"></a> <span class="st">"text"</span><span class="ex">:</span> <span class="st">"<s>Hello\n"</span></span>
|
||||
<span id="cb9-9"><a href="#cb9-9" aria-hidden="true" tabindex="-1"></a> <span class="ex">},</span></span>
|
||||
<span id="cb9-10"><a href="#cb9-10" aria-hidden="true" tabindex="-1"></a> <span class="kw">{</span></span>
|
||||
<span id="cb9-11"><a href="#cb9-11" aria-hidden="true" tabindex="-1"></a> <span class="st">"label"</span><span class="ex">:</span> true,</span>
|
||||
<span id="cb9-12"><a href="#cb9-12" aria-hidden="true" tabindex="-1"></a> <span class="st">"text"</span><span class="ex">:</span> <span class="st">"hi there!. "</span></span>
|
||||
<span id="cb9-13"><a href="#cb9-13" aria-hidden="true" tabindex="-1"></a> <span class="ex">},</span></span>
|
||||
<span id="cb9-14"><a href="#cb9-14" aria-hidden="true" tabindex="-1"></a> <span class="kw">{</span></span>
|
||||
<span id="cb9-15"><a href="#cb9-15" aria-hidden="true" tabindex="-1"></a> <span class="st">"label"</span><span class="ex">:</span> false,</span>
|
||||
<span id="cb9-16"><a href="#cb9-16" aria-hidden="true" tabindex="-1"></a> <span class="st">"text"</span><span class="ex">:</span> <span class="st">"goodbye "</span></span>
|
||||
<span id="cb9-17"><a href="#cb9-17" aria-hidden="true" tabindex="-1"></a> <span class="ex">},</span></span>
|
||||
<span id="cb9-18"><a href="#cb9-18" aria-hidden="true" tabindex="-1"></a> <span class="kw">{</span></span>
|
||||
<span id="cb9-19"><a href="#cb9-19" aria-hidden="true" tabindex="-1"></a> <span class="st">"label"</span><span class="ex">:</span> true,</span>
|
||||
<span id="cb9-20"><a href="#cb9-20" aria-hidden="true" tabindex="-1"></a> <span class="st">"text"</span><span class="ex">:</span> <span class="st">"farewell</s>"</span></span>
|
||||
<span id="cb9-21"><a href="#cb9-21" aria-hidden="true" tabindex="-1"></a> <span class="kw">}</span></span>
|
||||
<span id="cb9-22"><a href="#cb9-22" aria-hidden="true" tabindex="-1"></a> <span class="ex">]</span></span>
|
||||
<span id="cb9-23"><a href="#cb9-23" aria-hidden="true" tabindex="-1"></a> <span class="kw">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="sourceCode" id="cb10"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="ex">$</span> head <span class="at">-n1</span> output.jsonl <span class="kw">|</span> <span class="ex">python</span> <span class="at">-m</span> json.tool</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="cell-output cell-output-stdout">
|
||||
<pre><code>{
|
||||
"segments": [
|
||||
{
|
||||
"label": true,
|
||||
"text": "<s>Hello\n"
|
||||
},
|
||||
{
|
||||
"label": true,
|
||||
"text": "hi there!. "
|
||||
},
|
||||
{
|
||||
"label": false,
|
||||
"text": "goodbye "
|
||||
},
|
||||
{
|
||||
"label": true,
|
||||
"text": "farewell</s>"
|
||||
}
|
||||
]
|
||||
}</code></pre>
|
||||
</div>
|
||||
|
||||
|
||||
</section>
|
||||
|
||||
@@ -136,8 +136,11 @@ ul.task-list li input[type="checkbox"] {
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<span class="menu-text">docs/fdsp_qlora.qmd</span>
|
||||
</li>
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
@@ -172,8 +175,8 @@ ul.task-list li input[type="checkbox"] {
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a href="../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
@@ -181,6 +184,47 @@ ul.task-list li input[type="checkbox"] {
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
|
||||
@@ -170,8 +170,11 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<span class="menu-text">docs/fdsp_qlora.qmd</span>
|
||||
</li>
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
@@ -206,8 +209,8 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a href="../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
@@ -215,6 +218,47 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
|
||||
@@ -136,8 +136,11 @@ ul.task-list li input[type="checkbox"] {
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<span class="menu-text">docs/fdsp_qlora.qmd</span>
|
||||
</li>
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
@@ -172,8 +175,8 @@ ul.task-list li input[type="checkbox"] {
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a href="../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
@@ -181,6 +184,47 @@ ul.task-list li input[type="checkbox"] {
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
|
||||
@@ -136,8 +136,11 @@ ul.task-list li input[type="checkbox"] {
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<span class="menu-text">docs/fdsp_qlora.qmd</span>
|
||||
</li>
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
@@ -172,8 +175,8 @@ ul.task-list li input[type="checkbox"] {
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a href="../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
@@ -181,6 +184,47 @@ ul.task-list li input[type="checkbox"] {
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
|
||||
@@ -170,8 +170,11 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<span class="menu-text">docs/fdsp_qlora.qmd</span>
|
||||
</li>
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
@@ -206,8 +209,8 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a href="../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Formats</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
@@ -215,6 +218,47 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Conversation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Instruction Tuning</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Pre-training</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-Free</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user