1136 lines
64 KiB
HTML
1136 lines
64 KiB
HTML
<!DOCTYPE html>
|
||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
|
||
|
||
<meta charset="utf-8">
|
||
<meta name="generator" content="quarto-1.6.40">
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||
|
||
|
||
<title>Inference Guide – Axolotl</title>
|
||
<style>
|
||
code{white-space: pre-wrap;}
|
||
span.smallcaps{font-variant: small-caps;}
|
||
div.columns{display: flex; gap: min(4vw, 1.5em);}
|
||
div.column{flex: auto; overflow-x: auto;}
|
||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
||
ul.task-list{list-style: none;}
|
||
ul.task-list li input[type="checkbox"] {
|
||
width: 0.8em;
|
||
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
|
||
vertical-align: middle;
|
||
}
|
||
/* CSS for syntax highlighting */
|
||
pre > code.sourceCode { white-space: pre; position: relative; }
|
||
pre > code.sourceCode > span { line-height: 1.25; }
|
||
pre > code.sourceCode > span:empty { height: 1.2em; }
|
||
.sourceCode { overflow: visible; }
|
||
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
||
div.sourceCode { margin: 1em 0; }
|
||
pre.sourceCode { margin: 0; }
|
||
@media screen {
|
||
div.sourceCode { overflow: auto; }
|
||
}
|
||
@media print {
|
||
pre > code.sourceCode { white-space: pre-wrap; }
|
||
pre > code.sourceCode > span { display: inline-block; text-indent: -5em; padding-left: 5em; }
|
||
}
|
||
pre.numberSource code
|
||
{ counter-reset: source-line 0; }
|
||
pre.numberSource code > span
|
||
{ position: relative; left: -4em; counter-increment: source-line; }
|
||
pre.numberSource code > span > a:first-child::before
|
||
{ content: counter(source-line);
|
||
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
||
border: none; display: inline-block;
|
||
-webkit-touch-callout: none; -webkit-user-select: none;
|
||
-khtml-user-select: none; -moz-user-select: none;
|
||
-ms-user-select: none; user-select: none;
|
||
padding: 0 4px; width: 4em;
|
||
}
|
||
pre.numberSource { margin-left: 3em; padding-left: 4px; }
|
||
div.sourceCode
|
||
{ }
|
||
@media screen {
|
||
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
||
}
|
||
</style>
|
||
|
||
|
||
<script src="../site_libs/quarto-nav/quarto-nav.js"></script>
|
||
<script src="../site_libs/clipboard/clipboard.min.js"></script>
|
||
<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
|
||
<script src="../site_libs/quarto-search/fuse.min.js"></script>
|
||
<script src="../site_libs/quarto-search/quarto-search.js"></script>
|
||
<meta name="quarto:offset" content="../">
|
||
<link href="../favicon.jpg" rel="icon" type="image/jpeg">
|
||
<script src="../site_libs/quarto-html/quarto.js"></script>
|
||
<script src="../site_libs/quarto-html/popper.min.js"></script>
|
||
<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
|
||
<script src="../site_libs/quarto-html/anchor.min.js"></script>
|
||
<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
|
||
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-549806ee2085284f45b00abea8c6df48.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||
<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
|
||
<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||
<link href="../site_libs/bootstrap/bootstrap-1d8d3285ed62e8239ae07b1b029f75b0.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
|
||
<script id="quarto-search-options" type="application/json">{
|
||
"location": "navbar",
|
||
"copy-button": false,
|
||
"collapse-after": 3,
|
||
"panel-placement": "end",
|
||
"type": "overlay",
|
||
"limit": 50,
|
||
"keyboard-shortcut": [
|
||
"f",
|
||
"/",
|
||
"s"
|
||
],
|
||
"show-item-context": false,
|
||
"language": {
|
||
"search-no-results-text": "No results",
|
||
"search-matching-documents-text": "matching documents",
|
||
"search-copy-link-title": "Copy link to search",
|
||
"search-hide-matches-text": "Hide additional matches",
|
||
"search-more-match-text": "more match in this document",
|
||
"search-more-matches-text": "more matches in this document",
|
||
"search-clear-button-title": "Clear",
|
||
"search-text-placeholder": "",
|
||
"search-detached-cancel-button-title": "Cancel",
|
||
"search-submit-button-title": "Submit",
|
||
"search-label": "Search"
|
||
}
|
||
}</script>
|
||
|
||
|
||
<link rel="stylesheet" href="../styles.css">
|
||
</head>
|
||
|
||
<body class="nav-sidebar docked nav-fixed">
|
||
|
||
<div id="quarto-search-results"></div>
|
||
<header id="quarto-header" class="headroom fixed-top">
|
||
<nav class="navbar navbar-expand " data-bs-theme="dark">
|
||
<div class="navbar-container container-fluid">
|
||
<div class="navbar-brand-container mx-auto">
|
||
<a class="navbar-brand" href="../index.html">
|
||
<span class="navbar-title">Axolotl</span>
|
||
</a>
|
||
</div>
|
||
<div class="quarto-navbar-tools tools-wide tools-end">
|
||
<a href="https://twitter.com/axolotl_ai" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-twitter"></i></a>
|
||
<a href="https://github.com/axolotl-ai-cloud/axolotl/" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-github"></i></a>
|
||
<a href="https://discord.gg/7m9sfhzaf3" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-discord"></i></a>
|
||
</div>
|
||
<div id="quarto-search" class="" title="Search"></div>
|
||
</div> <!-- /container-fluid -->
|
||
</nav>
|
||
<nav class="quarto-secondary-nav">
|
||
<div class="container-fluid d-flex">
|
||
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||
<i class="bi bi-layout-text-sidebar-reverse"></i>
|
||
</button>
|
||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/getting-started.html">How-To Guides</a></li><li class="breadcrumb-item"><a href="../docs/inference.html">Inference Guide</a></li></ol></nav>
|
||
<a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||
</a>
|
||
</div>
|
||
</nav>
|
||
</header>
|
||
<!-- content -->
|
||
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
|
||
<!-- sidebar -->
|
||
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
|
||
<div class="sidebar-menu-container">
|
||
<ul class="list-unstyled mt-1">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../index.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Home</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
|
||
<span class="menu-text">How-To Guides</span></a>
|
||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/getting-started.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Getting Started with Axolotl</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/installation.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Installation Guide</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/debugging.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Debugging</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/inference.html" class="sidebar-item-text sidebar-link active">
|
||
<span class="menu-text">Inference Guide</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/multipack.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Multipack (Sample Packing)</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Template-free prompt construction</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/rlhf.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">RLHF (Beta)</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/nccl.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">NCCL</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/mac.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Mac M-series</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/multi-gpu.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Multi-GPU Training Guide</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/multi-node.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Multi Node</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/unsloth.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Unsloth</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/amd_hpc.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Training with AMD GPUs on HPC Systems</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/ray-integration.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Ray Train integration</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Dataset Formats</span></a>
|
||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Pre-training</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Instruction Tuning</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Conversation</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset-formats/stepwise_supervised.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Stepwise Supervised Format</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Template-Free</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true">
|
||
<span class="menu-text">Reference</span></a>
|
||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/config.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Config options</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/faq.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">FAQ</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</div>
|
||
</nav>
|
||
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
|
||
<!-- margin-sidebar -->
|
||
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
|
||
<nav id="TOC" role="doc-toc" class="toc-active">
|
||
<h2 id="toc-title">On this page</h2>
|
||
|
||
<ul>
|
||
<li><a href="#sec-quickstart" id="toc-sec-quickstart" class="nav-link active" data-scroll-target="#sec-quickstart"><span class="header-section-number">1</span> Quick Start</a>
|
||
<ul class="collapse">
|
||
<li><a href="#sec-basic" id="toc-sec-basic" class="nav-link" data-scroll-target="#sec-basic"><span class="header-section-number">1.1</span> Basic Inference</a></li>
|
||
</ul></li>
|
||
<li><a href="#sec-advanced" id="toc-sec-advanced" class="nav-link" data-scroll-target="#sec-advanced"><span class="header-section-number">2</span> Advanced Usage</a>
|
||
<ul class="collapse">
|
||
<li><a href="#sec-gradio" id="toc-sec-gradio" class="nav-link" data-scroll-target="#sec-gradio"><span class="header-section-number">2.1</span> Gradio Interface</a></li>
|
||
<li><a href="#sec-file-prompts" id="toc-sec-file-prompts" class="nav-link" data-scroll-target="#sec-file-prompts"><span class="header-section-number">2.2</span> File-based Prompts</a></li>
|
||
<li><a href="#sec-memory" id="toc-sec-memory" class="nav-link" data-scroll-target="#sec-memory"><span class="header-section-number">2.3</span> Memory Optimization</a></li>
|
||
</ul></li>
|
||
<li><a href="#sec-merging" id="toc-sec-merging" class="nav-link" data-scroll-target="#sec-merging"><span class="header-section-number">3</span> Merging LoRA Weights</a>
|
||
<ul class="collapse">
|
||
<li><a href="#sec-memory-management" id="toc-sec-memory-management" class="nav-link" data-scroll-target="#sec-memory-management"><span class="header-section-number">3.1</span> Memory Management for Merging</a></li>
|
||
</ul></li>
|
||
<li><a href="#sec-tokenization" id="toc-sec-tokenization" class="nav-link" data-scroll-target="#sec-tokenization"><span class="header-section-number">4</span> Tokenization</a>
|
||
<ul class="collapse">
|
||
<li><a href="#sec-tokenization-issues" id="toc-sec-tokenization-issues" class="nav-link" data-scroll-target="#sec-tokenization-issues"><span class="header-section-number">4.1</span> Common Issues</a></li>
|
||
<li><a href="#sec-special-tokens" id="toc-sec-special-tokens" class="nav-link" data-scroll-target="#sec-special-tokens"><span class="header-section-number">4.2</span> Special Tokens</a></li>
|
||
</ul></li>
|
||
<li><a href="#sec-troubleshooting" id="toc-sec-troubleshooting" class="nav-link" data-scroll-target="#sec-troubleshooting"><span class="header-section-number">5</span> Troubleshooting</a>
|
||
<ul class="collapse">
|
||
<li><a href="#sec-common-problems" id="toc-sec-common-problems" class="nav-link" data-scroll-target="#sec-common-problems"><span class="header-section-number">5.1</span> Common Problems</a></li>
|
||
</ul></li>
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
<!-- main -->
|
||
<main class="content" id="quarto-document-content">
|
||
|
||
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/getting-started.html">How-To Guides</a></li><li class="breadcrumb-item"><a href="../docs/inference.html">Inference Guide</a></li></ol></nav>
|
||
<div class="quarto-title">
|
||
<div class="quarto-title-block"><div><h1 class="title">Inference Guide</h1><button type="button" class="btn code-tools-button" id="quarto-code-tools-source"><i class="bi"></i> Code</button></div></div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="quarto-title-meta">
|
||
|
||
|
||
|
||
|
||
</div>
|
||
|
||
|
||
|
||
</header>
|
||
|
||
|
||
<p>This guide covers how to use your trained models for inference, including model loading, interactive testing, and common troubleshooting steps.</p>
|
||
<section id="sec-quickstart" class="level2" data-number="1">
|
||
<h2 data-number="1" class="anchored" data-anchor-id="sec-quickstart"><span class="header-section-number">1</span> Quick Start</h2>
|
||
<section id="sec-basic" class="level3" data-number="1.1">
|
||
<h3 data-number="1.1" class="anchored" data-anchor-id="sec-basic"><span class="header-section-number">1.1</span> Basic Inference</h3>
|
||
<div class="tabset-margin-container"></div><div class="panel-tabset">
|
||
<ul class="nav nav-tabs" role="tablist"><li class="nav-item" role="presentation"><a class="nav-link active" id="tabset-1-1-tab" data-bs-toggle="tab" data-bs-target="#tabset-1-1" role="tab" aria-controls="tabset-1-1" aria-selected="true" aria-current="page">LoRA Models</a></li><li class="nav-item" role="presentation"><a class="nav-link" id="tabset-1-2-tab" data-bs-toggle="tab" data-bs-target="#tabset-1-2" role="tab" aria-controls="tabset-1-2" aria-selected="false">Full Fine-tuned Models</a></li></ul>
|
||
<div class="tab-content">
|
||
<div id="tabset-1-1" class="tab-pane active" role="tabpanel" aria-labelledby="tabset-1-1-tab">
|
||
<div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="ex">axolotl</span> inference your_config.yml <span class="at">--lora-model-dir</span><span class="op">=</span><span class="st">"./lora-output-dir"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
</div>
|
||
<div id="tabset-1-2" class="tab-pane" role="tabpanel" aria-labelledby="tabset-1-2-tab">
|
||
<div class="sourceCode" id="cb2"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="ex">axolotl</span> inference your_config.yml <span class="at">--base-model</span><span class="op">=</span><span class="st">"./completed-model"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="sec-advanced" class="level2" data-number="2">
|
||
<h2 data-number="2" class="anchored" data-anchor-id="sec-advanced"><span class="header-section-number">2</span> Advanced Usage</h2>
|
||
<section id="sec-gradio" class="level3" data-number="2.1">
|
||
<h3 data-number="2.1" class="anchored" data-anchor-id="sec-gradio"><span class="header-section-number">2.1</span> Gradio Interface</h3>
|
||
<p>Launch an interactive web interface:</p>
|
||
<div class="sourceCode" id="cb3"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="ex">axolotl</span> inference your_config.yml <span class="at">--gradio</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
</section>
|
||
<section id="sec-file-prompts" class="level3" data-number="2.2">
|
||
<h3 data-number="2.2" class="anchored" data-anchor-id="sec-file-prompts"><span class="header-section-number">2.2</span> File-based Prompts</h3>
|
||
<p>Process prompts from a text file:</p>
|
||
<div class="sourceCode" id="cb4"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="fu">cat</span> /tmp/prompt.txt <span class="kw">|</span> <span class="ex">axolotl</span> inference your_config.yml <span class="dt">\</span></span>
|
||
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a> <span class="at">--base-model</span><span class="op">=</span><span class="st">"./completed-model"</span> <span class="at">--prompter</span><span class="op">=</span>None</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
</section>
|
||
<section id="sec-memory" class="level3" data-number="2.3">
|
||
<h3 data-number="2.3" class="anchored" data-anchor-id="sec-memory"><span class="header-section-number">2.3</span> Memory Optimization</h3>
|
||
<p>For large models or limited memory:</p>
|
||
<div class="sourceCode" id="cb5"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="ex">axolotl</span> inference your_config.yml <span class="at">--load-in-8bit</span><span class="op">=</span>True</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
</section>
|
||
</section>
|
||
<section id="sec-merging" class="level2" data-number="3">
|
||
<h2 data-number="3" class="anchored" data-anchor-id="sec-merging"><span class="header-section-number">3</span> Merging LoRA Weights</h2>
|
||
<p>Merge LoRA adapters with the base model:</p>
|
||
<div class="sourceCode" id="cb6"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="ex">axolotl</span> merge-lora your_config.yml <span class="at">--lora-model-dir</span><span class="op">=</span><span class="st">"./completed-model"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
<section id="sec-memory-management" class="level3" data-number="3.1">
|
||
<h3 data-number="3.1" class="anchored" data-anchor-id="sec-memory-management"><span class="header-section-number">3.1</span> Memory Management for Merging</h3>
|
||
<div class="tabset-margin-container"></div><div class="panel-tabset">
|
||
<ul class="nav nav-tabs" role="tablist"><li class="nav-item" role="presentation"><a class="nav-link active" id="tabset-2-1-tab" data-bs-toggle="tab" data-bs-target="#tabset-2-1" role="tab" aria-controls="tabset-2-1" aria-selected="true">Configuration Options</a></li><li class="nav-item" role="presentation"><a class="nav-link" id="tabset-2-2-tab" data-bs-toggle="tab" data-bs-target="#tabset-2-2" role="tab" aria-controls="tabset-2-2" aria-selected="false">Force CPU Merging</a></li></ul>
|
||
<div class="tab-content">
|
||
<div id="tabset-2-1" class="tab-pane active" role="tabpanel" aria-labelledby="tabset-2-1-tab">
|
||
<div class="sourceCode" id="cb7"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">gpu_memory_limit</span><span class="kw">:</span><span class="at"> 20GiB</span><span class="co"> # Adjust based on your GPU</span></span>
|
||
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="fu">lora_on_cpu</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span><span class="co"> # Process on CPU if needed</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
</div>
|
||
<div id="tabset-2-2" class="tab-pane" role="tabpanel" aria-labelledby="tabset-2-2-tab">
|
||
<div class="sourceCode" id="cb8"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="va">CUDA_VISIBLE_DEVICES</span><span class="op">=</span><span class="st">""</span> <span class="ex">axolotl</span> merge-lora ...</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="sec-tokenization" class="level2" data-number="4">
|
||
<h2 data-number="4" class="anchored" data-anchor-id="sec-tokenization"><span class="header-section-number">4</span> Tokenization</h2>
|
||
<section id="sec-tokenization-issues" class="level3" data-number="4.1">
|
||
<h3 data-number="4.1" class="anchored" data-anchor-id="sec-tokenization-issues"><span class="header-section-number">4.1</span> Common Issues</h3>
|
||
<div class="callout callout-style-default callout-warning callout-titled">
|
||
<div class="callout-header d-flex align-content-center">
|
||
<div class="callout-icon-container">
|
||
<i class="callout-icon"></i>
|
||
</div>
|
||
<div class="callout-title-container flex-fill">
|
||
Warning
|
||
</div>
|
||
</div>
|
||
<div class="callout-body-container callout-body">
|
||
<p>Tokenization mismatches between training and inference are a common source of problems.</p>
|
||
</div>
|
||
</div>
|
||
<p>To debug:</p>
|
||
<ol type="1">
|
||
<li>Check training tokenization:</li>
|
||
</ol>
|
||
<div class="sourceCode" id="cb9"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="ex">axolotl</span> preprocess your_config.yml <span class="at">--debug</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
<ol start="2" type="1">
|
||
<li><p>Verify inference tokenization by decoding tokens before model input</p></li>
|
||
<li><p>Compare token IDs between training and inference</p></li>
|
||
</ol>
|
||
</section>
|
||
<section id="sec-special-tokens" class="level3" data-number="4.2">
|
||
<h3 data-number="4.2" class="anchored" data-anchor-id="sec-special-tokens"><span class="header-section-number">4.2</span> Special Tokens</h3>
|
||
<p>Configure special tokens in your YAML:</p>
|
||
<div class="sourceCode" id="cb10"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="fu">special_tokens</span><span class="kw">:</span></span>
|
||
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">bos_token</span><span class="kw">:</span><span class="at"> </span><span class="st">"<s>"</span></span>
|
||
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">eos_token</span><span class="kw">:</span><span class="at"> </span><span class="st">"</s>"</span></span>
|
||
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">unk_token</span><span class="kw">:</span><span class="at"> </span><span class="st">"<unk>"</span></span>
|
||
<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a><span class="fu">tokens</span><span class="kw">:</span></span>
|
||
<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="st">"<|im_start|>"</span></span>
|
||
<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="st">"<|im_end|>"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
</section>
|
||
</section>
|
||
<section id="sec-troubleshooting" class="level2" data-number="5">
|
||
<h2 data-number="5" class="anchored" data-anchor-id="sec-troubleshooting"><span class="header-section-number">5</span> Troubleshooting</h2>
|
||
<section id="sec-common-problems" class="level3" data-number="5.1">
|
||
<h3 data-number="5.1" class="anchored" data-anchor-id="sec-common-problems"><span class="header-section-number">5.1</span> Common Problems</h3>
|
||
<div class="tabset-margin-container"></div><div class="panel-tabset">
|
||
<ul class="nav nav-tabs" role="tablist"><li class="nav-item" role="presentation"><a class="nav-link active" id="tabset-3-1-tab" data-bs-toggle="tab" data-bs-target="#tabset-3-1" role="tab" aria-controls="tabset-3-1" aria-selected="true">Memory Issues</a></li><li class="nav-item" role="presentation"><a class="nav-link" id="tabset-3-2-tab" data-bs-toggle="tab" data-bs-target="#tabset-3-2" role="tab" aria-controls="tabset-3-2" aria-selected="false">Token Issues</a></li><li class="nav-item" role="presentation"><a class="nav-link" id="tabset-3-3-tab" data-bs-toggle="tab" data-bs-target="#tabset-3-3" role="tab" aria-controls="tabset-3-3" aria-selected="false">Performance Issues</a></li></ul>
|
||
<div class="tab-content">
|
||
<div id="tabset-3-1" class="tab-pane active" role="tabpanel" aria-labelledby="tabset-3-1-tab">
|
||
<ul>
|
||
<li>Use 8-bit loading</li>
|
||
<li>Reduce batch sizes</li>
|
||
<li>Try CPU offloading</li>
|
||
</ul>
|
||
</div>
|
||
<div id="tabset-3-2" class="tab-pane" role="tabpanel" aria-labelledby="tabset-3-2-tab">
|
||
<ul>
|
||
<li>Verify special tokens</li>
|
||
<li>Check tokenizer settings</li>
|
||
<li>Compare training and inference preprocessing</li>
|
||
</ul>
|
||
</div>
|
||
<div id="tabset-3-3" class="tab-pane" role="tabpanel" aria-labelledby="tabset-3-3-tab">
|
||
<ul>
|
||
<li>Verify model loading</li>
|
||
<li>Check prompt formatting</li>
|
||
<li>Ensure temperature/sampling settings</li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<p>For more details, see our <a href="../docs/debugging.html">debugging guide</a>.</p>
|
||
|
||
|
||
<!-- -->
|
||
|
||
</section>
|
||
</section>
|
||
|
||
</main> <!-- /main -->
|
||
<script id="quarto-html-after-body" type="application/javascript">
|
||
window.document.addEventListener("DOMContentLoaded", function (event) {
|
||
const toggleBodyColorMode = (bsSheetEl) => {
|
||
const mode = bsSheetEl.getAttribute("data-mode");
|
||
const bodyEl = window.document.querySelector("body");
|
||
if (mode === "dark") {
|
||
bodyEl.classList.add("quarto-dark");
|
||
bodyEl.classList.remove("quarto-light");
|
||
} else {
|
||
bodyEl.classList.add("quarto-light");
|
||
bodyEl.classList.remove("quarto-dark");
|
||
}
|
||
}
|
||
const toggleBodyColorPrimary = () => {
|
||
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
|
||
if (bsSheetEl) {
|
||
toggleBodyColorMode(bsSheetEl);
|
||
}
|
||
}
|
||
toggleBodyColorPrimary();
|
||
const icon = "";
|
||
const anchorJS = new window.AnchorJS();
|
||
anchorJS.options = {
|
||
placement: 'right',
|
||
icon: icon
|
||
};
|
||
anchorJS.add('.anchored');
|
||
const isCodeAnnotation = (el) => {
|
||
for (const clz of el.classList) {
|
||
if (clz.startsWith('code-annotation-')) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
const onCopySuccess = function(e) {
|
||
// button target
|
||
const button = e.trigger;
|
||
// don't keep focus
|
||
button.blur();
|
||
// flash "checked"
|
||
button.classList.add('code-copy-button-checked');
|
||
var currentTitle = button.getAttribute("title");
|
||
button.setAttribute("title", "Copied!");
|
||
let tooltip;
|
||
if (window.bootstrap) {
|
||
button.setAttribute("data-bs-toggle", "tooltip");
|
||
button.setAttribute("data-bs-placement", "left");
|
||
button.setAttribute("data-bs-title", "Copied!");
|
||
tooltip = new bootstrap.Tooltip(button,
|
||
{ trigger: "manual",
|
||
customClass: "code-copy-button-tooltip",
|
||
offset: [0, -8]});
|
||
tooltip.show();
|
||
}
|
||
setTimeout(function() {
|
||
if (tooltip) {
|
||
tooltip.hide();
|
||
button.removeAttribute("data-bs-title");
|
||
button.removeAttribute("data-bs-toggle");
|
||
button.removeAttribute("data-bs-placement");
|
||
}
|
||
button.setAttribute("title", currentTitle);
|
||
button.classList.remove('code-copy-button-checked');
|
||
}, 1000);
|
||
// clear code selection
|
||
e.clearSelection();
|
||
}
|
||
const getTextToCopy = function(trigger) {
|
||
const codeEl = trigger.previousElementSibling.cloneNode(true);
|
||
for (const childEl of codeEl.children) {
|
||
if (isCodeAnnotation(childEl)) {
|
||
childEl.remove();
|
||
}
|
||
}
|
||
return codeEl.innerText;
|
||
}
|
||
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
|
||
text: getTextToCopy
|
||
});
|
||
clipboard.on('success', onCopySuccess);
|
||
if (window.document.getElementById('quarto-embedded-source-code-modal')) {
|
||
const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
|
||
text: getTextToCopy,
|
||
container: window.document.getElementById('quarto-embedded-source-code-modal')
|
||
});
|
||
clipboardModal.on('success', onCopySuccess);
|
||
}
|
||
const viewSource = window.document.getElementById('quarto-view-source') ||
|
||
window.document.getElementById('quarto-code-tools-source');
|
||
if (viewSource) {
|
||
const sourceUrl = viewSource.getAttribute("data-quarto-source-url");
|
||
viewSource.addEventListener("click", function(e) {
|
||
if (sourceUrl) {
|
||
// rstudio viewer pane
|
||
if (/\bcapabilities=\b/.test(window.location)) {
|
||
window.open(sourceUrl);
|
||
} else {
|
||
window.location.href = sourceUrl;
|
||
}
|
||
} else {
|
||
const modal = new bootstrap.Modal(document.getElementById('quarto-embedded-source-code-modal'));
|
||
modal.show();
|
||
}
|
||
return false;
|
||
});
|
||
}
|
||
function toggleCodeHandler(show) {
|
||
return function(e) {
|
||
const detailsSrc = window.document.querySelectorAll(".cell > details > .sourceCode");
|
||
for (let i=0; i<detailsSrc.length; i++) {
|
||
const details = detailsSrc[i].parentElement;
|
||
if (show) {
|
||
details.open = true;
|
||
} else {
|
||
details.removeAttribute("open");
|
||
}
|
||
}
|
||
const cellCodeDivs = window.document.querySelectorAll(".cell > .sourceCode");
|
||
const fromCls = show ? "hidden" : "unhidden";
|
||
const toCls = show ? "unhidden" : "hidden";
|
||
for (let i=0; i<cellCodeDivs.length; i++) {
|
||
const codeDiv = cellCodeDivs[i];
|
||
if (codeDiv.classList.contains(fromCls)) {
|
||
codeDiv.classList.remove(fromCls);
|
||
codeDiv.classList.add(toCls);
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
}
|
||
const hideAllCode = window.document.getElementById("quarto-hide-all-code");
|
||
if (hideAllCode) {
|
||
hideAllCode.addEventListener("click", toggleCodeHandler(false));
|
||
}
|
||
const showAllCode = window.document.getElementById("quarto-show-all-code");
|
||
if (showAllCode) {
|
||
showAllCode.addEventListener("click", toggleCodeHandler(true));
|
||
}
|
||
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
|
||
var mailtoRegex = new RegExp(/^mailto:/);
|
||
var filterRegex = new RegExp("https:\/\/axolotl-ai-cloud\.github\.io\/axolotl\/");
|
||
var isInternal = (href) => {
|
||
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
|
||
}
|
||
// Inspect non-navigation links and adorn them if external
|
||
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
|
||
for (var i=0; i<links.length; i++) {
|
||
const link = links[i];
|
||
if (!isInternal(link.href)) {
|
||
// undo the damage that might have been done by quarto-nav.js in the case of
|
||
// links that we want to consider external
|
||
if (link.dataset.originalHref !== undefined) {
|
||
link.href = link.dataset.originalHref;
|
||
}
|
||
}
|
||
}
|
||
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
|
||
const config = {
|
||
allowHTML: true,
|
||
maxWidth: 500,
|
||
delay: 100,
|
||
arrow: false,
|
||
appendTo: function(el) {
|
||
return el.parentElement;
|
||
},
|
||
interactive: true,
|
||
interactiveBorder: 10,
|
||
theme: 'quarto',
|
||
placement: 'bottom-start',
|
||
};
|
||
if (contentFn) {
|
||
config.content = contentFn;
|
||
}
|
||
if (onTriggerFn) {
|
||
config.onTrigger = onTriggerFn;
|
||
}
|
||
if (onUntriggerFn) {
|
||
config.onUntrigger = onUntriggerFn;
|
||
}
|
||
window.tippy(el, config);
|
||
}
|
||
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
|
||
for (var i=0; i<noterefs.length; i++) {
|
||
const ref = noterefs[i];
|
||
tippyHover(ref, function() {
|
||
// use id or data attribute instead here
|
||
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
|
||
try { href = new URL(href).hash; } catch {}
|
||
const id = href.replace(/^#\/?/, "");
|
||
const note = window.document.getElementById(id);
|
||
if (note) {
|
||
return note.innerHTML;
|
||
} else {
|
||
return "";
|
||
}
|
||
});
|
||
}
|
||
const xrefs = window.document.querySelectorAll('a.quarto-xref');
|
||
const processXRef = (id, note) => {
|
||
// Strip column container classes
|
||
const stripColumnClz = (el) => {
|
||
el.classList.remove("page-full", "page-columns");
|
||
if (el.children) {
|
||
for (const child of el.children) {
|
||
stripColumnClz(child);
|
||
}
|
||
}
|
||
}
|
||
stripColumnClz(note)
|
||
if (id === null || id.startsWith('sec-')) {
|
||
// Special case sections, only their first couple elements
|
||
const container = document.createElement("div");
|
||
if (note.children && note.children.length > 2) {
|
||
container.appendChild(note.children[0].cloneNode(true));
|
||
for (let i = 1; i < note.children.length; i++) {
|
||
const child = note.children[i];
|
||
if (child.tagName === "P" && child.innerText === "") {
|
||
continue;
|
||
} else {
|
||
container.appendChild(child.cloneNode(true));
|
||
break;
|
||
}
|
||
}
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(container);
|
||
}
|
||
return container.innerHTML
|
||
} else {
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(note);
|
||
}
|
||
return note.innerHTML;
|
||
}
|
||
} else {
|
||
// Remove any anchor links if they are present
|
||
const anchorLink = note.querySelector('a.anchorjs-link');
|
||
if (anchorLink) {
|
||
anchorLink.remove();
|
||
}
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(note);
|
||
}
|
||
if (note.classList.contains("callout")) {
|
||
return note.outerHTML;
|
||
} else {
|
||
return note.innerHTML;
|
||
}
|
||
}
|
||
}
|
||
for (var i=0; i<xrefs.length; i++) {
|
||
const xref = xrefs[i];
|
||
tippyHover(xref, undefined, function(instance) {
|
||
instance.disable();
|
||
let url = xref.getAttribute('href');
|
||
let hash = undefined;
|
||
if (url.startsWith('#')) {
|
||
hash = url;
|
||
} else {
|
||
try { hash = new URL(url).hash; } catch {}
|
||
}
|
||
if (hash) {
|
||
const id = hash.replace(/^#\/?/, "");
|
||
const note = window.document.getElementById(id);
|
||
if (note !== null) {
|
||
try {
|
||
const html = processXRef(id, note.cloneNode(true));
|
||
instance.setContent(html);
|
||
} finally {
|
||
instance.enable();
|
||
instance.show();
|
||
}
|
||
} else {
|
||
// See if we can fetch this
|
||
fetch(url.split('#')[0])
|
||
.then(res => res.text())
|
||
.then(html => {
|
||
const parser = new DOMParser();
|
||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||
const note = htmlDoc.getElementById(id);
|
||
if (note !== null) {
|
||
const html = processXRef(id, note);
|
||
instance.setContent(html);
|
||
}
|
||
}).finally(() => {
|
||
instance.enable();
|
||
instance.show();
|
||
});
|
||
}
|
||
} else {
|
||
// See if we can fetch a full url (with no hash to target)
|
||
// This is a special case and we should probably do some content thinning / targeting
|
||
fetch(url)
|
||
.then(res => res.text())
|
||
.then(html => {
|
||
const parser = new DOMParser();
|
||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||
const note = htmlDoc.querySelector('main.content');
|
||
if (note !== null) {
|
||
// This should only happen for chapter cross references
|
||
// (since there is no id in the URL)
|
||
// remove the first header
|
||
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
|
||
note.children[0].remove();
|
||
}
|
||
const html = processXRef(null, note);
|
||
instance.setContent(html);
|
||
}
|
||
}).finally(() => {
|
||
instance.enable();
|
||
instance.show();
|
||
});
|
||
}
|
||
}, function(instance) {
|
||
});
|
||
}
|
||
let selectedAnnoteEl;
|
||
const selectorForAnnotation = ( cell, annotation) => {
|
||
let cellAttr = 'data-code-cell="' + cell + '"';
|
||
let lineAttr = 'data-code-annotation="' + annotation + '"';
|
||
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
|
||
return selector;
|
||
}
|
||
const selectCodeLines = (annoteEl) => {
|
||
const doc = window.document;
|
||
const targetCell = annoteEl.getAttribute("data-target-cell");
|
||
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
|
||
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
|
||
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
|
||
const lineIds = lines.map((line) => {
|
||
return targetCell + "-" + line;
|
||
})
|
||
let top = null;
|
||
let height = null;
|
||
let parent = null;
|
||
if (lineIds.length > 0) {
|
||
//compute the position of the single el (top and bottom and make a div)
|
||
const el = window.document.getElementById(lineIds[0]);
|
||
top = el.offsetTop;
|
||
height = el.offsetHeight;
|
||
parent = el.parentElement.parentElement;
|
||
if (lineIds.length > 1) {
|
||
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
|
||
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
|
||
height = bottom - top;
|
||
}
|
||
if (top !== null && height !== null && parent !== null) {
|
||
// cook up a div (if necessary) and position it
|
||
let div = window.document.getElementById("code-annotation-line-highlight");
|
||
if (div === null) {
|
||
div = window.document.createElement("div");
|
||
div.setAttribute("id", "code-annotation-line-highlight");
|
||
div.style.position = 'absolute';
|
||
parent.appendChild(div);
|
||
}
|
||
div.style.top = top - 2 + "px";
|
||
div.style.height = height + 4 + "px";
|
||
div.style.left = 0;
|
||
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
|
||
if (gutterDiv === null) {
|
||
gutterDiv = window.document.createElement("div");
|
||
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
|
||
gutterDiv.style.position = 'absolute';
|
||
const codeCell = window.document.getElementById(targetCell);
|
||
const gutter = codeCell.querySelector('.code-annotation-gutter');
|
||
gutter.appendChild(gutterDiv);
|
||
}
|
||
gutterDiv.style.top = top - 2 + "px";
|
||
gutterDiv.style.height = height + 4 + "px";
|
||
}
|
||
selectedAnnoteEl = annoteEl;
|
||
}
|
||
};
|
||
const unselectCodeLines = () => {
|
||
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
|
||
elementsIds.forEach((elId) => {
|
||
const div = window.document.getElementById(elId);
|
||
if (div) {
|
||
div.remove();
|
||
}
|
||
});
|
||
selectedAnnoteEl = undefined;
|
||
};
|
||
// Handle positioning of the toggle
|
||
window.addEventListener(
|
||
"resize",
|
||
throttle(() => {
|
||
elRect = undefined;
|
||
if (selectedAnnoteEl) {
|
||
selectCodeLines(selectedAnnoteEl);
|
||
}
|
||
}, 10)
|
||
);
|
||
function throttle(fn, ms) {
|
||
let throttle = false;
|
||
let timer;
|
||
return (...args) => {
|
||
if(!throttle) { // first call gets through
|
||
fn.apply(this, args);
|
||
throttle = true;
|
||
} else { // all the others get throttled
|
||
if(timer) clearTimeout(timer); // cancel #2
|
||
timer = setTimeout(() => {
|
||
fn.apply(this, args);
|
||
timer = throttle = false;
|
||
}, ms);
|
||
}
|
||
};
|
||
}
|
||
// Attach click handler to the DT
|
||
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
|
||
for (const annoteDlNode of annoteDls) {
|
||
annoteDlNode.addEventListener('click', (event) => {
|
||
const clickedEl = event.target;
|
||
if (clickedEl !== selectedAnnoteEl) {
|
||
unselectCodeLines();
|
||
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
|
||
if (activeEl) {
|
||
activeEl.classList.remove('code-annotation-active');
|
||
}
|
||
selectCodeLines(clickedEl);
|
||
clickedEl.classList.add('code-annotation-active');
|
||
} else {
|
||
// Unselect the line
|
||
unselectCodeLines();
|
||
clickedEl.classList.remove('code-annotation-active');
|
||
}
|
||
});
|
||
}
|
||
const findCites = (el) => {
|
||
const parentEl = el.parentElement;
|
||
if (parentEl) {
|
||
const cites = parentEl.dataset.cites;
|
||
if (cites) {
|
||
return {
|
||
el,
|
||
cites: cites.split(' ')
|
||
};
|
||
} else {
|
||
return findCites(el.parentElement)
|
||
}
|
||
} else {
|
||
return undefined;
|
||
}
|
||
};
|
||
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
|
||
for (var i=0; i<bibliorefs.length; i++) {
|
||
const ref = bibliorefs[i];
|
||
const citeInfo = findCites(ref);
|
||
if (citeInfo) {
|
||
tippyHover(citeInfo.el, function() {
|
||
var popup = window.document.createElement('div');
|
||
citeInfo.cites.forEach(function(cite) {
|
||
var citeDiv = window.document.createElement('div');
|
||
citeDiv.classList.add('hanging-indent');
|
||
citeDiv.classList.add('csl-entry');
|
||
var biblioDiv = window.document.getElementById('ref-' + cite);
|
||
if (biblioDiv) {
|
||
citeDiv.innerHTML = biblioDiv.innerHTML;
|
||
}
|
||
popup.appendChild(citeDiv);
|
||
});
|
||
return popup.innerHTML;
|
||
});
|
||
}
|
||
}
|
||
});
|
||
</script><div class="modal fade" id="quarto-embedded-source-code-modal" tabindex="-1" aria-labelledby="quarto-embedded-source-code-modal-label" aria-hidden="true"><div class="modal-dialog modal-dialog-scrollable"><div class="modal-content"><div class="modal-header"><h5 class="modal-title" id="quarto-embedded-source-code-modal-label">Source Code</h5><button class="btn-close" data-bs-dismiss="modal"></button></div><div class="modal-body"><div class="">
|
||
<div class="sourceCode" id="cb11" data-shortcodes="false"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
|
||
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="an">title:</span><span class="co"> "Inference Guide"</span></span>
|
||
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="an">format:</span></span>
|
||
<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a><span class="co"> html:</span></span>
|
||
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a><span class="co"> toc: true</span></span>
|
||
<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a><span class="co"> toc-depth: 3</span></span>
|
||
<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a><span class="co"> number-sections: true</span></span>
|
||
<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a><span class="co"> code-tools: true</span></span>
|
||
<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a><span class="an">execute:</span></span>
|
||
<span id="cb11-10"><a href="#cb11-10" aria-hidden="true" tabindex="-1"></a><span class="co"> enabled: false</span></span>
|
||
<span id="cb11-11"><a href="#cb11-11" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
|
||
<span id="cb11-12"><a href="#cb11-12" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-13"><a href="#cb11-13" aria-hidden="true" tabindex="-1"></a>This guide covers how to use your trained models for inference, including model loading, interactive testing, and common troubleshooting steps.</span>
|
||
<span id="cb11-14"><a href="#cb11-14" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-15"><a href="#cb11-15" aria-hidden="true" tabindex="-1"></a><span class="fu">## Quick Start {#sec-quickstart}</span></span>
|
||
<span id="cb11-16"><a href="#cb11-16" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-17"><a href="#cb11-17" aria-hidden="true" tabindex="-1"></a><span class="fu">### Basic Inference {#sec-basic}</span></span>
|
||
<span id="cb11-18"><a href="#cb11-18" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-19"><a href="#cb11-19" aria-hidden="true" tabindex="-1"></a>::: {.panel-tabset}</span>
|
||
<span id="cb11-20"><a href="#cb11-20" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-21"><a href="#cb11-21" aria-hidden="true" tabindex="-1"></a><span class="fu">## LoRA Models</span></span>
|
||
<span id="cb11-22"><a href="#cb11-22" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-23"><a href="#cb11-23" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
|
||
<span id="cb11-24"><a href="#cb11-24" aria-hidden="true" tabindex="-1"></a><span class="in">axolotl inference your_config.yml --lora-model-dir="./lora-output-dir"</span></span>
|
||
<span id="cb11-25"><a href="#cb11-25" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
|
||
<span id="cb11-26"><a href="#cb11-26" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-27"><a href="#cb11-27" aria-hidden="true" tabindex="-1"></a><span class="fu">## Full Fine-tuned Models</span></span>
|
||
<span id="cb11-28"><a href="#cb11-28" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-29"><a href="#cb11-29" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
|
||
<span id="cb11-30"><a href="#cb11-30" aria-hidden="true" tabindex="-1"></a><span class="in">axolotl inference your_config.yml --base-model="./completed-model"</span></span>
|
||
<span id="cb11-31"><a href="#cb11-31" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
|
||
<span id="cb11-32"><a href="#cb11-32" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-33"><a href="#cb11-33" aria-hidden="true" tabindex="-1"></a>:::</span>
|
||
<span id="cb11-34"><a href="#cb11-34" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-35"><a href="#cb11-35" aria-hidden="true" tabindex="-1"></a><span class="fu">## Advanced Usage {#sec-advanced}</span></span>
|
||
<span id="cb11-36"><a href="#cb11-36" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-37"><a href="#cb11-37" aria-hidden="true" tabindex="-1"></a><span class="fu">### Gradio Interface {#sec-gradio}</span></span>
|
||
<span id="cb11-38"><a href="#cb11-38" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-39"><a href="#cb11-39" aria-hidden="true" tabindex="-1"></a>Launch an interactive web interface:</span>
|
||
<span id="cb11-40"><a href="#cb11-40" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-41"><a href="#cb11-41" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
|
||
<span id="cb11-42"><a href="#cb11-42" aria-hidden="true" tabindex="-1"></a><span class="in">axolotl inference your_config.yml --gradio</span></span>
|
||
<span id="cb11-43"><a href="#cb11-43" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
|
||
<span id="cb11-44"><a href="#cb11-44" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-45"><a href="#cb11-45" aria-hidden="true" tabindex="-1"></a><span class="fu">### File-based Prompts {#sec-file-prompts}</span></span>
|
||
<span id="cb11-46"><a href="#cb11-46" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-47"><a href="#cb11-47" aria-hidden="true" tabindex="-1"></a>Process prompts from a text file:</span>
|
||
<span id="cb11-48"><a href="#cb11-48" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-49"><a href="#cb11-49" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
|
||
<span id="cb11-50"><a href="#cb11-50" aria-hidden="true" tabindex="-1"></a><span class="in">cat /tmp/prompt.txt | axolotl inference your_config.yml \</span></span>
|
||
<span id="cb11-51"><a href="#cb11-51" aria-hidden="true" tabindex="-1"></a><span class="in"> --base-model="./completed-model" --prompter=None</span></span>
|
||
<span id="cb11-52"><a href="#cb11-52" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
|
||
<span id="cb11-53"><a href="#cb11-53" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-54"><a href="#cb11-54" aria-hidden="true" tabindex="-1"></a><span class="fu">### Memory Optimization {#sec-memory}</span></span>
|
||
<span id="cb11-55"><a href="#cb11-55" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-56"><a href="#cb11-56" aria-hidden="true" tabindex="-1"></a>For large models or limited memory:</span>
|
||
<span id="cb11-57"><a href="#cb11-57" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-58"><a href="#cb11-58" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
|
||
<span id="cb11-59"><a href="#cb11-59" aria-hidden="true" tabindex="-1"></a><span class="in">axolotl inference your_config.yml --load-in-8bit=True</span></span>
|
||
<span id="cb11-60"><a href="#cb11-60" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
|
||
<span id="cb11-61"><a href="#cb11-61" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-62"><a href="#cb11-62" aria-hidden="true" tabindex="-1"></a><span class="fu">## Merging LoRA Weights {#sec-merging}</span></span>
|
||
<span id="cb11-63"><a href="#cb11-63" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-64"><a href="#cb11-64" aria-hidden="true" tabindex="-1"></a>Merge LoRA adapters with the base model:</span>
|
||
<span id="cb11-65"><a href="#cb11-65" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-66"><a href="#cb11-66" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
|
||
<span id="cb11-67"><a href="#cb11-67" aria-hidden="true" tabindex="-1"></a><span class="in">axolotl merge-lora your_config.yml --lora-model-dir="./completed-model"</span></span>
|
||
<span id="cb11-68"><a href="#cb11-68" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
|
||
<span id="cb11-69"><a href="#cb11-69" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-70"><a href="#cb11-70" aria-hidden="true" tabindex="-1"></a><span class="fu">### Memory Management for Merging {#sec-memory-management}</span></span>
|
||
<span id="cb11-71"><a href="#cb11-71" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-72"><a href="#cb11-72" aria-hidden="true" tabindex="-1"></a>::: {.panel-tabset}</span>
|
||
<span id="cb11-73"><a href="#cb11-73" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-74"><a href="#cb11-74" aria-hidden="true" tabindex="-1"></a><span class="fu">## Configuration Options</span></span>
|
||
<span id="cb11-75"><a href="#cb11-75" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-76"><a href="#cb11-76" aria-hidden="true" tabindex="-1"></a><span class="in">```{.yaml}</span></span>
|
||
<span id="cb11-77"><a href="#cb11-77" aria-hidden="true" tabindex="-1"></a><span class="in">gpu_memory_limit: 20GiB # Adjust based on your GPU</span></span>
|
||
<span id="cb11-78"><a href="#cb11-78" aria-hidden="true" tabindex="-1"></a><span class="in">lora_on_cpu: true # Process on CPU if needed</span></span>
|
||
<span id="cb11-79"><a href="#cb11-79" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
|
||
<span id="cb11-80"><a href="#cb11-80" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-81"><a href="#cb11-81" aria-hidden="true" tabindex="-1"></a><span class="fu">## Force CPU Merging</span></span>
|
||
<span id="cb11-82"><a href="#cb11-82" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-83"><a href="#cb11-83" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
|
||
<span id="cb11-84"><a href="#cb11-84" aria-hidden="true" tabindex="-1"></a><span class="in">CUDA_VISIBLE_DEVICES="" axolotl merge-lora ...</span></span>
|
||
<span id="cb11-85"><a href="#cb11-85" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
|
||
<span id="cb11-86"><a href="#cb11-86" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-87"><a href="#cb11-87" aria-hidden="true" tabindex="-1"></a>:::</span>
|
||
<span id="cb11-88"><a href="#cb11-88" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-89"><a href="#cb11-89" aria-hidden="true" tabindex="-1"></a><span class="fu">## Tokenization {#sec-tokenization}</span></span>
|
||
<span id="cb11-90"><a href="#cb11-90" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-91"><a href="#cb11-91" aria-hidden="true" tabindex="-1"></a><span class="fu">### Common Issues {#sec-tokenization-issues}</span></span>
|
||
<span id="cb11-92"><a href="#cb11-92" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-93"><a href="#cb11-93" aria-hidden="true" tabindex="-1"></a>::: {.callout-warning}</span>
|
||
<span id="cb11-94"><a href="#cb11-94" aria-hidden="true" tabindex="-1"></a>Tokenization mismatches between training and inference are a common source of problems.</span>
|
||
<span id="cb11-95"><a href="#cb11-95" aria-hidden="true" tabindex="-1"></a>:::</span>
|
||
<span id="cb11-96"><a href="#cb11-96" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-97"><a href="#cb11-97" aria-hidden="true" tabindex="-1"></a>To debug:</span>
|
||
<span id="cb11-98"><a href="#cb11-98" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-99"><a href="#cb11-99" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Check training tokenization:</span>
|
||
<span id="cb11-100"><a href="#cb11-100" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
|
||
<span id="cb11-101"><a href="#cb11-101" aria-hidden="true" tabindex="-1"></a><span class="in">axolotl preprocess your_config.yml --debug</span></span>
|
||
<span id="cb11-102"><a href="#cb11-102" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
|
||
<span id="cb11-103"><a href="#cb11-103" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-104"><a href="#cb11-104" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Verify inference tokenization by decoding tokens before model input</span>
|
||
<span id="cb11-105"><a href="#cb11-105" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-106"><a href="#cb11-106" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Compare token IDs between training and inference</span>
|
||
<span id="cb11-107"><a href="#cb11-107" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-108"><a href="#cb11-108" aria-hidden="true" tabindex="-1"></a><span class="fu">### Special Tokens {#sec-special-tokens}</span></span>
|
||
<span id="cb11-109"><a href="#cb11-109" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-110"><a href="#cb11-110" aria-hidden="true" tabindex="-1"></a>Configure special tokens in your YAML:</span>
|
||
<span id="cb11-111"><a href="#cb11-111" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-112"><a href="#cb11-112" aria-hidden="true" tabindex="-1"></a><span class="in">```{.yaml}</span></span>
|
||
<span id="cb11-113"><a href="#cb11-113" aria-hidden="true" tabindex="-1"></a><span class="in">special_tokens:</span></span>
|
||
<span id="cb11-114"><a href="#cb11-114" aria-hidden="true" tabindex="-1"></a><span class="in"> bos_token: "<s>"</span></span>
|
||
<span id="cb11-115"><a href="#cb11-115" aria-hidden="true" tabindex="-1"></a><span class="in"> eos_token: "</s>"</span></span>
|
||
<span id="cb11-116"><a href="#cb11-116" aria-hidden="true" tabindex="-1"></a><span class="in"> unk_token: "<unk>"</span></span>
|
||
<span id="cb11-117"><a href="#cb11-117" aria-hidden="true" tabindex="-1"></a><span class="in">tokens:</span></span>
|
||
<span id="cb11-118"><a href="#cb11-118" aria-hidden="true" tabindex="-1"></a><span class="in"> - "<|im_start|>"</span></span>
|
||
<span id="cb11-119"><a href="#cb11-119" aria-hidden="true" tabindex="-1"></a><span class="in"> - "<|im_end|>"</span></span>
|
||
<span id="cb11-120"><a href="#cb11-120" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
|
||
<span id="cb11-121"><a href="#cb11-121" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-122"><a href="#cb11-122" aria-hidden="true" tabindex="-1"></a><span class="fu">## Troubleshooting {#sec-troubleshooting}</span></span>
|
||
<span id="cb11-123"><a href="#cb11-123" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-124"><a href="#cb11-124" aria-hidden="true" tabindex="-1"></a><span class="fu">### Common Problems {#sec-common-problems}</span></span>
|
||
<span id="cb11-125"><a href="#cb11-125" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-126"><a href="#cb11-126" aria-hidden="true" tabindex="-1"></a>::: {.panel-tabset}</span>
|
||
<span id="cb11-127"><a href="#cb11-127" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-128"><a href="#cb11-128" aria-hidden="true" tabindex="-1"></a><span class="fu">## Memory Issues</span></span>
|
||
<span id="cb11-129"><a href="#cb11-129" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-130"><a href="#cb11-130" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Use 8-bit loading</span>
|
||
<span id="cb11-131"><a href="#cb11-131" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Reduce batch sizes</span>
|
||
<span id="cb11-132"><a href="#cb11-132" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Try CPU offloading</span>
|
||
<span id="cb11-133"><a href="#cb11-133" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-134"><a href="#cb11-134" aria-hidden="true" tabindex="-1"></a><span class="fu">## Token Issues</span></span>
|
||
<span id="cb11-135"><a href="#cb11-135" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-136"><a href="#cb11-136" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Verify special tokens</span>
|
||
<span id="cb11-137"><a href="#cb11-137" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Check tokenizer settings</span>
|
||
<span id="cb11-138"><a href="#cb11-138" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Compare training and inference preprocessing</span>
|
||
<span id="cb11-139"><a href="#cb11-139" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-140"><a href="#cb11-140" aria-hidden="true" tabindex="-1"></a><span class="fu">## Performance Issues</span></span>
|
||
<span id="cb11-141"><a href="#cb11-141" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-142"><a href="#cb11-142" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Verify model loading</span>
|
||
<span id="cb11-143"><a href="#cb11-143" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Check prompt formatting</span>
|
||
<span id="cb11-144"><a href="#cb11-144" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Ensure temperature/sampling settings</span>
|
||
<span id="cb11-145"><a href="#cb11-145" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-146"><a href="#cb11-146" aria-hidden="true" tabindex="-1"></a>:::</span>
|
||
<span id="cb11-147"><a href="#cb11-147" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb11-148"><a href="#cb11-148" aria-hidden="true" tabindex="-1"></a>For more details, see our <span class="co">[</span><span class="ot">debugging guide</span><span class="co">](debugging.qmd)</span>.</span></code><button title="Copy to Clipboard" class="code-copy-button" data-in-quarto-modal=""><i class="bi"></i></button></pre></div>
|
||
</div></div></div></div></div>
|
||
</div> <!-- /content -->
|
||
|
||
|
||
|
||
|
||
</body></html> |