Built site for gh-pages

This commit is contained in:
Quarto GHA Workflow Runner
2025-02-25 09:11:03 +00:00
parent 4c1ebd7402
commit efdea0e9ff
48 changed files with 10359 additions and 5198 deletions

View File

@@ -7,7 +7,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<title>Inference Guide Axolotl</title>
<title>Inference Axolotl</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
@@ -69,10 +69,10 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
<script src="../site_libs/quarto-html/anchor.min.js"></script>
<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-2f5df379a58b258e96c21c0638c20c03.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-b53751a350365c71b6c909e95f209ed1.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="../site_libs/bootstrap/bootstrap-141b2cdb37a94fcfd6825c1581ff795f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
<link href="../site_libs/bootstrap/bootstrap-0cda210ced8960466d2ee7bf22d15016.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
<script id="quarto-search-options" type="application/json">{
"location": "navbar",
"copy-button": false,
@@ -112,8 +112,8 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<nav class="navbar navbar-expand " data-bs-theme="dark">
<div class="navbar-container container-fluid">
<div class="navbar-brand-container mx-auto">
<a class="navbar-brand" href="../index.html">
<span class="navbar-title">Axolotl</span>
<a href="../index.html" class="navbar-brand navbar-brand-logo">
<img src="../image/axolotl_logo_digital_white.svg" alt="" class="navbar-logo">
</a>
</div>
<div class="quarto-navbar-tools tools-wide tools-end">
@@ -129,7 +129,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
<i class="bi bi-layout-text-sidebar-reverse"></i>
</button>
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/getting-started.html">How-To Guides</a></li><li class="breadcrumb-item"><a href="../docs/inference.html">Inference Guide</a></li></ol></nav>
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/getting-started.html">Getting Started</a></li><li class="breadcrumb-item"><a href="../docs/inference.html">Inference</a></li></ol></nav>
<a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
</a>
</div>
@@ -150,7 +150,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
<span class="menu-text">How-To Guides</span></a>
<span class="menu-text">Getting Started</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
@@ -159,91 +159,25 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/getting-started.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Getting Started with Axolotl</span></a>
<span class="menu-text">Quickstart</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/installation.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Installation Guide</span></a>
<span class="menu-text">Installation</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/debugging.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Debugging</span></a>
<a href="../docs/cli.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">CLI Reference</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/inference.html" class="sidebar-item-text sidebar-link active">
<span class="menu-text">Inference Guide</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/multipack.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Multipack (Sample Packing)</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">FDSP + QLoRA</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/input_output.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Template-free prompt construction</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/rlhf.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">RLHF (Beta)</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/nccl.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">NCCL</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/mac.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Mac M-series</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/multi-gpu.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Multi-GPU Training Guide</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/multi-node.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Multi Node</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/unsloth.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Unsloth</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/amd_hpc.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Training with AMD GPUs on HPC Systems</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/ray-integration.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Ray Train integration</span></a>
<span class="menu-text">Inference</span></a>
</div>
</li>
</ul>
@@ -298,7 +232,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true">
<span class="menu-text">Reference</span></a>
<span class="menu-text">Deployments</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
@@ -306,18 +240,187 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/multi-gpu.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Multi-GPU</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/multi-node.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Multi Node</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/ray-integration.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Ray Train</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/amd_hpc.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">AMD GPUs on HPC Systems</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/mac.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Mac M-series</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true">
<span class="menu-text">How To Guides</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-4" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/multimodal.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">MultiModal / Vision Language Models (BETA)</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/rlhf.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">RLHF (Beta)</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/reward_modelling.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Reward Modelling</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/lr_groups.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Learning Rate Groups</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/lora_optims.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">LoRA Optimizations</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true">
<span class="menu-text">Core Concepts</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-5" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/batch_vs_grad.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Batch size vs Gradient accumulation</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/dataset_preprocessing.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Dataset Preprocessing</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/multipack.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Multipack (Sample Packing)</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-6" role="navigation" aria-expanded="true">
<span class="menu-text">Advanced Features</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-6" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-6" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">FDSP + QLoRA</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/unsloth.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Unsloth</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/torchao.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">PyTorch ao</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/custom_integrations.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Custom Integrations</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-7" role="navigation" aria-expanded="true">
<span class="menu-text">Troubleshooting</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-7" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-7" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/faq.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">FAQ</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/debugging.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Debugging</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/nccl.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">NCCL</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-8" role="navigation" aria-expanded="true">
<span class="menu-text">Reference</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-8" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-8" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/config.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Config options</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/faq.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">FAQ</span></a>
</div>
</li>
</ul>
</div>
</nav>
@@ -357,9 +460,9 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<!-- main -->
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/getting-started.html">How-To Guides</a></li><li class="breadcrumb-item"><a href="../docs/inference.html">Inference Guide</a></li></ol></nav>
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/getting-started.html">Getting Started</a></li><li class="breadcrumb-item"><a href="../docs/inference.html">Inference</a></li></ol></nav>
<div class="quarto-title">
<div class="quarto-title-block"><div><h1 class="title">Inference Guide</h1><button type="button" class="btn code-tools-button" id="quarto-code-tools-source"><i class="bi"></i> Code</button></div></div>
<h1 class="title">Inference</h1>
</div>
@@ -505,8 +608,6 @@ Warning
<p>For more details, see our <a href="../docs/debugging.html">debugging guide</a>.</p>
<!-- -->
</section>
</section>
@@ -598,57 +699,6 @@ window.document.addEventListener("DOMContentLoaded", function (event) {
container: window.document.getElementById('quarto-embedded-source-code-modal')
});
clipboardModal.on('success', onCopySuccess);
}
const viewSource = window.document.getElementById('quarto-view-source') ||
window.document.getElementById('quarto-code-tools-source');
if (viewSource) {
const sourceUrl = viewSource.getAttribute("data-quarto-source-url");
viewSource.addEventListener("click", function(e) {
if (sourceUrl) {
// rstudio viewer pane
if (/\bcapabilities=\b/.test(window.location)) {
window.open(sourceUrl);
} else {
window.location.href = sourceUrl;
}
} else {
const modal = new bootstrap.Modal(document.getElementById('quarto-embedded-source-code-modal'));
modal.show();
}
return false;
});
}
function toggleCodeHandler(show) {
return function(e) {
const detailsSrc = window.document.querySelectorAll(".cell > details > .sourceCode");
for (let i=0; i<detailsSrc.length; i++) {
const details = detailsSrc[i].parentElement;
if (show) {
details.open = true;
} else {
details.removeAttribute("open");
}
}
const cellCodeDivs = window.document.querySelectorAll(".cell > .sourceCode");
const fromCls = show ? "hidden" : "unhidden";
const toCls = show ? "unhidden" : "hidden";
for (let i=0; i<cellCodeDivs.length; i++) {
const codeDiv = cellCodeDivs[i];
if (codeDiv.classList.contains(fromCls)) {
codeDiv.classList.remove(fromCls);
codeDiv.classList.add(toCls);
}
}
return false;
}
}
const hideAllCode = window.document.getElementById("quarto-hide-all-code");
if (hideAllCode) {
hideAllCode.addEventListener("click", toggleCodeHandler(false));
}
const showAllCode = window.document.getElementById("quarto-show-all-code");
if (showAllCode) {
showAllCode.addEventListener("click", toggleCodeHandler(true));
}
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
var mailtoRegex = new RegExp(/^mailto:/);
@@ -978,156 +1028,7 @@ window.document.addEventListener("DOMContentLoaded", function (event) {
}
}
});
</script><div class="modal fade" id="quarto-embedded-source-code-modal" tabindex="-1" aria-labelledby="quarto-embedded-source-code-modal-label" aria-hidden="true"><div class="modal-dialog modal-dialog-scrollable"><div class="modal-content"><div class="modal-header"><h5 class="modal-title" id="quarto-embedded-source-code-modal-label">Source Code</h5><button class="btn-close" data-bs-dismiss="modal"></button></div><div class="modal-body"><div class="">
<div class="sourceCode" id="cb11" data-shortcodes="false"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="an">title:</span><span class="co"> "Inference Guide"</span></span>
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="an">format:</span></span>
<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a><span class="co"> html:</span></span>
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a><span class="co"> toc: true</span></span>
<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a><span class="co"> toc-depth: 3</span></span>
<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a><span class="co"> number-sections: true</span></span>
<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a><span class="co"> code-tools: true</span></span>
<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a><span class="an">execute:</span></span>
<span id="cb11-10"><a href="#cb11-10" aria-hidden="true" tabindex="-1"></a><span class="co"> enabled: false</span></span>
<span id="cb11-11"><a href="#cb11-11" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
<span id="cb11-12"><a href="#cb11-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-13"><a href="#cb11-13" aria-hidden="true" tabindex="-1"></a>This guide covers how to use your trained models for inference, including model loading, interactive testing, and common troubleshooting steps.</span>
<span id="cb11-14"><a href="#cb11-14" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-15"><a href="#cb11-15" aria-hidden="true" tabindex="-1"></a><span class="fu">## Quick Start {#sec-quickstart}</span></span>
<span id="cb11-16"><a href="#cb11-16" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-17"><a href="#cb11-17" aria-hidden="true" tabindex="-1"></a><span class="fu">### Basic Inference {#sec-basic}</span></span>
<span id="cb11-18"><a href="#cb11-18" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-19"><a href="#cb11-19" aria-hidden="true" tabindex="-1"></a>::: {.panel-tabset}</span>
<span id="cb11-20"><a href="#cb11-20" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-21"><a href="#cb11-21" aria-hidden="true" tabindex="-1"></a><span class="fu">## LoRA Models</span></span>
<span id="cb11-22"><a href="#cb11-22" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-23"><a href="#cb11-23" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
<span id="cb11-24"><a href="#cb11-24" aria-hidden="true" tabindex="-1"></a><span class="in">axolotl inference your_config.yml --lora-model-dir="./lora-output-dir"</span></span>
<span id="cb11-25"><a href="#cb11-25" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb11-26"><a href="#cb11-26" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-27"><a href="#cb11-27" aria-hidden="true" tabindex="-1"></a><span class="fu">## Full Fine-tuned Models</span></span>
<span id="cb11-28"><a href="#cb11-28" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-29"><a href="#cb11-29" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
<span id="cb11-30"><a href="#cb11-30" aria-hidden="true" tabindex="-1"></a><span class="in">axolotl inference your_config.yml --base-model="./completed-model"</span></span>
<span id="cb11-31"><a href="#cb11-31" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb11-32"><a href="#cb11-32" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-33"><a href="#cb11-33" aria-hidden="true" tabindex="-1"></a>:::</span>
<span id="cb11-34"><a href="#cb11-34" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-35"><a href="#cb11-35" aria-hidden="true" tabindex="-1"></a><span class="fu">## Advanced Usage {#sec-advanced}</span></span>
<span id="cb11-36"><a href="#cb11-36" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-37"><a href="#cb11-37" aria-hidden="true" tabindex="-1"></a><span class="fu">### Gradio Interface {#sec-gradio}</span></span>
<span id="cb11-38"><a href="#cb11-38" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-39"><a href="#cb11-39" aria-hidden="true" tabindex="-1"></a>Launch an interactive web interface:</span>
<span id="cb11-40"><a href="#cb11-40" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-41"><a href="#cb11-41" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
<span id="cb11-42"><a href="#cb11-42" aria-hidden="true" tabindex="-1"></a><span class="in">axolotl inference your_config.yml --gradio</span></span>
<span id="cb11-43"><a href="#cb11-43" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb11-44"><a href="#cb11-44" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-45"><a href="#cb11-45" aria-hidden="true" tabindex="-1"></a><span class="fu">### File-based Prompts {#sec-file-prompts}</span></span>
<span id="cb11-46"><a href="#cb11-46" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-47"><a href="#cb11-47" aria-hidden="true" tabindex="-1"></a>Process prompts from a text file:</span>
<span id="cb11-48"><a href="#cb11-48" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-49"><a href="#cb11-49" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
<span id="cb11-50"><a href="#cb11-50" aria-hidden="true" tabindex="-1"></a><span class="in">cat /tmp/prompt.txt | axolotl inference your_config.yml \</span></span>
<span id="cb11-51"><a href="#cb11-51" aria-hidden="true" tabindex="-1"></a><span class="in"> --base-model="./completed-model" --prompter=None</span></span>
<span id="cb11-52"><a href="#cb11-52" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb11-53"><a href="#cb11-53" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-54"><a href="#cb11-54" aria-hidden="true" tabindex="-1"></a><span class="fu">### Memory Optimization {#sec-memory}</span></span>
<span id="cb11-55"><a href="#cb11-55" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-56"><a href="#cb11-56" aria-hidden="true" tabindex="-1"></a>For large models or limited memory:</span>
<span id="cb11-57"><a href="#cb11-57" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-58"><a href="#cb11-58" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
<span id="cb11-59"><a href="#cb11-59" aria-hidden="true" tabindex="-1"></a><span class="in">axolotl inference your_config.yml --load-in-8bit=True</span></span>
<span id="cb11-60"><a href="#cb11-60" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb11-61"><a href="#cb11-61" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-62"><a href="#cb11-62" aria-hidden="true" tabindex="-1"></a><span class="fu">## Merging LoRA Weights {#sec-merging}</span></span>
<span id="cb11-63"><a href="#cb11-63" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-64"><a href="#cb11-64" aria-hidden="true" tabindex="-1"></a>Merge LoRA adapters with the base model:</span>
<span id="cb11-65"><a href="#cb11-65" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-66"><a href="#cb11-66" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
<span id="cb11-67"><a href="#cb11-67" aria-hidden="true" tabindex="-1"></a><span class="in">axolotl merge-lora your_config.yml --lora-model-dir="./completed-model"</span></span>
<span id="cb11-68"><a href="#cb11-68" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb11-69"><a href="#cb11-69" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-70"><a href="#cb11-70" aria-hidden="true" tabindex="-1"></a><span class="fu">### Memory Management for Merging {#sec-memory-management}</span></span>
<span id="cb11-71"><a href="#cb11-71" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-72"><a href="#cb11-72" aria-hidden="true" tabindex="-1"></a>::: {.panel-tabset}</span>
<span id="cb11-73"><a href="#cb11-73" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-74"><a href="#cb11-74" aria-hidden="true" tabindex="-1"></a><span class="fu">## Configuration Options</span></span>
<span id="cb11-75"><a href="#cb11-75" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-76"><a href="#cb11-76" aria-hidden="true" tabindex="-1"></a><span class="in">```{.yaml}</span></span>
<span id="cb11-77"><a href="#cb11-77" aria-hidden="true" tabindex="-1"></a><span class="in">gpu_memory_limit: 20GiB # Adjust based on your GPU</span></span>
<span id="cb11-78"><a href="#cb11-78" aria-hidden="true" tabindex="-1"></a><span class="in">lora_on_cpu: true # Process on CPU if needed</span></span>
<span id="cb11-79"><a href="#cb11-79" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb11-80"><a href="#cb11-80" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-81"><a href="#cb11-81" aria-hidden="true" tabindex="-1"></a><span class="fu">## Force CPU Merging</span></span>
<span id="cb11-82"><a href="#cb11-82" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-83"><a href="#cb11-83" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
<span id="cb11-84"><a href="#cb11-84" aria-hidden="true" tabindex="-1"></a><span class="in">CUDA_VISIBLE_DEVICES="" axolotl merge-lora ...</span></span>
<span id="cb11-85"><a href="#cb11-85" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb11-86"><a href="#cb11-86" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-87"><a href="#cb11-87" aria-hidden="true" tabindex="-1"></a>:::</span>
<span id="cb11-88"><a href="#cb11-88" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-89"><a href="#cb11-89" aria-hidden="true" tabindex="-1"></a><span class="fu">## Tokenization {#sec-tokenization}</span></span>
<span id="cb11-90"><a href="#cb11-90" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-91"><a href="#cb11-91" aria-hidden="true" tabindex="-1"></a><span class="fu">### Common Issues {#sec-tokenization-issues}</span></span>
<span id="cb11-92"><a href="#cb11-92" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-93"><a href="#cb11-93" aria-hidden="true" tabindex="-1"></a>::: {.callout-warning}</span>
<span id="cb11-94"><a href="#cb11-94" aria-hidden="true" tabindex="-1"></a>Tokenization mismatches between training and inference are a common source of problems.</span>
<span id="cb11-95"><a href="#cb11-95" aria-hidden="true" tabindex="-1"></a>:::</span>
<span id="cb11-96"><a href="#cb11-96" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-97"><a href="#cb11-97" aria-hidden="true" tabindex="-1"></a>To debug:</span>
<span id="cb11-98"><a href="#cb11-98" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-99"><a href="#cb11-99" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Check training tokenization:</span>
<span id="cb11-100"><a href="#cb11-100" aria-hidden="true" tabindex="-1"></a><span class="in">```{.bash}</span></span>
<span id="cb11-101"><a href="#cb11-101" aria-hidden="true" tabindex="-1"></a><span class="in">axolotl preprocess your_config.yml --debug</span></span>
<span id="cb11-102"><a href="#cb11-102" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb11-103"><a href="#cb11-103" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-104"><a href="#cb11-104" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Verify inference tokenization by decoding tokens before model input</span>
<span id="cb11-105"><a href="#cb11-105" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-106"><a href="#cb11-106" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Compare token IDs between training and inference</span>
<span id="cb11-107"><a href="#cb11-107" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-108"><a href="#cb11-108" aria-hidden="true" tabindex="-1"></a><span class="fu">### Special Tokens {#sec-special-tokens}</span></span>
<span id="cb11-109"><a href="#cb11-109" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-110"><a href="#cb11-110" aria-hidden="true" tabindex="-1"></a>Configure special tokens in your YAML:</span>
<span id="cb11-111"><a href="#cb11-111" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-112"><a href="#cb11-112" aria-hidden="true" tabindex="-1"></a><span class="in">```{.yaml}</span></span>
<span id="cb11-113"><a href="#cb11-113" aria-hidden="true" tabindex="-1"></a><span class="in">special_tokens:</span></span>
<span id="cb11-114"><a href="#cb11-114" aria-hidden="true" tabindex="-1"></a><span class="in"> bos_token: "&lt;s&gt;"</span></span>
<span id="cb11-115"><a href="#cb11-115" aria-hidden="true" tabindex="-1"></a><span class="in"> eos_token: "&lt;/s&gt;"</span></span>
<span id="cb11-116"><a href="#cb11-116" aria-hidden="true" tabindex="-1"></a><span class="in"> unk_token: "&lt;unk&gt;"</span></span>
<span id="cb11-117"><a href="#cb11-117" aria-hidden="true" tabindex="-1"></a><span class="in">tokens:</span></span>
<span id="cb11-118"><a href="#cb11-118" aria-hidden="true" tabindex="-1"></a><span class="in"> - "&lt;|im_start|&gt;"</span></span>
<span id="cb11-119"><a href="#cb11-119" aria-hidden="true" tabindex="-1"></a><span class="in"> - "&lt;|im_end|&gt;"</span></span>
<span id="cb11-120"><a href="#cb11-120" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb11-121"><a href="#cb11-121" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-122"><a href="#cb11-122" aria-hidden="true" tabindex="-1"></a><span class="fu">## Troubleshooting {#sec-troubleshooting}</span></span>
<span id="cb11-123"><a href="#cb11-123" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-124"><a href="#cb11-124" aria-hidden="true" tabindex="-1"></a><span class="fu">### Common Problems {#sec-common-problems}</span></span>
<span id="cb11-125"><a href="#cb11-125" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-126"><a href="#cb11-126" aria-hidden="true" tabindex="-1"></a>::: {.panel-tabset}</span>
<span id="cb11-127"><a href="#cb11-127" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-128"><a href="#cb11-128" aria-hidden="true" tabindex="-1"></a><span class="fu">## Memory Issues</span></span>
<span id="cb11-129"><a href="#cb11-129" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-130"><a href="#cb11-130" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Use 8-bit loading</span>
<span id="cb11-131"><a href="#cb11-131" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Reduce batch sizes</span>
<span id="cb11-132"><a href="#cb11-132" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Try CPU offloading</span>
<span id="cb11-133"><a href="#cb11-133" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-134"><a href="#cb11-134" aria-hidden="true" tabindex="-1"></a><span class="fu">## Token Issues</span></span>
<span id="cb11-135"><a href="#cb11-135" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-136"><a href="#cb11-136" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Verify special tokens</span>
<span id="cb11-137"><a href="#cb11-137" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Check tokenizer settings</span>
<span id="cb11-138"><a href="#cb11-138" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Compare training and inference preprocessing</span>
<span id="cb11-139"><a href="#cb11-139" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-140"><a href="#cb11-140" aria-hidden="true" tabindex="-1"></a><span class="fu">## Performance Issues</span></span>
<span id="cb11-141"><a href="#cb11-141" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-142"><a href="#cb11-142" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Verify model loading</span>
<span id="cb11-143"><a href="#cb11-143" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Check prompt formatting</span>
<span id="cb11-144"><a href="#cb11-144" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Ensure temperature/sampling settings</span>
<span id="cb11-145"><a href="#cb11-145" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-146"><a href="#cb11-146" aria-hidden="true" tabindex="-1"></a>:::</span>
<span id="cb11-147"><a href="#cb11-147" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-148"><a href="#cb11-148" aria-hidden="true" tabindex="-1"></a>For more details, see our <span class="co">[</span><span class="ot">debugging guide</span><span class="co">](debugging.qmd)</span>.</span></code><button title="Copy to Clipboard" class="code-copy-button" data-in-quarto-modal=""><i class="bi"></i></button></pre></div>
</div></div></div></div></div>
</script>
</div> <!-- /content -->