Files
axolotl/docs/api/index.html
Quarto GHA Workflow Runner 14b3af3330 Built site for gh-pages
2025-04-28 14:13:30 +00:00

1462 lines
63 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.7.29">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<title>index Axolotl</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align: middle;
}
</style>
<script src="../../site_libs/quarto-nav/quarto-nav.js"></script>
<script src="../../site_libs/clipboard/clipboard.min.js"></script>
<script src="../../site_libs/quarto-search/autocomplete.umd.js"></script>
<script src="../../site_libs/quarto-search/fuse.min.js"></script>
<script src="../../site_libs/quarto-search/quarto-search.js"></script>
<meta name="quarto:offset" content="../../">
<link href="../../favicon.jpg" rel="icon" type="image/jpeg">
<script src="../../site_libs/quarto-html/quarto.js" type="module"></script>
<script src="../../site_libs/quarto-html/tabsets/tabsets.js" type="module"></script>
<script src="../../site_libs/quarto-html/popper.min.js"></script>
<script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
<script src="../../site_libs/quarto-html/anchor.min.js"></script>
<link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-748b535e376f14d4692bf2b2e5fd6380.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
<link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="../../site_libs/bootstrap/bootstrap-653e373a27bf50c3d267316c2b2b59fb.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
<script id="quarto-search-options" type="application/json">{
"location": "navbar",
"copy-button": false,
"collapse-after": 3,
"panel-placement": "end",
"type": "overlay",
"limit": 50,
"keyboard-shortcut": [
"f",
"/",
"s"
],
"show-item-context": false,
"language": {
"search-no-results-text": "No results",
"search-matching-documents-text": "matching documents",
"search-copy-link-title": "Copy link to search",
"search-hide-matches-text": "Hide additional matches",
"search-more-match-text": "more match in this document",
"search-more-matches-text": "more matches in this document",
"search-clear-button-title": "Clear",
"search-text-placeholder": "",
"search-detached-cancel-button-title": "Cancel",
"search-submit-button-title": "Submit",
"search-label": "Search"
}
}</script>
<link rel="stylesheet" href="../../styles.css">
</head>
<body class="nav-sidebar docked nav-fixed quarto-light">
<div id="quarto-search-results"></div>
<header id="quarto-header" class="headroom fixed-top">
<nav class="navbar navbar-expand " data-bs-theme="dark">
<div class="navbar-container container-fluid">
<div class="navbar-brand-container mx-auto">
<a href="../../index.html" class="navbar-brand navbar-brand-logo">
<img src="../../image/axolotl_logo_digital_white.svg" alt="" class="navbar-logo">
</a>
</div>
<div class="quarto-navbar-tools tools-wide tools-end">
<a href="https://twitter.com/axolotl_ai" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-twitter"></i></a>
<a href="https://github.com/axolotl-ai-cloud/axolotl/" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-github"></i></a>
<a href="https://discord.gg/7m9sfhzaf3" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-discord"></i></a>
</div>
<div id="quarto-search" class="" title="Search"></div>
</div> <!-- /container-fluid -->
</nav>
<nav class="quarto-secondary-nav">
<div class="container-fluid d-flex">
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
<i class="bi bi-layout-text-sidebar-reverse"></i>
</button>
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"></ol></nav>
<a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
</a>
</div>
</nav>
</header>
<!-- content -->
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
<!-- sidebar -->
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
<div class="sidebar-menu-container">
<ul class="list-unstyled mt-1">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../index.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Home</span></a>
</div>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
<span class="menu-text">Getting Started</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/getting-started.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Quickstart</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/installation.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Installation</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/inference.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Inference and Merging</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/cli.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Command Line Interface (CLI)</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/config.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Config Reference</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/api" class="sidebar-item-text sidebar-link">
<span class="menu-text">API Reference</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a href="../../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Dataset Formats</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Pre-training</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Instruction Tuning</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Conversation</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/dataset-formats/stepwise_supervised.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Stepwise Supervised Format</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Template-Free</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true">
<span class="menu-text">Deployments</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/docker.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Docker</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/multi-gpu.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Multi-GPU</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/multi-node.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Multi Node</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/ray-integration.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Ray Train</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/amd_hpc.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">AMD GPUs on HPC Systems</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/mac.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Mac M-series</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true">
<span class="menu-text">How To Guides</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-4" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/multimodal.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">MultiModal / Vision Language Models (BETA)</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/rlhf.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">RLHF (Beta)</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/reward_modelling.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Reward Modelling</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/lr_groups.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Learning Rate Groups</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/lora_optims.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">LoRA Optimizations</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/dataset_loading.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Dataset Loading</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true">
<span class="menu-text">Core Concepts</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-5" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/batch_vs_grad.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Batch size vs Gradient accumulation</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/dataset_preprocessing.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Dataset Preprocessing</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/multipack.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Multipack (Sample Packing)</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-6" role="navigation" aria-expanded="true">
<span class="menu-text">Advanced Features</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-6" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-6" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">FDSP + QLoRA</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/unsloth.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Unsloth</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/torchao.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">PyTorch ao</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/custom_integrations.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Custom Integrations</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/sequence_parallelism.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Sequence Parallelism</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-7" role="navigation" aria-expanded="true">
<span class="menu-text">Troubleshooting</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-7" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-7" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/faq.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">FAQ</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/debugging.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Debugging</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../docs/nccl.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">NCCL</span></a>
</div>
</li>
</ul>
</li>
</ul>
</div>
</nav>
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
<!-- margin-sidebar -->
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
<nav id="TOC" role="doc-toc" class="toc-active">
<h2 id="toc-title">On this page</h2>
<ul>
<li><a href="#api-reference" id="toc-api-reference" class="nav-link active" data-scroll-target="#api-reference">API Reference</a>
<ul class="collapse">
<li><a href="#core" id="toc-core" class="nav-link" data-scroll-target="#core">Core</a></li>
<li><a href="#cli" id="toc-cli" class="nav-link" data-scroll-target="#cli">CLI</a></li>
<li><a href="#trainers" id="toc-trainers" class="nav-link" data-scroll-target="#trainers">Trainers</a></li>
<li><a href="#prompt-strategies" id="toc-prompt-strategies" class="nav-link" data-scroll-target="#prompt-strategies">Prompt Strategies</a></li>
<li><a href="#kernels" id="toc-kernels" class="nav-link" data-scroll-target="#kernels">Kernels</a></li>
<li><a href="#monkeypatches" id="toc-monkeypatches" class="nav-link" data-scroll-target="#monkeypatches">MonkeyPatches</a></li>
<li><a href="#utils" id="toc-utils" class="nav-link" data-scroll-target="#utils">Utils</a></li>
<li><a href="#schemas" id="toc-schemas" class="nav-link" data-scroll-target="#schemas">Schemas</a></li>
<li><a href="#integrations" id="toc-integrations" class="nav-link" data-scroll-target="#integrations">Integrations</a></li>
<li><a href="#common" id="toc-common" class="nav-link" data-scroll-target="#common">Common</a></li>
<li><a href="#models" id="toc-models" class="nav-link" data-scroll-target="#models">Models</a></li>
<li><a href="#data-processing" id="toc-data-processing" class="nav-link" data-scroll-target="#data-processing">Data Processing</a></li>
<li><a href="#callbacks" id="toc-callbacks" class="nav-link" data-scroll-target="#callbacks">Callbacks</a></li>
</ul></li>
</ul>
</nav>
</div>
<!-- main -->
<main class="content" id="quarto-document-content"><header id="title-block-header" class="quarto-title-block"></header>
<section id="api-reference" class="level1 doc doc-index">
<h1 class="doc doc-index">API Reference</h1>
<section id="core" class="level2">
<h2 class="anchored" data-anchor-id="core">Core</h2>
<p>Core functionality for training</p>
<table class="caption-top table">
<tbody>
<tr class="odd">
<td><a href="../../docs/api/train.html#axolotl.train">train</a></td>
<td>Prepare and train a model on a dataset. Can also infer from a model or merge lora</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/evaluate.html#axolotl.evaluate">evaluate</a></td>
<td>Module for evaluating models.</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/datasets.html#axolotl.datasets">datasets</a></td>
<td>Module containing Dataset functionality</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/convert.html#axolotl.convert">convert</a></td>
<td>Module containing File Reader, File Writer, Json Parser, and Jsonl Serializer classes</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/prompt_tokenizers.html#axolotl.prompt_tokenizers">prompt_tokenizers</a></td>
<td>Module containing PromptTokenizingStrategy and Prompter classes</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/logging_config.html#axolotl.logging_config">logging_config</a></td>
<td>Common logging module for axolotl</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/core.trainer_builder.html#axolotl.core.trainer_builder">core.trainer_builder</a></td>
<td>Builder for the training args and trainer</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/core.training_args.html#axolotl.core.training_args">core.training_args</a></td>
<td>extra axolotl specific training args</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/core.chat.messages.html#axolotl.core.chat.messages">core.chat.messages</a></td>
<td>internal message representations of chat messages</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/core.chat.format.chatml.html#axolotl.core.chat.format.chatml">core.chat.format.chatml</a></td>
<td>ChatML transformation functions for MessageContents</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/core.chat.format.llama3x.html#axolotl.core.chat.format.llama3x">core.chat.format.llama3x</a></td>
<td>Llama 3.x chat formatting functions for MessageContents</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/core.chat.format.shared.html#axolotl.core.chat.format.shared">core.chat.format.shared</a></td>
<td>shared functions for format transforms</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/core.datasets.chat.html#axolotl.core.datasets.chat">core.datasets.chat</a></td>
<td>chat dataset module</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/core.datasets.transforms.chat_builder.html#axolotl.core.datasets.transforms.chat_builder">core.datasets.transforms.chat_builder</a></td>
<td>This module contains a function that builds a transform that takes a row from the dataset and converts it to a Chat.</td>
</tr>
</tbody>
</table>
</section>
<section id="cli" class="level2">
<h2 class="anchored" data-anchor-id="cli">CLI</h2>
<p>Command-line interface</p>
<table class="caption-top table">
<tbody>
<tr class="odd">
<td><a href="../../docs/api/cli.main.html#axolotl.cli.main">cli.main</a></td>
<td>Click CLI definitions for various axolotl commands.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/cli.train.html#axolotl.cli.train">cli.train</a></td>
<td>CLI to run training on a model.</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/cli.evaluate.html#axolotl.cli.evaluate">cli.evaluate</a></td>
<td>CLI to run evaluation on a model.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/cli.args.html#axolotl.cli.args">cli.args</a></td>
<td>Module for axolotl CLI command arguments.</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/cli.checks.html#axolotl.cli.checks">cli.checks</a></td>
<td>Various checks for Axolotl CLI.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/cli.config.html#axolotl.cli.config">cli.config</a></td>
<td>Configuration loading and processing.</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/cli.inference.html#axolotl.cli.inference">cli.inference</a></td>
<td>CLI to run inference on a trained model.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/cli.merge_lora.html#axolotl.cli.merge_lora">cli.merge_lora</a></td>
<td>CLI to merge a trained LoRA into a base model.</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/cli.merge_sharded_fsdp_weights.html#axolotl.cli.merge_sharded_fsdp_weights">cli.merge_sharded_fsdp_weights</a></td>
<td>CLI to merge sharded FSDP model checkpoints into a single combined checkpoint.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/cli.preprocess.html#axolotl.cli.preprocess">cli.preprocess</a></td>
<td>CLI to run preprocessing of a dataset.</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/cli.sweeps.html#axolotl.cli.sweeps">cli.sweeps</a></td>
<td>Utilities for handling sweeps over configs for axolotl train CLI command</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/cli.utils.html#axolotl.cli.utils">cli.utils</a></td>
<td>Utility methods for axolotl CLI.</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/cli.vllm_serve.html#axolotl.cli.vllm_serve">cli.vllm_serve</a></td>
<td>CLI to start the vllm server for online RL</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/cli.cloud.base.html#axolotl.cli.cloud.base">cli.cloud.base</a></td>
<td>base class for cloud platforms from cli</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/cli.cloud.modal_.html#axolotl.cli.cloud.modal_">cli.cloud.modal_</a></td>
<td>Modal Cloud support from CLI</td>
</tr>
</tbody>
</table>
</section>
<section id="trainers" class="level2">
<h2 class="anchored" data-anchor-id="trainers">Trainers</h2>
<p>Training implementations</p>
<table class="caption-top table">
<tbody>
<tr class="odd">
<td><a href="../../docs/api/core.trainers.base.html#axolotl.core.trainers.base">core.trainers.base</a></td>
<td>Module for customized trainers</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/core.trainers.trl.html#axolotl.core.trainers.trl">core.trainers.trl</a></td>
<td>Module for TRL PPO trainer</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/core.trainers.dpo.trainer.html#axolotl.core.trainers.dpo.trainer">core.trainers.dpo.trainer</a></td>
<td>DPO trainer for axolotl</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/core.trainers.grpo.trainer.html#axolotl.core.trainers.grpo.trainer">core.trainers.grpo.trainer</a></td>
<td>Axolotl GRPO trainer</td>
</tr>
</tbody>
</table>
</section>
<section id="prompt-strategies" class="level2">
<h2 class="anchored" data-anchor-id="prompt-strategies">Prompt Strategies</h2>
<p>Prompt formatting strategies</p>
<table class="caption-top table">
<tbody>
<tr class="odd">
<td><a href="../../docs/api/prompt_strategies.base.html#axolotl.prompt_strategies.base">prompt_strategies.base</a></td>
<td>module for base dataset transform strategies</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/prompt_strategies.chat_template.html#axolotl.prompt_strategies.chat_template">prompt_strategies.chat_template</a></td>
<td>HF Chat Templates prompt strategy</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/prompt_strategies.alpaca_chat.html#axolotl.prompt_strategies.alpaca_chat">prompt_strategies.alpaca_chat</a></td>
<td>Module for Alpaca prompt strategy classes</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/prompt_strategies.alpaca_instruct.html#axolotl.prompt_strategies.alpaca_instruct">prompt_strategies.alpaca_instruct</a></td>
<td>Module loading the AlpacaInstructPromptTokenizingStrategy class</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/prompt_strategies.alpaca_w_system.html#axolotl.prompt_strategies.alpaca_w_system">prompt_strategies.alpaca_w_system</a></td>
<td>Prompt strategies loader for alpaca instruction datasets with system prompts</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/prompt_strategies.user_defined.html#axolotl.prompt_strategies.user_defined">prompt_strategies.user_defined</a></td>
<td>User Defined prompts with configuration from the YML config</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/prompt_strategies.llama2_chat.html#axolotl.prompt_strategies.llama2_chat">prompt_strategies.llama2_chat</a></td>
<td>Prompt Strategy for finetuning Llama2 chat models</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/prompt_strategies.completion.html#axolotl.prompt_strategies.completion">prompt_strategies.completion</a></td>
<td>Basic completion text</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/prompt_strategies.input_output.html#axolotl.prompt_strategies.input_output">prompt_strategies.input_output</a></td>
<td>Module for plain input/output prompt pairs</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/prompt_strategies.stepwise_supervised.html#axolotl.prompt_strategies.stepwise_supervised">prompt_strategies.stepwise_supervised</a></td>
<td>Module for stepwise datasets, typically including a prompt and reasoning traces,</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/prompt_strategies.metharme.html#axolotl.prompt_strategies.metharme">prompt_strategies.metharme</a></td>
<td>Module containing the MetharmenPromptTokenizingStrategy and MetharmePrompter class</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/prompt_strategies.orcamini.html#axolotl.prompt_strategies.orcamini">prompt_strategies.orcamini</a></td>
<td>Prompt Strategy for finetuning Orca Mini (v2) models</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/prompt_strategies.pygmalion.html#axolotl.prompt_strategies.pygmalion">prompt_strategies.pygmalion</a></td>
<td>Module containing the PygmalionPromptTokenizingStrategy and PygmalionPrompter class</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/prompt_strategies.messages.chat.html#axolotl.prompt_strategies.messages.chat">prompt_strategies.messages.chat</a></td>
<td>Chat dataset wrapping strategy for new internal messages representations</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/prompt_strategies.dpo.chat_template.html#axolotl.prompt_strategies.dpo.chat_template">prompt_strategies.dpo.chat_template</a></td>
<td>DPO prompt strategies for using tokenizer chat templates.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/prompt_strategies.dpo.llama3.html#axolotl.prompt_strategies.dpo.llama3">prompt_strategies.dpo.llama3</a></td>
<td>DPO strategies for llama-3 chat template</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/prompt_strategies.dpo.chatml.html#axolotl.prompt_strategies.dpo.chatml">prompt_strategies.dpo.chatml</a></td>
<td>DPO strategies for chatml</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/prompt_strategies.dpo.zephyr.html#axolotl.prompt_strategies.dpo.zephyr">prompt_strategies.dpo.zephyr</a></td>
<td>DPO strategies for zephyr</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/prompt_strategies.dpo.user_defined.html#axolotl.prompt_strategies.dpo.user_defined">prompt_strategies.dpo.user_defined</a></td>
<td>User-defined DPO strategies</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/prompt_strategies.dpo.passthrough.html#axolotl.prompt_strategies.dpo.passthrough">prompt_strategies.dpo.passthrough</a></td>
<td>DPO prompt strategies passthrough/zero-processing strategy</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/prompt_strategies.kto.llama3.html#axolotl.prompt_strategies.kto.llama3">prompt_strategies.kto.llama3</a></td>
<td>KTO strategies for llama-3 chat template</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/prompt_strategies.kto.chatml.html#axolotl.prompt_strategies.kto.chatml">prompt_strategies.kto.chatml</a></td>
<td>KTO strategies for chatml</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/prompt_strategies.kto.user_defined.html#axolotl.prompt_strategies.kto.user_defined">prompt_strategies.kto.user_defined</a></td>
<td>User-defined KTO strategies</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/prompt_strategies.orpo.chat_template.html#axolotl.prompt_strategies.orpo.chat_template">prompt_strategies.orpo.chat_template</a></td>
<td>chatml prompt tokenization strategy for ORPO</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/prompt_strategies.bradley_terry.llama3.html#axolotl.prompt_strategies.bradley_terry.llama3">prompt_strategies.bradley_terry.llama3</a></td>
<td>chatml transforms for datasets with system, input, chosen, rejected to match llama3 chat template</td>
</tr>
</tbody>
</table>
</section>
<section id="kernels" class="level2">
<h2 class="anchored" data-anchor-id="kernels">Kernels</h2>
<p>Low-level performance optimizations</p>
<table class="caption-top table">
<tbody>
<tr class="odd">
<td><a href="../../docs/api/kernels.lora.html#axolotl.kernels.lora">kernels.lora</a></td>
<td>Module for definition of Low-Rank Adaptation (LoRA) Triton kernels.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/kernels.geglu.html#axolotl.kernels.geglu">kernels.geglu</a></td>
<td>Module for definition of GEGLU Triton kernels.</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/kernels.swiglu.html#axolotl.kernels.swiglu">kernels.swiglu</a></td>
<td>Module for definition of SwiGLU Triton kernels.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/kernels.quantize.html#axolotl.kernels.quantize">kernels.quantize</a></td>
<td>Dequantization utilities for <code>bitsandbytes</code> integration.</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/kernels.utils.html#axolotl.kernels.utils">kernels.utils</a></td>
<td>Utilities for <code>axolotl.kernels</code> submodules.</td>
</tr>
</tbody>
</table>
</section>
<section id="monkeypatches" class="level2">
<h2 class="anchored" data-anchor-id="monkeypatches">MonkeyPatches</h2>
<p>Runtime patches for model optimizations</p>
<table class="caption-top table">
<tbody>
<tr class="odd">
<td><a href="../../docs/api/monkeypatch.llama_attn_hijack_flash.html#axolotl.monkeypatch.llama_attn_hijack_flash">monkeypatch.llama_attn_hijack_flash</a></td>
<td>Flash attention monkey patch for llama model</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/monkeypatch.llama_attn_hijack_xformers.html#axolotl.monkeypatch.llama_attn_hijack_xformers">monkeypatch.llama_attn_hijack_xformers</a></td>
<td>Directly copied the code from https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/modules/llama_attn_hijack.py and made some adjustments</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/monkeypatch.mistral_attn_hijack_flash.html#axolotl.monkeypatch.mistral_attn_hijack_flash">monkeypatch.mistral_attn_hijack_flash</a></td>
<td>Flash attention monkey patch for mistral model</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/monkeypatch.multipack.html#axolotl.monkeypatch.multipack">monkeypatch.multipack</a></td>
<td>multipack patching for v2 of sample packing</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/monkeypatch.relora.html#axolotl.monkeypatch.relora">monkeypatch.relora</a></td>
<td>Implements the ReLoRA training procedure from https://arxiv.org/abs/2307.05695, minus the initial full fine-tune.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/monkeypatch.llama_expand_mask.html#axolotl.monkeypatch.llama_expand_mask">monkeypatch.llama_expand_mask</a></td>
<td>expands the binary attention mask per 3.2.2 of https://arxiv.org/pdf/2107.02027.pdf</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/monkeypatch.lora_kernels.html#axolotl.monkeypatch.lora_kernels">monkeypatch.lora_kernels</a></td>
<td>Module for patching custom LoRA Triton kernels and <code>torch.autograd</code> functions.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/monkeypatch.utils.html#axolotl.monkeypatch.utils">monkeypatch.utils</a></td>
<td>Shared utils for the monkeypatches</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/monkeypatch.btlm_attn_hijack_flash.html#axolotl.monkeypatch.btlm_attn_hijack_flash">monkeypatch.btlm_attn_hijack_flash</a></td>
<td>Flash attention monkey patch for cerebras btlm model</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/monkeypatch.llama_patch_multipack.html#axolotl.monkeypatch.llama_patch_multipack">monkeypatch.llama_patch_multipack</a></td>
<td>Patched LlamaAttention to use torch.nn.functional.scaled_dot_product_attention</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/monkeypatch.stablelm_attn_hijack_flash.html#axolotl.monkeypatch.stablelm_attn_hijack_flash">monkeypatch.stablelm_attn_hijack_flash</a></td>
<td>PyTorch StableLM Epoch model.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/monkeypatch.trainer_fsdp_optim.html#axolotl.monkeypatch.trainer_fsdp_optim">monkeypatch.trainer_fsdp_optim</a></td>
<td>fix for FSDP optimizer save in trainer w 4.47.0</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/monkeypatch.transformers_fa_utils.html#axolotl.monkeypatch.transformers_fa_utils">monkeypatch.transformers_fa_utils</a></td>
<td>see https://github.com/huggingface/transformers/pull/35834</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/monkeypatch.unsloth_.html#axolotl.monkeypatch.unsloth_">monkeypatch.unsloth_</a></td>
<td>module for patching with unsloth optimizations</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/monkeypatch.attention.mllama.html#axolotl.monkeypatch.attention.mllama">monkeypatch.attention.mllama</a></td>
<td>Monkeypatch for Vision Llama for FA2 support</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/monkeypatch.data.batch_dataset_fetcher.html#axolotl.monkeypatch.data.batch_dataset_fetcher">monkeypatch.data.batch_dataset_fetcher</a></td>
<td>monkey patches for the dataset fetcher to handle batches of packed indexes</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/monkeypatch.mixtral.html#axolotl.monkeypatch.mixtral">monkeypatch.mixtral</a></td>
<td>Patches to support multipack for mixtral</td>
</tr>
</tbody>
</table>
</section>
<section id="utils" class="level2">
<h2 class="anchored" data-anchor-id="utils">Utils</h2>
<p>Utility functions</p>
<table class="caption-top table">
<tbody>
<tr class="odd">
<td><a href="../../docs/api/utils.models.html#axolotl.utils.models">utils.models</a></td>
<td>Module for models and model loading</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.tokenization.html#axolotl.utils.tokenization">utils.tokenization</a></td>
<td>Module for tokenization utilities</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.chat_templates.html#axolotl.utils.chat_templates">utils.chat_templates</a></td>
<td>This module provides functionality for selecting chat templates based on user choices.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.lora.html#axolotl.utils.lora">utils.lora</a></td>
<td>module to get the state dict of a merged lora model</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.lora_embeddings.html#axolotl.utils.lora_embeddings">utils.lora_embeddings</a></td>
<td>helpers for lora embeddings</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.model_shard_quant.html#axolotl.utils.model_shard_quant">utils.model_shard_quant</a></td>
<td>module to handle loading model on cpu/meta device for FSDP</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.bench.html#axolotl.utils.bench">utils.bench</a></td>
<td>Benchmarking and measurement utilities</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.freeze.html#axolotl.utils.freeze">utils.freeze</a></td>
<td>module to freeze/unfreeze parameters by name</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.trainer.html#axolotl.utils.trainer">utils.trainer</a></td>
<td>Module containing the Trainer class and related functions</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.schedulers.html#axolotl.utils.schedulers">utils.schedulers</a></td>
<td>Module for custom LRScheduler class</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.distributed.html#axolotl.utils.distributed">utils.distributed</a></td>
<td>utility helpers for distributed checks</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.dict.html#axolotl.utils.dict">utils.dict</a></td>
<td>Module containing the DictDefault class</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.optimizers.adopt.html#axolotl.utils.optimizers.adopt">utils.optimizers.adopt</a></td>
<td>Copied from https://github.com/iShohei220/adopt</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.data.pretraining.html#axolotl.utils.data.pretraining">utils.data.pretraining</a></td>
<td>data handling specific to pretraining</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.data.sft.html#axolotl.utils.data.sft">utils.data.sft</a></td>
<td>data handling specific to SFT</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.gradient_checkpointing.unsloth.html#axolotl.utils.gradient_checkpointing.unsloth">utils.gradient_checkpointing.unsloth</a></td>
<td>Unsloth checkpointing</td>
</tr>
</tbody>
</table>
</section>
<section id="schemas" class="level2">
<h2 class="anchored" data-anchor-id="schemas">Schemas</h2>
<p>Pydantic data models for Axolotl config</p>
<table class="caption-top table">
<tbody>
<tr class="odd">
<td><a href="../../docs/api/utils.schemas.config.html#axolotl.utils.schemas.config">utils.schemas.config</a></td>
<td>Module with Pydantic models for configuration.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.schemas.model.html#axolotl.utils.schemas.model">utils.schemas.model</a></td>
<td>Pydantic models for model input / output, etc. configuration</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.schemas.training.html#axolotl.utils.schemas.training">utils.schemas.training</a></td>
<td>Pydantic models for training hyperparameters</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.schemas.datasets.html#axolotl.utils.schemas.datasets">utils.schemas.datasets</a></td>
<td>Pydantic models for datasets-related configuration</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.schemas.peft.html#axolotl.utils.schemas.peft">utils.schemas.peft</a></td>
<td>Pydantic models for PEFT-related configuration</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.schemas.trl.html#axolotl.utils.schemas.trl">utils.schemas.trl</a></td>
<td>Pydantic models for TRL trainer configuration</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.schemas.multimodal.html#axolotl.utils.schemas.multimodal">utils.schemas.multimodal</a></td>
<td>Pydantic models for multimodal-related configuration</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.schemas.integrations.html#axolotl.utils.schemas.integrations">utils.schemas.integrations</a></td>
<td>Pydantic models for Axolotl integrations</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.schemas.enums.html#axolotl.utils.schemas.enums">utils.schemas.enums</a></td>
<td>Enums for Axolotl input config</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.schemas.utils.html#axolotl.utils.schemas.utils">utils.schemas.utils</a></td>
<td>Utilities for Axolotl Pydantic models</td>
</tr>
</tbody>
</table>
</section>
<section id="integrations" class="level2">
<h2 class="anchored" data-anchor-id="integrations">Integrations</h2>
<p>Third-party integrations and extensions</p>
<table class="caption-top table">
<tbody>
<tr class="odd">
<td><a href="../../docs/api/integrations.base.html#axolotl.integrations.base">integrations.base</a></td>
<td>Base class for all plugins.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/integrations.cut_cross_entropy.args.html#axolotl.integrations.cut_cross_entropy.args">integrations.cut_cross_entropy.args</a></td>
<td>Module for handling Cut Cross Entropy input arguments.</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/integrations.grokfast.optimizer.html#axolotl.integrations.grokfast.optimizer">integrations.grokfast.optimizer</a></td>
<td></td>
</tr>
<tr class="even">
<td><a href="../../docs/api/integrations.kd.trainer.html#axolotl.integrations.kd.trainer">integrations.kd.trainer</a></td>
<td>KD trainer</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/integrations.liger.args.html#axolotl.integrations.liger.args">integrations.liger.args</a></td>
<td>Module for handling LIGER input arguments.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/integrations.lm_eval.args.html#axolotl.integrations.lm_eval.args">integrations.lm_eval.args</a></td>
<td>Module for handling lm eval harness input arguments.</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/integrations.spectrum.args.html#axolotl.integrations.spectrum.args">integrations.spectrum.args</a></td>
<td>Module for handling Spectrum input arguments.</td>
</tr>
</tbody>
</table>
</section>
<section id="common" class="level2">
<h2 class="anchored" data-anchor-id="common">Common</h2>
<p>Common utilities and shared functionality</p>
<table class="caption-top table">
<tbody>
<tr class="odd">
<td><a href="../../docs/api/common.architectures.html#axolotl.common.architectures">common.architectures</a></td>
<td>Common architecture specific constants</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/common.const.html#axolotl.common.const">common.const</a></td>
<td>Various shared constants</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/common.datasets.html#axolotl.common.datasets">common.datasets</a></td>
<td>Dataset loading utilities.</td>
</tr>
</tbody>
</table>
</section>
<section id="models" class="level2">
<h2 class="anchored" data-anchor-id="models">Models</h2>
<p>Custom model implementations</p>
<table class="caption-top table">
<tbody>
<tr class="odd">
<td><a href="../../docs/api/models.mamba.modeling_mamba.html#axolotl.models.mamba.modeling_mamba">models.mamba.modeling_mamba</a></td>
<td></td>
</tr>
</tbody>
</table>
</section>
<section id="data-processing" class="level2">
<h2 class="anchored" data-anchor-id="data-processing">Data Processing</h2>
<p>Data processing utilities</p>
<table class="caption-top table">
<tbody>
<tr class="odd">
<td><a href="../../docs/api/utils.collators.core.html#axolotl.utils.collators.core">utils.collators.core</a></td>
<td>basic shared collator constants</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.collators.batching.html#axolotl.utils.collators.batching">utils.collators.batching</a></td>
<td>Data collators for axolotl to pad labels and position_ids for packed sequences</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.collators.mamba.html#axolotl.utils.collators.mamba">utils.collators.mamba</a></td>
<td>collators for Mamba</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.collators.mm_chat.html#axolotl.utils.collators.mm_chat">utils.collators.mm_chat</a></td>
<td>Collators for multi-modal chat messages and packing</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.samplers.multipack.html#axolotl.utils.samplers.multipack">utils.samplers.multipack</a></td>
<td>Multipack Batch Sampler</td>
</tr>
</tbody>
</table>
</section>
<section id="callbacks" class="level2">
<h2 class="anchored" data-anchor-id="callbacks">Callbacks</h2>
<p>Training callbacks</p>
<table class="caption-top table">
<tbody>
<tr class="odd">
<td><a href="../../docs/api/utils.callbacks.perplexity.html#axolotl.utils.callbacks.perplexity">utils.callbacks.perplexity</a></td>
<td>callback to calculate perplexity as an evaluation metric.</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.callbacks.profiler.html#axolotl.utils.callbacks.profiler">utils.callbacks.profiler</a></td>
<td>HF Trainer callback for creating pytorch profiling snapshots</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.callbacks.lisa.html#axolotl.utils.callbacks.lisa">utils.callbacks.lisa</a></td>
<td>module for LISA</td>
</tr>
<tr class="even">
<td><a href="../../docs/api/utils.callbacks.mlflow_.html#axolotl.utils.callbacks.mlflow_">utils.callbacks.mlflow_</a></td>
<td>MLFlow module for trainer callbacks</td>
</tr>
<tr class="odd">
<td><a href="../../docs/api/utils.callbacks.comet_.html#axolotl.utils.callbacks.comet_">utils.callbacks.comet_</a></td>
<td>Comet module for trainer callbacks</td>
</tr>
</tbody>
</table>
</section>
</section>
</main> <!-- /main -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const isCodeAnnotation = (el) => {
for (const clz of el.classList) {
if (clz.startsWith('code-annotation-')) {
return true;
}
}
return false;
}
const onCopySuccess = function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
let tooltip;
if (window.bootstrap) {
button.setAttribute("data-bs-toggle", "tooltip");
button.setAttribute("data-bs-placement", "left");
button.setAttribute("data-bs-title", "Copied!");
tooltip = new bootstrap.Tooltip(button,
{ trigger: "manual",
customClass: "code-copy-button-tooltip",
offset: [0, -8]});
tooltip.show();
}
setTimeout(function() {
if (tooltip) {
tooltip.hide();
button.removeAttribute("data-bs-title");
button.removeAttribute("data-bs-toggle");
button.removeAttribute("data-bs-placement");
}
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
}
const getTextToCopy = function(trigger) {
const codeEl = trigger.previousElementSibling.cloneNode(true);
for (const childEl of codeEl.children) {
if (isCodeAnnotation(childEl)) {
childEl.remove();
}
}
return codeEl.innerText;
}
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
text: getTextToCopy
});
clipboard.on('success', onCopySuccess);
if (window.document.getElementById('quarto-embedded-source-code-modal')) {
const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
text: getTextToCopy,
container: window.document.getElementById('quarto-embedded-source-code-modal')
});
clipboardModal.on('success', onCopySuccess);
}
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
var mailtoRegex = new RegExp(/^mailto:/);
var filterRegex = new RegExp("https:\/\/docs\.axolotl\.ai");
var isInternal = (href) => {
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
}
// Inspect non-navigation links and adorn them if external
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
for (var i=0; i<links.length; i++) {
const link = links[i];
if (!isInternal(link.href)) {
// undo the damage that might have been done by quarto-nav.js in the case of
// links that we want to consider external
if (link.dataset.originalHref !== undefined) {
link.href = link.dataset.originalHref;
}
}
}
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
const config = {
allowHTML: true,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start',
};
if (contentFn) {
config.content = contentFn;
}
if (onTriggerFn) {
config.onTrigger = onTriggerFn;
}
if (onUntriggerFn) {
config.onUntrigger = onUntriggerFn;
}
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note) {
return note.innerHTML;
} else {
return "";
}
});
}
const xrefs = window.document.querySelectorAll('a.quarto-xref');
const processXRef = (id, note) => {
// Strip column container classes
const stripColumnClz = (el) => {
el.classList.remove("page-full", "page-columns");
if (el.children) {
for (const child of el.children) {
stripColumnClz(child);
}
}
}
stripColumnClz(note)
if (id === null || id.startsWith('sec-')) {
// Special case sections, only their first couple elements
const container = document.createElement("div");
if (note.children && note.children.length > 2) {
container.appendChild(note.children[0].cloneNode(true));
for (let i = 1; i < note.children.length; i++) {
const child = note.children[i];
if (child.tagName === "P" && child.innerText === "") {
continue;
} else {
container.appendChild(child.cloneNode(true));
break;
}
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(container);
}
return container.innerHTML
} else {
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
return note.innerHTML;
}
} else {
// Remove any anchor links if they are present
const anchorLink = note.querySelector('a.anchorjs-link');
if (anchorLink) {
anchorLink.remove();
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
if (note.classList.contains("callout")) {
return note.outerHTML;
} else {
return note.innerHTML;
}
}
}
for (var i=0; i<xrefs.length; i++) {
const xref = xrefs[i];
tippyHover(xref, undefined, function(instance) {
instance.disable();
let url = xref.getAttribute('href');
let hash = undefined;
if (url.startsWith('#')) {
hash = url;
} else {
try { hash = new URL(url).hash; } catch {}
}
if (hash) {
const id = hash.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note !== null) {
try {
const html = processXRef(id, note.cloneNode(true));
instance.setContent(html);
} finally {
instance.enable();
instance.show();
}
} else {
// See if we can fetch this
fetch(url.split('#')[0])
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.getElementById(id);
if (note !== null) {
const html = processXRef(id, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
} else {
// See if we can fetch a full url (with no hash to target)
// This is a special case and we should probably do some content thinning / targeting
fetch(url)
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.querySelector('main.content');
if (note !== null) {
// This should only happen for chapter cross references
// (since there is no id in the URL)
// remove the first header
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
note.children[0].remove();
}
const html = processXRef(null, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
}, function(instance) {
});
}
let selectedAnnoteEl;
const selectorForAnnotation = ( cell, annotation) => {
let cellAttr = 'data-code-cell="' + cell + '"';
let lineAttr = 'data-code-annotation="' + annotation + '"';
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
return selector;
}
const selectCodeLines = (annoteEl) => {
const doc = window.document;
const targetCell = annoteEl.getAttribute("data-target-cell");
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
const lineIds = lines.map((line) => {
return targetCell + "-" + line;
})
let top = null;
let height = null;
let parent = null;
if (lineIds.length > 0) {
//compute the position of the single el (top and bottom and make a div)
const el = window.document.getElementById(lineIds[0]);
top = el.offsetTop;
height = el.offsetHeight;
parent = el.parentElement.parentElement;
if (lineIds.length > 1) {
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
height = bottom - top;
}
if (top !== null && height !== null && parent !== null) {
// cook up a div (if necessary) and position it
let div = window.document.getElementById("code-annotation-line-highlight");
if (div === null) {
div = window.document.createElement("div");
div.setAttribute("id", "code-annotation-line-highlight");
div.style.position = 'absolute';
parent.appendChild(div);
}
div.style.top = top - 2 + "px";
div.style.height = height + 4 + "px";
div.style.left = 0;
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
if (gutterDiv === null) {
gutterDiv = window.document.createElement("div");
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
gutterDiv.style.position = 'absolute';
const codeCell = window.document.getElementById(targetCell);
const gutter = codeCell.querySelector('.code-annotation-gutter');
gutter.appendChild(gutterDiv);
}
gutterDiv.style.top = top - 2 + "px";
gutterDiv.style.height = height + 4 + "px";
}
selectedAnnoteEl = annoteEl;
}
};
const unselectCodeLines = () => {
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
elementsIds.forEach((elId) => {
const div = window.document.getElementById(elId);
if (div) {
div.remove();
}
});
selectedAnnoteEl = undefined;
};
// Handle positioning of the toggle
window.addEventListener(
"resize",
throttle(() => {
elRect = undefined;
if (selectedAnnoteEl) {
selectCodeLines(selectedAnnoteEl);
}
}, 10)
);
function throttle(fn, ms) {
let throttle = false;
let timer;
return (...args) => {
if(!throttle) { // first call gets through
fn.apply(this, args);
throttle = true;
} else { // all the others get throttled
if(timer) clearTimeout(timer); // cancel #2
timer = setTimeout(() => {
fn.apply(this, args);
timer = throttle = false;
}, ms);
}
};
}
// Attach click handler to the DT
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
for (const annoteDlNode of annoteDls) {
annoteDlNode.addEventListener('click', (event) => {
const clickedEl = event.target;
if (clickedEl !== selectedAnnoteEl) {
unselectCodeLines();
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
if (activeEl) {
activeEl.classList.remove('code-annotation-active');
}
selectCodeLines(clickedEl);
clickedEl.classList.add('code-annotation-active');
} else {
// Unselect the line
unselectCodeLines();
clickedEl.classList.remove('code-annotation-active');
}
});
}
const findCites = (el) => {
const parentEl = el.parentElement;
if (parentEl) {
const cites = parentEl.dataset.cites;
if (cites) {
return {
el,
cites: cites.split(' ')
};
} else {
return findCites(el.parentElement)
}
} else {
return undefined;
}
};
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const citeInfo = findCites(ref);
if (citeInfo) {
tippyHover(citeInfo.el, function() {
var popup = window.document.createElement('div');
citeInfo.cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
}
});
</script>
</div> <!-- /content -->
</body></html>