From 2fa856633397ee80b59402f4d86b9e8a55306969 Mon Sep 17 00:00:00 2001
From: Quarto GHA Workflow Runner <quarto-github-actions-publish@example.com>
Date: Tue, 17 Jun 2025 16:11:55 +0000
Subject: [PATCH] Built site for gh-pages

---
 .nojekyll                                     |   2 +-
 FAQS.html                                     |   4 +-
 TODO.html                                     |   4 +-
 docs/amd_hpc.html                             |   4 +-
 docs/api/cli.args.html                        |   4 +-
 docs/api/cli.checks.html                      |   4 +-
 docs/api/cli.cloud.base.html                  |   4 +-
 docs/api/cli.cloud.modal_.html                |   4 +-
 docs/api/cli.config.html                      |   4 +-
 docs/api/cli.evaluate.html                    |   4 +-
 docs/api/cli.inference.html                   |   4 +-
 docs/api/cli.main.html                        |   4 +-
 docs/api/cli.merge_lora.html                  |   4 +-
 docs/api/cli.merge_sharded_fsdp_weights.html  |   4 +-
 docs/api/cli.preprocess.html                  |   4 +-
 docs/api/cli.quantize.html                    |   4 +-
 docs/api/cli.sweeps.html                      |   4 +-
 docs/api/cli.train.html                       |   4 +-
 docs/api/cli.utils.html                       |   4 +-
 docs/api/cli.vllm_serve.html                  |   4 +-
 docs/api/common.architectures.html            |   4 +-
 docs/api/common.const.html                    |   4 +-
 docs/api/common.datasets.html                 |   4 +-
 docs/api/convert.html                         |   4 +-
 docs/api/core.builders.base.html              |   4 +-
 docs/api/core.builders.causal.html            |   4 +-
 docs/api/core.builders.rl.html                |   4 +-
 docs/api/core.chat.format.chatml.html         |   4 +-
 docs/api/core.chat.format.llama3x.html        |   4 +-
 docs/api/core.chat.format.shared.html         |   4 +-
 docs/api/core.chat.messages.html              |   4 +-
 docs/api/core.datasets.chat.html              |   4 +-
 ...core.datasets.transforms.chat_builder.html |   4 +-
 docs/api/core.trainers.base.html              |   4 +-
 docs/api/core.trainers.dpo.trainer.html       |   4 +-
 docs/api/core.trainers.grpo.sampler.html      |   4 +-
 docs/api/core.trainers.grpo.trainer.html      |   4 +-
 docs/api/core.trainers.mamba.html             |   4 +-
 docs/api/core.trainers.mixins.optimizer.html  |   4 +-
 ...core.trainers.mixins.rng_state_loader.html |   4 +-
 docs/api/core.trainers.mixins.scheduler.html  |   4 +-
 docs/api/core.trainers.relora.html            |   4 +-
 docs/api/core.trainers.trl.html               |   4 +-
 docs/api/core.trainers.utils.html             |   4 +-
 docs/api/core.training_args.html              | 363 +--------
 docs/api/datasets.html                        |   4 +-
 docs/api/evaluate.html                        |   4 +-
 docs/api/index.html                           |   4 +-
 docs/api/integrations.base.html               | 695 +++++++++++-------
 .../integrations.cut_cross_entropy.args.html  |   4 +-
 docs/api/integrations.grokfast.optimizer.html |   4 +-
 docs/api/integrations.kd.trainer.html         |  12 +-
 docs/api/integrations.liger.args.html         |   4 +-
 docs/api/integrations.lm_eval.args.html       |   4 +-
 docs/api/integrations.spectrum.args.html      |   4 +-
 docs/api/kernels.geglu.html                   |   4 +-
 docs/api/kernels.lora.html                    |   4 +-
 docs/api/kernels.quantize.html                |   4 +-
 docs/api/kernels.swiglu.html                  |   4 +-
 docs/api/kernels.utils.html                   |   4 +-
 docs/api/loaders.adapter.html                 |   4 +-
 docs/api/loaders.constants.html               |   4 +-
 docs/api/loaders.model.html                   |   4 +-
 docs/api/loaders.patch_manager.html           |   4 +-
 docs/api/loaders.processor.html               |   4 +-
 docs/api/loaders.tokenizer.html               |   4 +-
 docs/api/logging_config.html                  |   4 +-
 docs/api/models.mamba.modeling_mamba.html     |   4 +-
 .../monkeypatch.btlm_attn_hijack_flash.html   |   4 +-
 ...onkeypatch.data.batch_dataset_fetcher.html |   4 +-
 ...ch.gradient_checkpointing.offload_cpu.html |   4 +-
 ...h.gradient_checkpointing.offload_disk.html |   4 +-
 .../monkeypatch.llama_attn_hijack_flash.html  |   4 +-
 ...onkeypatch.llama_attn_hijack_xformers.html |   4 +-
 docs/api/monkeypatch.llama_expand_mask.html   |   4 +-
 .../monkeypatch.llama_patch_multipack.html    |   4 +-
 docs/api/monkeypatch.lora_kernels.html        |   4 +-
 ...monkeypatch.mistral_attn_hijack_flash.html |   4 +-
 docs/api/monkeypatch.mixtral.html             |   4 +-
 docs/api/monkeypatch.multipack.html           |   4 +-
 docs/api/monkeypatch.relora.html              |   4 +-
 ...onkeypatch.stablelm_attn_hijack_flash.html |   4 +-
 docs/api/monkeypatch.trainer_fsdp_optim.html  |   4 +-
 .../monkeypatch.transformers_fa_utils.html    |   4 +-
 docs/api/monkeypatch.unsloth_.html            |   4 +-
 docs/api/monkeypatch.utils.html               |   4 +-
 docs/api/prompt_strategies.alpaca_chat.html   |   4 +-
 .../prompt_strategies.alpaca_instruct.html    |   4 +-
 .../prompt_strategies.alpaca_w_system.html    |   4 +-
 docs/api/prompt_strategies.base.html          |   4 +-
 ...rompt_strategies.bradley_terry.llama3.html |   4 +-
 docs/api/prompt_strategies.chat_template.html |   4 +-
 docs/api/prompt_strategies.completion.html    |   4 +-
 .../prompt_strategies.dpo.chat_template.html  |   4 +-
 docs/api/prompt_strategies.dpo.chatml.html    |   4 +-
 docs/api/prompt_strategies.dpo.llama3.html    |   4 +-
 .../prompt_strategies.dpo.passthrough.html    |   4 +-
 .../prompt_strategies.dpo.user_defined.html   |   4 +-
 docs/api/prompt_strategies.dpo.zephyr.html    |   4 +-
 docs/api/prompt_strategies.input_output.html  |   4 +-
 docs/api/prompt_strategies.kto.chatml.html    |   4 +-
 docs/api/prompt_strategies.kto.llama3.html    |   4 +-
 .../prompt_strategies.kto.user_defined.html   |   4 +-
 docs/api/prompt_strategies.llama2_chat.html   |   4 +-
 docs/api/prompt_strategies.messages.chat.html |   4 +-
 docs/api/prompt_strategies.metharme.html      |   4 +-
 docs/api/prompt_strategies.orcamini.html      |   4 +-
 .../prompt_strategies.orpo.chat_template.html |   4 +-
 docs/api/prompt_strategies.pygmalion.html     |   4 +-
 ...prompt_strategies.stepwise_supervised.html |   4 +-
 docs/api/prompt_strategies.user_defined.html  |   4 +-
 docs/api/prompt_tokenizers.html               |   4 +-
 docs/api/train.html                           |  10 +-
 docs/api/utils.bench.html                     |   4 +-
 docs/api/utils.callbacks.comet_.html          |   4 +-
 docs/api/utils.callbacks.lisa.html            |   4 +-
 docs/api/utils.callbacks.mlflow_.html         |   4 +-
 docs/api/utils.callbacks.perplexity.html      |   4 +-
 docs/api/utils.callbacks.profiler.html        |   4 +-
 docs/api/utils.callbacks.qat.html             |   4 +-
 docs/api/utils.chat_templates.html            |   4 +-
 docs/api/utils.collators.batching.html        |   4 +-
 docs/api/utils.collators.core.html            |   4 +-
 docs/api/utils.collators.mamba.html           |   4 +-
 docs/api/utils.collators.mm_chat.html         |   4 +-
 .../utils.ctx_managers.sequence_parallel.html |   4 +-
 docs/api/utils.data.pretraining.html          |   4 +-
 docs/api/utils.data.sft.html                  |   4 +-
 docs/api/utils.dict.html                      |   4 +-
 docs/api/utils.distributed.html               |   4 +-
 docs/api/utils.freeze.html                    |   4 +-
 docs/api/utils.lora.html                      |   4 +-
 docs/api/utils.model_shard_quant.html         |   4 +-
 docs/api/utils.optimizers.adopt.html          |   4 +-
 docs/api/utils.quantization.html              |   4 +-
 docs/api/utils.samplers.multipack.html        |   6 +-
 docs/api/utils.schedulers.html                |   4 +-
 docs/api/utils.schemas.config.html            |   4 +-
 docs/api/utils.schemas.datasets.html          |   4 +-
 docs/api/utils.schemas.enums.html             |   4 +-
 docs/api/utils.schemas.integrations.html      |   4 +-
 docs/api/utils.schemas.model.html             |   4 +-
 docs/api/utils.schemas.multimodal.html        |   4 +-
 docs/api/utils.schemas.peft.html              |   4 +-
 docs/api/utils.schemas.training.html          |   4 +-
 docs/api/utils.schemas.trl.html               |   4 +-
 docs/api/utils.schemas.utils.html             |   4 +-
 docs/api/utils.tokenization.html              |   4 +-
 docs/api/utils.trainer.html                   |   4 +-
 docs/batch_vs_grad.html                       |   4 +-
 docs/cli.html                                 |   4 +-
 docs/config.html                              |   4 +-
 docs/custom_integrations.html                 |   4 +-
 docs/dataset-formats/conversation.html        |   4 +-
 docs/dataset-formats/index.html               |   4 +-
 docs/dataset-formats/inst_tune.html           |   4 +-
 docs/dataset-formats/pretraining.html         |   4 +-
 docs/dataset-formats/stepwise_supervised.html |   4 +-
 docs/dataset-formats/template_free.html       |   4 +-
 docs/dataset-formats/tokenized.html           |   4 +-
 docs/dataset_loading.html                     |   4 +-
 docs/dataset_preprocessing.html               |   4 +-
 docs/debugging.html                           |   4 +-
 docs/docker.html                              |   4 +-
 docs/faq.html                                 |   4 +-
 docs/fsdp_qlora.html                          |   4 +-
 docs/getting-started.html                     |   4 +-
 docs/inference.html                           |   4 +-
 docs/input_output.html                        |   4 +-
 docs/installation.html                        |   4 +-
 docs/lora_optims.html                         |   4 +-
 docs/lr_groups.html                           |   4 +-
 docs/mac.html                                 |   4 +-
 docs/multi-gpu.html                           |   4 +-
 docs/multi-node.html                          |   4 +-
 docs/multimodal.html                          |   4 +-
 docs/multipack.html                           |   4 +-
 docs/nccl.html                                |   4 +-
 docs/qat.html                                 |   4 +-
 docs/quantize.html                            |   4 +-
 docs/ray-integration.html                     |   4 +-
 docs/reward_modelling.html                    |   4 +-
 docs/rlhf.html                                |   4 +-
 docs/sequence_parallelism.html                |   4 +-
 docs/torchao.html                             |   4 +-
 docs/unsloth.html                             |   4 +-
 .../colab-axolotl-example.html                |   4 +-
 index.html                                    |   4 +-
 search.json                                   |  20 +-
 ...dark-2fef5ea3f8957b3e4ecc936fc74692ca.css} |   2 +-
 sitemap.xml                                   | 378 +++++-----
 src/axolotl/integrations/LICENSE.html         |   4 +-
 .../cut_cross_entropy/ACKNOWLEDGEMENTS.html   |   4 +-
 193 files changed, 1017 insertions(+), 1207 deletions(-)
 rename site_libs/quarto-html/{quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css => quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css} (98%)

diff --git a/.nojekyll b/.nojekyll
index 8fc3cab38..b505fa798 100644
--- a/.nojekyll
+++ b/.nojekyll
@@ -1 +1 @@
-ce7842d3
\ No newline at end of file
+e3a927da
\ No newline at end of file
diff --git a/FAQS.html b/FAQS.html
index f830a41e7..b8e57147a 100644
--- a/FAQS.html
+++ b/FAQS.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="site_libs/quarto-html/anchor.min.js"></script>
 <link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/TODO.html b/TODO.html
index fe087edb8..043afc751 100644
--- a/TODO.html
+++ b/TODO.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="site_libs/quarto-html/anchor.min.js"></script>
 <link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/amd_hpc.html b/docs/amd_hpc.html
index 43f28314c..6729e593b 100644
--- a/docs/amd_hpc.html
+++ b/docs/amd_hpc.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.args.html b/docs/api/cli.args.html
index 5c7ccf150..45326f855 100644
--- a/docs/api/cli.args.html
+++ b/docs/api/cli.args.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.checks.html b/docs/api/cli.checks.html
index 3a399516b..81b7a2db1 100644
--- a/docs/api/cli.checks.html
+++ b/docs/api/cli.checks.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.cloud.base.html b/docs/api/cli.cloud.base.html
index d84d7361c..8c7084221 100644
--- a/docs/api/cli.cloud.base.html
+++ b/docs/api/cli.cloud.base.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.cloud.modal_.html b/docs/api/cli.cloud.modal_.html
index ef7344cde..70ada4697 100644
--- a/docs/api/cli.cloud.modal_.html
+++ b/docs/api/cli.cloud.modal_.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.config.html b/docs/api/cli.config.html
index 699a9c317..a99253e7e 100644
--- a/docs/api/cli.config.html
+++ b/docs/api/cli.config.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.evaluate.html b/docs/api/cli.evaluate.html
index bd7172685..4bcf4396f 100644
--- a/docs/api/cli.evaluate.html
+++ b/docs/api/cli.evaluate.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.inference.html b/docs/api/cli.inference.html
index 179b702fd..a2e44c3a9 100644
--- a/docs/api/cli.inference.html
+++ b/docs/api/cli.inference.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.main.html b/docs/api/cli.main.html
index ed8aaab61..ad902b8de 100644
--- a/docs/api/cli.main.html
+++ b/docs/api/cli.main.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.merge_lora.html b/docs/api/cli.merge_lora.html
index 2db0ca75e..64d3e26c8 100644
--- a/docs/api/cli.merge_lora.html
+++ b/docs/api/cli.merge_lora.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.merge_sharded_fsdp_weights.html b/docs/api/cli.merge_sharded_fsdp_weights.html
index 259201127..e30111985 100644
--- a/docs/api/cli.merge_sharded_fsdp_weights.html
+++ b/docs/api/cli.merge_sharded_fsdp_weights.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.preprocess.html b/docs/api/cli.preprocess.html
index 7be534fa3..ea78d1c3a 100644
--- a/docs/api/cli.preprocess.html
+++ b/docs/api/cli.preprocess.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.quantize.html b/docs/api/cli.quantize.html
index 19d629c57..ece804f0e 100644
--- a/docs/api/cli.quantize.html
+++ b/docs/api/cli.quantize.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.sweeps.html b/docs/api/cli.sweeps.html
index 85006fc13..7774d319f 100644
--- a/docs/api/cli.sweeps.html
+++ b/docs/api/cli.sweeps.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.train.html b/docs/api/cli.train.html
index 9aeca8838..696c29151 100644
--- a/docs/api/cli.train.html
+++ b/docs/api/cli.train.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.utils.html b/docs/api/cli.utils.html
index 8e465ee35..a1c20b2e0 100644
--- a/docs/api/cli.utils.html
+++ b/docs/api/cli.utils.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/cli.vllm_serve.html b/docs/api/cli.vllm_serve.html
index 390efa71c..9b4756035 100644
--- a/docs/api/cli.vllm_serve.html
+++ b/docs/api/cli.vllm_serve.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/common.architectures.html b/docs/api/common.architectures.html
index 16f5c5968..2387cbdce 100644
--- a/docs/api/common.architectures.html
+++ b/docs/api/common.architectures.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/common.const.html b/docs/api/common.const.html
index abce5b674..26823cc80 100644
--- a/docs/api/common.const.html
+++ b/docs/api/common.const.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/common.datasets.html b/docs/api/common.datasets.html
index fcea0706f..f7348888d 100644
--- a/docs/api/common.datasets.html
+++ b/docs/api/common.datasets.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/convert.html b/docs/api/convert.html
index e12228a5b..3085bb207 100644
--- a/docs/api/convert.html
+++ b/docs/api/convert.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.builders.base.html b/docs/api/core.builders.base.html
index e8e66d364..887ef2037 100644
--- a/docs/api/core.builders.base.html
+++ b/docs/api/core.builders.base.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.builders.causal.html b/docs/api/core.builders.causal.html
index 2ed028e94..943afe52d 100644
--- a/docs/api/core.builders.causal.html
+++ b/docs/api/core.builders.causal.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.builders.rl.html b/docs/api/core.builders.rl.html
index 0057eae22..a4908556c 100644
--- a/docs/api/core.builders.rl.html
+++ b/docs/api/core.builders.rl.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.chat.format.chatml.html b/docs/api/core.chat.format.chatml.html
index 611ecd3da..3adbc037a 100644
--- a/docs/api/core.chat.format.chatml.html
+++ b/docs/api/core.chat.format.chatml.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.chat.format.llama3x.html b/docs/api/core.chat.format.llama3x.html
index 96dbb7a21..57a26f889 100644
--- a/docs/api/core.chat.format.llama3x.html
+++ b/docs/api/core.chat.format.llama3x.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.chat.format.shared.html b/docs/api/core.chat.format.shared.html
index b2853426b..bf92033e0 100644
--- a/docs/api/core.chat.format.shared.html
+++ b/docs/api/core.chat.format.shared.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.chat.messages.html b/docs/api/core.chat.messages.html
index 41c1796c3..e5fda6d2e 100644
--- a/docs/api/core.chat.messages.html
+++ b/docs/api/core.chat.messages.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.datasets.chat.html b/docs/api/core.datasets.chat.html
index e5adfdb9f..2a7bf4af9 100644
--- a/docs/api/core.datasets.chat.html
+++ b/docs/api/core.datasets.chat.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.datasets.transforms.chat_builder.html b/docs/api/core.datasets.transforms.chat_builder.html
index 938c416b8..0f5e8297e 100644
--- a/docs/api/core.datasets.transforms.chat_builder.html
+++ b/docs/api/core.datasets.transforms.chat_builder.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.trainers.base.html b/docs/api/core.trainers.base.html
index d89770446..541c9fa51 100644
--- a/docs/api/core.trainers.base.html
+++ b/docs/api/core.trainers.base.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.trainers.dpo.trainer.html b/docs/api/core.trainers.dpo.trainer.html
index 1c6925099..514549c33 100644
--- a/docs/api/core.trainers.dpo.trainer.html
+++ b/docs/api/core.trainers.dpo.trainer.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.trainers.grpo.sampler.html b/docs/api/core.trainers.grpo.sampler.html
index c6f989026..103dc7b9b 100644
--- a/docs/api/core.trainers.grpo.sampler.html
+++ b/docs/api/core.trainers.grpo.sampler.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.trainers.grpo.trainer.html b/docs/api/core.trainers.grpo.trainer.html
index 38949c3ec..aa0bcecfc 100644
--- a/docs/api/core.trainers.grpo.trainer.html
+++ b/docs/api/core.trainers.grpo.trainer.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.trainers.mamba.html b/docs/api/core.trainers.mamba.html
index a6a9393d3..13a503d08 100644
--- a/docs/api/core.trainers.mamba.html
+++ b/docs/api/core.trainers.mamba.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.trainers.mixins.optimizer.html b/docs/api/core.trainers.mixins.optimizer.html
index 5a1e2d1e8..efdcaa905 100644
--- a/docs/api/core.trainers.mixins.optimizer.html
+++ b/docs/api/core.trainers.mixins.optimizer.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.trainers.mixins.rng_state_loader.html b/docs/api/core.trainers.mixins.rng_state_loader.html
index 12a6edceb..8e754b6ec 100644
--- a/docs/api/core.trainers.mixins.rng_state_loader.html
+++ b/docs/api/core.trainers.mixins.rng_state_loader.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.trainers.mixins.scheduler.html b/docs/api/core.trainers.mixins.scheduler.html
index 70fc85389..64b74ae7b 100644
--- a/docs/api/core.trainers.mixins.scheduler.html
+++ b/docs/api/core.trainers.mixins.scheduler.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.trainers.relora.html b/docs/api/core.trainers.relora.html
index 8ca9b12b2..bad4b4c7a 100644
--- a/docs/api/core.trainers.relora.html
+++ b/docs/api/core.trainers.relora.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.trainers.trl.html b/docs/api/core.trainers.trl.html
index 2f6145b8e..6ca87adf9 100644
--- a/docs/api/core.trainers.trl.html
+++ b/docs/api/core.trainers.trl.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.trainers.utils.html b/docs/api/core.trainers.utils.html
index e58ef1e3b..fbf337991 100644
--- a/docs/api/core.trainers.utils.html
+++ b/docs/api/core.trainers.utils.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/core.training_args.html b/docs/api/core.training_args.html
index f35ea760a..e2c507b72 100644
--- a/docs/api/core.training_args.html
+++ b/docs/api/core.training_args.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
@@ -477,7 +477,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
   <li><a href="#axolotl.core.training_args.AxolotlPRMConfig" id="toc-axolotl.core.training_args.AxolotlPRMConfig" class="nav-link" data-scroll-target="#axolotl.core.training_args.AxolotlPRMConfig">AxolotlPRMConfig</a></li>
   <li><a href="#axolotl.core.training_args.AxolotlRewardConfig" id="toc-axolotl.core.training_args.AxolotlRewardConfig" class="nav-link" data-scroll-target="#axolotl.core.training_args.AxolotlRewardConfig">AxolotlRewardConfig</a></li>
   <li><a href="#axolotl.core.training_args.AxolotlTrainingArguments" id="toc-axolotl.core.training_args.AxolotlTrainingArguments" class="nav-link" data-scroll-target="#axolotl.core.training_args.AxolotlTrainingArguments">AxolotlTrainingArguments</a></li>
-  <li><a href="#axolotl.core.training_args.AxolotlTrainingMixins" id="toc-axolotl.core.training_args.AxolotlTrainingMixins" class="nav-link" data-scroll-target="#axolotl.core.training_args.AxolotlTrainingMixins">AxolotlTrainingMixins</a></li>
   </ul></li>
   </ul></li>
   </ul>
@@ -527,385 +526,39 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
 <td><a href="#axolotl.core.training_args.AxolotlTrainingArguments">AxolotlTrainingArguments</a></td>
 <td>Training arguments for Causal trainer</td>
 </tr>
-<tr class="odd">
-<td><a href="#axolotl.core.training_args.AxolotlTrainingMixins">AxolotlTrainingMixins</a></td>
-<td>Mixin class for the Axolotl training args.</td>
-</tr>
 </tbody>
 </table>
 <section id="axolotl.core.training_args.AxolotlCPOConfig" class="level3">
 <h3 class="anchored" data-anchor-id="axolotl.core.training_args.AxolotlCPOConfig">AxolotlCPOConfig</h3>
-<div class="sourceCode" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>core.training_args.AxolotlCPOConfig(</span>
-<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a>    model_type<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a>    lr_quadratic_warmup<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a>    pretraining<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a>    sample_packing<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a>    sample_packing_sequentially<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a>    multipack_real_batches<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a>    eval_sample_packing<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a>    sample_packing_efficiency<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a>    sample_packing_bin_size<span class="op">=</span><span class="dv">200</span>,</span>
-<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a>    sample_packing_group_size<span class="op">=</span><span class="dv">100000</span>,</span>
-<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a>    max_seq_length<span class="op">=</span><span class="dv">2048</span>,</span>
-<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a>    dataset_num_proc<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-14"><a href="#cb1-14" aria-hidden="true" tabindex="-1"></a>    relora_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-15"><a href="#cb1-15" aria-hidden="true" tabindex="-1"></a>    relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-16"><a href="#cb1-16" aria-hidden="true" tabindex="-1"></a>    relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-17"><a href="#cb1-17" aria-hidden="true" tabindex="-1"></a>    relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
-<span id="cb1-18"><a href="#cb1-18" aria-hidden="true" tabindex="-1"></a>    bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
-<span id="cb1-19"><a href="#cb1-19" aria-hidden="true" tabindex="-1"></a>    bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
-<span id="cb1-20"><a href="#cb1-20" aria-hidden="true" tabindex="-1"></a>    do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb1-21"><a href="#cb1-21" aria-hidden="true" tabindex="-1"></a>    do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb1-22"><a href="#cb1-22" aria-hidden="true" tabindex="-1"></a>    max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-23"><a href="#cb1-23" aria-hidden="true" tabindex="-1"></a>    bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
-<span id="cb1-24"><a href="#cb1-24" aria-hidden="true" tabindex="-1"></a>    dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-25"><a href="#cb1-25" aria-hidden="true" tabindex="-1"></a>    cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-26"><a href="#cb1-26" aria-hidden="true" tabindex="-1"></a>    cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-27"><a href="#cb1-27" aria-hidden="true" tabindex="-1"></a>    loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-28"><a href="#cb1-28" aria-hidden="true" tabindex="-1"></a>    loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
-<span id="cb1-29"><a href="#cb1-29" aria-hidden="true" tabindex="-1"></a>    embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-30"><a href="#cb1-30" aria-hidden="true" tabindex="-1"></a>    lr_groups<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-31"><a href="#cb1-31" aria-hidden="true" tabindex="-1"></a>    embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-32"><a href="#cb1-32" aria-hidden="true" tabindex="-1"></a>    qlora<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb1-33"><a href="#cb1-33" aria-hidden="true" tabindex="-1"></a>    orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-34"><a href="#cb1-34" aria-hidden="true" tabindex="-1"></a>    lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-35"><a href="#cb1-35" aria-hidden="true" tabindex="-1"></a>    lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-36"><a href="#cb1-36" aria-hidden="true" tabindex="-1"></a>    lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-37"><a href="#cb1-37" aria-hidden="true" tabindex="-1"></a>    curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-38"><a href="#cb1-38" aria-hidden="true" tabindex="-1"></a>    alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-39"><a href="#cb1-39" aria-hidden="true" tabindex="-1"></a>    chat_template<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-40"><a href="#cb1-40" aria-hidden="true" tabindex="-1"></a>    kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-41"><a href="#cb1-41" aria-hidden="true" tabindex="-1"></a>    kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb1-42"><a href="#cb1-42" aria-hidden="true" tabindex="-1"></a>    kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb1-43"><a href="#cb1-43" aria-hidden="true" tabindex="-1"></a>    kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-44"><a href="#cb1-44" aria-hidden="true" tabindex="-1"></a>    kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-45"><a href="#cb1-45" aria-hidden="true" tabindex="-1"></a>    adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-46"><a href="#cb1-46" aria-hidden="true" tabindex="-1"></a>    adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-47"><a href="#cb1-47" aria-hidden="true" tabindex="-1"></a>    image_size<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-48"><a href="#cb1-48" aria-hidden="true" tabindex="-1"></a>    image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-49"><a href="#cb1-49" aria-hidden="true" tabindex="-1"></a>    simpo_gamma<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-50"><a href="#cb1-50" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>core.training_args.AxolotlCPOConfig(simpo_gamma<span class="op">=</span><span class="va">None</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>CPO config for CPO training</p>
 </section>
 <section id="axolotl.core.training_args.AxolotlKTOConfig" class="level3">
 <h3 class="anchored" data-anchor-id="axolotl.core.training_args.AxolotlKTOConfig">AxolotlKTOConfig</h3>
-<div class="sourceCode" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>core.training_args.AxolotlKTOConfig(</span>
-<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>    model_type<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>    lr_quadratic_warmup<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a>    pretraining<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a>    sample_packing<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a>    sample_packing_sequentially<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a>    multipack_real_batches<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb2-8"><a href="#cb2-8" aria-hidden="true" tabindex="-1"></a>    eval_sample_packing<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-9"><a href="#cb2-9" aria-hidden="true" tabindex="-1"></a>    sample_packing_efficiency<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb2-10"><a href="#cb2-10" aria-hidden="true" tabindex="-1"></a>    sample_packing_bin_size<span class="op">=</span><span class="dv">200</span>,</span>
-<span id="cb2-11"><a href="#cb2-11" aria-hidden="true" tabindex="-1"></a>    sample_packing_group_size<span class="op">=</span><span class="dv">100000</span>,</span>
-<span id="cb2-12"><a href="#cb2-12" aria-hidden="true" tabindex="-1"></a>    max_seq_length<span class="op">=</span><span class="dv">2048</span>,</span>
-<span id="cb2-13"><a href="#cb2-13" aria-hidden="true" tabindex="-1"></a>    dataset_num_proc<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-14"><a href="#cb2-14" aria-hidden="true" tabindex="-1"></a>    relora_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-15"><a href="#cb2-15" aria-hidden="true" tabindex="-1"></a>    relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-16"><a href="#cb2-16" aria-hidden="true" tabindex="-1"></a>    relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-17"><a href="#cb2-17" aria-hidden="true" tabindex="-1"></a>    relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
-<span id="cb2-18"><a href="#cb2-18" aria-hidden="true" tabindex="-1"></a>    bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
-<span id="cb2-19"><a href="#cb2-19" aria-hidden="true" tabindex="-1"></a>    bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
-<span id="cb2-20"><a href="#cb2-20" aria-hidden="true" tabindex="-1"></a>    do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb2-21"><a href="#cb2-21" aria-hidden="true" tabindex="-1"></a>    do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb2-22"><a href="#cb2-22" aria-hidden="true" tabindex="-1"></a>    max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-23"><a href="#cb2-23" aria-hidden="true" tabindex="-1"></a>    bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
-<span id="cb2-24"><a href="#cb2-24" aria-hidden="true" tabindex="-1"></a>    dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-25"><a href="#cb2-25" aria-hidden="true" tabindex="-1"></a>    cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-26"><a href="#cb2-26" aria-hidden="true" tabindex="-1"></a>    cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-27"><a href="#cb2-27" aria-hidden="true" tabindex="-1"></a>    loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-28"><a href="#cb2-28" aria-hidden="true" tabindex="-1"></a>    loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
-<span id="cb2-29"><a href="#cb2-29" aria-hidden="true" tabindex="-1"></a>    embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-30"><a href="#cb2-30" aria-hidden="true" tabindex="-1"></a>    lr_groups<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-31"><a href="#cb2-31" aria-hidden="true" tabindex="-1"></a>    embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-32"><a href="#cb2-32" aria-hidden="true" tabindex="-1"></a>    qlora<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb2-33"><a href="#cb2-33" aria-hidden="true" tabindex="-1"></a>    orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-34"><a href="#cb2-34" aria-hidden="true" tabindex="-1"></a>    lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-35"><a href="#cb2-35" aria-hidden="true" tabindex="-1"></a>    lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-36"><a href="#cb2-36" aria-hidden="true" tabindex="-1"></a>    lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-37"><a href="#cb2-37" aria-hidden="true" tabindex="-1"></a>    curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-38"><a href="#cb2-38" aria-hidden="true" tabindex="-1"></a>    alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-39"><a href="#cb2-39" aria-hidden="true" tabindex="-1"></a>    chat_template<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-40"><a href="#cb2-40" aria-hidden="true" tabindex="-1"></a>    kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-41"><a href="#cb2-41" aria-hidden="true" tabindex="-1"></a>    kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb2-42"><a href="#cb2-42" aria-hidden="true" tabindex="-1"></a>    kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb2-43"><a href="#cb2-43" aria-hidden="true" tabindex="-1"></a>    kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-44"><a href="#cb2-44" aria-hidden="true" tabindex="-1"></a>    kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-45"><a href="#cb2-45" aria-hidden="true" tabindex="-1"></a>    adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-46"><a href="#cb2-46" aria-hidden="true" tabindex="-1"></a>    adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-47"><a href="#cb2-47" aria-hidden="true" tabindex="-1"></a>    image_size<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-48"><a href="#cb2-48" aria-hidden="true" tabindex="-1"></a>    image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-49"><a href="#cb2-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>core.training_args.AxolotlKTOConfig()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>KTO config for KTO training</p>
 </section>
 <section id="axolotl.core.training_args.AxolotlORPOConfig" class="level3">
 <h3 class="anchored" data-anchor-id="axolotl.core.training_args.AxolotlORPOConfig">AxolotlORPOConfig</h3>
-<div class="sourceCode" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>core.training_args.AxolotlORPOConfig(</span>
-<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>    model_type<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>    lr_quadratic_warmup<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a>    pretraining<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a>    sample_packing<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a>    sample_packing_sequentially<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a>    multipack_real_batches<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a>    eval_sample_packing<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a>    sample_packing_efficiency<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a>    sample_packing_bin_size<span class="op">=</span><span class="dv">200</span>,</span>
-<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a>    sample_packing_group_size<span class="op">=</span><span class="dv">100000</span>,</span>
-<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a>    max_seq_length<span class="op">=</span><span class="dv">2048</span>,</span>
-<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a>    dataset_num_proc<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a>    relora_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a>    relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a>    relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-17"><a href="#cb3-17" aria-hidden="true" tabindex="-1"></a>    relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
-<span id="cb3-18"><a href="#cb3-18" aria-hidden="true" tabindex="-1"></a>    bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
-<span id="cb3-19"><a href="#cb3-19" aria-hidden="true" tabindex="-1"></a>    bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
-<span id="cb3-20"><a href="#cb3-20" aria-hidden="true" tabindex="-1"></a>    do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb3-21"><a href="#cb3-21" aria-hidden="true" tabindex="-1"></a>    do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb3-22"><a href="#cb3-22" aria-hidden="true" tabindex="-1"></a>    max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-23"><a href="#cb3-23" aria-hidden="true" tabindex="-1"></a>    bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
-<span id="cb3-24"><a href="#cb3-24" aria-hidden="true" tabindex="-1"></a>    dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-25"><a href="#cb3-25" aria-hidden="true" tabindex="-1"></a>    cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-26"><a href="#cb3-26" aria-hidden="true" tabindex="-1"></a>    cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-27"><a href="#cb3-27" aria-hidden="true" tabindex="-1"></a>    loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-28"><a href="#cb3-28" aria-hidden="true" tabindex="-1"></a>    loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
-<span id="cb3-29"><a href="#cb3-29" aria-hidden="true" tabindex="-1"></a>    embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-30"><a href="#cb3-30" aria-hidden="true" tabindex="-1"></a>    lr_groups<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-31"><a href="#cb3-31" aria-hidden="true" tabindex="-1"></a>    embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-32"><a href="#cb3-32" aria-hidden="true" tabindex="-1"></a>    qlora<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb3-33"><a href="#cb3-33" aria-hidden="true" tabindex="-1"></a>    orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-34"><a href="#cb3-34" aria-hidden="true" tabindex="-1"></a>    lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-35"><a href="#cb3-35" aria-hidden="true" tabindex="-1"></a>    lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-36"><a href="#cb3-36" aria-hidden="true" tabindex="-1"></a>    lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-37"><a href="#cb3-37" aria-hidden="true" tabindex="-1"></a>    curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-38"><a href="#cb3-38" aria-hidden="true" tabindex="-1"></a>    alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-39"><a href="#cb3-39" aria-hidden="true" tabindex="-1"></a>    chat_template<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-40"><a href="#cb3-40" aria-hidden="true" tabindex="-1"></a>    kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-41"><a href="#cb3-41" aria-hidden="true" tabindex="-1"></a>    kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb3-42"><a href="#cb3-42" aria-hidden="true" tabindex="-1"></a>    kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb3-43"><a href="#cb3-43" aria-hidden="true" tabindex="-1"></a>    kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-44"><a href="#cb3-44" aria-hidden="true" tabindex="-1"></a>    kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-45"><a href="#cb3-45" aria-hidden="true" tabindex="-1"></a>    adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-46"><a href="#cb3-46" aria-hidden="true" tabindex="-1"></a>    adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-47"><a href="#cb3-47" aria-hidden="true" tabindex="-1"></a>    image_size<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-48"><a href="#cb3-48" aria-hidden="true" tabindex="-1"></a>    image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb3-49"><a href="#cb3-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>core.training_args.AxolotlORPOConfig()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>ORPO config for ORPO training</p>
 </section>
 <section id="axolotl.core.training_args.AxolotlPRMConfig" class="level3">
 <h3 class="anchored" data-anchor-id="axolotl.core.training_args.AxolotlPRMConfig">AxolotlPRMConfig</h3>
-<div class="sourceCode" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>core.training_args.AxolotlPRMConfig(</span>
-<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>    model_type<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>    lr_quadratic_warmup<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a>    pretraining<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a>    sample_packing<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a>    sample_packing_sequentially<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a>    multipack_real_batches<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a>    eval_sample_packing<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-9"><a href="#cb4-9" aria-hidden="true" tabindex="-1"></a>    sample_packing_efficiency<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a>    sample_packing_bin_size<span class="op">=</span><span class="dv">200</span>,</span>
-<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a>    sample_packing_group_size<span class="op">=</span><span class="dv">100000</span>,</span>
-<span id="cb4-12"><a href="#cb4-12" aria-hidden="true" tabindex="-1"></a>    max_seq_length<span class="op">=</span><span class="dv">2048</span>,</span>
-<span id="cb4-13"><a href="#cb4-13" aria-hidden="true" tabindex="-1"></a>    dataset_num_proc<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-14"><a href="#cb4-14" aria-hidden="true" tabindex="-1"></a>    relora_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-15"><a href="#cb4-15" aria-hidden="true" tabindex="-1"></a>    relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-16"><a href="#cb4-16" aria-hidden="true" tabindex="-1"></a>    relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-17"><a href="#cb4-17" aria-hidden="true" tabindex="-1"></a>    relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
-<span id="cb4-18"><a href="#cb4-18" aria-hidden="true" tabindex="-1"></a>    bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
-<span id="cb4-19"><a href="#cb4-19" aria-hidden="true" tabindex="-1"></a>    bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
-<span id="cb4-20"><a href="#cb4-20" aria-hidden="true" tabindex="-1"></a>    do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb4-21"><a href="#cb4-21" aria-hidden="true" tabindex="-1"></a>    do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb4-22"><a href="#cb4-22" aria-hidden="true" tabindex="-1"></a>    max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-23"><a href="#cb4-23" aria-hidden="true" tabindex="-1"></a>    bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
-<span id="cb4-24"><a href="#cb4-24" aria-hidden="true" tabindex="-1"></a>    dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-25"><a href="#cb4-25" aria-hidden="true" tabindex="-1"></a>    cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-26"><a href="#cb4-26" aria-hidden="true" tabindex="-1"></a>    cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-27"><a href="#cb4-27" aria-hidden="true" tabindex="-1"></a>    loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-28"><a href="#cb4-28" aria-hidden="true" tabindex="-1"></a>    loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
-<span id="cb4-29"><a href="#cb4-29" aria-hidden="true" tabindex="-1"></a>    embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-30"><a href="#cb4-30" aria-hidden="true" tabindex="-1"></a>    lr_groups<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-31"><a href="#cb4-31" aria-hidden="true" tabindex="-1"></a>    embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-32"><a href="#cb4-32" aria-hidden="true" tabindex="-1"></a>    qlora<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb4-33"><a href="#cb4-33" aria-hidden="true" tabindex="-1"></a>    orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-34"><a href="#cb4-34" aria-hidden="true" tabindex="-1"></a>    lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-35"><a href="#cb4-35" aria-hidden="true" tabindex="-1"></a>    lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-36"><a href="#cb4-36" aria-hidden="true" tabindex="-1"></a>    lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-37"><a href="#cb4-37" aria-hidden="true" tabindex="-1"></a>    curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-38"><a href="#cb4-38" aria-hidden="true" tabindex="-1"></a>    alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-39"><a href="#cb4-39" aria-hidden="true" tabindex="-1"></a>    chat_template<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-40"><a href="#cb4-40" aria-hidden="true" tabindex="-1"></a>    kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-41"><a href="#cb4-41" aria-hidden="true" tabindex="-1"></a>    kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb4-42"><a href="#cb4-42" aria-hidden="true" tabindex="-1"></a>    kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb4-43"><a href="#cb4-43" aria-hidden="true" tabindex="-1"></a>    kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-44"><a href="#cb4-44" aria-hidden="true" tabindex="-1"></a>    kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-45"><a href="#cb4-45" aria-hidden="true" tabindex="-1"></a>    adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-46"><a href="#cb4-46" aria-hidden="true" tabindex="-1"></a>    adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-47"><a href="#cb4-47" aria-hidden="true" tabindex="-1"></a>    image_size<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-48"><a href="#cb4-48" aria-hidden="true" tabindex="-1"></a>    image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb4-49"><a href="#cb4-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>core.training_args.AxolotlPRMConfig()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>PRM config for PRM training</p>
 </section>
 <section id="axolotl.core.training_args.AxolotlRewardConfig" class="level3">
 <h3 class="anchored" data-anchor-id="axolotl.core.training_args.AxolotlRewardConfig">AxolotlRewardConfig</h3>
-<div class="sourceCode" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>core.training_args.AxolotlRewardConfig(</span>
-<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>    model_type<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>    lr_quadratic_warmup<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a>    pretraining<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a>    sample_packing<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a>    sample_packing_sequentially<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a>    multipack_real_batches<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a>    eval_sample_packing<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-9"><a href="#cb5-9" aria-hidden="true" tabindex="-1"></a>    sample_packing_efficiency<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a>    sample_packing_bin_size<span class="op">=</span><span class="dv">200</span>,</span>
-<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a>    sample_packing_group_size<span class="op">=</span><span class="dv">100000</span>,</span>
-<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a>    max_seq_length<span class="op">=</span><span class="dv">2048</span>,</span>
-<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a>    dataset_num_proc<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-14"><a href="#cb5-14" aria-hidden="true" tabindex="-1"></a>    relora_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-15"><a href="#cb5-15" aria-hidden="true" tabindex="-1"></a>    relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-16"><a href="#cb5-16" aria-hidden="true" tabindex="-1"></a>    relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-17"><a href="#cb5-17" aria-hidden="true" tabindex="-1"></a>    relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
-<span id="cb5-18"><a href="#cb5-18" aria-hidden="true" tabindex="-1"></a>    bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
-<span id="cb5-19"><a href="#cb5-19" aria-hidden="true" tabindex="-1"></a>    bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
-<span id="cb5-20"><a href="#cb5-20" aria-hidden="true" tabindex="-1"></a>    do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb5-21"><a href="#cb5-21" aria-hidden="true" tabindex="-1"></a>    do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb5-22"><a href="#cb5-22" aria-hidden="true" tabindex="-1"></a>    max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-23"><a href="#cb5-23" aria-hidden="true" tabindex="-1"></a>    bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
-<span id="cb5-24"><a href="#cb5-24" aria-hidden="true" tabindex="-1"></a>    dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-25"><a href="#cb5-25" aria-hidden="true" tabindex="-1"></a>    cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-26"><a href="#cb5-26" aria-hidden="true" tabindex="-1"></a>    cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-27"><a href="#cb5-27" aria-hidden="true" tabindex="-1"></a>    loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-28"><a href="#cb5-28" aria-hidden="true" tabindex="-1"></a>    loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
-<span id="cb5-29"><a href="#cb5-29" aria-hidden="true" tabindex="-1"></a>    embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-30"><a href="#cb5-30" aria-hidden="true" tabindex="-1"></a>    lr_groups<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-31"><a href="#cb5-31" aria-hidden="true" tabindex="-1"></a>    embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-32"><a href="#cb5-32" aria-hidden="true" tabindex="-1"></a>    qlora<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb5-33"><a href="#cb5-33" aria-hidden="true" tabindex="-1"></a>    orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-34"><a href="#cb5-34" aria-hidden="true" tabindex="-1"></a>    lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-35"><a href="#cb5-35" aria-hidden="true" tabindex="-1"></a>    lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-36"><a href="#cb5-36" aria-hidden="true" tabindex="-1"></a>    lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-37"><a href="#cb5-37" aria-hidden="true" tabindex="-1"></a>    curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-38"><a href="#cb5-38" aria-hidden="true" tabindex="-1"></a>    alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-39"><a href="#cb5-39" aria-hidden="true" tabindex="-1"></a>    chat_template<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-40"><a href="#cb5-40" aria-hidden="true" tabindex="-1"></a>    kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-41"><a href="#cb5-41" aria-hidden="true" tabindex="-1"></a>    kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb5-42"><a href="#cb5-42" aria-hidden="true" tabindex="-1"></a>    kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb5-43"><a href="#cb5-43" aria-hidden="true" tabindex="-1"></a>    kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-44"><a href="#cb5-44" aria-hidden="true" tabindex="-1"></a>    kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-45"><a href="#cb5-45" aria-hidden="true" tabindex="-1"></a>    adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-46"><a href="#cb5-46" aria-hidden="true" tabindex="-1"></a>    adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-47"><a href="#cb5-47" aria-hidden="true" tabindex="-1"></a>    image_size<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-48"><a href="#cb5-48" aria-hidden="true" tabindex="-1"></a>    image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-49"><a href="#cb5-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>core.training_args.AxolotlRewardConfig()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Reward config for Reward training</p>
 </section>
 <section id="axolotl.core.training_args.AxolotlTrainingArguments" class="level3">
 <h3 class="anchored" data-anchor-id="axolotl.core.training_args.AxolotlTrainingArguments">AxolotlTrainingArguments</h3>
-<div class="sourceCode" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>core.training_args.AxolotlTrainingArguments(</span>
-<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>    model_type<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>    lr_quadratic_warmup<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a>    pretraining<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>    sample_packing<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>    sample_packing_sequentially<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a>    multipack_real_batches<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a>    eval_sample_packing<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a>    sample_packing_efficiency<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a>    sample_packing_bin_size<span class="op">=</span><span class="dv">200</span>,</span>
-<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a>    sample_packing_group_size<span class="op">=</span><span class="dv">100000</span>,</span>
-<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a>    max_seq_length<span class="op">=</span><span class="dv">2048</span>,</span>
-<span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a>    dataset_num_proc<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-14"><a href="#cb6-14" aria-hidden="true" tabindex="-1"></a>    relora_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-15"><a href="#cb6-15" aria-hidden="true" tabindex="-1"></a>    relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-16"><a href="#cb6-16" aria-hidden="true" tabindex="-1"></a>    relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-17"><a href="#cb6-17" aria-hidden="true" tabindex="-1"></a>    relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
-<span id="cb6-18"><a href="#cb6-18" aria-hidden="true" tabindex="-1"></a>    bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
-<span id="cb6-19"><a href="#cb6-19" aria-hidden="true" tabindex="-1"></a>    bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
-<span id="cb6-20"><a href="#cb6-20" aria-hidden="true" tabindex="-1"></a>    do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb6-21"><a href="#cb6-21" aria-hidden="true" tabindex="-1"></a>    do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb6-22"><a href="#cb6-22" aria-hidden="true" tabindex="-1"></a>    max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-23"><a href="#cb6-23" aria-hidden="true" tabindex="-1"></a>    bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
-<span id="cb6-24"><a href="#cb6-24" aria-hidden="true" tabindex="-1"></a>    dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-25"><a href="#cb6-25" aria-hidden="true" tabindex="-1"></a>    cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-26"><a href="#cb6-26" aria-hidden="true" tabindex="-1"></a>    cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-27"><a href="#cb6-27" aria-hidden="true" tabindex="-1"></a>    loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-28"><a href="#cb6-28" aria-hidden="true" tabindex="-1"></a>    loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
-<span id="cb6-29"><a href="#cb6-29" aria-hidden="true" tabindex="-1"></a>    embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-30"><a href="#cb6-30" aria-hidden="true" tabindex="-1"></a>    lr_groups<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-31"><a href="#cb6-31" aria-hidden="true" tabindex="-1"></a>    embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-32"><a href="#cb6-32" aria-hidden="true" tabindex="-1"></a>    qlora<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb6-33"><a href="#cb6-33" aria-hidden="true" tabindex="-1"></a>    orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-34"><a href="#cb6-34" aria-hidden="true" tabindex="-1"></a>    lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-35"><a href="#cb6-35" aria-hidden="true" tabindex="-1"></a>    lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-36"><a href="#cb6-36" aria-hidden="true" tabindex="-1"></a>    lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-37"><a href="#cb6-37" aria-hidden="true" tabindex="-1"></a>    curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-38"><a href="#cb6-38" aria-hidden="true" tabindex="-1"></a>    alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-39"><a href="#cb6-39" aria-hidden="true" tabindex="-1"></a>    chat_template<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-40"><a href="#cb6-40" aria-hidden="true" tabindex="-1"></a>    kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-41"><a href="#cb6-41" aria-hidden="true" tabindex="-1"></a>    kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb6-42"><a href="#cb6-42" aria-hidden="true" tabindex="-1"></a>    kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb6-43"><a href="#cb6-43" aria-hidden="true" tabindex="-1"></a>    kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-44"><a href="#cb6-44" aria-hidden="true" tabindex="-1"></a>    kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-45"><a href="#cb6-45" aria-hidden="true" tabindex="-1"></a>    adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-46"><a href="#cb6-46" aria-hidden="true" tabindex="-1"></a>    adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-47"><a href="#cb6-47" aria-hidden="true" tabindex="-1"></a>    image_size<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-48"><a href="#cb6-48" aria-hidden="true" tabindex="-1"></a>    image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb6-49"><a href="#cb6-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>core.training_args.AxolotlTrainingArguments()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Training arguments for Causal trainer</p>
 <p>This code is duplicated due to HF TrainingArguments not setting output_dir with a
 default value so it can’t be used as a mixin.</p>
-</section>
-<section id="axolotl.core.training_args.AxolotlTrainingMixins" class="level3">
-<h3 class="anchored" data-anchor-id="axolotl.core.training_args.AxolotlTrainingMixins">AxolotlTrainingMixins</h3>
-<div class="sourceCode" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>core.training_args.AxolotlTrainingMixins(</span>
-<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>    model_type<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>    lr_quadratic_warmup<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>    pretraining<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>    sample_packing<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>    sample_packing_sequentially<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>    multipack_real_batches<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>    eval_sample_packing<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a>    sample_packing_efficiency<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>    sample_packing_bin_size<span class="op">=</span><span class="dv">200</span>,</span>
-<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a>    sample_packing_group_size<span class="op">=</span><span class="dv">100000</span>,</span>
-<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a>    max_seq_length<span class="op">=</span><span class="dv">2048</span>,</span>
-<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a>    dataset_num_proc<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a>    relora_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a>    relora_warmup_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-16"><a href="#cb7-16" aria-hidden="true" tabindex="-1"></a>    relora_anneal_steps<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-17"><a href="#cb7-17" aria-hidden="true" tabindex="-1"></a>    relora_prune_ratio<span class="op">=</span><span class="fl">0.9</span>,</span>
-<span id="cb7-18"><a href="#cb7-18" aria-hidden="true" tabindex="-1"></a>    bench_split<span class="op">=</span><span class="st">'eval'</span>,</span>
-<span id="cb7-19"><a href="#cb7-19" aria-hidden="true" tabindex="-1"></a>    bench_dataset<span class="op">=</span><span class="st">'pharaouk/dharma-1/dharma_1_mini.json'</span>,</span>
-<span id="cb7-20"><a href="#cb7-20" aria-hidden="true" tabindex="-1"></a>    do_bench_eval<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb7-21"><a href="#cb7-21" aria-hidden="true" tabindex="-1"></a>    do_causal_lm_eval<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb7-22"><a href="#cb7-22" aria-hidden="true" tabindex="-1"></a>    max_bench_samples<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-23"><a href="#cb7-23" aria-hidden="true" tabindex="-1"></a>    bench_source_max_len<span class="op">=</span><span class="dv">2048</span>,</span>
-<span id="cb7-24"><a href="#cb7-24" aria-hidden="true" tabindex="-1"></a>    dataloader_prefetch_factor<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-25"><a href="#cb7-25" aria-hidden="true" tabindex="-1"></a>    cosine_min_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-26"><a href="#cb7-26" aria-hidden="true" tabindex="-1"></a>    cosine_constant_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-27"><a href="#cb7-27" aria-hidden="true" tabindex="-1"></a>    loraplus_lr_ratio<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-28"><a href="#cb7-28" aria-hidden="true" tabindex="-1"></a>    loraplus_lr_embedding<span class="op">=</span><span class="fl">1e-06</span>,</span>
-<span id="cb7-29"><a href="#cb7-29" aria-hidden="true" tabindex="-1"></a>    embedding_lr_scale<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-30"><a href="#cb7-30" aria-hidden="true" tabindex="-1"></a>    lr_groups<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-31"><a href="#cb7-31" aria-hidden="true" tabindex="-1"></a>    embedding_lr<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-32"><a href="#cb7-32" aria-hidden="true" tabindex="-1"></a>    qlora<span class="op">=</span><span class="va">False</span>,</span>
-<span id="cb7-33"><a href="#cb7-33" aria-hidden="true" tabindex="-1"></a>    orpo_alpha<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-34"><a href="#cb7-34" aria-hidden="true" tabindex="-1"></a>    lisa_n_layers<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-35"><a href="#cb7-35" aria-hidden="true" tabindex="-1"></a>    lisa_step_interval<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-36"><a href="#cb7-36" aria-hidden="true" tabindex="-1"></a>    lisa_layers_attribute<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-37"><a href="#cb7-37" aria-hidden="true" tabindex="-1"></a>    curriculum_sampling<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-38"><a href="#cb7-38" aria-hidden="true" tabindex="-1"></a>    alternate_lr_scheduler_type<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-39"><a href="#cb7-39" aria-hidden="true" tabindex="-1"></a>    chat_template<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-40"><a href="#cb7-40" aria-hidden="true" tabindex="-1"></a>    kd_ce_alpha<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-41"><a href="#cb7-41" aria-hidden="true" tabindex="-1"></a>    kd_alpha<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb7-42"><a href="#cb7-42" aria-hidden="true" tabindex="-1"></a>    kd_temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb7-43"><a href="#cb7-43" aria-hidden="true" tabindex="-1"></a>    kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-44"><a href="#cb7-44" aria-hidden="true" tabindex="-1"></a>    kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-45"><a href="#cb7-45" aria-hidden="true" tabindex="-1"></a>    adam_beta3<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-46"><a href="#cb7-46" aria-hidden="true" tabindex="-1"></a>    adam_epsilon2<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-47"><a href="#cb7-47" aria-hidden="true" tabindex="-1"></a>    image_size<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-48"><a href="#cb7-48" aria-hidden="true" tabindex="-1"></a>    image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb7-49"><a href="#cb7-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<p>Mixin class for the Axolotl training args.</p>
 
 
 </section>
diff --git a/docs/api/datasets.html b/docs/api/datasets.html
index 6047df4c5..e5ac8fc2f 100644
--- a/docs/api/datasets.html
+++ b/docs/api/datasets.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/evaluate.html b/docs/api/evaluate.html
index 670f8fcbb..d18334785 100644
--- a/docs/api/evaluate.html
+++ b/docs/api/evaluate.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/index.html b/docs/api/index.html
index dd9712d9c..0e8eb2f6e 100644
--- a/docs/api/index.html
+++ b/docs/api/index.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/integrations.base.html b/docs/api/integrations.base.html
index d176d3583..af130b9a5 100644
--- a/docs/api/integrations.base.html
+++ b/docs/api/integrations.base.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
@@ -583,50 +583,62 @@ training.</p>
 <td>Creates and returns an optimizer for training.</td>
 </tr>
 <tr class="odd">
+<td><a href="#axolotl.integrations.base.BasePlugin.get_collator_cls_and_kwargs">get_collator_cls_and_kwargs</a></td>
+<td>Returns a custom class for the collator.</td>
+</tr>
+<tr class="even">
 <td><a href="#axolotl.integrations.base.BasePlugin.get_input_args">get_input_args</a></td>
 <td>Returns a pydantic model for the plugin’s input arguments.</td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><a href="#axolotl.integrations.base.BasePlugin.get_trainer_cls">get_trainer_cls</a></td>
 <td>Returns a custom class for the trainer.</td>
 </tr>
+<tr class="even">
+<td><a href="#axolotl.integrations.base.BasePlugin.get_training_args">get_training_args</a></td>
+<td>Returns custom training arguments to set on TrainingArgs.</td>
+</tr>
 <tr class="odd">
+<td><a href="#axolotl.integrations.base.BasePlugin.get_training_args_mixin">get_training_args_mixin</a></td>
+<td>Returns a dataclass model for the plugin’s training arguments.</td>
+</tr>
+<tr class="even">
 <td><a href="#axolotl.integrations.base.BasePlugin.load_datasets">load_datasets</a></td>
 <td>Loads and preprocesses the dataset for training.</td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><a href="#axolotl.integrations.base.BasePlugin.post_lora_load">post_lora_load</a></td>
 <td>Performs actions after LoRA weights are loaded.</td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><a href="#axolotl.integrations.base.BasePlugin.post_model_build">post_model_build</a></td>
 <td>Performs actions after the model is built/loaded, but before any adapters are applied.</td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><a href="#axolotl.integrations.base.BasePlugin.post_model_load">post_model_load</a></td>
 <td>Performs actions after the model is loaded.</td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><a href="#axolotl.integrations.base.BasePlugin.post_train">post_train</a></td>
 <td>Performs actions after training is complete.</td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><a href="#axolotl.integrations.base.BasePlugin.post_train_unload">post_train_unload</a></td>
 <td>Performs actions after training is complete and the model is unloaded.</td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><a href="#axolotl.integrations.base.BasePlugin.post_trainer_create">post_trainer_create</a></td>
 <td>Performs actions after the trainer is created.</td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><a href="#axolotl.integrations.base.BasePlugin.pre_lora_load">pre_lora_load</a></td>
 <td>Performs actions before LoRA weights are loaded.</td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><a href="#axolotl.integrations.base.BasePlugin.pre_model_load">pre_model_load</a></td>
 <td>Performs actions before the model is loaded.</td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><a href="#axolotl.integrations.base.BasePlugin.register">register</a></td>
 <td>Registers the plugin with the given configuration.</td>
 </tr>
@@ -883,17 +895,74 @@ callbacks that require access to the model or trainer.</p>
 </table>
 </section>
 </section>
+<section id="axolotl.integrations.base.BasePlugin.get_collator_cls_and_kwargs" class="level5">
+<h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.get_collator_cls_and_kwargs">get_collator_cls_and_kwargs</h5>
+<div class="sourceCode" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.get_collator_cls_and_kwargs(cfg, is_eval<span class="op">=</span><span class="va">False</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Returns a custom class for the collator.</p>
+<section id="parameters-4" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-4">Parameters</h6>
+<table class="caption-top table">
+<colgroup>
+<col style="width: 13%">
+<col style="width: 18%">
+<col style="width: 50%">
+<col style="width: 17%">
+</colgroup>
+<thead>
+<tr class="header">
+<th>Name</th>
+<th>Type</th>
+<th>Description</th>
+<th>Default</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>cfg</td>
+<td>DictDefault</td>
+<td>The global axolotl configuration.</td>
+<td><em>required</em></td>
+</tr>
+<tr class="even">
+<td>is_eval</td>
+<td>bool</td>
+<td>Whether this is an eval split.</td>
+<td><code>False</code></td>
+</tr>
+</tbody>
+</table>
+</section>
+<section id="returns-4" class="level6 doc-section doc-section-returns">
+<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-4">Returns</h6>
+<table class="caption-top table">
+<thead>
+<tr class="header">
+<th>Name</th>
+<th>Type</th>
+<th>Description</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>class</td>
+<td></td>
+<td>The class for the collator.</td>
+</tr>
+</tbody>
+</table>
+</section>
+</section>
 <section id="axolotl.integrations.base.BasePlugin.get_input_args" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.get_input_args">get_input_args</h5>
-<div class="sourceCode" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.get_input_args()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.get_input_args()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Returns a pydantic model for the plugin’s input arguments.</p>
 </section>
 <section id="axolotl.integrations.base.BasePlugin.get_trainer_cls" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.get_trainer_cls">get_trainer_cls</h5>
-<div class="sourceCode" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.get_trainer_cls(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.get_trainer_cls(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Returns a custom class for the trainer.</p>
-<section id="parameters-4" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-4">Parameters</h6>
+<section id="parameters-5" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-5">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 11%">
@@ -919,8 +988,8 @@ callbacks that require access to the model or trainer.</p>
 </tbody>
 </table>
 </section>
-<section id="returns-4" class="level6 doc-section doc-section-returns">
-<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-4">Returns</h6>
+<section id="returns-5" class="level6 doc-section doc-section-returns">
+<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-5">Returns</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 9%">
@@ -944,12 +1013,68 @@ callbacks that require access to the model or trainer.</p>
 </table>
 </section>
 </section>
+<section id="axolotl.integrations.base.BasePlugin.get_training_args" class="level5">
+<h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.get_training_args">get_training_args</h5>
+<div class="sourceCode" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.get_training_args(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Returns custom training arguments to set on TrainingArgs.</p>
+<section id="parameters-6" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-6">Parameters</h6>
+<table class="caption-top table">
+<colgroup>
+<col style="width: 11%">
+<col style="width: 19%">
+<col style="width: 51%">
+<col style="width: 17%">
+</colgroup>
+<thead>
+<tr class="header">
+<th>Name</th>
+<th>Type</th>
+<th>Description</th>
+<th>Default</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>cfg</td>
+<td>DictDefault</td>
+<td>The global axolotl configuration.</td>
+<td><em>required</em></td>
+</tr>
+</tbody>
+</table>
+</section>
+<section id="returns-6" class="level6 doc-section doc-section-returns">
+<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-6">Returns</h6>
+<table class="caption-top table">
+<thead>
+<tr class="header">
+<th>Name</th>
+<th>Type</th>
+<th>Description</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>object</td>
+<td></td>
+<td>dict containing the training arguments.</td>
+</tr>
+</tbody>
+</table>
+</section>
+</section>
+<section id="axolotl.integrations.base.BasePlugin.get_training_args_mixin" class="level5">
+<h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.get_training_args_mixin">get_training_args_mixin</h5>
+<div class="sourceCode" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.get_training_args_mixin()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Returns a dataclass model for the plugin’s training arguments.</p>
+</section>
 <section id="axolotl.integrations.base.BasePlugin.load_datasets" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.load_datasets">load_datasets</h5>
-<div class="sourceCode" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.load_datasets(cfg, preprocess<span class="op">=</span><span class="va">False</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.load_datasets(cfg, preprocess<span class="op">=</span><span class="va">False</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Loads and preprocesses the dataset for training.</p>
-<section id="parameters-5" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-5">Parameters</h6>
+<section id="parameters-7" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-7">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 13%">
@@ -981,8 +1106,8 @@ callbacks that require access to the model or trainer.</p>
 </tbody>
 </table>
 </section>
-<section id="returns-5" class="level6 doc-section doc-section-returns">
-<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-5">Returns</h6>
+<section id="returns-7" class="level6 doc-section doc-section-returns">
+<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-7">Returns</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 15%">
@@ -1008,78 +1133,8 @@ callbacks that require access to the model or trainer.</p>
 </section>
 <section id="axolotl.integrations.base.BasePlugin.post_lora_load" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.post_lora_load">post_lora_load</h5>
-<div class="sourceCode" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.post_lora_load(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.post_lora_load(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Performs actions after LoRA weights are loaded.</p>
-<section id="parameters-6" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-6">Parameters</h6>
-<table class="caption-top table">
-<colgroup>
-<col style="width: 9%">
-<col style="width: 35%">
-<col style="width: 41%">
-<col style="width: 14%">
-</colgroup>
-<thead>
-<tr class="header">
-<th>Name</th>
-<th>Type</th>
-<th>Description</th>
-<th>Default</th>
-</tr>
-</thead>
-<tbody>
-<tr class="odd">
-<td>cfg</td>
-<td>DictDefault</td>
-<td>The configuration for the plugin.</td>
-<td><em>required</em></td>
-</tr>
-<tr class="even">
-<td>model</td>
-<td>PreTrainedModel | PeftModel</td>
-<td>The loaded model.</td>
-<td><em>required</em></td>
-</tr>
-</tbody>
-</table>
-</section>
-</section>
-<section id="axolotl.integrations.base.BasePlugin.post_model_build" class="level5">
-<h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.post_model_build">post_model_build</h5>
-<div class="sourceCode" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.post_model_build(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<p>Performs actions after the model is built/loaded, but before any adapters are applied.</p>
-<section id="parameters-7" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-7">Parameters</h6>
-<table class="caption-top table">
-<colgroup>
-<col style="width: 11%">
-<col style="width: 19%">
-<col style="width: 51%">
-<col style="width: 17%">
-</colgroup>
-<thead>
-<tr class="header">
-<th>Name</th>
-<th>Type</th>
-<th>Description</th>
-<th>Default</th>
-</tr>
-</thead>
-<tbody>
-<tr class="odd">
-<td>cfg</td>
-<td>DictDefault</td>
-<td>The configuration for the plugin.</td>
-<td><em>required</em></td>
-</tr>
-</tbody>
-</table>
-</section>
-</section>
-<section id="axolotl.integrations.base.BasePlugin.post_model_load" class="level5">
-<h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.post_model_load">post_model_load</h5>
-<div class="sourceCode" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.post_model_load(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<p>Performs actions after the model is loaded.</p>
 <section id="parameters-8" class="level6 doc-section doc-section-parameters">
 <h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-8">Parameters</h6>
 <table class="caption-top table">
@@ -1114,14 +1169,84 @@ callbacks that require access to the model or trainer.</p>
 </table>
 </section>
 </section>
-<section id="axolotl.integrations.base.BasePlugin.post_train" class="level5">
-<h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.post_train">post_train</h5>
-<div class="sourceCode" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.post_train(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<p>Performs actions after training is complete.</p>
+<section id="axolotl.integrations.base.BasePlugin.post_model_build" class="level5">
+<h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.post_model_build">post_model_build</h5>
+<div class="sourceCode" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.post_model_build(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Performs actions after the model is built/loaded, but before any adapters are applied.</p>
 <section id="parameters-9" class="level6 doc-section doc-section-parameters">
 <h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-9">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
+<col style="width: 11%">
+<col style="width: 19%">
+<col style="width: 51%">
+<col style="width: 17%">
+</colgroup>
+<thead>
+<tr class="header">
+<th>Name</th>
+<th>Type</th>
+<th>Description</th>
+<th>Default</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>cfg</td>
+<td>DictDefault</td>
+<td>The configuration for the plugin.</td>
+<td><em>required</em></td>
+</tr>
+</tbody>
+</table>
+</section>
+</section>
+<section id="axolotl.integrations.base.BasePlugin.post_model_load" class="level5">
+<h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.post_model_load">post_model_load</h5>
+<div class="sourceCode" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.post_model_load(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Performs actions after the model is loaded.</p>
+<section id="parameters-10" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-10">Parameters</h6>
+<table class="caption-top table">
+<colgroup>
+<col style="width: 9%">
+<col style="width: 35%">
+<col style="width: 41%">
+<col style="width: 14%">
+</colgroup>
+<thead>
+<tr class="header">
+<th>Name</th>
+<th>Type</th>
+<th>Description</th>
+<th>Default</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>cfg</td>
+<td>DictDefault</td>
+<td>The configuration for the plugin.</td>
+<td><em>required</em></td>
+</tr>
+<tr class="even">
+<td>model</td>
+<td>PreTrainedModel | PeftModel</td>
+<td>The loaded model.</td>
+<td><em>required</em></td>
+</tr>
+</tbody>
+</table>
+</section>
+</section>
+<section id="axolotl.integrations.base.BasePlugin.post_train" class="level5">
+<h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.post_train">post_train</h5>
+<div class="sourceCode" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.post_train(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Performs actions after training is complete.</p>
+<section id="parameters-11" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-11">Parameters</h6>
+<table class="caption-top table">
+<colgroup>
 <col style="width: 10%">
 <col style="width: 38%">
 <col style="width: 35%">
@@ -1154,10 +1279,10 @@ callbacks that require access to the model or trainer.</p>
 </section>
 <section id="axolotl.integrations.base.BasePlugin.post_train_unload" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.post_train_unload">post_train_unload</h5>
-<div class="sourceCode" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.post_train_unload(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.post_train_unload(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Performs actions after training is complete and the model is unloaded.</p>
-<section id="parameters-10" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-10">Parameters</h6>
+<section id="parameters-12" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-12">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 11%">
@@ -1186,10 +1311,10 @@ callbacks that require access to the model or trainer.</p>
 </section>
 <section id="axolotl.integrations.base.BasePlugin.post_trainer_create" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.post_trainer_create">post_trainer_create</h5>
-<div class="sourceCode" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.post_trainer_create(cfg, trainer)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.post_trainer_create(cfg, trainer)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Performs actions after the trainer is created.</p>
-<section id="parameters-11" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-11">Parameters</h6>
+<section id="parameters-13" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-13">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 13%">
@@ -1224,10 +1349,10 @@ callbacks that require access to the model or trainer.</p>
 </section>
 <section id="axolotl.integrations.base.BasePlugin.pre_lora_load" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.pre_lora_load">pre_lora_load</h5>
-<div class="sourceCode" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.pre_lora_load(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.pre_lora_load(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Performs actions before LoRA weights are loaded.</p>
-<section id="parameters-12" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-12">Parameters</h6>
+<section id="parameters-14" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-14">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 11%">
@@ -1262,10 +1387,10 @@ callbacks that require access to the model or trainer.</p>
 </section>
 <section id="axolotl.integrations.base.BasePlugin.pre_model_load" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.pre_model_load">pre_model_load</h5>
-<div class="sourceCode" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.pre_model_load(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.pre_model_load(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Performs actions before the model is loaded.</p>
-<section id="parameters-13" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-13">Parameters</h6>
+<section id="parameters-15" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-15">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 11%">
@@ -1294,10 +1419,10 @@ callbacks that require access to the model or trainer.</p>
 </section>
 <section id="axolotl.integrations.base.BasePlugin.register" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.BasePlugin.register">register</h5>
-<div class="sourceCode" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.register(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>integrations.base.BasePlugin.register(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Registers the plugin with the given configuration.</p>
-<section id="parameters-14" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-14">Parameters</h6>
+<section id="parameters-16" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-16">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 11%">
@@ -1328,7 +1453,7 @@ callbacks that require access to the model or trainer.</p>
 </section>
 <section id="axolotl.integrations.base.PluginManager" class="level3">
 <h3 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager">PluginManager</h3>
-<div class="sourceCode" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>The <code>PluginManager</code> class is responsible for loading and managing plugins. It
 should be a singleton so it can be accessed from anywhere in the codebase.</p>
 <section id="attributes" class="level4 doc-section doc-section-attributes">
@@ -1384,54 +1509,66 @@ should be a singleton so it can be accessed from anywhere in the codebase.</p>
 <td>Calls the <code>create_optimizer</code> method of all registered plugins and returns</td>
 </tr>
 <tr class="odd">
+<td><a href="#axolotl.integrations.base.PluginManager.get_collator_cls_and_kwargs">get_collator_cls_and_kwargs</a></td>
+<td>Calls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.</td>
+</tr>
+<tr class="even">
 <td><a href="#axolotl.integrations.base.PluginManager.get_input_args">get_input_args</a></td>
 <td>Returns a list of Pydantic classes for all registered plugins’ input arguments.’</td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><a href="#axolotl.integrations.base.PluginManager.get_instance">get_instance</a></td>
 <td>Returns the singleton instance of PluginManager. If the instance doesn’t</td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><a href="#axolotl.integrations.base.PluginManager.get_trainer_cls">get_trainer_cls</a></td>
 <td>Calls the <code>get_trainer_cls</code> method of all registered plugins and returns the</td>
 </tr>
+<tr class="odd">
+<td><a href="#axolotl.integrations.base.PluginManager.get_training_args">get_training_args</a></td>
+<td>Calls the get_training_args method of all registered plugins and returns the combined training arguments.</td>
+</tr>
 <tr class="even">
+<td><a href="#axolotl.integrations.base.PluginManager.get_training_args_mixin">get_training_args_mixin</a></td>
+<td>Returns a list of dataclasses for all registered plugins’ training args mixins’</td>
+</tr>
+<tr class="odd">
 <td><a href="#axolotl.integrations.base.PluginManager.load_datasets">load_datasets</a></td>
 <td>Calls the load_datasets method of each registered plugin.</td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><a href="#axolotl.integrations.base.PluginManager.post_lora_load">post_lora_load</a></td>
 <td>Calls the <code>post_lora_load</code> method of all registered plugins.</td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><a href="#axolotl.integrations.base.PluginManager.post_model_build">post_model_build</a></td>
 <td>Calls the <code>post_model_build</code> method of all registered plugins after the</td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><a href="#axolotl.integrations.base.PluginManager.post_model_load">post_model_load</a></td>
 <td>Calls the <code>post_model_load</code> method of all registered plugins after the model</td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><a href="#axolotl.integrations.base.PluginManager.post_train">post_train</a></td>
 <td>Calls the post_train method of all registered plugins.</td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><a href="#axolotl.integrations.base.PluginManager.post_train_unload">post_train_unload</a></td>
 <td>Calls the post_train_unload method of all registered plugins.</td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><a href="#axolotl.integrations.base.PluginManager.post_trainer_create">post_trainer_create</a></td>
 <td>Calls the <code>post_trainer_create</code> method of all registered plugins.</td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><a href="#axolotl.integrations.base.PluginManager.pre_lora_load">pre_lora_load</a></td>
 <td>Calls the <code>pre_lora_load</code> method of all registered plugins.</td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><a href="#axolotl.integrations.base.PluginManager.pre_model_load">pre_model_load</a></td>
 <td>Calls the pre_model_load method of all registered plugins.</td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><a href="#axolotl.integrations.base.PluginManager.register">register</a></td>
 <td>Registers a new plugin by its name.</td>
 </tr>
@@ -1439,10 +1576,10 @@ should be a singleton so it can be accessed from anywhere in the codebase.</p>
 </table>
 <section id="axolotl.integrations.base.PluginManager.add_callbacks_post_trainer" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.add_callbacks_post_trainer">add_callbacks_post_trainer</h5>
-<div class="sourceCode" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Calls the <code>add_callbacks_post_trainer</code> method of all registered plugins.</p>
-<section id="parameters-15" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-15">Parameters</h6>
+<section id="parameters-17" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-17">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 12%">
@@ -1474,8 +1611,8 @@ should be a singleton so it can be accessed from anywhere in the codebase.</p>
 </tbody>
 </table>
 </section>
-<section id="returns-6" class="level6 doc-section doc-section-returns">
-<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-6">Returns</h6>
+<section id="returns-8" class="level6 doc-section doc-section-returns">
+<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-8">Returns</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 8%">
@@ -1501,10 +1638,10 @@ should be a singleton so it can be accessed from anywhere in the codebase.</p>
 </section>
 <section id="axolotl.integrations.base.PluginManager.add_callbacks_pre_trainer" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.add_callbacks_pre_trainer">add_callbacks_pre_trainer</h5>
-<div class="sourceCode" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Calls the add_callbacks_pre_trainer method of all registered plugins.</p>
-<section id="parameters-16" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-16">Parameters</h6>
+<section id="parameters-18" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-18">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 10%">
@@ -1536,8 +1673,8 @@ should be a singleton so it can be accessed from anywhere in the codebase.</p>
 </tbody>
 </table>
 </section>
-<section id="returns-7" class="level6 doc-section doc-section-returns">
-<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-7">Returns</h6>
+<section id="returns-9" class="level6 doc-section doc-section-returns">
+<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-9">Returns</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 8%">
@@ -1563,15 +1700,15 @@ should be a singleton so it can be accessed from anywhere in the codebase.</p>
 </section>
 <section id="axolotl.integrations.base.PluginManager.create_lr_scheduler" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.create_lr_scheduler">create_lr_scheduler</h5>
-<div class="sourceCode" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.create_lr_scheduler(</span>
-<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>    trainer,</span>
-<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a>    optimizer,</span>
-<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a>    num_training_steps,</span>
-<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.create_lr_scheduler(</span>
+<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>    trainer,</span>
+<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a>    optimizer,</span>
+<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a>    num_training_steps,</span>
+<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Calls the <code>create_lr_scheduler</code> method of all registered plugins and returns
 the first non-<code>None</code> scheduler.</p>
-<section id="parameters-17" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-17">Parameters</h6>
+<section id="parameters-19" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-19">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 16%">
@@ -1603,8 +1740,8 @@ the first non-<code>None</code> scheduler.</p>
 </tbody>
 </table>
 </section>
-<section id="returns-8" class="level6 doc-section doc-section-returns">
-<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-8">Returns</h6>
+<section id="returns-10" class="level6 doc-section doc-section-returns">
+<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-10">Returns</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 8%">
@@ -1630,11 +1767,11 @@ the first non-<code>None</code> scheduler.</p>
 </section>
 <section id="axolotl.integrations.base.PluginManager.create_optimizer" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.create_optimizer">create_optimizer</h5>
-<div class="sourceCode" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.create_optimizer(trainer)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.create_optimizer(trainer)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Calls the <code>create_optimizer</code> method of all registered plugins and returns
 the first non-<code>None</code> optimizer.</p>
-<section id="parameters-18" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-18">Parameters</h6>
+<section id="parameters-20" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-20">Parameters</h6>
 <table class="caption-top table">
 <thead>
 <tr class="header">
@@ -1654,8 +1791,8 @@ the first non-<code>None</code> optimizer.</p>
 </tbody>
 </table>
 </section>
-<section id="returns-9" class="level6 doc-section doc-section-returns">
-<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-9">Returns</h6>
+<section id="returns-11" class="level6 doc-section doc-section-returns">
+<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-11">Returns</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 10%">
@@ -1679,12 +1816,22 @@ the first non-<code>None</code> optimizer.</p>
 </table>
 </section>
 </section>
+<section id="axolotl.integrations.base.PluginManager.get_collator_cls_and_kwargs" class="level5">
+<h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.get_collator_cls_and_kwargs">get_collator_cls_and_kwargs</h5>
+<div class="sourceCode" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.get_collator_cls_and_kwargs(cfg, is_eval<span class="op">=</span><span class="va">False</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Calls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.</p>
+<p>Parameters:
+cfg (dict): The configuration for the plugins.
+is_eval (bool): Whether this is an eval split.</p>
+<p>Returns:
+object: The collator class, or None if none was found.</p>
+</section>
 <section id="axolotl.integrations.base.PluginManager.get_input_args" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.get_input_args">get_input_args</h5>
-<div class="sourceCode" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.get_input_args()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.get_input_args()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Returns a list of Pydantic classes for all registered plugins’ input arguments.’</p>
-<section id="returns-10" class="level6 doc-section doc-section-returns">
-<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-10">Returns</h6>
+<section id="returns-12" class="level6 doc-section doc-section-returns">
+<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-12">Returns</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 8%">
@@ -1710,17 +1857,17 @@ the first non-<code>None</code> optimizer.</p>
 </section>
 <section id="axolotl.integrations.base.PluginManager.get_instance" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.get_instance">get_instance</h5>
-<div class="sourceCode" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.get_instance()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.get_instance()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Returns the singleton instance of PluginManager. If the instance doesn’t
 exist, it creates a new one.</p>
 </section>
 <section id="axolotl.integrations.base.PluginManager.get_trainer_cls" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.get_trainer_cls">get_trainer_cls</h5>
-<div class="sourceCode" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.get_trainer_cls(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.get_trainer_cls(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Calls the <code>get_trainer_cls</code> method of all registered plugins and returns the
 first non-<code>None</code> trainer class.</p>
-<section id="parameters-19" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-19">Parameters</h6>
+<section id="parameters-21" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-21">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 11%">
@@ -1746,8 +1893,8 @@ first non-<code>None</code> trainer class.</p>
 </tbody>
 </table>
 </section>
-<section id="returns-11" class="level6 doc-section doc-section-returns">
-<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-11">Returns</h6>
+<section id="returns-13" class="level6 doc-section doc-section-returns">
+<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-13">Returns</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 9%">
@@ -1771,12 +1918,28 @@ first non-<code>None</code> trainer class.</p>
 </table>
 </section>
 </section>
+<section id="axolotl.integrations.base.PluginManager.get_training_args" class="level5">
+<h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.get_training_args">get_training_args</h5>
+<div class="sourceCode" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.get_training_args(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Calls the get_training_args method of all registered plugins and returns the combined training arguments.</p>
+<p>Parameters:
+cfg (dict): The configuration for the plugins.</p>
+<p>Returns:
+object: The training arguments</p>
+</section>
+<section id="axolotl.integrations.base.PluginManager.get_training_args_mixin" class="level5">
+<h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.get_training_args_mixin">get_training_args_mixin</h5>
+<div class="sourceCode" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.get_training_args_mixin()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Returns a list of dataclasses for all registered plugins’ training args mixins’</p>
+<p>Returns:
+list[str]: A list of dataclsses</p>
+</section>
 <section id="axolotl.integrations.base.PluginManager.load_datasets" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.load_datasets">load_datasets</h5>
-<div class="sourceCode" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.load_datasets(cfg, preprocess<span class="op">=</span><span class="va">False</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.load_datasets(cfg, preprocess<span class="op">=</span><span class="va">False</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Calls the load_datasets method of each registered plugin.</p>
-<section id="parameters-20" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-20">Parameters</h6>
+<section id="parameters-22" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-22">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 13%">
@@ -1808,8 +1971,8 @@ first non-<code>None</code> trainer class.</p>
 </tbody>
 </table>
 </section>
-<section id="returns-12" class="level6 doc-section doc-section-returns">
-<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-12">Returns</h6>
+<section id="returns-14" class="level6 doc-section doc-section-returns">
+<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns-14">Returns</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 7%">
@@ -1835,86 +1998,8 @@ first non-<code>None</code> trainer class.</p>
 </section>
 <section id="axolotl.integrations.base.PluginManager.post_lora_load" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.post_lora_load">post_lora_load</h5>
-<div class="sourceCode" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.post_lora_load(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.post_lora_load(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Calls the <code>post_lora_load</code> method of all registered plugins.</p>
-<section id="parameters-21" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-21">Parameters</h6>
-<table class="caption-top table">
-<colgroup>
-<col style="width: 9%">
-<col style="width: 34%">
-<col style="width: 41%">
-<col style="width: 13%">
-</colgroup>
-<thead>
-<tr class="header">
-<th>Name</th>
-<th>Type</th>
-<th>Description</th>
-<th>Default</th>
-</tr>
-</thead>
-<tbody>
-<tr class="odd">
-<td>cfg</td>
-<td>DictDefault</td>
-<td>The configuration for the plugins.</td>
-<td><em>required</em></td>
-</tr>
-<tr class="even">
-<td>model</td>
-<td>PreTrainedModel | PeftModel</td>
-<td>The loaded model.</td>
-<td><em>required</em></td>
-</tr>
-</tbody>
-</table>
-</section>
-</section>
-<section id="axolotl.integrations.base.PluginManager.post_model_build" class="level5">
-<h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.post_model_build">post_model_build</h5>
-<div class="sourceCode" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.post_model_build(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<p>Calls the <code>post_model_build</code> method of all registered plugins after the
-model has been built / loaded, but before any adapters have been applied.</p>
-<section id="parameters-22" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-22">Parameters</h6>
-<table class="caption-top table">
-<colgroup>
-<col style="width: 10%">
-<col style="width: 23%">
-<col style="width: 49%">
-<col style="width: 16%">
-</colgroup>
-<thead>
-<tr class="header">
-<th>Name</th>
-<th>Type</th>
-<th>Description</th>
-<th>Default</th>
-</tr>
-</thead>
-<tbody>
-<tr class="odd">
-<td>cfg</td>
-<td>DictDefault</td>
-<td>The configuration for the plugins.</td>
-<td><em>required</em></td>
-</tr>
-<tr class="even">
-<td>model</td>
-<td>PreTrainedModel</td>
-<td>The loaded model.</td>
-<td><em>required</em></td>
-</tr>
-</tbody>
-</table>
-</section>
-</section>
-<section id="axolotl.integrations.base.PluginManager.post_model_load" class="level5">
-<h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.post_model_load">post_model_load</h5>
-<div class="sourceCode" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.post_model_load(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<p>Calls the <code>post_model_load</code> method of all registered plugins after the model
-has been loaded inclusive of any adapters.</p>
 <section id="parameters-23" class="level6 doc-section doc-section-parameters">
 <h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-23">Parameters</h6>
 <table class="caption-top table">
@@ -1949,14 +2034,92 @@ has been loaded inclusive of any adapters.</p>
 </table>
 </section>
 </section>
-<section id="axolotl.integrations.base.PluginManager.post_train" class="level5">
-<h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.post_train">post_train</h5>
-<div class="sourceCode" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.post_train(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<p>Calls the post_train method of all registered plugins.</p>
+<section id="axolotl.integrations.base.PluginManager.post_model_build" class="level5">
+<h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.post_model_build">post_model_build</h5>
+<div class="sourceCode" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.post_model_build(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Calls the <code>post_model_build</code> method of all registered plugins after the
+model has been built / loaded, but before any adapters have been applied.</p>
 <section id="parameters-24" class="level6 doc-section doc-section-parameters">
 <h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-24">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
+<col style="width: 10%">
+<col style="width: 23%">
+<col style="width: 49%">
+<col style="width: 16%">
+</colgroup>
+<thead>
+<tr class="header">
+<th>Name</th>
+<th>Type</th>
+<th>Description</th>
+<th>Default</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>cfg</td>
+<td>DictDefault</td>
+<td>The configuration for the plugins.</td>
+<td><em>required</em></td>
+</tr>
+<tr class="even">
+<td>model</td>
+<td>PreTrainedModel</td>
+<td>The loaded model.</td>
+<td><em>required</em></td>
+</tr>
+</tbody>
+</table>
+</section>
+</section>
+<section id="axolotl.integrations.base.PluginManager.post_model_load" class="level5">
+<h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.post_model_load">post_model_load</h5>
+<div class="sourceCode" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.post_model_load(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Calls the <code>post_model_load</code> method of all registered plugins after the model
+has been loaded inclusive of any adapters.</p>
+<section id="parameters-25" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-25">Parameters</h6>
+<table class="caption-top table">
+<colgroup>
+<col style="width: 9%">
+<col style="width: 34%">
+<col style="width: 41%">
+<col style="width: 13%">
+</colgroup>
+<thead>
+<tr class="header">
+<th>Name</th>
+<th>Type</th>
+<th>Description</th>
+<th>Default</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>cfg</td>
+<td>DictDefault</td>
+<td>The configuration for the plugins.</td>
+<td><em>required</em></td>
+</tr>
+<tr class="even">
+<td>model</td>
+<td>PreTrainedModel | PeftModel</td>
+<td>The loaded model.</td>
+<td><em>required</em></td>
+</tr>
+</tbody>
+</table>
+</section>
+</section>
+<section id="axolotl.integrations.base.PluginManager.post_train" class="level5">
+<h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.post_train">post_train</h5>
+<div class="sourceCode" id="cb37"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.post_train(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Calls the post_train method of all registered plugins.</p>
+<section id="parameters-26" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-26">Parameters</h6>
+<table class="caption-top table">
+<colgroup>
 <col style="width: 9%">
 <col style="width: 34%">
 <col style="width: 41%">
@@ -1989,10 +2152,10 @@ has been loaded inclusive of any adapters.</p>
 </section>
 <section id="axolotl.integrations.base.PluginManager.post_train_unload" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.post_train_unload">post_train_unload</h5>
-<div class="sourceCode" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.post_train_unload(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.post_train_unload(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Calls the post_train_unload method of all registered plugins.</p>
-<section id="parameters-25" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-25">Parameters</h6>
+<section id="parameters-27" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-27">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 11%">
@@ -2021,10 +2184,10 @@ has been loaded inclusive of any adapters.</p>
 </section>
 <section id="axolotl.integrations.base.PluginManager.post_trainer_create" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.post_trainer_create">post_trainer_create</h5>
-<div class="sourceCode" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.post_trainer_create(cfg, trainer)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb39"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.post_trainer_create(cfg, trainer)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Calls the <code>post_trainer_create</code> method of all registered plugins.</p>
-<section id="parameters-26" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-26">Parameters</h6>
+<section id="parameters-28" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-28">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 12%">
@@ -2059,10 +2222,10 @@ has been loaded inclusive of any adapters.</p>
 </section>
 <section id="axolotl.integrations.base.PluginManager.pre_lora_load" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.pre_lora_load">pre_lora_load</h5>
-<div class="sourceCode" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.pre_lora_load(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb40"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.pre_lora_load(cfg, model)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Calls the <code>pre_lora_load</code> method of all registered plugins.</p>
-<section id="parameters-27" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-27">Parameters</h6>
+<section id="parameters-29" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-29">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 10%">
@@ -2097,10 +2260,10 @@ has been loaded inclusive of any adapters.</p>
 </section>
 <section id="axolotl.integrations.base.PluginManager.pre_model_load" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.pre_model_load">pre_model_load</h5>
-<div class="sourceCode" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.pre_model_load(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb41"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.pre_model_load(cfg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Calls the pre_model_load method of all registered plugins.</p>
-<section id="parameters-28" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-28">Parameters</h6>
+<section id="parameters-30" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-30">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 11%">
@@ -2129,10 +2292,10 @@ has been loaded inclusive of any adapters.</p>
 </section>
 <section id="axolotl.integrations.base.PluginManager.register" class="level5">
 <h5 class="anchored" data-anchor-id="axolotl.integrations.base.PluginManager.register">register</h5>
-<div class="sourceCode" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.register(plugin_name)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb42"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>integrations.base.PluginManager.register(plugin_name)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Registers a new plugin by its name.</p>
-<section id="parameters-29" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-29">Parameters</h6>
+<section id="parameters-31" class="level6 doc-section doc-section-parameters">
+<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-31">Parameters</h6>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 17%">
@@ -2199,13 +2362,13 @@ has been loaded inclusive of any adapters.</p>
 </table>
 <section id="axolotl.integrations.base.load_plugin" class="level3">
 <h3 class="anchored" data-anchor-id="axolotl.integrations.base.load_plugin">load_plugin</h3>
-<div class="sourceCode" id="cb37"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a>integrations.base.load_plugin(plugin_name)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb43"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a>integrations.base.load_plugin(plugin_name)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Loads a plugin based on the given plugin name.</p>
 <p>The plugin name should be in the format “module_name.class_name”. This function
 splits the plugin name into module and class, imports the module, retrieves the
 class from the module, and creates an instance of the class.</p>
-<section id="parameters-30" class="level4 doc-section doc-section-parameters">
-<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-30">Parameters</h4>
+<section id="parameters-32" class="level4 doc-section doc-section-parameters">
+<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-32">Parameters</h4>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 10%">
@@ -2231,8 +2394,8 @@ class from the module, and creates an instance of the class.</p>
 </tbody>
 </table>
 </section>
-<section id="returns-13" class="level4 doc-section doc-section-returns">
-<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-13">Returns</h4>
+<section id="returns-15" class="level4 doc-section doc-section-returns">
+<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns-15">Returns</h4>
 <table class="caption-top table">
 <thead>
 <tr class="header">
diff --git a/docs/api/integrations.cut_cross_entropy.args.html b/docs/api/integrations.cut_cross_entropy.args.html
index 591ecb279..0d9c196b1 100644
--- a/docs/api/integrations.cut_cross_entropy.args.html
+++ b/docs/api/integrations.cut_cross_entropy.args.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/integrations.grokfast.optimizer.html b/docs/api/integrations.grokfast.optimizer.html
index 58bdec4c3..0ce5db289 100644
--- a/docs/api/integrations.grokfast.optimizer.html
+++ b/docs/api/integrations.grokfast.optimizer.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/integrations.kd.trainer.html b/docs/api/integrations.kd.trainer.html
index 3c07400a4..a67cc634f 100644
--- a/docs/api/integrations.kd.trainer.html
+++ b/docs/api/integrations.kd.trainer.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
@@ -505,13 +505,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
 </table>
 <section id="axolotl.integrations.kd.trainer.AxolotlKDTrainer" class="level3">
 <h3 class="anchored" data-anchor-id="axolotl.integrations.kd.trainer.AxolotlKDTrainer">AxolotlKDTrainer</h3>
-<div class="sourceCode" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>integrations.kd.trainer.AxolotlKDTrainer(</span>
-<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a>    <span class="op">*</span>_args,</span>
-<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a>    bench_data_collator<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a>    eval_data_collator<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a>    dataset_tags<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a>    <span class="op">**</span>kwargs,</span>
-<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>integrations.kd.trainer.AxolotlKDTrainer(<span class="op">*</span>args, <span class="op">**</span>kwargs)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Custom trainer subclass for Knowledge Distillation (KD)</p>
 <section id="methods" class="level4">
 <h4 class="anchored" data-anchor-id="methods">Methods</h4>
diff --git a/docs/api/integrations.liger.args.html b/docs/api/integrations.liger.args.html
index eb9da55cb..c11026969 100644
--- a/docs/api/integrations.liger.args.html
+++ b/docs/api/integrations.liger.args.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/integrations.lm_eval.args.html b/docs/api/integrations.lm_eval.args.html
index 6d83f251c..69654f754 100644
--- a/docs/api/integrations.lm_eval.args.html
+++ b/docs/api/integrations.lm_eval.args.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/integrations.spectrum.args.html b/docs/api/integrations.spectrum.args.html
index bdd1cf8f7..9654d35f9 100644
--- a/docs/api/integrations.spectrum.args.html
+++ b/docs/api/integrations.spectrum.args.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/kernels.geglu.html b/docs/api/kernels.geglu.html
index e4600f65b..8facfc925 100644
--- a/docs/api/kernels.geglu.html
+++ b/docs/api/kernels.geglu.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/kernels.lora.html b/docs/api/kernels.lora.html
index b56c81867..d95f833dd 100644
--- a/docs/api/kernels.lora.html
+++ b/docs/api/kernels.lora.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/kernels.quantize.html b/docs/api/kernels.quantize.html
index 1d6d5514b..7c0f0863f 100644
--- a/docs/api/kernels.quantize.html
+++ b/docs/api/kernels.quantize.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/kernels.swiglu.html b/docs/api/kernels.swiglu.html
index 4e328e85a..4b8ea80b4 100644
--- a/docs/api/kernels.swiglu.html
+++ b/docs/api/kernels.swiglu.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/kernels.utils.html b/docs/api/kernels.utils.html
index fa72ae146..efb913292 100644
--- a/docs/api/kernels.utils.html
+++ b/docs/api/kernels.utils.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/loaders.adapter.html b/docs/api/loaders.adapter.html
index 61ae67633..4b26428c1 100644
--- a/docs/api/loaders.adapter.html
+++ b/docs/api/loaders.adapter.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/loaders.constants.html b/docs/api/loaders.constants.html
index 19361878a..f16ff75d6 100644
--- a/docs/api/loaders.constants.html
+++ b/docs/api/loaders.constants.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/loaders.model.html b/docs/api/loaders.model.html
index 34ea3818f..df1f57516 100644
--- a/docs/api/loaders.model.html
+++ b/docs/api/loaders.model.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/loaders.patch_manager.html b/docs/api/loaders.patch_manager.html
index 1cf87e755..686be477f 100644
--- a/docs/api/loaders.patch_manager.html
+++ b/docs/api/loaders.patch_manager.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/loaders.processor.html b/docs/api/loaders.processor.html
index 08f696443..4f0a25431 100644
--- a/docs/api/loaders.processor.html
+++ b/docs/api/loaders.processor.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/loaders.tokenizer.html b/docs/api/loaders.tokenizer.html
index 91cd68fee..04e93ce7e 100644
--- a/docs/api/loaders.tokenizer.html
+++ b/docs/api/loaders.tokenizer.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/logging_config.html b/docs/api/logging_config.html
index 14f28e365..9b8a27560 100644
--- a/docs/api/logging_config.html
+++ b/docs/api/logging_config.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/models.mamba.modeling_mamba.html b/docs/api/models.mamba.modeling_mamba.html
index 5b3124262..d8aff1760 100644
--- a/docs/api/models.mamba.modeling_mamba.html
+++ b/docs/api/models.mamba.modeling_mamba.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.btlm_attn_hijack_flash.html b/docs/api/monkeypatch.btlm_attn_hijack_flash.html
index a3c442d79..f72113b26 100644
--- a/docs/api/monkeypatch.btlm_attn_hijack_flash.html
+++ b/docs/api/monkeypatch.btlm_attn_hijack_flash.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.data.batch_dataset_fetcher.html b/docs/api/monkeypatch.data.batch_dataset_fetcher.html
index 41750b39f..36b227261 100644
--- a/docs/api/monkeypatch.data.batch_dataset_fetcher.html
+++ b/docs/api/monkeypatch.data.batch_dataset_fetcher.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html b/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html
index 79c7f5fad..6e15306d4 100644
--- a/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html
+++ b/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html b/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html
index f74d8ffc0..8fe9d2c1c 100644
--- a/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html
+++ b/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.llama_attn_hijack_flash.html b/docs/api/monkeypatch.llama_attn_hijack_flash.html
index f15371eeb..9bf46e22c 100644
--- a/docs/api/monkeypatch.llama_attn_hijack_flash.html
+++ b/docs/api/monkeypatch.llama_attn_hijack_flash.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.llama_attn_hijack_xformers.html b/docs/api/monkeypatch.llama_attn_hijack_xformers.html
index da2231c03..c8bc4c41b 100644
--- a/docs/api/monkeypatch.llama_attn_hijack_xformers.html
+++ b/docs/api/monkeypatch.llama_attn_hijack_xformers.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.llama_expand_mask.html b/docs/api/monkeypatch.llama_expand_mask.html
index d43769c27..f898103f0 100644
--- a/docs/api/monkeypatch.llama_expand_mask.html
+++ b/docs/api/monkeypatch.llama_expand_mask.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.llama_patch_multipack.html b/docs/api/monkeypatch.llama_patch_multipack.html
index 95e11f64b..9dd00677b 100644
--- a/docs/api/monkeypatch.llama_patch_multipack.html
+++ b/docs/api/monkeypatch.llama_patch_multipack.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.lora_kernels.html b/docs/api/monkeypatch.lora_kernels.html
index a88400489..cc66cd680 100644
--- a/docs/api/monkeypatch.lora_kernels.html
+++ b/docs/api/monkeypatch.lora_kernels.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.mistral_attn_hijack_flash.html b/docs/api/monkeypatch.mistral_attn_hijack_flash.html
index 69af0d089..e3044bbe0 100644
--- a/docs/api/monkeypatch.mistral_attn_hijack_flash.html
+++ b/docs/api/monkeypatch.mistral_attn_hijack_flash.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.mixtral.html b/docs/api/monkeypatch.mixtral.html
index c18d339ae..60a205058 100644
--- a/docs/api/monkeypatch.mixtral.html
+++ b/docs/api/monkeypatch.mixtral.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.multipack.html b/docs/api/monkeypatch.multipack.html
index ea34d7f4a..494871088 100644
--- a/docs/api/monkeypatch.multipack.html
+++ b/docs/api/monkeypatch.multipack.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.relora.html b/docs/api/monkeypatch.relora.html
index 7851da067..5798a22f8 100644
--- a/docs/api/monkeypatch.relora.html
+++ b/docs/api/monkeypatch.relora.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.stablelm_attn_hijack_flash.html b/docs/api/monkeypatch.stablelm_attn_hijack_flash.html
index 531cb5170..686ebab48 100644
--- a/docs/api/monkeypatch.stablelm_attn_hijack_flash.html
+++ b/docs/api/monkeypatch.stablelm_attn_hijack_flash.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.trainer_fsdp_optim.html b/docs/api/monkeypatch.trainer_fsdp_optim.html
index 88b610955..0ed007eb0 100644
--- a/docs/api/monkeypatch.trainer_fsdp_optim.html
+++ b/docs/api/monkeypatch.trainer_fsdp_optim.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.transformers_fa_utils.html b/docs/api/monkeypatch.transformers_fa_utils.html
index 0595de574..47076d09f 100644
--- a/docs/api/monkeypatch.transformers_fa_utils.html
+++ b/docs/api/monkeypatch.transformers_fa_utils.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.unsloth_.html b/docs/api/monkeypatch.unsloth_.html
index 5ffdcde1a..ff130a0c3 100644
--- a/docs/api/monkeypatch.unsloth_.html
+++ b/docs/api/monkeypatch.unsloth_.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/monkeypatch.utils.html b/docs/api/monkeypatch.utils.html
index 33f0a2db6..5be182d1b 100644
--- a/docs/api/monkeypatch.utils.html
+++ b/docs/api/monkeypatch.utils.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.alpaca_chat.html b/docs/api/prompt_strategies.alpaca_chat.html
index a43047296..6ba4f93b9 100644
--- a/docs/api/prompt_strategies.alpaca_chat.html
+++ b/docs/api/prompt_strategies.alpaca_chat.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.alpaca_instruct.html b/docs/api/prompt_strategies.alpaca_instruct.html
index 1f002b430..cfe84be70 100644
--- a/docs/api/prompt_strategies.alpaca_instruct.html
+++ b/docs/api/prompt_strategies.alpaca_instruct.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.alpaca_w_system.html b/docs/api/prompt_strategies.alpaca_w_system.html
index 7b9a5dcb6..8021611ac 100644
--- a/docs/api/prompt_strategies.alpaca_w_system.html
+++ b/docs/api/prompt_strategies.alpaca_w_system.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.base.html b/docs/api/prompt_strategies.base.html
index 6e7f372ee..66ac332cd 100644
--- a/docs/api/prompt_strategies.base.html
+++ b/docs/api/prompt_strategies.base.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.bradley_terry.llama3.html b/docs/api/prompt_strategies.bradley_terry.llama3.html
index ba3ac919c..a0a2fdace 100644
--- a/docs/api/prompt_strategies.bradley_terry.llama3.html
+++ b/docs/api/prompt_strategies.bradley_terry.llama3.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.chat_template.html b/docs/api/prompt_strategies.chat_template.html
index 95f0bff50..f45b43fc8 100644
--- a/docs/api/prompt_strategies.chat_template.html
+++ b/docs/api/prompt_strategies.chat_template.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.completion.html b/docs/api/prompt_strategies.completion.html
index 26d067702..7a6da24d1 100644
--- a/docs/api/prompt_strategies.completion.html
+++ b/docs/api/prompt_strategies.completion.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.dpo.chat_template.html b/docs/api/prompt_strategies.dpo.chat_template.html
index 1c3d29d40..57db53620 100644
--- a/docs/api/prompt_strategies.dpo.chat_template.html
+++ b/docs/api/prompt_strategies.dpo.chat_template.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.dpo.chatml.html b/docs/api/prompt_strategies.dpo.chatml.html
index c1248f8d2..b80ade62b 100644
--- a/docs/api/prompt_strategies.dpo.chatml.html
+++ b/docs/api/prompt_strategies.dpo.chatml.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.dpo.llama3.html b/docs/api/prompt_strategies.dpo.llama3.html
index 23b79a69e..2cb33b69e 100644
--- a/docs/api/prompt_strategies.dpo.llama3.html
+++ b/docs/api/prompt_strategies.dpo.llama3.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.dpo.passthrough.html b/docs/api/prompt_strategies.dpo.passthrough.html
index 5c721367a..24687a9c9 100644
--- a/docs/api/prompt_strategies.dpo.passthrough.html
+++ b/docs/api/prompt_strategies.dpo.passthrough.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.dpo.user_defined.html b/docs/api/prompt_strategies.dpo.user_defined.html
index e14dc761e..10cfd40a2 100644
--- a/docs/api/prompt_strategies.dpo.user_defined.html
+++ b/docs/api/prompt_strategies.dpo.user_defined.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.dpo.zephyr.html b/docs/api/prompt_strategies.dpo.zephyr.html
index 8a96d88d4..333fdae27 100644
--- a/docs/api/prompt_strategies.dpo.zephyr.html
+++ b/docs/api/prompt_strategies.dpo.zephyr.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.input_output.html b/docs/api/prompt_strategies.input_output.html
index 890fb4c2f..70571b848 100644
--- a/docs/api/prompt_strategies.input_output.html
+++ b/docs/api/prompt_strategies.input_output.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.kto.chatml.html b/docs/api/prompt_strategies.kto.chatml.html
index 6176331ab..d3cc38423 100644
--- a/docs/api/prompt_strategies.kto.chatml.html
+++ b/docs/api/prompt_strategies.kto.chatml.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.kto.llama3.html b/docs/api/prompt_strategies.kto.llama3.html
index 4987a4561..a334aa3a7 100644
--- a/docs/api/prompt_strategies.kto.llama3.html
+++ b/docs/api/prompt_strategies.kto.llama3.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.kto.user_defined.html b/docs/api/prompt_strategies.kto.user_defined.html
index 975aa129d..051605cd7 100644
--- a/docs/api/prompt_strategies.kto.user_defined.html
+++ b/docs/api/prompt_strategies.kto.user_defined.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.llama2_chat.html b/docs/api/prompt_strategies.llama2_chat.html
index 3a6019f7b..8a6a44a8a 100644
--- a/docs/api/prompt_strategies.llama2_chat.html
+++ b/docs/api/prompt_strategies.llama2_chat.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.messages.chat.html b/docs/api/prompt_strategies.messages.chat.html
index 45cc71ce2..852463d4c 100644
--- a/docs/api/prompt_strategies.messages.chat.html
+++ b/docs/api/prompt_strategies.messages.chat.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.metharme.html b/docs/api/prompt_strategies.metharme.html
index c915475d4..2c7190d25 100644
--- a/docs/api/prompt_strategies.metharme.html
+++ b/docs/api/prompt_strategies.metharme.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.orcamini.html b/docs/api/prompt_strategies.orcamini.html
index 3ad7cd470..15b7979ca 100644
--- a/docs/api/prompt_strategies.orcamini.html
+++ b/docs/api/prompt_strategies.orcamini.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.orpo.chat_template.html b/docs/api/prompt_strategies.orpo.chat_template.html
index 855c72f5c..e48e2197c 100644
--- a/docs/api/prompt_strategies.orpo.chat_template.html
+++ b/docs/api/prompt_strategies.orpo.chat_template.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.pygmalion.html b/docs/api/prompt_strategies.pygmalion.html
index ae26c5850..3f27dc0ea 100644
--- a/docs/api/prompt_strategies.pygmalion.html
+++ b/docs/api/prompt_strategies.pygmalion.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.stepwise_supervised.html b/docs/api/prompt_strategies.stepwise_supervised.html
index 6dfc697c9..d612bef34 100644
--- a/docs/api/prompt_strategies.stepwise_supervised.html
+++ b/docs/api/prompt_strategies.stepwise_supervised.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_strategies.user_defined.html b/docs/api/prompt_strategies.user_defined.html
index 4c6ac9243..1184e80d1 100644
--- a/docs/api/prompt_strategies.user_defined.html
+++ b/docs/api/prompt_strategies.user_defined.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/prompt_tokenizers.html b/docs/api/prompt_tokenizers.html
index 7afafb58d..1556e0974 100644
--- a/docs/api/prompt_tokenizers.html
+++ b/docs/api/prompt_tokenizers.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/train.html b/docs/api/train.html
index 4ff1d6204..f26aba0e2 100644
--- a/docs/api/train.html
+++ b/docs/api/train.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
@@ -958,8 +958,8 @@ trainer setup.</p>
 <table class="caption-top table">
 <colgroup>
 <col style="width: 3%">
-<col style="width: 61%">
-<col style="width: 34%">
+<col style="width: 63%">
+<col style="width: 33%">
 </colgroup>
 <thead>
 <tr class="header">
@@ -971,7 +971,7 @@ trainer setup.</p>
 <tbody>
 <tr class="odd">
 <td></td>
-<td>tuple[HFRLTrainerBuilder | HFCausalTrainerBuilder, PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]</td>
+<td>tuple['HFRLTrainerBuilder' | 'HFCausalTrainerBuilder', PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]</td>
 <td>Tuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor</td>
 </tr>
 </tbody>
diff --git a/docs/api/utils.bench.html b/docs/api/utils.bench.html
index d6c3c9374..4ca5c00ea 100644
--- a/docs/api/utils.bench.html
+++ b/docs/api/utils.bench.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.callbacks.comet_.html b/docs/api/utils.callbacks.comet_.html
index c15a1bb43..89a30e784 100644
--- a/docs/api/utils.callbacks.comet_.html
+++ b/docs/api/utils.callbacks.comet_.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.callbacks.lisa.html b/docs/api/utils.callbacks.lisa.html
index 59292b946..416a23d89 100644
--- a/docs/api/utils.callbacks.lisa.html
+++ b/docs/api/utils.callbacks.lisa.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.callbacks.mlflow_.html b/docs/api/utils.callbacks.mlflow_.html
index bf403287b..0c0bb2cdc 100644
--- a/docs/api/utils.callbacks.mlflow_.html
+++ b/docs/api/utils.callbacks.mlflow_.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.callbacks.perplexity.html b/docs/api/utils.callbacks.perplexity.html
index 6d1f8f899..699622984 100644
--- a/docs/api/utils.callbacks.perplexity.html
+++ b/docs/api/utils.callbacks.perplexity.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.callbacks.profiler.html b/docs/api/utils.callbacks.profiler.html
index 824d1dcde..084f7a6f0 100644
--- a/docs/api/utils.callbacks.profiler.html
+++ b/docs/api/utils.callbacks.profiler.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.callbacks.qat.html b/docs/api/utils.callbacks.qat.html
index 30ea386fa..6b16a4c99 100644
--- a/docs/api/utils.callbacks.qat.html
+++ b/docs/api/utils.callbacks.qat.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.chat_templates.html b/docs/api/utils.chat_templates.html
index 92c0816f6..49a544319 100644
--- a/docs/api/utils.chat_templates.html
+++ b/docs/api/utils.chat_templates.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.collators.batching.html b/docs/api/utils.collators.batching.html
index 47b7388d1..b0760a962 100644
--- a/docs/api/utils.collators.batching.html
+++ b/docs/api/utils.collators.batching.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.collators.core.html b/docs/api/utils.collators.core.html
index a7b4d7461..c9719b121 100644
--- a/docs/api/utils.collators.core.html
+++ b/docs/api/utils.collators.core.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.collators.mamba.html b/docs/api/utils.collators.mamba.html
index a960b8c19..ba734d472 100644
--- a/docs/api/utils.collators.mamba.html
+++ b/docs/api/utils.collators.mamba.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.collators.mm_chat.html b/docs/api/utils.collators.mm_chat.html
index 4988bac19..e14fac627 100644
--- a/docs/api/utils.collators.mm_chat.html
+++ b/docs/api/utils.collators.mm_chat.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.ctx_managers.sequence_parallel.html b/docs/api/utils.ctx_managers.sequence_parallel.html
index 4991af2fb..926b2ce62 100644
--- a/docs/api/utils.ctx_managers.sequence_parallel.html
+++ b/docs/api/utils.ctx_managers.sequence_parallel.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.data.pretraining.html b/docs/api/utils.data.pretraining.html
index 402ae4732..8b2cb83c7 100644
--- a/docs/api/utils.data.pretraining.html
+++ b/docs/api/utils.data.pretraining.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.data.sft.html b/docs/api/utils.data.sft.html
index 6fff26a34..1c8ba124f 100644
--- a/docs/api/utils.data.sft.html
+++ b/docs/api/utils.data.sft.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.dict.html b/docs/api/utils.dict.html
index 995837ca1..0fa828e67 100644
--- a/docs/api/utils.dict.html
+++ b/docs/api/utils.dict.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.distributed.html b/docs/api/utils.distributed.html
index 4fd370fcd..57ba7da1f 100644
--- a/docs/api/utils.distributed.html
+++ b/docs/api/utils.distributed.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.freeze.html b/docs/api/utils.freeze.html
index 073e0d0a6..7c4f6ca36 100644
--- a/docs/api/utils.freeze.html
+++ b/docs/api/utils.freeze.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.lora.html b/docs/api/utils.lora.html
index 1a648ffa2..ae2351e56 100644
--- a/docs/api/utils.lora.html
+++ b/docs/api/utils.lora.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.model_shard_quant.html b/docs/api/utils.model_shard_quant.html
index c8d6c3800..2f481d570 100644
--- a/docs/api/utils.model_shard_quant.html
+++ b/docs/api/utils.model_shard_quant.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.optimizers.adopt.html b/docs/api/utils.optimizers.adopt.html
index abface679..686e1fb5a 100644
--- a/docs/api/utils.optimizers.adopt.html
+++ b/docs/api/utils.optimizers.adopt.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.quantization.html b/docs/api/utils.quantization.html
index b39c84ba2..5e5227176 100644
--- a/docs/api/utils.quantization.html
+++ b/docs/api/utils.quantization.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.samplers.multipack.html b/docs/api/utils.samplers.multipack.html
index 75a23093a..5bfc6b943 100644
--- a/docs/api/utils.samplers.multipack.html
+++ b/docs/api/utils.samplers.multipack.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
@@ -519,7 +519,7 @@ into fixed-capacity batches to optimize memory usage and training throughput.</p
 <span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a>    batch_max_len,</span>
 <span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a>    lengths,</span>
 <span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a>    packing_efficiency_estimate<span class="op">=</span><span class="fl">1.0</span>,</span>
-<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a>    drop_last<span class="op">=</span><span class="va">False</span>,</span>
+<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a>    drop_last<span class="op">=</span><span class="va">True</span>,</span>
 <span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a>    num_count_samples<span class="op">=</span><span class="dv">8</span>,</span>
 <span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a>    sequential<span class="op">=</span><span class="va">False</span>,</span>
 <span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a>    group_size<span class="op">=</span><span class="dv">100000</span>,</span>
diff --git a/docs/api/utils.schedulers.html b/docs/api/utils.schedulers.html
index 933b8a66b..cf276638f 100644
--- a/docs/api/utils.schedulers.html
+++ b/docs/api/utils.schedulers.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.schemas.config.html b/docs/api/utils.schemas.config.html
index 544ca7182..2f51a2a5e 100644
--- a/docs/api/utils.schemas.config.html
+++ b/docs/api/utils.schemas.config.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.schemas.datasets.html b/docs/api/utils.schemas.datasets.html
index ad2cf1d3a..86c609a39 100644
--- a/docs/api/utils.schemas.datasets.html
+++ b/docs/api/utils.schemas.datasets.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.schemas.enums.html b/docs/api/utils.schemas.enums.html
index bed4f1650..ca834d521 100644
--- a/docs/api/utils.schemas.enums.html
+++ b/docs/api/utils.schemas.enums.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.schemas.integrations.html b/docs/api/utils.schemas.integrations.html
index 9045eda30..02104fd3b 100644
--- a/docs/api/utils.schemas.integrations.html
+++ b/docs/api/utils.schemas.integrations.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.schemas.model.html b/docs/api/utils.schemas.model.html
index e7d4e3261..d04c16aad 100644
--- a/docs/api/utils.schemas.model.html
+++ b/docs/api/utils.schemas.model.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.schemas.multimodal.html b/docs/api/utils.schemas.multimodal.html
index c81509a29..ae93a5193 100644
--- a/docs/api/utils.schemas.multimodal.html
+++ b/docs/api/utils.schemas.multimodal.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.schemas.peft.html b/docs/api/utils.schemas.peft.html
index 0ed7da969..5776ee34e 100644
--- a/docs/api/utils.schemas.peft.html
+++ b/docs/api/utils.schemas.peft.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.schemas.training.html b/docs/api/utils.schemas.training.html
index ee9e20530..2f6044285 100644
--- a/docs/api/utils.schemas.training.html
+++ b/docs/api/utils.schemas.training.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.schemas.trl.html b/docs/api/utils.schemas.trl.html
index ace507cb4..701353f6e 100644
--- a/docs/api/utils.schemas.trl.html
+++ b/docs/api/utils.schemas.trl.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.schemas.utils.html b/docs/api/utils.schemas.utils.html
index bedcd58db..ea584823a 100644
--- a/docs/api/utils.schemas.utils.html
+++ b/docs/api/utils.schemas.utils.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.tokenization.html b/docs/api/utils.tokenization.html
index 34ff65a62..914885b79 100644
--- a/docs/api/utils.tokenization.html
+++ b/docs/api/utils.tokenization.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/api/utils.trainer.html b/docs/api/utils.trainer.html
index 27d912099..c56877014 100644
--- a/docs/api/utils.trainer.html
+++ b/docs/api/utils.trainer.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/batch_vs_grad.html b/docs/batch_vs_grad.html
index 0bb231e05..fab84b01c 100644
--- a/docs/batch_vs_grad.html
+++ b/docs/batch_vs_grad.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/cli.html b/docs/cli.html
index b4ac80ccd..9186c8c89 100644
--- a/docs/cli.html
+++ b/docs/cli.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/config.html b/docs/config.html
index 20b491caa..84e5fb6c6 100644
--- a/docs/config.html
+++ b/docs/config.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/custom_integrations.html b/docs/custom_integrations.html
index c02b213bd..cd72e6f71 100644
--- a/docs/custom_integrations.html
+++ b/docs/custom_integrations.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/dataset-formats/conversation.html b/docs/dataset-formats/conversation.html
index b3f68d3e1..c03cfcc80 100644
--- a/docs/dataset-formats/conversation.html
+++ b/docs/dataset-formats/conversation.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/dataset-formats/index.html b/docs/dataset-formats/index.html
index 86fc9726d..a534c4200 100644
--- a/docs/dataset-formats/index.html
+++ b/docs/dataset-formats/index.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/dataset-formats/inst_tune.html b/docs/dataset-formats/inst_tune.html
index bb3102eb7..f7aa3d294 100644
--- a/docs/dataset-formats/inst_tune.html
+++ b/docs/dataset-formats/inst_tune.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/dataset-formats/pretraining.html b/docs/dataset-formats/pretraining.html
index f80701f5b..3cbc523da 100644
--- a/docs/dataset-formats/pretraining.html
+++ b/docs/dataset-formats/pretraining.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/dataset-formats/stepwise_supervised.html b/docs/dataset-formats/stepwise_supervised.html
index 923d0e539..c1959429b 100644
--- a/docs/dataset-formats/stepwise_supervised.html
+++ b/docs/dataset-formats/stepwise_supervised.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/dataset-formats/template_free.html b/docs/dataset-formats/template_free.html
index bda64fd55..479ca2d50 100644
--- a/docs/dataset-formats/template_free.html
+++ b/docs/dataset-formats/template_free.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/dataset-formats/tokenized.html b/docs/dataset-formats/tokenized.html
index cc45dd48c..8b7530290 100644
--- a/docs/dataset-formats/tokenized.html
+++ b/docs/dataset-formats/tokenized.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/dataset_loading.html b/docs/dataset_loading.html
index 8d7fd93da..a9a80bc2d 100644
--- a/docs/dataset_loading.html
+++ b/docs/dataset_loading.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/dataset_preprocessing.html b/docs/dataset_preprocessing.html
index 3989b8f67..d4f38dff9 100644
--- a/docs/dataset_preprocessing.html
+++ b/docs/dataset_preprocessing.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/debugging.html b/docs/debugging.html
index fbf57b1ce..387537a40 100644
--- a/docs/debugging.html
+++ b/docs/debugging.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/docker.html b/docs/docker.html
index 5c9b4c4a3..a2f2dfcc8 100644
--- a/docs/docker.html
+++ b/docs/docker.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/faq.html b/docs/faq.html
index bc369cec5..c45204292 100644
--- a/docs/faq.html
+++ b/docs/faq.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/fsdp_qlora.html b/docs/fsdp_qlora.html
index 8534386c6..44a58694a 100644
--- a/docs/fsdp_qlora.html
+++ b/docs/fsdp_qlora.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/getting-started.html b/docs/getting-started.html
index d11a07ffe..6ed7e8cd4 100644
--- a/docs/getting-started.html
+++ b/docs/getting-started.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/inference.html b/docs/inference.html
index e5453dd41..6b722a2c0 100644
--- a/docs/inference.html
+++ b/docs/inference.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/input_output.html b/docs/input_output.html
index 6c451de3f..8a4f02837 100644
--- a/docs/input_output.html
+++ b/docs/input_output.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/installation.html b/docs/installation.html
index 344a41f2b..b48621d30 100644
--- a/docs/installation.html
+++ b/docs/installation.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/lora_optims.html b/docs/lora_optims.html
index 50ff20bb4..6051a3f8b 100644
--- a/docs/lora_optims.html
+++ b/docs/lora_optims.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/lr_groups.html b/docs/lr_groups.html
index c4233d6ef..e5027f04e 100644
--- a/docs/lr_groups.html
+++ b/docs/lr_groups.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/mac.html b/docs/mac.html
index 1a2372cb5..1d456b738 100644
--- a/docs/mac.html
+++ b/docs/mac.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/multi-gpu.html b/docs/multi-gpu.html
index f108b5438..c27d38361 100644
--- a/docs/multi-gpu.html
+++ b/docs/multi-gpu.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/multi-node.html b/docs/multi-node.html
index 75e4dd3bb..7719c0af1 100644
--- a/docs/multi-node.html
+++ b/docs/multi-node.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/multimodal.html b/docs/multimodal.html
index 40d016b0d..f609b2e6d 100644
--- a/docs/multimodal.html
+++ b/docs/multimodal.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/multipack.html b/docs/multipack.html
index 00c774d3a..fc8d958c5 100644
--- a/docs/multipack.html
+++ b/docs/multipack.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/nccl.html b/docs/nccl.html
index 1cbbe00c5..202e69448 100644
--- a/docs/nccl.html
+++ b/docs/nccl.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/qat.html b/docs/qat.html
index fff59d03e..ea5e15216 100644
--- a/docs/qat.html
+++ b/docs/qat.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/quantize.html b/docs/quantize.html
index df02b4a41..0e6f97bd9 100644
--- a/docs/quantize.html
+++ b/docs/quantize.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/ray-integration.html b/docs/ray-integration.html
index 66ce8d648..ebc818736 100644
--- a/docs/ray-integration.html
+++ b/docs/ray-integration.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/reward_modelling.html b/docs/reward_modelling.html
index 213c7b371..5fd93a079 100644
--- a/docs/reward_modelling.html
+++ b/docs/reward_modelling.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/rlhf.html b/docs/rlhf.html
index 61a7fec9a..710441105 100644
--- a/docs/rlhf.html
+++ b/docs/rlhf.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/sequence_parallelism.html b/docs/sequence_parallelism.html
index f175c4a46..8c1d23e9d 100644
--- a/docs/sequence_parallelism.html
+++ b/docs/sequence_parallelism.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/torchao.html b/docs/torchao.html
index ca6ee1d91..d36819861 100644
--- a/docs/torchao.html
+++ b/docs/torchao.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/docs/unsloth.html b/docs/unsloth.html
index 4b00088e0..ce1b4e94f 100644
--- a/docs/unsloth.html
+++ b/docs/unsloth.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/examples/colab-notebooks/colab-axolotl-example.html b/examples/colab-notebooks/colab-axolotl-example.html
index 954f57fd6..15ec4064f 100644
--- a/examples/colab-notebooks/colab-axolotl-example.html
+++ b/examples/colab-notebooks/colab-axolotl-example.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/index.html b/index.html
index 19401ce86..4612eed0e 100644
--- a/index.html
+++ b/index.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <script src="site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="site_libs/quarto-html/anchor.min.js"></script>
 <link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/search.json b/search.json
index 70ea3f187..7e66ac345 100644
--- a/search.json
+++ b/search.json
@@ -644,14 +644,14 @@
     "href": "docs/api/core.training_args.html",
     "title": "core.training_args",
     "section": "",
-    "text": "core.training_args\nextra axolotl specific training args\n\n\n\n\n\nName\nDescription\n\n\n\n\nAxolotlCPOConfig\nCPO config for CPO training\n\n\nAxolotlKTOConfig\nKTO config for KTO training\n\n\nAxolotlORPOConfig\nORPO config for ORPO training\n\n\nAxolotlPRMConfig\nPRM config for PRM training\n\n\nAxolotlRewardConfig\nReward config for Reward training\n\n\nAxolotlTrainingArguments\nTraining arguments for Causal trainer\n\n\nAxolotlTrainingMixins\nMixin class for the Axolotl training args.\n\n\n\n\n\ncore.training_args.AxolotlCPOConfig(\n    model_type=None,\n    lr_quadratic_warmup=False,\n    pretraining=False,\n    sample_packing=False,\n    sample_packing_sequentially=False,\n    multipack_real_batches=False,\n    eval_sample_packing=None,\n    sample_packing_efficiency=1.0,\n    sample_packing_bin_size=200,\n    sample_packing_group_size=100000,\n    max_seq_length=2048,\n    dataset_num_proc=None,\n    relora_steps=None,\n    relora_warmup_steps=None,\n    relora_anneal_steps=None,\n    relora_prune_ratio=0.9,\n    bench_split='eval',\n    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n    do_bench_eval=False,\n    do_causal_lm_eval=False,\n    max_bench_samples=None,\n    bench_source_max_len=2048,\n    dataloader_prefetch_factor=None,\n    cosine_min_lr_ratio=None,\n    cosine_constant_lr_ratio=None,\n    loraplus_lr_ratio=None,\n    loraplus_lr_embedding=1e-06,\n    embedding_lr_scale=None,\n    lr_groups=None,\n    embedding_lr=None,\n    qlora=False,\n    orpo_alpha=None,\n    lisa_n_layers=None,\n    lisa_step_interval=None,\n    lisa_layers_attribute=None,\n    curriculum_sampling=None,\n    alternate_lr_scheduler_type=None,\n    chat_template=None,\n    kd_ce_alpha=None,\n    kd_alpha=1.0,\n    kd_temperature=1.0,\n    kd_zscore_base_temp=None,\n    kd_top_k_before_softmax=None,\n    adam_beta3=None,\n    adam_epsilon2=None,\n    image_size=None,\n    image_resize_algorithm=None,\n    simpo_gamma=None,\n)\nCPO config for CPO training\n\n\n\ncore.training_args.AxolotlKTOConfig(\n    model_type=None,\n    lr_quadratic_warmup=False,\n    pretraining=False,\n    sample_packing=False,\n    sample_packing_sequentially=False,\n    multipack_real_batches=False,\n    eval_sample_packing=None,\n    sample_packing_efficiency=1.0,\n    sample_packing_bin_size=200,\n    sample_packing_group_size=100000,\n    max_seq_length=2048,\n    dataset_num_proc=None,\n    relora_steps=None,\n    relora_warmup_steps=None,\n    relora_anneal_steps=None,\n    relora_prune_ratio=0.9,\n    bench_split='eval',\n    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n    do_bench_eval=False,\n    do_causal_lm_eval=False,\n    max_bench_samples=None,\n    bench_source_max_len=2048,\n    dataloader_prefetch_factor=None,\n    cosine_min_lr_ratio=None,\n    cosine_constant_lr_ratio=None,\n    loraplus_lr_ratio=None,\n    loraplus_lr_embedding=1e-06,\n    embedding_lr_scale=None,\n    lr_groups=None,\n    embedding_lr=None,\n    qlora=False,\n    orpo_alpha=None,\n    lisa_n_layers=None,\n    lisa_step_interval=None,\n    lisa_layers_attribute=None,\n    curriculum_sampling=None,\n    alternate_lr_scheduler_type=None,\n    chat_template=None,\n    kd_ce_alpha=None,\n    kd_alpha=1.0,\n    kd_temperature=1.0,\n    kd_zscore_base_temp=None,\n    kd_top_k_before_softmax=None,\n    adam_beta3=None,\n    adam_epsilon2=None,\n    image_size=None,\n    image_resize_algorithm=None,\n)\nKTO config for KTO training\n\n\n\ncore.training_args.AxolotlORPOConfig(\n    model_type=None,\n    lr_quadratic_warmup=False,\n    pretraining=False,\n    sample_packing=False,\n    sample_packing_sequentially=False,\n    multipack_real_batches=False,\n    eval_sample_packing=None,\n    sample_packing_efficiency=1.0,\n    sample_packing_bin_size=200,\n    sample_packing_group_size=100000,\n    max_seq_length=2048,\n    dataset_num_proc=None,\n    relora_steps=None,\n    relora_warmup_steps=None,\n    relora_anneal_steps=None,\n    relora_prune_ratio=0.9,\n    bench_split='eval',\n    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n    do_bench_eval=False,\n    do_causal_lm_eval=False,\n    max_bench_samples=None,\n    bench_source_max_len=2048,\n    dataloader_prefetch_factor=None,\n    cosine_min_lr_ratio=None,\n    cosine_constant_lr_ratio=None,\n    loraplus_lr_ratio=None,\n    loraplus_lr_embedding=1e-06,\n    embedding_lr_scale=None,\n    lr_groups=None,\n    embedding_lr=None,\n    qlora=False,\n    orpo_alpha=None,\n    lisa_n_layers=None,\n    lisa_step_interval=None,\n    lisa_layers_attribute=None,\n    curriculum_sampling=None,\n    alternate_lr_scheduler_type=None,\n    chat_template=None,\n    kd_ce_alpha=None,\n    kd_alpha=1.0,\n    kd_temperature=1.0,\n    kd_zscore_base_temp=None,\n    kd_top_k_before_softmax=None,\n    adam_beta3=None,\n    adam_epsilon2=None,\n    image_size=None,\n    image_resize_algorithm=None,\n)\nORPO config for ORPO training\n\n\n\ncore.training_args.AxolotlPRMConfig(\n    model_type=None,\n    lr_quadratic_warmup=False,\n    pretraining=False,\n    sample_packing=False,\n    sample_packing_sequentially=False,\n    multipack_real_batches=False,\n    eval_sample_packing=None,\n    sample_packing_efficiency=1.0,\n    sample_packing_bin_size=200,\n    sample_packing_group_size=100000,\n    max_seq_length=2048,\n    dataset_num_proc=None,\n    relora_steps=None,\n    relora_warmup_steps=None,\n    relora_anneal_steps=None,\n    relora_prune_ratio=0.9,\n    bench_split='eval',\n    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n    do_bench_eval=False,\n    do_causal_lm_eval=False,\n    max_bench_samples=None,\n    bench_source_max_len=2048,\n    dataloader_prefetch_factor=None,\n    cosine_min_lr_ratio=None,\n    cosine_constant_lr_ratio=None,\n    loraplus_lr_ratio=None,\n    loraplus_lr_embedding=1e-06,\n    embedding_lr_scale=None,\n    lr_groups=None,\n    embedding_lr=None,\n    qlora=False,\n    orpo_alpha=None,\n    lisa_n_layers=None,\n    lisa_step_interval=None,\n    lisa_layers_attribute=None,\n    curriculum_sampling=None,\n    alternate_lr_scheduler_type=None,\n    chat_template=None,\n    kd_ce_alpha=None,\n    kd_alpha=1.0,\n    kd_temperature=1.0,\n    kd_zscore_base_temp=None,\n    kd_top_k_before_softmax=None,\n    adam_beta3=None,\n    adam_epsilon2=None,\n    image_size=None,\n    image_resize_algorithm=None,\n)\nPRM config for PRM training\n\n\n\ncore.training_args.AxolotlRewardConfig(\n    model_type=None,\n    lr_quadratic_warmup=False,\n    pretraining=False,\n    sample_packing=False,\n    sample_packing_sequentially=False,\n    multipack_real_batches=False,\n    eval_sample_packing=None,\n    sample_packing_efficiency=1.0,\n    sample_packing_bin_size=200,\n    sample_packing_group_size=100000,\n    max_seq_length=2048,\n    dataset_num_proc=None,\n    relora_steps=None,\n    relora_warmup_steps=None,\n    relora_anneal_steps=None,\n    relora_prune_ratio=0.9,\n    bench_split='eval',\n    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n    do_bench_eval=False,\n    do_causal_lm_eval=False,\n    max_bench_samples=None,\n    bench_source_max_len=2048,\n    dataloader_prefetch_factor=None,\n    cosine_min_lr_ratio=None,\n    cosine_constant_lr_ratio=None,\n    loraplus_lr_ratio=None,\n    loraplus_lr_embedding=1e-06,\n    embedding_lr_scale=None,\n    lr_groups=None,\n    embedding_lr=None,\n    qlora=False,\n    orpo_alpha=None,\n    lisa_n_layers=None,\n    lisa_step_interval=None,\n    lisa_layers_attribute=None,\n    curriculum_sampling=None,\n    alternate_lr_scheduler_type=None,\n    chat_template=None,\n    kd_ce_alpha=None,\n    kd_alpha=1.0,\n    kd_temperature=1.0,\n    kd_zscore_base_temp=None,\n    kd_top_k_before_softmax=None,\n    adam_beta3=None,\n    adam_epsilon2=None,\n    image_size=None,\n    image_resize_algorithm=None,\n)\nReward config for Reward training\n\n\n\ncore.training_args.AxolotlTrainingArguments(\n    model_type=None,\n    lr_quadratic_warmup=False,\n    pretraining=False,\n    sample_packing=False,\n    sample_packing_sequentially=False,\n    multipack_real_batches=False,\n    eval_sample_packing=None,\n    sample_packing_efficiency=1.0,\n    sample_packing_bin_size=200,\n    sample_packing_group_size=100000,\n    max_seq_length=2048,\n    dataset_num_proc=None,\n    relora_steps=None,\n    relora_warmup_steps=None,\n    relora_anneal_steps=None,\n    relora_prune_ratio=0.9,\n    bench_split='eval',\n    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n    do_bench_eval=False,\n    do_causal_lm_eval=False,\n    max_bench_samples=None,\n    bench_source_max_len=2048,\n    dataloader_prefetch_factor=None,\n    cosine_min_lr_ratio=None,\n    cosine_constant_lr_ratio=None,\n    loraplus_lr_ratio=None,\n    loraplus_lr_embedding=1e-06,\n    embedding_lr_scale=None,\n    lr_groups=None,\n    embedding_lr=None,\n    qlora=False,\n    orpo_alpha=None,\n    lisa_n_layers=None,\n    lisa_step_interval=None,\n    lisa_layers_attribute=None,\n    curriculum_sampling=None,\n    alternate_lr_scheduler_type=None,\n    chat_template=None,\n    kd_ce_alpha=None,\n    kd_alpha=1.0,\n    kd_temperature=1.0,\n    kd_zscore_base_temp=None,\n    kd_top_k_before_softmax=None,\n    adam_beta3=None,\n    adam_epsilon2=None,\n    image_size=None,\n    image_resize_algorithm=None,\n)\nTraining arguments for Causal trainer\nThis code is duplicated due to HF TrainingArguments not setting output_dir with a\ndefault value so it can’t be used as a mixin.\n\n\n\ncore.training_args.AxolotlTrainingMixins(\n    model_type=None,\n    lr_quadratic_warmup=False,\n    pretraining=False,\n    sample_packing=False,\n    sample_packing_sequentially=False,\n    multipack_real_batches=False,\n    eval_sample_packing=None,\n    sample_packing_efficiency=1.0,\n    sample_packing_bin_size=200,\n    sample_packing_group_size=100000,\n    max_seq_length=2048,\n    dataset_num_proc=None,\n    relora_steps=None,\n    relora_warmup_steps=None,\n    relora_anneal_steps=None,\n    relora_prune_ratio=0.9,\n    bench_split='eval',\n    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n    do_bench_eval=False,\n    do_causal_lm_eval=False,\n    max_bench_samples=None,\n    bench_source_max_len=2048,\n    dataloader_prefetch_factor=None,\n    cosine_min_lr_ratio=None,\n    cosine_constant_lr_ratio=None,\n    loraplus_lr_ratio=None,\n    loraplus_lr_embedding=1e-06,\n    embedding_lr_scale=None,\n    lr_groups=None,\n    embedding_lr=None,\n    qlora=False,\n    orpo_alpha=None,\n    lisa_n_layers=None,\n    lisa_step_interval=None,\n    lisa_layers_attribute=None,\n    curriculum_sampling=None,\n    alternate_lr_scheduler_type=None,\n    chat_template=None,\n    kd_ce_alpha=None,\n    kd_alpha=1.0,\n    kd_temperature=1.0,\n    kd_zscore_base_temp=None,\n    kd_top_k_before_softmax=None,\n    adam_beta3=None,\n    adam_epsilon2=None,\n    image_size=None,\n    image_resize_algorithm=None,\n)\nMixin class for the Axolotl training args."
+    "text": "core.training_args\nextra axolotl specific training args\n\n\n\n\n\nName\nDescription\n\n\n\n\nAxolotlCPOConfig\nCPO config for CPO training\n\n\nAxolotlKTOConfig\nKTO config for KTO training\n\n\nAxolotlORPOConfig\nORPO config for ORPO training\n\n\nAxolotlPRMConfig\nPRM config for PRM training\n\n\nAxolotlRewardConfig\nReward config for Reward training\n\n\nAxolotlTrainingArguments\nTraining arguments for Causal trainer\n\n\n\n\n\ncore.training_args.AxolotlCPOConfig(simpo_gamma=None)\nCPO config for CPO training\n\n\n\ncore.training_args.AxolotlKTOConfig()\nKTO config for KTO training\n\n\n\ncore.training_args.AxolotlORPOConfig()\nORPO config for ORPO training\n\n\n\ncore.training_args.AxolotlPRMConfig()\nPRM config for PRM training\n\n\n\ncore.training_args.AxolotlRewardConfig()\nReward config for Reward training\n\n\n\ncore.training_args.AxolotlTrainingArguments()\nTraining arguments for Causal trainer\nThis code is duplicated due to HF TrainingArguments not setting output_dir with a\ndefault value so it can’t be used as a mixin."
   },
   {
     "objectID": "docs/api/core.training_args.html#classes",
     "href": "docs/api/core.training_args.html#classes",
     "title": "core.training_args",
     "section": "",
-    "text": "Name\nDescription\n\n\n\n\nAxolotlCPOConfig\nCPO config for CPO training\n\n\nAxolotlKTOConfig\nKTO config for KTO training\n\n\nAxolotlORPOConfig\nORPO config for ORPO training\n\n\nAxolotlPRMConfig\nPRM config for PRM training\n\n\nAxolotlRewardConfig\nReward config for Reward training\n\n\nAxolotlTrainingArguments\nTraining arguments for Causal trainer\n\n\nAxolotlTrainingMixins\nMixin class for the Axolotl training args.\n\n\n\n\n\ncore.training_args.AxolotlCPOConfig(\n    model_type=None,\n    lr_quadratic_warmup=False,\n    pretraining=False,\n    sample_packing=False,\n    sample_packing_sequentially=False,\n    multipack_real_batches=False,\n    eval_sample_packing=None,\n    sample_packing_efficiency=1.0,\n    sample_packing_bin_size=200,\n    sample_packing_group_size=100000,\n    max_seq_length=2048,\n    dataset_num_proc=None,\n    relora_steps=None,\n    relora_warmup_steps=None,\n    relora_anneal_steps=None,\n    relora_prune_ratio=0.9,\n    bench_split='eval',\n    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n    do_bench_eval=False,\n    do_causal_lm_eval=False,\n    max_bench_samples=None,\n    bench_source_max_len=2048,\n    dataloader_prefetch_factor=None,\n    cosine_min_lr_ratio=None,\n    cosine_constant_lr_ratio=None,\n    loraplus_lr_ratio=None,\n    loraplus_lr_embedding=1e-06,\n    embedding_lr_scale=None,\n    lr_groups=None,\n    embedding_lr=None,\n    qlora=False,\n    orpo_alpha=None,\n    lisa_n_layers=None,\n    lisa_step_interval=None,\n    lisa_layers_attribute=None,\n    curriculum_sampling=None,\n    alternate_lr_scheduler_type=None,\n    chat_template=None,\n    kd_ce_alpha=None,\n    kd_alpha=1.0,\n    kd_temperature=1.0,\n    kd_zscore_base_temp=None,\n    kd_top_k_before_softmax=None,\n    adam_beta3=None,\n    adam_epsilon2=None,\n    image_size=None,\n    image_resize_algorithm=None,\n    simpo_gamma=None,\n)\nCPO config for CPO training\n\n\n\ncore.training_args.AxolotlKTOConfig(\n    model_type=None,\n    lr_quadratic_warmup=False,\n    pretraining=False,\n    sample_packing=False,\n    sample_packing_sequentially=False,\n    multipack_real_batches=False,\n    eval_sample_packing=None,\n    sample_packing_efficiency=1.0,\n    sample_packing_bin_size=200,\n    sample_packing_group_size=100000,\n    max_seq_length=2048,\n    dataset_num_proc=None,\n    relora_steps=None,\n    relora_warmup_steps=None,\n    relora_anneal_steps=None,\n    relora_prune_ratio=0.9,\n    bench_split='eval',\n    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n    do_bench_eval=False,\n    do_causal_lm_eval=False,\n    max_bench_samples=None,\n    bench_source_max_len=2048,\n    dataloader_prefetch_factor=None,\n    cosine_min_lr_ratio=None,\n    cosine_constant_lr_ratio=None,\n    loraplus_lr_ratio=None,\n    loraplus_lr_embedding=1e-06,\n    embedding_lr_scale=None,\n    lr_groups=None,\n    embedding_lr=None,\n    qlora=False,\n    orpo_alpha=None,\n    lisa_n_layers=None,\n    lisa_step_interval=None,\n    lisa_layers_attribute=None,\n    curriculum_sampling=None,\n    alternate_lr_scheduler_type=None,\n    chat_template=None,\n    kd_ce_alpha=None,\n    kd_alpha=1.0,\n    kd_temperature=1.0,\n    kd_zscore_base_temp=None,\n    kd_top_k_before_softmax=None,\n    adam_beta3=None,\n    adam_epsilon2=None,\n    image_size=None,\n    image_resize_algorithm=None,\n)\nKTO config for KTO training\n\n\n\ncore.training_args.AxolotlORPOConfig(\n    model_type=None,\n    lr_quadratic_warmup=False,\n    pretraining=False,\n    sample_packing=False,\n    sample_packing_sequentially=False,\n    multipack_real_batches=False,\n    eval_sample_packing=None,\n    sample_packing_efficiency=1.0,\n    sample_packing_bin_size=200,\n    sample_packing_group_size=100000,\n    max_seq_length=2048,\n    dataset_num_proc=None,\n    relora_steps=None,\n    relora_warmup_steps=None,\n    relora_anneal_steps=None,\n    relora_prune_ratio=0.9,\n    bench_split='eval',\n    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n    do_bench_eval=False,\n    do_causal_lm_eval=False,\n    max_bench_samples=None,\n    bench_source_max_len=2048,\n    dataloader_prefetch_factor=None,\n    cosine_min_lr_ratio=None,\n    cosine_constant_lr_ratio=None,\n    loraplus_lr_ratio=None,\n    loraplus_lr_embedding=1e-06,\n    embedding_lr_scale=None,\n    lr_groups=None,\n    embedding_lr=None,\n    qlora=False,\n    orpo_alpha=None,\n    lisa_n_layers=None,\n    lisa_step_interval=None,\n    lisa_layers_attribute=None,\n    curriculum_sampling=None,\n    alternate_lr_scheduler_type=None,\n    chat_template=None,\n    kd_ce_alpha=None,\n    kd_alpha=1.0,\n    kd_temperature=1.0,\n    kd_zscore_base_temp=None,\n    kd_top_k_before_softmax=None,\n    adam_beta3=None,\n    adam_epsilon2=None,\n    image_size=None,\n    image_resize_algorithm=None,\n)\nORPO config for ORPO training\n\n\n\ncore.training_args.AxolotlPRMConfig(\n    model_type=None,\n    lr_quadratic_warmup=False,\n    pretraining=False,\n    sample_packing=False,\n    sample_packing_sequentially=False,\n    multipack_real_batches=False,\n    eval_sample_packing=None,\n    sample_packing_efficiency=1.0,\n    sample_packing_bin_size=200,\n    sample_packing_group_size=100000,\n    max_seq_length=2048,\n    dataset_num_proc=None,\n    relora_steps=None,\n    relora_warmup_steps=None,\n    relora_anneal_steps=None,\n    relora_prune_ratio=0.9,\n    bench_split='eval',\n    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n    do_bench_eval=False,\n    do_causal_lm_eval=False,\n    max_bench_samples=None,\n    bench_source_max_len=2048,\n    dataloader_prefetch_factor=None,\n    cosine_min_lr_ratio=None,\n    cosine_constant_lr_ratio=None,\n    loraplus_lr_ratio=None,\n    loraplus_lr_embedding=1e-06,\n    embedding_lr_scale=None,\n    lr_groups=None,\n    embedding_lr=None,\n    qlora=False,\n    orpo_alpha=None,\n    lisa_n_layers=None,\n    lisa_step_interval=None,\n    lisa_layers_attribute=None,\n    curriculum_sampling=None,\n    alternate_lr_scheduler_type=None,\n    chat_template=None,\n    kd_ce_alpha=None,\n    kd_alpha=1.0,\n    kd_temperature=1.0,\n    kd_zscore_base_temp=None,\n    kd_top_k_before_softmax=None,\n    adam_beta3=None,\n    adam_epsilon2=None,\n    image_size=None,\n    image_resize_algorithm=None,\n)\nPRM config for PRM training\n\n\n\ncore.training_args.AxolotlRewardConfig(\n    model_type=None,\n    lr_quadratic_warmup=False,\n    pretraining=False,\n    sample_packing=False,\n    sample_packing_sequentially=False,\n    multipack_real_batches=False,\n    eval_sample_packing=None,\n    sample_packing_efficiency=1.0,\n    sample_packing_bin_size=200,\n    sample_packing_group_size=100000,\n    max_seq_length=2048,\n    dataset_num_proc=None,\n    relora_steps=None,\n    relora_warmup_steps=None,\n    relora_anneal_steps=None,\n    relora_prune_ratio=0.9,\n    bench_split='eval',\n    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n    do_bench_eval=False,\n    do_causal_lm_eval=False,\n    max_bench_samples=None,\n    bench_source_max_len=2048,\n    dataloader_prefetch_factor=None,\n    cosine_min_lr_ratio=None,\n    cosine_constant_lr_ratio=None,\n    loraplus_lr_ratio=None,\n    loraplus_lr_embedding=1e-06,\n    embedding_lr_scale=None,\n    lr_groups=None,\n    embedding_lr=None,\n    qlora=False,\n    orpo_alpha=None,\n    lisa_n_layers=None,\n    lisa_step_interval=None,\n    lisa_layers_attribute=None,\n    curriculum_sampling=None,\n    alternate_lr_scheduler_type=None,\n    chat_template=None,\n    kd_ce_alpha=None,\n    kd_alpha=1.0,\n    kd_temperature=1.0,\n    kd_zscore_base_temp=None,\n    kd_top_k_before_softmax=None,\n    adam_beta3=None,\n    adam_epsilon2=None,\n    image_size=None,\n    image_resize_algorithm=None,\n)\nReward config for Reward training\n\n\n\ncore.training_args.AxolotlTrainingArguments(\n    model_type=None,\n    lr_quadratic_warmup=False,\n    pretraining=False,\n    sample_packing=False,\n    sample_packing_sequentially=False,\n    multipack_real_batches=False,\n    eval_sample_packing=None,\n    sample_packing_efficiency=1.0,\n    sample_packing_bin_size=200,\n    sample_packing_group_size=100000,\n    max_seq_length=2048,\n    dataset_num_proc=None,\n    relora_steps=None,\n    relora_warmup_steps=None,\n    relora_anneal_steps=None,\n    relora_prune_ratio=0.9,\n    bench_split='eval',\n    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n    do_bench_eval=False,\n    do_causal_lm_eval=False,\n    max_bench_samples=None,\n    bench_source_max_len=2048,\n    dataloader_prefetch_factor=None,\n    cosine_min_lr_ratio=None,\n    cosine_constant_lr_ratio=None,\n    loraplus_lr_ratio=None,\n    loraplus_lr_embedding=1e-06,\n    embedding_lr_scale=None,\n    lr_groups=None,\n    embedding_lr=None,\n    qlora=False,\n    orpo_alpha=None,\n    lisa_n_layers=None,\n    lisa_step_interval=None,\n    lisa_layers_attribute=None,\n    curriculum_sampling=None,\n    alternate_lr_scheduler_type=None,\n    chat_template=None,\n    kd_ce_alpha=None,\n    kd_alpha=1.0,\n    kd_temperature=1.0,\n    kd_zscore_base_temp=None,\n    kd_top_k_before_softmax=None,\n    adam_beta3=None,\n    adam_epsilon2=None,\n    image_size=None,\n    image_resize_algorithm=None,\n)\nTraining arguments for Causal trainer\nThis code is duplicated due to HF TrainingArguments not setting output_dir with a\ndefault value so it can’t be used as a mixin.\n\n\n\ncore.training_args.AxolotlTrainingMixins(\n    model_type=None,\n    lr_quadratic_warmup=False,\n    pretraining=False,\n    sample_packing=False,\n    sample_packing_sequentially=False,\n    multipack_real_batches=False,\n    eval_sample_packing=None,\n    sample_packing_efficiency=1.0,\n    sample_packing_bin_size=200,\n    sample_packing_group_size=100000,\n    max_seq_length=2048,\n    dataset_num_proc=None,\n    relora_steps=None,\n    relora_warmup_steps=None,\n    relora_anneal_steps=None,\n    relora_prune_ratio=0.9,\n    bench_split='eval',\n    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n    do_bench_eval=False,\n    do_causal_lm_eval=False,\n    max_bench_samples=None,\n    bench_source_max_len=2048,\n    dataloader_prefetch_factor=None,\n    cosine_min_lr_ratio=None,\n    cosine_constant_lr_ratio=None,\n    loraplus_lr_ratio=None,\n    loraplus_lr_embedding=1e-06,\n    embedding_lr_scale=None,\n    lr_groups=None,\n    embedding_lr=None,\n    qlora=False,\n    orpo_alpha=None,\n    lisa_n_layers=None,\n    lisa_step_interval=None,\n    lisa_layers_attribute=None,\n    curriculum_sampling=None,\n    alternate_lr_scheduler_type=None,\n    chat_template=None,\n    kd_ce_alpha=None,\n    kd_alpha=1.0,\n    kd_temperature=1.0,\n    kd_zscore_base_temp=None,\n    kd_top_k_before_softmax=None,\n    adam_beta3=None,\n    adam_epsilon2=None,\n    image_size=None,\n    image_resize_algorithm=None,\n)\nMixin class for the Axolotl training args."
+    "text": "Name\nDescription\n\n\n\n\nAxolotlCPOConfig\nCPO config for CPO training\n\n\nAxolotlKTOConfig\nKTO config for KTO training\n\n\nAxolotlORPOConfig\nORPO config for ORPO training\n\n\nAxolotlPRMConfig\nPRM config for PRM training\n\n\nAxolotlRewardConfig\nReward config for Reward training\n\n\nAxolotlTrainingArguments\nTraining arguments for Causal trainer\n\n\n\n\n\ncore.training_args.AxolotlCPOConfig(simpo_gamma=None)\nCPO config for CPO training\n\n\n\ncore.training_args.AxolotlKTOConfig()\nKTO config for KTO training\n\n\n\ncore.training_args.AxolotlORPOConfig()\nORPO config for ORPO training\n\n\n\ncore.training_args.AxolotlPRMConfig()\nPRM config for PRM training\n\n\n\ncore.training_args.AxolotlRewardConfig()\nReward config for Reward training\n\n\n\ncore.training_args.AxolotlTrainingArguments()\nTraining arguments for Causal trainer\nThis code is duplicated due to HF TrainingArguments not setting output_dir with a\ndefault value so it can’t be used as a mixin."
   },
   {
     "objectID": "docs/api/prompt_strategies.user_defined.html",
@@ -1085,14 +1085,14 @@
     "href": "docs/api/train.html",
     "title": "train",
     "section": "",
-    "text": "train\nPrepare and train a model on a dataset. Can also infer from a model or merge lora\n\n\n\n\n\nName\nDescription\n\n\n\n\ncreate_model_card\nCreate a model card for the trained model if needed.\n\n\ndetermine_resume_checkpoint\nDetermine the checkpoint to resume from based on configuration.\n\n\nexecute_training\nExecute the training process with appropriate SDP kernel configurations.\n\n\nhandle_untrained_tokens_fix\nApply fixes for untrained tokens if configured.\n\n\nsave_initial_configs\nSave initial configurations before training.\n\n\nsave_trained_model\nSave the trained model according to configuration and training setup.\n\n\nsetup_model_and_tokenizer\nLoad the tokenizer, processor (for multimodal models), and model based on\n\n\nsetup_model_and_trainer\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\n\n\nsetup_model_card\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\nsetup_reference_model\nSet up the reference model for RL training if needed.\n\n\nsetup_signal_handler\nSet up signal handler for graceful termination.\n\n\ntrain\nTrain a model on the given dataset.\n\n\n\n\n\ntrain.create_model_card(cfg, trainer)\nCreate a model card for the trained model if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object with model card creation capabilities.\nrequired\n\n\n\n\n\n\n\ntrain.determine_resume_checkpoint(cfg)\nDetermine the checkpoint to resume from based on configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nstr | None\nPath to the checkpoint to resume from, or None if not resuming.\n\n\n\n\n\n\n\ntrain.execute_training(cfg, trainer, resume_from_checkpoint)\nExecute the training process with appropriate SDP kernel configurations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe configured trainer object.\nrequired\n\n\nresume_from_checkpoint\nstr | None\nPath to checkpoint to resume from, if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.handle_untrained_tokens_fix(\n    cfg,\n    model,\n    tokenizer,\n    train_dataset,\n    safe_serialization,\n)\nApply fixes for untrained tokens if configured.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to apply fixes to.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer for token identification.\nrequired\n\n\ntrain_dataset\nDataset\nThe training dataset to use.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving.\nrequired\n\n\n\n\n\n\n\ntrain.save_initial_configs(cfg, tokenizer, model, peft_config, processor)\nSave initial configurations before training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to save.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save configuration for.\nrequired\n\n\npeft_config\nPeftConfig | None\nThe PEFT configuration to save if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.save_trained_model(cfg, trainer, model, safe_serialization)\nSave the trained model according to configuration and training setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe trainer object.\nrequired\n\n\nmodel\nPreTrainedModel\nThe trained model to save.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization.\nrequired\n\n\n\n\n\n\n\ntrain.setup_model_and_tokenizer(cfg)\nLoad the tokenizer, processor (for multimodal models), and model based on\nconfiguration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple containing model, tokenizer, peft_config (if LoRA / QLoRA, else None), and processor (if multimodal, else None).\n\n\n\n\n\n\n\ntrain.setup_model_and_trainer(cfg, dataset_meta)\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\ntrainer setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters.\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[HFRLTrainerBuilder | HFCausalTrainerBuilder, PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor\n\n\n\n\n\n\n\ntrain.setup_model_card(cfg)\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\ntrain.setup_reference_model(cfg, tokenizer)\nSet up the reference model for RL training if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to use for the reference model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nPreTrainedModel | None\nReference model if needed for RL training, None otherwise.\n\n\n\n\n\n\n\ntrain.setup_signal_handler(cfg, model, safe_serialization)\nSet up signal handler for graceful termination.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save on termination\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving\nrequired\n\n\n\n\n\n\n\ntrain.train(cfg, dataset_meta)\nTrain a model on the given dataset.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PeftModel | PreTrainedModel, PreTrainedTokenizer, Trainer]\nTuple of (model, tokenizer) after training"
+    "text": "train\nPrepare and train a model on a dataset. Can also infer from a model or merge lora\n\n\n\n\n\nName\nDescription\n\n\n\n\ncreate_model_card\nCreate a model card for the trained model if needed.\n\n\ndetermine_resume_checkpoint\nDetermine the checkpoint to resume from based on configuration.\n\n\nexecute_training\nExecute the training process with appropriate SDP kernel configurations.\n\n\nhandle_untrained_tokens_fix\nApply fixes for untrained tokens if configured.\n\n\nsave_initial_configs\nSave initial configurations before training.\n\n\nsave_trained_model\nSave the trained model according to configuration and training setup.\n\n\nsetup_model_and_tokenizer\nLoad the tokenizer, processor (for multimodal models), and model based on\n\n\nsetup_model_and_trainer\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\n\n\nsetup_model_card\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\nsetup_reference_model\nSet up the reference model for RL training if needed.\n\n\nsetup_signal_handler\nSet up signal handler for graceful termination.\n\n\ntrain\nTrain a model on the given dataset.\n\n\n\n\n\ntrain.create_model_card(cfg, trainer)\nCreate a model card for the trained model if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object with model card creation capabilities.\nrequired\n\n\n\n\n\n\n\ntrain.determine_resume_checkpoint(cfg)\nDetermine the checkpoint to resume from based on configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nstr | None\nPath to the checkpoint to resume from, or None if not resuming.\n\n\n\n\n\n\n\ntrain.execute_training(cfg, trainer, resume_from_checkpoint)\nExecute the training process with appropriate SDP kernel configurations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe configured trainer object.\nrequired\n\n\nresume_from_checkpoint\nstr | None\nPath to checkpoint to resume from, if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.handle_untrained_tokens_fix(\n    cfg,\n    model,\n    tokenizer,\n    train_dataset,\n    safe_serialization,\n)\nApply fixes for untrained tokens if configured.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to apply fixes to.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer for token identification.\nrequired\n\n\ntrain_dataset\nDataset\nThe training dataset to use.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving.\nrequired\n\n\n\n\n\n\n\ntrain.save_initial_configs(cfg, tokenizer, model, peft_config, processor)\nSave initial configurations before training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to save.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save configuration for.\nrequired\n\n\npeft_config\nPeftConfig | None\nThe PEFT configuration to save if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.save_trained_model(cfg, trainer, model, safe_serialization)\nSave the trained model according to configuration and training setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe trainer object.\nrequired\n\n\nmodel\nPreTrainedModel\nThe trained model to save.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization.\nrequired\n\n\n\n\n\n\n\ntrain.setup_model_and_tokenizer(cfg)\nLoad the tokenizer, processor (for multimodal models), and model based on\nconfiguration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple containing model, tokenizer, peft_config (if LoRA / QLoRA, else None), and processor (if multimodal, else None).\n\n\n\n\n\n\n\ntrain.setup_model_and_trainer(cfg, dataset_meta)\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\ntrainer setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters.\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple['HFRLTrainerBuilder' | 'HFCausalTrainerBuilder', PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor\n\n\n\n\n\n\n\ntrain.setup_model_card(cfg)\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\ntrain.setup_reference_model(cfg, tokenizer)\nSet up the reference model for RL training if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to use for the reference model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nPreTrainedModel | None\nReference model if needed for RL training, None otherwise.\n\n\n\n\n\n\n\ntrain.setup_signal_handler(cfg, model, safe_serialization)\nSet up signal handler for graceful termination.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save on termination\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving\nrequired\n\n\n\n\n\n\n\ntrain.train(cfg, dataset_meta)\nTrain a model on the given dataset.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PeftModel | PreTrainedModel, PreTrainedTokenizer, Trainer]\nTuple of (model, tokenizer) after training"
   },
   {
     "objectID": "docs/api/train.html#functions",
     "href": "docs/api/train.html#functions",
     "title": "train",
     "section": "",
-    "text": "Name\nDescription\n\n\n\n\ncreate_model_card\nCreate a model card for the trained model if needed.\n\n\ndetermine_resume_checkpoint\nDetermine the checkpoint to resume from based on configuration.\n\n\nexecute_training\nExecute the training process with appropriate SDP kernel configurations.\n\n\nhandle_untrained_tokens_fix\nApply fixes for untrained tokens if configured.\n\n\nsave_initial_configs\nSave initial configurations before training.\n\n\nsave_trained_model\nSave the trained model according to configuration and training setup.\n\n\nsetup_model_and_tokenizer\nLoad the tokenizer, processor (for multimodal models), and model based on\n\n\nsetup_model_and_trainer\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\n\n\nsetup_model_card\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\nsetup_reference_model\nSet up the reference model for RL training if needed.\n\n\nsetup_signal_handler\nSet up signal handler for graceful termination.\n\n\ntrain\nTrain a model on the given dataset.\n\n\n\n\n\ntrain.create_model_card(cfg, trainer)\nCreate a model card for the trained model if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object with model card creation capabilities.\nrequired\n\n\n\n\n\n\n\ntrain.determine_resume_checkpoint(cfg)\nDetermine the checkpoint to resume from based on configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nstr | None\nPath to the checkpoint to resume from, or None if not resuming.\n\n\n\n\n\n\n\ntrain.execute_training(cfg, trainer, resume_from_checkpoint)\nExecute the training process with appropriate SDP kernel configurations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe configured trainer object.\nrequired\n\n\nresume_from_checkpoint\nstr | None\nPath to checkpoint to resume from, if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.handle_untrained_tokens_fix(\n    cfg,\n    model,\n    tokenizer,\n    train_dataset,\n    safe_serialization,\n)\nApply fixes for untrained tokens if configured.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to apply fixes to.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer for token identification.\nrequired\n\n\ntrain_dataset\nDataset\nThe training dataset to use.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving.\nrequired\n\n\n\n\n\n\n\ntrain.save_initial_configs(cfg, tokenizer, model, peft_config, processor)\nSave initial configurations before training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to save.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save configuration for.\nrequired\n\n\npeft_config\nPeftConfig | None\nThe PEFT configuration to save if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.save_trained_model(cfg, trainer, model, safe_serialization)\nSave the trained model according to configuration and training setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe trainer object.\nrequired\n\n\nmodel\nPreTrainedModel\nThe trained model to save.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization.\nrequired\n\n\n\n\n\n\n\ntrain.setup_model_and_tokenizer(cfg)\nLoad the tokenizer, processor (for multimodal models), and model based on\nconfiguration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple containing model, tokenizer, peft_config (if LoRA / QLoRA, else None), and processor (if multimodal, else None).\n\n\n\n\n\n\n\ntrain.setup_model_and_trainer(cfg, dataset_meta)\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\ntrainer setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters.\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[HFRLTrainerBuilder | HFCausalTrainerBuilder, PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor\n\n\n\n\n\n\n\ntrain.setup_model_card(cfg)\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\ntrain.setup_reference_model(cfg, tokenizer)\nSet up the reference model for RL training if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to use for the reference model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nPreTrainedModel | None\nReference model if needed for RL training, None otherwise.\n\n\n\n\n\n\n\ntrain.setup_signal_handler(cfg, model, safe_serialization)\nSet up signal handler for graceful termination.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save on termination\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving\nrequired\n\n\n\n\n\n\n\ntrain.train(cfg, dataset_meta)\nTrain a model on the given dataset.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PeftModel | PreTrainedModel, PreTrainedTokenizer, Trainer]\nTuple of (model, tokenizer) after training"
+    "text": "Name\nDescription\n\n\n\n\ncreate_model_card\nCreate a model card for the trained model if needed.\n\n\ndetermine_resume_checkpoint\nDetermine the checkpoint to resume from based on configuration.\n\n\nexecute_training\nExecute the training process with appropriate SDP kernel configurations.\n\n\nhandle_untrained_tokens_fix\nApply fixes for untrained tokens if configured.\n\n\nsave_initial_configs\nSave initial configurations before training.\n\n\nsave_trained_model\nSave the trained model according to configuration and training setup.\n\n\nsetup_model_and_tokenizer\nLoad the tokenizer, processor (for multimodal models), and model based on\n\n\nsetup_model_and_trainer\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\n\n\nsetup_model_card\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\nsetup_reference_model\nSet up the reference model for RL training if needed.\n\n\nsetup_signal_handler\nSet up signal handler for graceful termination.\n\n\ntrain\nTrain a model on the given dataset.\n\n\n\n\n\ntrain.create_model_card(cfg, trainer)\nCreate a model card for the trained model if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object with model card creation capabilities.\nrequired\n\n\n\n\n\n\n\ntrain.determine_resume_checkpoint(cfg)\nDetermine the checkpoint to resume from based on configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nstr | None\nPath to the checkpoint to resume from, or None if not resuming.\n\n\n\n\n\n\n\ntrain.execute_training(cfg, trainer, resume_from_checkpoint)\nExecute the training process with appropriate SDP kernel configurations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe configured trainer object.\nrequired\n\n\nresume_from_checkpoint\nstr | None\nPath to checkpoint to resume from, if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.handle_untrained_tokens_fix(\n    cfg,\n    model,\n    tokenizer,\n    train_dataset,\n    safe_serialization,\n)\nApply fixes for untrained tokens if configured.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to apply fixes to.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer for token identification.\nrequired\n\n\ntrain_dataset\nDataset\nThe training dataset to use.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving.\nrequired\n\n\n\n\n\n\n\ntrain.save_initial_configs(cfg, tokenizer, model, peft_config, processor)\nSave initial configurations before training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to save.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save configuration for.\nrequired\n\n\npeft_config\nPeftConfig | None\nThe PEFT configuration to save if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.save_trained_model(cfg, trainer, model, safe_serialization)\nSave the trained model according to configuration and training setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe trainer object.\nrequired\n\n\nmodel\nPreTrainedModel\nThe trained model to save.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization.\nrequired\n\n\n\n\n\n\n\ntrain.setup_model_and_tokenizer(cfg)\nLoad the tokenizer, processor (for multimodal models), and model based on\nconfiguration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple containing model, tokenizer, peft_config (if LoRA / QLoRA, else None), and processor (if multimodal, else None).\n\n\n\n\n\n\n\ntrain.setup_model_and_trainer(cfg, dataset_meta)\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\ntrainer setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters.\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple['HFRLTrainerBuilder' | 'HFCausalTrainerBuilder', PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor\n\n\n\n\n\n\n\ntrain.setup_model_card(cfg)\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\ntrain.setup_reference_model(cfg, tokenizer)\nSet up the reference model for RL training if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to use for the reference model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nPreTrainedModel | None\nReference model if needed for RL training, None otherwise.\n\n\n\n\n\n\n\ntrain.setup_signal_handler(cfg, model, safe_serialization)\nSet up signal handler for graceful termination.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save on termination\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving\nrequired\n\n\n\n\n\n\n\ntrain.train(cfg, dataset_meta)\nTrain a model on the given dataset.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PeftModel | PreTrainedModel, PreTrainedTokenizer, Trainer]\nTuple of (model, tokenizer) after training"
   },
   {
     "objectID": "docs/api/monkeypatch.mixtral.html",
@@ -1127,14 +1127,14 @@
     "href": "docs/api/utils.samplers.multipack.html",
     "title": "utils.samplers.multipack",
     "section": "",
-    "text": "utils.samplers.multipack\nMultipack Batch Sampler - An efficient batch sampler for packing variable-length sequences\ninto fixed-capacity batches to optimize memory usage and training throughput.\n\n\n\n\n\nName\nDescription\n\n\n\n\nMultipackBatchSampler\nBatch sampler class for efficient packing of variable-length sequences\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler(\n    sampler,\n    batch_size,\n    batch_max_len,\n    lengths,\n    packing_efficiency_estimate=1.0,\n    drop_last=False,\n    num_count_samples=8,\n    sequential=False,\n    group_size=100000,\n    bin_size=200,\n    num_processes=None,\n    safe_mode=True,\n    **kwargs,\n)\nBatch sampler class for efficient packing of variable-length sequences\nThis sampler packs sequences into fixed-capacity bins (batches) to maximize\nGPU memory utilization and training throughput by reducing padding.\nIt supports both parallel packing (using FFD algorithm) and\nsequential packing (preserving original sequence order).\n\n\n\n\n\nName\nDescription\n\n\n\n\nefficiency\nCalculate the packing efficiency (ratio of tokens used to total token slots).\n\n\ngather_efficiency\nGather and synchronize packing efficiency estimates across all distributed\n\n\ngather_len_batches\nGather and synchronize batch counts across all distributed ranks. Returns\n\n\ngenerate_batches\nGenerate packed batches for training.\n\n\nset_epoch\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.efficiency()\nCalculate the packing efficiency (ratio of tokens used to total token slots).\nHigher is better - 1.0 would mean perfect packing with no wasted space.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_efficiency()\nGather and synchronize packing efficiency estimates across all distributed\nranks.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nfloat\nA conservative efficiency estimate based on the measurements.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_len_batches(num)\nGather and synchronize batch counts across all distributed ranks. Returns\nthe minimum number of batches available on any rank.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.generate_batches(set_stats=False)\nGenerate packed batches for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nset_stats\nbool\nWhether to update efficiency statistics.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[list[int]]]\nList of batches, where each batch contains multiple bins, and each bin contains multiple sequence indices.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.set_epoch(epoch)\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nallocate_sequentially\nSequential allocator that preserves example order.\n\n\nffd_check\nFirst-fit-decreasing bin packing algorithm check.\n\n\npack_group\nPack a group of sequences into bins using First-Fit Decreasing algorithm.\n\n\npack_parallel\nPack sequences into bins using parallel processing.\n\n\n\n\n\nutils.samplers.multipack.allocate_sequentially(\n    sequence_lengths,\n    rank,\n    bin_capacity,\n    num_ranks,\n)\nSequential allocator that preserves example order.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nThe lengths of all examples.\nrequired\n\n\nrank\nint\nThe current rank (for distributed training).\nrequired\n\n\nbin_capacity\nint\nThe capacity of each bin (maximum sequence length).\nrequired\n\n\nnum_ranks\nint\nNumber of ranks (processes / GPUs).\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nrank_batches\nlist[list[int]]\nList of batches for the current rank.\n\n\ntotal_tokens_used\nint\nNumber of actual example tokens.\n\n\ntotal_token_slots\nint\nMaximum theoretical number of example tokens (number of bins * bin capacity).\n\n\n\n\n\n\n\nutils.samplers.multipack.ffd_check(sequence_lengths, bin_capacity, num_bins)\nFirst-fit-decreasing bin packing algorithm check.\nChecks if sequences with the given lengths could fit in the specified number of\nbins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin.\nrequired\n\n\nnum_bins\nint\nNumber of bins available.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nbool\nTrue if all sequences can be packed, False otherwise.\n\n\n\n\n\n\n\nutils.samplers.multipack.pack_group(\n    sequence_lengths,\n    group_offset,\n    bin_capacity,\n    max_bins,\n    bin_size,\n    safe_mode=True,\n)\nPack a group of sequences into bins using First-Fit Decreasing algorithm.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\ngroup_offset\nint\nOffset to apply to indices when returning results.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin.\nrequired\n\n\nmax_bins\nint\nMaximum number of bins to use.\nrequired\n\n\nbin_size\nint\nMaximum number of sequences per bin.\nrequired\n\n\nsafe_mode\nbool\nIf True, use a more conservative packing approach.\nTrue\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[int]]\nList of bins, where each bin contains indices of sequences assigned to it.\n\n\n\n\n\n\n\nutils.samplers.multipack.pack_parallel(\n    sequence_lengths,\n    bin_capacity,\n    group_size,\n    bin_size,\n    num_processes=None,\n    safe_mode=True,\n    mp_start_method='spawn',\n)\nPack sequences into bins using parallel processing.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin as total number of tokens.\nrequired\n\n\ngroup_size\nint\nNumber of sequences to process in each group.\nrequired\n\n\nbin_size\nint\nMaximum number of bins to use.\nrequired\n\n\nnum_processes\nint | None\nNumber of parallel processes to use.\nNone\n\n\nsafe_mode\nbool\nIf True, use a more conservative packing approach.\nTrue\n\n\nmp_start_method\nstr | None\nMultiprocessing start method (‘fork’, ‘spawn’, ‘forkserver’). ‘spawn’ is often safer with Numba/PyTorch. Set to None to use system default.\n'spawn'\n\n\n\nReturns:\nList of bins, where each bin contains indices of sequences assigned to it."
+    "text": "utils.samplers.multipack\nMultipack Batch Sampler - An efficient batch sampler for packing variable-length sequences\ninto fixed-capacity batches to optimize memory usage and training throughput.\n\n\n\n\n\nName\nDescription\n\n\n\n\nMultipackBatchSampler\nBatch sampler class for efficient packing of variable-length sequences\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler(\n    sampler,\n    batch_size,\n    batch_max_len,\n    lengths,\n    packing_efficiency_estimate=1.0,\n    drop_last=True,\n    num_count_samples=8,\n    sequential=False,\n    group_size=100000,\n    bin_size=200,\n    num_processes=None,\n    safe_mode=True,\n    **kwargs,\n)\nBatch sampler class for efficient packing of variable-length sequences\nThis sampler packs sequences into fixed-capacity bins (batches) to maximize\nGPU memory utilization and training throughput by reducing padding.\nIt supports both parallel packing (using FFD algorithm) and\nsequential packing (preserving original sequence order).\n\n\n\n\n\nName\nDescription\n\n\n\n\nefficiency\nCalculate the packing efficiency (ratio of tokens used to total token slots).\n\n\ngather_efficiency\nGather and synchronize packing efficiency estimates across all distributed\n\n\ngather_len_batches\nGather and synchronize batch counts across all distributed ranks. Returns\n\n\ngenerate_batches\nGenerate packed batches for training.\n\n\nset_epoch\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.efficiency()\nCalculate the packing efficiency (ratio of tokens used to total token slots).\nHigher is better - 1.0 would mean perfect packing with no wasted space.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_efficiency()\nGather and synchronize packing efficiency estimates across all distributed\nranks.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nfloat\nA conservative efficiency estimate based on the measurements.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_len_batches(num)\nGather and synchronize batch counts across all distributed ranks. Returns\nthe minimum number of batches available on any rank.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.generate_batches(set_stats=False)\nGenerate packed batches for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nset_stats\nbool\nWhether to update efficiency statistics.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[list[int]]]\nList of batches, where each batch contains multiple bins, and each bin contains multiple sequence indices.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.set_epoch(epoch)\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nallocate_sequentially\nSequential allocator that preserves example order.\n\n\nffd_check\nFirst-fit-decreasing bin packing algorithm check.\n\n\npack_group\nPack a group of sequences into bins using First-Fit Decreasing algorithm.\n\n\npack_parallel\nPack sequences into bins using parallel processing.\n\n\n\n\n\nutils.samplers.multipack.allocate_sequentially(\n    sequence_lengths,\n    rank,\n    bin_capacity,\n    num_ranks,\n)\nSequential allocator that preserves example order.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nThe lengths of all examples.\nrequired\n\n\nrank\nint\nThe current rank (for distributed training).\nrequired\n\n\nbin_capacity\nint\nThe capacity of each bin (maximum sequence length).\nrequired\n\n\nnum_ranks\nint\nNumber of ranks (processes / GPUs).\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nrank_batches\nlist[list[int]]\nList of batches for the current rank.\n\n\ntotal_tokens_used\nint\nNumber of actual example tokens.\n\n\ntotal_token_slots\nint\nMaximum theoretical number of example tokens (number of bins * bin capacity).\n\n\n\n\n\n\n\nutils.samplers.multipack.ffd_check(sequence_lengths, bin_capacity, num_bins)\nFirst-fit-decreasing bin packing algorithm check.\nChecks if sequences with the given lengths could fit in the specified number of\nbins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin.\nrequired\n\n\nnum_bins\nint\nNumber of bins available.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nbool\nTrue if all sequences can be packed, False otherwise.\n\n\n\n\n\n\n\nutils.samplers.multipack.pack_group(\n    sequence_lengths,\n    group_offset,\n    bin_capacity,\n    max_bins,\n    bin_size,\n    safe_mode=True,\n)\nPack a group of sequences into bins using First-Fit Decreasing algorithm.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\ngroup_offset\nint\nOffset to apply to indices when returning results.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin.\nrequired\n\n\nmax_bins\nint\nMaximum number of bins to use.\nrequired\n\n\nbin_size\nint\nMaximum number of sequences per bin.\nrequired\n\n\nsafe_mode\nbool\nIf True, use a more conservative packing approach.\nTrue\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[int]]\nList of bins, where each bin contains indices of sequences assigned to it.\n\n\n\n\n\n\n\nutils.samplers.multipack.pack_parallel(\n    sequence_lengths,\n    bin_capacity,\n    group_size,\n    bin_size,\n    num_processes=None,\n    safe_mode=True,\n    mp_start_method='spawn',\n)\nPack sequences into bins using parallel processing.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin as total number of tokens.\nrequired\n\n\ngroup_size\nint\nNumber of sequences to process in each group.\nrequired\n\n\nbin_size\nint\nMaximum number of bins to use.\nrequired\n\n\nnum_processes\nint | None\nNumber of parallel processes to use.\nNone\n\n\nsafe_mode\nbool\nIf True, use a more conservative packing approach.\nTrue\n\n\nmp_start_method\nstr | None\nMultiprocessing start method (‘fork’, ‘spawn’, ‘forkserver’). ‘spawn’ is often safer with Numba/PyTorch. Set to None to use system default.\n'spawn'\n\n\n\nReturns:\nList of bins, where each bin contains indices of sequences assigned to it."
   },
   {
     "objectID": "docs/api/utils.samplers.multipack.html#classes",
     "href": "docs/api/utils.samplers.multipack.html#classes",
     "title": "utils.samplers.multipack",
     "section": "",
-    "text": "Name\nDescription\n\n\n\n\nMultipackBatchSampler\nBatch sampler class for efficient packing of variable-length sequences\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler(\n    sampler,\n    batch_size,\n    batch_max_len,\n    lengths,\n    packing_efficiency_estimate=1.0,\n    drop_last=False,\n    num_count_samples=8,\n    sequential=False,\n    group_size=100000,\n    bin_size=200,\n    num_processes=None,\n    safe_mode=True,\n    **kwargs,\n)\nBatch sampler class for efficient packing of variable-length sequences\nThis sampler packs sequences into fixed-capacity bins (batches) to maximize\nGPU memory utilization and training throughput by reducing padding.\nIt supports both parallel packing (using FFD algorithm) and\nsequential packing (preserving original sequence order).\n\n\n\n\n\nName\nDescription\n\n\n\n\nefficiency\nCalculate the packing efficiency (ratio of tokens used to total token slots).\n\n\ngather_efficiency\nGather and synchronize packing efficiency estimates across all distributed\n\n\ngather_len_batches\nGather and synchronize batch counts across all distributed ranks. Returns\n\n\ngenerate_batches\nGenerate packed batches for training.\n\n\nset_epoch\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.efficiency()\nCalculate the packing efficiency (ratio of tokens used to total token slots).\nHigher is better - 1.0 would mean perfect packing with no wasted space.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_efficiency()\nGather and synchronize packing efficiency estimates across all distributed\nranks.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nfloat\nA conservative efficiency estimate based on the measurements.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_len_batches(num)\nGather and synchronize batch counts across all distributed ranks. Returns\nthe minimum number of batches available on any rank.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.generate_batches(set_stats=False)\nGenerate packed batches for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nset_stats\nbool\nWhether to update efficiency statistics.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[list[int]]]\nList of batches, where each batch contains multiple bins, and each bin contains multiple sequence indices.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.set_epoch(epoch)\nSet the epoch number, used for reproducible shuffling across epochs"
+    "text": "Name\nDescription\n\n\n\n\nMultipackBatchSampler\nBatch sampler class for efficient packing of variable-length sequences\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler(\n    sampler,\n    batch_size,\n    batch_max_len,\n    lengths,\n    packing_efficiency_estimate=1.0,\n    drop_last=True,\n    num_count_samples=8,\n    sequential=False,\n    group_size=100000,\n    bin_size=200,\n    num_processes=None,\n    safe_mode=True,\n    **kwargs,\n)\nBatch sampler class for efficient packing of variable-length sequences\nThis sampler packs sequences into fixed-capacity bins (batches) to maximize\nGPU memory utilization and training throughput by reducing padding.\nIt supports both parallel packing (using FFD algorithm) and\nsequential packing (preserving original sequence order).\n\n\n\n\n\nName\nDescription\n\n\n\n\nefficiency\nCalculate the packing efficiency (ratio of tokens used to total token slots).\n\n\ngather_efficiency\nGather and synchronize packing efficiency estimates across all distributed\n\n\ngather_len_batches\nGather and synchronize batch counts across all distributed ranks. Returns\n\n\ngenerate_batches\nGenerate packed batches for training.\n\n\nset_epoch\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.efficiency()\nCalculate the packing efficiency (ratio of tokens used to total token slots).\nHigher is better - 1.0 would mean perfect packing with no wasted space.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_efficiency()\nGather and synchronize packing efficiency estimates across all distributed\nranks.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nfloat\nA conservative efficiency estimate based on the measurements.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_len_batches(num)\nGather and synchronize batch counts across all distributed ranks. Returns\nthe minimum number of batches available on any rank.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.generate_batches(set_stats=False)\nGenerate packed batches for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nset_stats\nbool\nWhether to update efficiency statistics.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[list[int]]]\nList of batches, where each batch contains multiple bins, and each bin contains multiple sequence indices.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.set_epoch(epoch)\nSet the epoch number, used for reproducible shuffling across epochs"
   },
   {
     "objectID": "docs/api/utils.samplers.multipack.html#functions",
@@ -1428,14 +1428,14 @@
     "href": "docs/api/integrations.kd.trainer.html",
     "title": "integrations.kd.trainer",
     "section": "",
-    "text": "integrations.kd.trainer\nKD trainer\n\n\n\n\n\nName\nDescription\n\n\n\n\nAxolotlKDTrainer\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer(\n    *_args,\n    bench_data_collator=None,\n    eval_data_collator=None,\n    dataset_tags=None,\n    **kwargs,\n)\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nName\nDescription\n\n\n\n\ncompute_loss\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer.compute_loss(\n    model,\n    inputs,\n    return_outputs=False,\n    num_items_in_batch=None,\n)\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\nSubclass and override for custom behavior."
+    "text": "integrations.kd.trainer\nKD trainer\n\n\n\n\n\nName\nDescription\n\n\n\n\nAxolotlKDTrainer\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer(*args, **kwargs)\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nName\nDescription\n\n\n\n\ncompute_loss\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer.compute_loss(\n    model,\n    inputs,\n    return_outputs=False,\n    num_items_in_batch=None,\n)\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\nSubclass and override for custom behavior."
   },
   {
     "objectID": "docs/api/integrations.kd.trainer.html#classes",
     "href": "docs/api/integrations.kd.trainer.html#classes",
     "title": "integrations.kd.trainer",
     "section": "",
-    "text": "Name\nDescription\n\n\n\n\nAxolotlKDTrainer\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer(\n    *_args,\n    bench_data_collator=None,\n    eval_data_collator=None,\n    dataset_tags=None,\n    **kwargs,\n)\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nName\nDescription\n\n\n\n\ncompute_loss\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer.compute_loss(\n    model,\n    inputs,\n    return_outputs=False,\n    num_items_in_batch=None,\n)\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\nSubclass and override for custom behavior."
+    "text": "Name\nDescription\n\n\n\n\nAxolotlKDTrainer\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer(*args, **kwargs)\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nName\nDescription\n\n\n\n\ncompute_loss\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer.compute_loss(\n    model,\n    inputs,\n    return_outputs=False,\n    num_items_in_batch=None,\n)\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\nSubclass and override for custom behavior."
   },
   {
     "objectID": "docs/api/utils.schemas.enums.html",
@@ -2720,14 +2720,14 @@
     "href": "docs/api/integrations.base.html",
     "title": "integrations.base",
     "section": "",
-    "text": "integrations.base\nBase class for all plugins.\nA plugin is a reusable, modular, and self-contained piece of code that extends the functionality of Axolotl.\nPlugins can be used to integrate third-party models, modify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and implement the required methods.\n\n\n\n\n\nName\nDescription\n\n\n\n\nBaseOptimizerFactory\nBase class for factories to create custom optimizers\n\n\nBasePlugin\nBase class for all plugins. Defines the interface for plugin methods.\n\n\nPluginManager\nThe PluginManager class is responsible for loading and managing plugins. It\n\n\n\n\n\nintegrations.base.BaseOptimizerFactory()\nBase class for factories to create custom optimizers\n\n\n\nintegrations.base.BasePlugin()\nBase class for all plugins. Defines the interface for plugin methods.\nA plugin is a reusable, modular, and self-contained piece of code that extends\nthe functionality of Axolotl. Plugins can be used to integrate third-party models,\nmodify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and\nimplement the required methods.\n\n\nPlugin methods include:\n- register(cfg): Registers the plugin with the given configuration.\n- load_datasets(cfg): Loads and preprocesses the dataset for training.\n- pre_model_load(cfg): Performs actions before the model is loaded.\n- post_model_build(cfg, model): Performs actions after the model is loaded, but\nbefore LoRA adapters are applied.\n- pre_lora_load(cfg, model): Performs actions before LoRA weights are loaded.\n- post_lora_load(cfg, model): Performs actions after LoRA weights are loaded.\n- post_model_load(cfg, model): Performs actions after the model is loaded,\ninclusive of any adapters.\n- post_trainer_create(cfg, trainer): Performs actions after the trainer is\ncreated.\n- create_optimizer(cfg, trainer): Creates and returns an optimizer for training.\n- create_lr_scheduler(cfg, trainer, optimizer, num_training_steps): Creates and\nreturns a learning rate scheduler.\n- add_callbacks_pre_trainer(cfg, model): Adds callbacks to the trainer before\ntraining.\n- add_callbacks_post_trainer(cfg, trainer): Adds callbacks to the trainer after\ntraining.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nAdds callbacks to the trainer after creating the trainer. This is useful for\n\n\nadd_callbacks_pre_trainer\nSet up callbacks before creating the trainer.\n\n\ncreate_lr_scheduler\nCreates and returns a learning rate scheduler.\n\n\ncreate_optimizer\nCreates and returns an optimizer for training.\n\n\nget_input_args\nReturns a pydantic model for the plugin’s input arguments.\n\n\nget_trainer_cls\nReturns a custom class for the trainer.\n\n\nload_datasets\nLoads and preprocesses the dataset for training.\n\n\npost_lora_load\nPerforms actions after LoRA weights are loaded.\n\n\npost_model_build\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\npost_model_load\nPerforms actions after the model is loaded.\n\n\npost_train\nPerforms actions after training is complete.\n\n\npost_train_unload\nPerforms actions after training is complete and the model is unloaded.\n\n\npost_trainer_create\nPerforms actions after the trainer is created.\n\n\npre_lora_load\nPerforms actions before LoRA weights are loaded.\n\n\npre_model_load\nPerforms actions before the model is loaded.\n\n\nregister\nRegisters the plugin with the given configuration.\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_post_trainer(cfg, trainer)\nAdds callbacks to the trainer after creating the trainer. This is useful for\ncallbacks that require access to the model or trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_pre_trainer(cfg, model)\nSet up callbacks before creating the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_lr_scheduler(\n    cfg,\n    trainer,\n    optimizer,\n    num_training_steps,\n)\nCreates and returns a learning rate scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\nnum_training_steps\nint\nTotal number of training steps\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_optimizer(cfg, trainer)\nCreates and returns an optimizer for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_input_args()\nReturns a pydantic model for the plugin’s input arguments.\n\n\n\nintegrations.base.BasePlugin.get_trainer_cls(cfg)\nReturns a custom class for the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.load_datasets(cfg, preprocess=False)\nLoads and preprocesses the dataset for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\npreprocess\nbool\nWhether this is the preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\ndataset_meta\nUnion['TrainDatasetMeta', None]\nThe metadata for the training dataset.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_lora_load(cfg, model)\nPerforms actions after LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_build(cfg, model)\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_load(cfg, model)\nPerforms actions after the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train(cfg, model)\nPerforms actions after training is complete.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe axolotl configuration.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train_unload(cfg)\nPerforms actions after training is complete and the model is unloaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_trainer_create(cfg, trainer)\nPerforms actions after the trainer is created.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_lora_load(cfg, model)\nPerforms actions before LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_model_load(cfg)\nPerforms actions before the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.register(cfg)\nRegisters the plugin with the given configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\n\n\nintegrations.base.PluginManager()\nThe PluginManager class is responsible for loading and managing plugins. It\nshould be a singleton so it can be accessed from anywhere in the codebase.\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nplugins\nOrderedDict[str, BasePlugin]\nA list of loaded plugins.\n\n\n\n\n\n\nKey methods include:\n- get_instance(): Static method to get the singleton instance of PluginManager.\n- register(plugin_name: str): Registers a new plugin by its name.\n- pre_model_load(cfg): Calls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\nadd_callbacks_pre_trainer\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\ncreate_lr_scheduler\nCalls the create_lr_scheduler method of all registered plugins and returns\n\n\ncreate_optimizer\nCalls the create_optimizer method of all registered plugins and returns\n\n\nget_input_args\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\nget_instance\nReturns the singleton instance of PluginManager. If the instance doesn’t\n\n\nget_trainer_cls\nCalls the get_trainer_cls method of all registered plugins and returns the\n\n\nload_datasets\nCalls the load_datasets method of each registered plugin.\n\n\npost_lora_load\nCalls the post_lora_load method of all registered plugins.\n\n\npost_model_build\nCalls the post_model_build method of all registered plugins after the\n\n\npost_model_load\nCalls the post_model_load method of all registered plugins after the model\n\n\npost_train\nCalls the post_train method of all registered plugins.\n\n\npost_train_unload\nCalls the post_train_unload method of all registered plugins.\n\n\npost_trainer_create\nCalls the post_trainer_create method of all registered plugins.\n\n\npre_lora_load\nCalls the pre_lora_load method of all registered plugins.\n\n\npre_model_load\nCalls the pre_model_load method of all registered plugins.\n\n\nregister\nRegisters a new plugin by its name.\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_lr_scheduler(\n    trainer,\n    optimizer,\n    num_training_steps,\n)\nCalls the create_lr_scheduler method of all registered plugins and returns\nthe first non-None scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler, or None if not found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_optimizer(trainer)\nCalls the create_optimizer method of all registered plugins and returns\nthe first non-None optimizer.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer, or None if none was found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_input_args()\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[str]\nA list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_instance()\nReturns the singleton instance of PluginManager. If the instance doesn’t\nexist, it creates a new one.\n\n\n\nintegrations.base.PluginManager.get_trainer_cls(cfg)\nCalls the get_trainer_cls method of all registered plugins and returns the\nfirst non-None trainer class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.load_datasets(cfg, preprocess=False)\nCalls the load_datasets method of each registered plugin.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\npreprocess\nbool\nWhether this is preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nUnion['TrainDatasetMeta', None]\nThe dataset metadata loaded from all registered plugins.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_lora_load(cfg, model)\nCalls the post_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_build(cfg, model)\nCalls the post_model_build method of all registered plugins after the\nmodel has been built / loaded, but before any adapters have been applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_load(cfg, model)\nCalls the post_model_load method of all registered plugins after the model\nhas been loaded inclusive of any adapters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train(cfg, model)\nCalls the post_train method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train_unload(cfg)\nCalls the post_train_unload method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_trainer_create(cfg, trainer)\nCalls the post_trainer_create method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_lora_load(cfg, model)\nCalls the pre_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_model_load(cfg)\nCalls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.register(plugin_name)\nRegisters a new plugin by its name.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be registered.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported.\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nload_plugin\nLoads a plugin based on the given plugin name.\n\n\n\n\n\nintegrations.base.load_plugin(plugin_name)\nLoads a plugin based on the given plugin name.\nThe plugin name should be in the format “module_name.class_name”. This function\nsplits the plugin name into module and class, imports the module, retrieves the\nclass from the module, and creates an instance of the class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be loaded. The name should be in the format “module_name.class_name”.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nBasePlugin\nAn instance of the loaded plugin.\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported."
+    "text": "integrations.base\nBase class for all plugins.\nA plugin is a reusable, modular, and self-contained piece of code that extends the functionality of Axolotl.\nPlugins can be used to integrate third-party models, modify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and implement the required methods.\n\n\n\n\n\nName\nDescription\n\n\n\n\nBaseOptimizerFactory\nBase class for factories to create custom optimizers\n\n\nBasePlugin\nBase class for all plugins. Defines the interface for plugin methods.\n\n\nPluginManager\nThe PluginManager class is responsible for loading and managing plugins. It\n\n\n\n\n\nintegrations.base.BaseOptimizerFactory()\nBase class for factories to create custom optimizers\n\n\n\nintegrations.base.BasePlugin()\nBase class for all plugins. Defines the interface for plugin methods.\nA plugin is a reusable, modular, and self-contained piece of code that extends\nthe functionality of Axolotl. Plugins can be used to integrate third-party models,\nmodify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and\nimplement the required methods.\n\n\nPlugin methods include:\n- register(cfg): Registers the plugin with the given configuration.\n- load_datasets(cfg): Loads and preprocesses the dataset for training.\n- pre_model_load(cfg): Performs actions before the model is loaded.\n- post_model_build(cfg, model): Performs actions after the model is loaded, but\nbefore LoRA adapters are applied.\n- pre_lora_load(cfg, model): Performs actions before LoRA weights are loaded.\n- post_lora_load(cfg, model): Performs actions after LoRA weights are loaded.\n- post_model_load(cfg, model): Performs actions after the model is loaded,\ninclusive of any adapters.\n- post_trainer_create(cfg, trainer): Performs actions after the trainer is\ncreated.\n- create_optimizer(cfg, trainer): Creates and returns an optimizer for training.\n- create_lr_scheduler(cfg, trainer, optimizer, num_training_steps): Creates and\nreturns a learning rate scheduler.\n- add_callbacks_pre_trainer(cfg, model): Adds callbacks to the trainer before\ntraining.\n- add_callbacks_post_trainer(cfg, trainer): Adds callbacks to the trainer after\ntraining.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nAdds callbacks to the trainer after creating the trainer. This is useful for\n\n\nadd_callbacks_pre_trainer\nSet up callbacks before creating the trainer.\n\n\ncreate_lr_scheduler\nCreates and returns a learning rate scheduler.\n\n\ncreate_optimizer\nCreates and returns an optimizer for training.\n\n\nget_collator_cls_and_kwargs\nReturns a custom class for the collator.\n\n\nget_input_args\nReturns a pydantic model for the plugin’s input arguments.\n\n\nget_trainer_cls\nReturns a custom class for the trainer.\n\n\nget_training_args\nReturns custom training arguments to set on TrainingArgs.\n\n\nget_training_args_mixin\nReturns a dataclass model for the plugin’s training arguments.\n\n\nload_datasets\nLoads and preprocesses the dataset for training.\n\n\npost_lora_load\nPerforms actions after LoRA weights are loaded.\n\n\npost_model_build\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\npost_model_load\nPerforms actions after the model is loaded.\n\n\npost_train\nPerforms actions after training is complete.\n\n\npost_train_unload\nPerforms actions after training is complete and the model is unloaded.\n\n\npost_trainer_create\nPerforms actions after the trainer is created.\n\n\npre_lora_load\nPerforms actions before LoRA weights are loaded.\n\n\npre_model_load\nPerforms actions before the model is loaded.\n\n\nregister\nRegisters the plugin with the given configuration.\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_post_trainer(cfg, trainer)\nAdds callbacks to the trainer after creating the trainer. This is useful for\ncallbacks that require access to the model or trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_pre_trainer(cfg, model)\nSet up callbacks before creating the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_lr_scheduler(\n    cfg,\n    trainer,\n    optimizer,\n    num_training_steps,\n)\nCreates and returns a learning rate scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\nnum_training_steps\nint\nTotal number of training steps\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_optimizer(cfg, trainer)\nCreates and returns an optimizer for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_collator_cls_and_kwargs(cfg, is_eval=False)\nReturns a custom class for the collator.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\nis_eval\nbool\nWhether this is an eval split.\nFalse\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nclass\n\nThe class for the collator.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_input_args()\nReturns a pydantic model for the plugin’s input arguments.\n\n\n\nintegrations.base.BasePlugin.get_trainer_cls(cfg)\nReturns a custom class for the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_training_args(cfg)\nReturns custom training arguments to set on TrainingArgs.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nobject\n\ndict containing the training arguments.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_training_args_mixin()\nReturns a dataclass model for the plugin’s training arguments.\n\n\n\nintegrations.base.BasePlugin.load_datasets(cfg, preprocess=False)\nLoads and preprocesses the dataset for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\npreprocess\nbool\nWhether this is the preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\ndataset_meta\nUnion['TrainDatasetMeta', None]\nThe metadata for the training dataset.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_lora_load(cfg, model)\nPerforms actions after LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_build(cfg, model)\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_load(cfg, model)\nPerforms actions after the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train(cfg, model)\nPerforms actions after training is complete.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe axolotl configuration.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train_unload(cfg)\nPerforms actions after training is complete and the model is unloaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_trainer_create(cfg, trainer)\nPerforms actions after the trainer is created.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_lora_load(cfg, model)\nPerforms actions before LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_model_load(cfg)\nPerforms actions before the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.register(cfg)\nRegisters the plugin with the given configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\n\n\nintegrations.base.PluginManager()\nThe PluginManager class is responsible for loading and managing plugins. It\nshould be a singleton so it can be accessed from anywhere in the codebase.\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nplugins\nOrderedDict[str, BasePlugin]\nA list of loaded plugins.\n\n\n\n\n\n\nKey methods include:\n- get_instance(): Static method to get the singleton instance of PluginManager.\n- register(plugin_name: str): Registers a new plugin by its name.\n- pre_model_load(cfg): Calls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\nadd_callbacks_pre_trainer\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\ncreate_lr_scheduler\nCalls the create_lr_scheduler method of all registered plugins and returns\n\n\ncreate_optimizer\nCalls the create_optimizer method of all registered plugins and returns\n\n\nget_collator_cls_and_kwargs\nCalls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.\n\n\nget_input_args\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\nget_instance\nReturns the singleton instance of PluginManager. If the instance doesn’t\n\n\nget_trainer_cls\nCalls the get_trainer_cls method of all registered plugins and returns the\n\n\nget_training_args\nCalls the get_training_args method of all registered plugins and returns the combined training arguments.\n\n\nget_training_args_mixin\nReturns a list of dataclasses for all registered plugins’ training args mixins’\n\n\nload_datasets\nCalls the load_datasets method of each registered plugin.\n\n\npost_lora_load\nCalls the post_lora_load method of all registered plugins.\n\n\npost_model_build\nCalls the post_model_build method of all registered plugins after the\n\n\npost_model_load\nCalls the post_model_load method of all registered plugins after the model\n\n\npost_train\nCalls the post_train method of all registered plugins.\n\n\npost_train_unload\nCalls the post_train_unload method of all registered plugins.\n\n\npost_trainer_create\nCalls the post_trainer_create method of all registered plugins.\n\n\npre_lora_load\nCalls the pre_lora_load method of all registered plugins.\n\n\npre_model_load\nCalls the pre_model_load method of all registered plugins.\n\n\nregister\nRegisters a new plugin by its name.\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_lr_scheduler(\n    trainer,\n    optimizer,\n    num_training_steps,\n)\nCalls the create_lr_scheduler method of all registered plugins and returns\nthe first non-None scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler, or None if not found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_optimizer(trainer)\nCalls the create_optimizer method of all registered plugins and returns\nthe first non-None optimizer.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer, or None if none was found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_collator_cls_and_kwargs(cfg, is_eval=False)\nCalls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.\nParameters:\ncfg (dict): The configuration for the plugins.\nis_eval (bool): Whether this is an eval split.\nReturns:\nobject: The collator class, or None if none was found.\n\n\n\nintegrations.base.PluginManager.get_input_args()\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[str]\nA list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_instance()\nReturns the singleton instance of PluginManager. If the instance doesn’t\nexist, it creates a new one.\n\n\n\nintegrations.base.PluginManager.get_trainer_cls(cfg)\nCalls the get_trainer_cls method of all registered plugins and returns the\nfirst non-None trainer class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_training_args(cfg)\nCalls the get_training_args method of all registered plugins and returns the combined training arguments.\nParameters:\ncfg (dict): The configuration for the plugins.\nReturns:\nobject: The training arguments\n\n\n\nintegrations.base.PluginManager.get_training_args_mixin()\nReturns a list of dataclasses for all registered plugins’ training args mixins’\nReturns:\nlist[str]: A list of dataclsses\n\n\n\nintegrations.base.PluginManager.load_datasets(cfg, preprocess=False)\nCalls the load_datasets method of each registered plugin.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\npreprocess\nbool\nWhether this is preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nUnion['TrainDatasetMeta', None]\nThe dataset metadata loaded from all registered plugins.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_lora_load(cfg, model)\nCalls the post_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_build(cfg, model)\nCalls the post_model_build method of all registered plugins after the\nmodel has been built / loaded, but before any adapters have been applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_load(cfg, model)\nCalls the post_model_load method of all registered plugins after the model\nhas been loaded inclusive of any adapters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train(cfg, model)\nCalls the post_train method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train_unload(cfg)\nCalls the post_train_unload method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_trainer_create(cfg, trainer)\nCalls the post_trainer_create method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_lora_load(cfg, model)\nCalls the pre_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_model_load(cfg)\nCalls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.register(plugin_name)\nRegisters a new plugin by its name.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be registered.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported.\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nload_plugin\nLoads a plugin based on the given plugin name.\n\n\n\n\n\nintegrations.base.load_plugin(plugin_name)\nLoads a plugin based on the given plugin name.\nThe plugin name should be in the format “module_name.class_name”. This function\nsplits the plugin name into module and class, imports the module, retrieves the\nclass from the module, and creates an instance of the class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be loaded. The name should be in the format “module_name.class_name”.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nBasePlugin\nAn instance of the loaded plugin.\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported."
   },
   {
     "objectID": "docs/api/integrations.base.html#classes",
     "href": "docs/api/integrations.base.html#classes",
     "title": "integrations.base",
     "section": "",
-    "text": "Name\nDescription\n\n\n\n\nBaseOptimizerFactory\nBase class for factories to create custom optimizers\n\n\nBasePlugin\nBase class for all plugins. Defines the interface for plugin methods.\n\n\nPluginManager\nThe PluginManager class is responsible for loading and managing plugins. It\n\n\n\n\n\nintegrations.base.BaseOptimizerFactory()\nBase class for factories to create custom optimizers\n\n\n\nintegrations.base.BasePlugin()\nBase class for all plugins. Defines the interface for plugin methods.\nA plugin is a reusable, modular, and self-contained piece of code that extends\nthe functionality of Axolotl. Plugins can be used to integrate third-party models,\nmodify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and\nimplement the required methods.\n\n\nPlugin methods include:\n- register(cfg): Registers the plugin with the given configuration.\n- load_datasets(cfg): Loads and preprocesses the dataset for training.\n- pre_model_load(cfg): Performs actions before the model is loaded.\n- post_model_build(cfg, model): Performs actions after the model is loaded, but\nbefore LoRA adapters are applied.\n- pre_lora_load(cfg, model): Performs actions before LoRA weights are loaded.\n- post_lora_load(cfg, model): Performs actions after LoRA weights are loaded.\n- post_model_load(cfg, model): Performs actions after the model is loaded,\ninclusive of any adapters.\n- post_trainer_create(cfg, trainer): Performs actions after the trainer is\ncreated.\n- create_optimizer(cfg, trainer): Creates and returns an optimizer for training.\n- create_lr_scheduler(cfg, trainer, optimizer, num_training_steps): Creates and\nreturns a learning rate scheduler.\n- add_callbacks_pre_trainer(cfg, model): Adds callbacks to the trainer before\ntraining.\n- add_callbacks_post_trainer(cfg, trainer): Adds callbacks to the trainer after\ntraining.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nAdds callbacks to the trainer after creating the trainer. This is useful for\n\n\nadd_callbacks_pre_trainer\nSet up callbacks before creating the trainer.\n\n\ncreate_lr_scheduler\nCreates and returns a learning rate scheduler.\n\n\ncreate_optimizer\nCreates and returns an optimizer for training.\n\n\nget_input_args\nReturns a pydantic model for the plugin’s input arguments.\n\n\nget_trainer_cls\nReturns a custom class for the trainer.\n\n\nload_datasets\nLoads and preprocesses the dataset for training.\n\n\npost_lora_load\nPerforms actions after LoRA weights are loaded.\n\n\npost_model_build\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\npost_model_load\nPerforms actions after the model is loaded.\n\n\npost_train\nPerforms actions after training is complete.\n\n\npost_train_unload\nPerforms actions after training is complete and the model is unloaded.\n\n\npost_trainer_create\nPerforms actions after the trainer is created.\n\n\npre_lora_load\nPerforms actions before LoRA weights are loaded.\n\n\npre_model_load\nPerforms actions before the model is loaded.\n\n\nregister\nRegisters the plugin with the given configuration.\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_post_trainer(cfg, trainer)\nAdds callbacks to the trainer after creating the trainer. This is useful for\ncallbacks that require access to the model or trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_pre_trainer(cfg, model)\nSet up callbacks before creating the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_lr_scheduler(\n    cfg,\n    trainer,\n    optimizer,\n    num_training_steps,\n)\nCreates and returns a learning rate scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\nnum_training_steps\nint\nTotal number of training steps\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_optimizer(cfg, trainer)\nCreates and returns an optimizer for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_input_args()\nReturns a pydantic model for the plugin’s input arguments.\n\n\n\nintegrations.base.BasePlugin.get_trainer_cls(cfg)\nReturns a custom class for the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.load_datasets(cfg, preprocess=False)\nLoads and preprocesses the dataset for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\npreprocess\nbool\nWhether this is the preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\ndataset_meta\nUnion['TrainDatasetMeta', None]\nThe metadata for the training dataset.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_lora_load(cfg, model)\nPerforms actions after LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_build(cfg, model)\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_load(cfg, model)\nPerforms actions after the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train(cfg, model)\nPerforms actions after training is complete.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe axolotl configuration.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train_unload(cfg)\nPerforms actions after training is complete and the model is unloaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_trainer_create(cfg, trainer)\nPerforms actions after the trainer is created.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_lora_load(cfg, model)\nPerforms actions before LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_model_load(cfg)\nPerforms actions before the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.register(cfg)\nRegisters the plugin with the given configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\n\n\nintegrations.base.PluginManager()\nThe PluginManager class is responsible for loading and managing plugins. It\nshould be a singleton so it can be accessed from anywhere in the codebase.\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nplugins\nOrderedDict[str, BasePlugin]\nA list of loaded plugins.\n\n\n\n\n\n\nKey methods include:\n- get_instance(): Static method to get the singleton instance of PluginManager.\n- register(plugin_name: str): Registers a new plugin by its name.\n- pre_model_load(cfg): Calls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\nadd_callbacks_pre_trainer\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\ncreate_lr_scheduler\nCalls the create_lr_scheduler method of all registered plugins and returns\n\n\ncreate_optimizer\nCalls the create_optimizer method of all registered plugins and returns\n\n\nget_input_args\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\nget_instance\nReturns the singleton instance of PluginManager. If the instance doesn’t\n\n\nget_trainer_cls\nCalls the get_trainer_cls method of all registered plugins and returns the\n\n\nload_datasets\nCalls the load_datasets method of each registered plugin.\n\n\npost_lora_load\nCalls the post_lora_load method of all registered plugins.\n\n\npost_model_build\nCalls the post_model_build method of all registered plugins after the\n\n\npost_model_load\nCalls the post_model_load method of all registered plugins after the model\n\n\npost_train\nCalls the post_train method of all registered plugins.\n\n\npost_train_unload\nCalls the post_train_unload method of all registered plugins.\n\n\npost_trainer_create\nCalls the post_trainer_create method of all registered plugins.\n\n\npre_lora_load\nCalls the pre_lora_load method of all registered plugins.\n\n\npre_model_load\nCalls the pre_model_load method of all registered plugins.\n\n\nregister\nRegisters a new plugin by its name.\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_lr_scheduler(\n    trainer,\n    optimizer,\n    num_training_steps,\n)\nCalls the create_lr_scheduler method of all registered plugins and returns\nthe first non-None scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler, or None if not found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_optimizer(trainer)\nCalls the create_optimizer method of all registered plugins and returns\nthe first non-None optimizer.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer, or None if none was found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_input_args()\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[str]\nA list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_instance()\nReturns the singleton instance of PluginManager. If the instance doesn’t\nexist, it creates a new one.\n\n\n\nintegrations.base.PluginManager.get_trainer_cls(cfg)\nCalls the get_trainer_cls method of all registered plugins and returns the\nfirst non-None trainer class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.load_datasets(cfg, preprocess=False)\nCalls the load_datasets method of each registered plugin.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\npreprocess\nbool\nWhether this is preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nUnion['TrainDatasetMeta', None]\nThe dataset metadata loaded from all registered plugins.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_lora_load(cfg, model)\nCalls the post_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_build(cfg, model)\nCalls the post_model_build method of all registered plugins after the\nmodel has been built / loaded, but before any adapters have been applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_load(cfg, model)\nCalls the post_model_load method of all registered plugins after the model\nhas been loaded inclusive of any adapters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train(cfg, model)\nCalls the post_train method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train_unload(cfg)\nCalls the post_train_unload method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_trainer_create(cfg, trainer)\nCalls the post_trainer_create method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_lora_load(cfg, model)\nCalls the pre_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_model_load(cfg)\nCalls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.register(plugin_name)\nRegisters a new plugin by its name.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be registered.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported."
+    "text": "Name\nDescription\n\n\n\n\nBaseOptimizerFactory\nBase class for factories to create custom optimizers\n\n\nBasePlugin\nBase class for all plugins. Defines the interface for plugin methods.\n\n\nPluginManager\nThe PluginManager class is responsible for loading and managing plugins. It\n\n\n\n\n\nintegrations.base.BaseOptimizerFactory()\nBase class for factories to create custom optimizers\n\n\n\nintegrations.base.BasePlugin()\nBase class for all plugins. Defines the interface for plugin methods.\nA plugin is a reusable, modular, and self-contained piece of code that extends\nthe functionality of Axolotl. Plugins can be used to integrate third-party models,\nmodify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and\nimplement the required methods.\n\n\nPlugin methods include:\n- register(cfg): Registers the plugin with the given configuration.\n- load_datasets(cfg): Loads and preprocesses the dataset for training.\n- pre_model_load(cfg): Performs actions before the model is loaded.\n- post_model_build(cfg, model): Performs actions after the model is loaded, but\nbefore LoRA adapters are applied.\n- pre_lora_load(cfg, model): Performs actions before LoRA weights are loaded.\n- post_lora_load(cfg, model): Performs actions after LoRA weights are loaded.\n- post_model_load(cfg, model): Performs actions after the model is loaded,\ninclusive of any adapters.\n- post_trainer_create(cfg, trainer): Performs actions after the trainer is\ncreated.\n- create_optimizer(cfg, trainer): Creates and returns an optimizer for training.\n- create_lr_scheduler(cfg, trainer, optimizer, num_training_steps): Creates and\nreturns a learning rate scheduler.\n- add_callbacks_pre_trainer(cfg, model): Adds callbacks to the trainer before\ntraining.\n- add_callbacks_post_trainer(cfg, trainer): Adds callbacks to the trainer after\ntraining.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nAdds callbacks to the trainer after creating the trainer. This is useful for\n\n\nadd_callbacks_pre_trainer\nSet up callbacks before creating the trainer.\n\n\ncreate_lr_scheduler\nCreates and returns a learning rate scheduler.\n\n\ncreate_optimizer\nCreates and returns an optimizer for training.\n\n\nget_collator_cls_and_kwargs\nReturns a custom class for the collator.\n\n\nget_input_args\nReturns a pydantic model for the plugin’s input arguments.\n\n\nget_trainer_cls\nReturns a custom class for the trainer.\n\n\nget_training_args\nReturns custom training arguments to set on TrainingArgs.\n\n\nget_training_args_mixin\nReturns a dataclass model for the plugin’s training arguments.\n\n\nload_datasets\nLoads and preprocesses the dataset for training.\n\n\npost_lora_load\nPerforms actions after LoRA weights are loaded.\n\n\npost_model_build\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\npost_model_load\nPerforms actions after the model is loaded.\n\n\npost_train\nPerforms actions after training is complete.\n\n\npost_train_unload\nPerforms actions after training is complete and the model is unloaded.\n\n\npost_trainer_create\nPerforms actions after the trainer is created.\n\n\npre_lora_load\nPerforms actions before LoRA weights are loaded.\n\n\npre_model_load\nPerforms actions before the model is loaded.\n\n\nregister\nRegisters the plugin with the given configuration.\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_post_trainer(cfg, trainer)\nAdds callbacks to the trainer after creating the trainer. This is useful for\ncallbacks that require access to the model or trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_pre_trainer(cfg, model)\nSet up callbacks before creating the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_lr_scheduler(\n    cfg,\n    trainer,\n    optimizer,\n    num_training_steps,\n)\nCreates and returns a learning rate scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\nnum_training_steps\nint\nTotal number of training steps\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_optimizer(cfg, trainer)\nCreates and returns an optimizer for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_collator_cls_and_kwargs(cfg, is_eval=False)\nReturns a custom class for the collator.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\nis_eval\nbool\nWhether this is an eval split.\nFalse\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nclass\n\nThe class for the collator.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_input_args()\nReturns a pydantic model for the plugin’s input arguments.\n\n\n\nintegrations.base.BasePlugin.get_trainer_cls(cfg)\nReturns a custom class for the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_training_args(cfg)\nReturns custom training arguments to set on TrainingArgs.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nobject\n\ndict containing the training arguments.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_training_args_mixin()\nReturns a dataclass model for the plugin’s training arguments.\n\n\n\nintegrations.base.BasePlugin.load_datasets(cfg, preprocess=False)\nLoads and preprocesses the dataset for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\npreprocess\nbool\nWhether this is the preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\ndataset_meta\nUnion['TrainDatasetMeta', None]\nThe metadata for the training dataset.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_lora_load(cfg, model)\nPerforms actions after LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_build(cfg, model)\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_load(cfg, model)\nPerforms actions after the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train(cfg, model)\nPerforms actions after training is complete.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe axolotl configuration.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train_unload(cfg)\nPerforms actions after training is complete and the model is unloaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_trainer_create(cfg, trainer)\nPerforms actions after the trainer is created.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_lora_load(cfg, model)\nPerforms actions before LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_model_load(cfg)\nPerforms actions before the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.register(cfg)\nRegisters the plugin with the given configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\n\n\nintegrations.base.PluginManager()\nThe PluginManager class is responsible for loading and managing plugins. It\nshould be a singleton so it can be accessed from anywhere in the codebase.\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nplugins\nOrderedDict[str, BasePlugin]\nA list of loaded plugins.\n\n\n\n\n\n\nKey methods include:\n- get_instance(): Static method to get the singleton instance of PluginManager.\n- register(plugin_name: str): Registers a new plugin by its name.\n- pre_model_load(cfg): Calls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\nadd_callbacks_pre_trainer\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\ncreate_lr_scheduler\nCalls the create_lr_scheduler method of all registered plugins and returns\n\n\ncreate_optimizer\nCalls the create_optimizer method of all registered plugins and returns\n\n\nget_collator_cls_and_kwargs\nCalls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.\n\n\nget_input_args\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\nget_instance\nReturns the singleton instance of PluginManager. If the instance doesn’t\n\n\nget_trainer_cls\nCalls the get_trainer_cls method of all registered plugins and returns the\n\n\nget_training_args\nCalls the get_training_args method of all registered plugins and returns the combined training arguments.\n\n\nget_training_args_mixin\nReturns a list of dataclasses for all registered plugins’ training args mixins’\n\n\nload_datasets\nCalls the load_datasets method of each registered plugin.\n\n\npost_lora_load\nCalls the post_lora_load method of all registered plugins.\n\n\npost_model_build\nCalls the post_model_build method of all registered plugins after the\n\n\npost_model_load\nCalls the post_model_load method of all registered plugins after the model\n\n\npost_train\nCalls the post_train method of all registered plugins.\n\n\npost_train_unload\nCalls the post_train_unload method of all registered plugins.\n\n\npost_trainer_create\nCalls the post_trainer_create method of all registered plugins.\n\n\npre_lora_load\nCalls the pre_lora_load method of all registered plugins.\n\n\npre_model_load\nCalls the pre_model_load method of all registered plugins.\n\n\nregister\nRegisters a new plugin by its name.\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_lr_scheduler(\n    trainer,\n    optimizer,\n    num_training_steps,\n)\nCalls the create_lr_scheduler method of all registered plugins and returns\nthe first non-None scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler, or None if not found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_optimizer(trainer)\nCalls the create_optimizer method of all registered plugins and returns\nthe first non-None optimizer.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer, or None if none was found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_collator_cls_and_kwargs(cfg, is_eval=False)\nCalls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.\nParameters:\ncfg (dict): The configuration for the plugins.\nis_eval (bool): Whether this is an eval split.\nReturns:\nobject: The collator class, or None if none was found.\n\n\n\nintegrations.base.PluginManager.get_input_args()\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[str]\nA list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_instance()\nReturns the singleton instance of PluginManager. If the instance doesn’t\nexist, it creates a new one.\n\n\n\nintegrations.base.PluginManager.get_trainer_cls(cfg)\nCalls the get_trainer_cls method of all registered plugins and returns the\nfirst non-None trainer class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_training_args(cfg)\nCalls the get_training_args method of all registered plugins and returns the combined training arguments.\nParameters:\ncfg (dict): The configuration for the plugins.\nReturns:\nobject: The training arguments\n\n\n\nintegrations.base.PluginManager.get_training_args_mixin()\nReturns a list of dataclasses for all registered plugins’ training args mixins’\nReturns:\nlist[str]: A list of dataclsses\n\n\n\nintegrations.base.PluginManager.load_datasets(cfg, preprocess=False)\nCalls the load_datasets method of each registered plugin.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\npreprocess\nbool\nWhether this is preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nUnion['TrainDatasetMeta', None]\nThe dataset metadata loaded from all registered plugins.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_lora_load(cfg, model)\nCalls the post_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_build(cfg, model)\nCalls the post_model_build method of all registered plugins after the\nmodel has been built / loaded, but before any adapters have been applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_load(cfg, model)\nCalls the post_model_load method of all registered plugins after the model\nhas been loaded inclusive of any adapters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train(cfg, model)\nCalls the post_train method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train_unload(cfg)\nCalls the post_train_unload method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_trainer_create(cfg, trainer)\nCalls the post_trainer_create method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_lora_load(cfg, model)\nCalls the pre_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_model_load(cfg)\nCalls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.register(plugin_name)\nRegisters a new plugin by its name.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be registered.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported."
   },
   {
     "objectID": "docs/api/integrations.base.html#functions",
diff --git a/site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css b/site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css
similarity index 98%
rename from site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css
rename to site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css
index ccf2cfe39..c4f3fb5fd 100644
--- a/site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css
+++ b/site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css
@@ -216,4 +216,4 @@ code span.wa {
   content: "</";
 }
 
-/*# sourceMappingURL=cf5278f7cea5512246cb3b41cb474b3b.css.map */
+/*# sourceMappingURL=9d1c423767af11a14e59cf8ecfea55bb.css.map */
diff --git a/sitemap.xml b/sitemap.xml
index 92885c679..b64e0bd2b 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,758 +2,758 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
   <url>
     <loc>https://docs.axolotl.ai/docs/unsloth.html</loc>
-    <lastmod>2025-06-15T20:47:12.066Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.782Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/conversation.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.776Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.777Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/tokenized.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.777Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/mac.html</loc>
-    <lastmod>2025-06-15T20:47:12.065Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.781Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/nccl.html</loc>
-    <lastmod>2025-06-15T20:47:12.066Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.781Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/multi-node.html</loc>
-    <lastmod>2025-06-15T20:47:12.066Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.781Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/docker.html</loc>
-    <lastmod>2025-06-15T20:47:12.063Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.777Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/lr_groups.html</loc>
-    <lastmod>2025-06-15T20:47:12.065Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.781Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/inference.html</loc>
-    <lastmod>2025-06-15T20:47:12.065Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.781Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/cli.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.776Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/faq.html</loc>
-    <lastmod>2025-06-15T20:47:12.063Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.777Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/getting-started.html</loc>
-    <lastmod>2025-06-15T20:47:12.063Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.777Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/custom_integrations.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.776Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/fsdp_qlora.html</loc>
-    <lastmod>2025-06-15T20:47:12.063Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.777Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/common.const.html</loc>
-    <lastmod>2025-06-15T20:47:40.284Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.219Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_tokenizers.html</loc>
-    <lastmod>2025-06-15T20:47:38.988Z</lastmod>
+    <lastmod>2025-06-17T16:10:12.985Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html</loc>
-    <lastmod>2025-06-15T20:47:39.622Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.541Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.training_args.html</loc>
-    <lastmod>2025-06-15T20:47:39.105Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.025Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html</loc>
-    <lastmod>2025-06-15T20:47:39.547Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.466Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.dict.html</loc>
-    <lastmod>2025-06-15T20:47:40.019Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.937Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html</loc>
-    <lastmod>2025-06-15T20:47:39.884Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.804Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.collators.mamba.html</loc>
-    <lastmod>2025-06-15T20:47:40.324Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.259Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html</loc>
-    <lastmod>2025-06-15T20:47:39.446Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.365Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.train.html</loc>
-    <lastmod>2025-06-15T20:47:39.185Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.104Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html</loc>
-    <lastmod>2025-06-15T20:47:39.597Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.516Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html</loc>
-    <lastmod>2025-06-15T20:47:39.131Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.051Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/loaders.processor.html</loc>
-    <lastmod>2025-06-15T20:47:39.426Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.344Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html</loc>
-    <lastmod>2025-06-15T20:47:39.145Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.065Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.mamba.html</loc>
-    <lastmod>2025-06-15T20:47:39.371Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.288Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html</loc>
-    <lastmod>2025-06-15T20:47:39.891Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.810Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html</loc>
-    <lastmod>2025-06-15T20:47:40.300Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.235Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.relora.html</loc>
-    <lastmod>2025-06-15T20:47:39.376Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.292Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.builders.causal.html</loc>
-    <lastmod>2025-06-15T20:47:39.008Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.005Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.chat.messages.html</loc>
-    <lastmod>2025-06-15T20:47:39.128Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.048Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html</loc>
-    <lastmod>2025-06-15T20:47:40.278Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.213Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.quantize.html</loc>
-    <lastmod>2025-06-15T20:47:39.340Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.256Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.checks.html</loc>
-    <lastmod>2025-06-15T20:47:39.219Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.138Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html</loc>
-    <lastmod>2025-06-15T20:47:39.632Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.551Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.lora.html</loc>
-    <lastmod>2025-06-15T20:47:39.748Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.667Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html</loc>
-    <lastmod>2025-06-15T20:47:40.113Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.032Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/loaders.adapter.html</loc>
-    <lastmod>2025-06-15T20:47:39.431Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.350Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/index.html</loc>
-    <lastmod>2025-06-15T20:47:38.850Z</lastmod>
+    <lastmod>2025-06-17T16:10:12.847Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html</loc>
-    <lastmod>2025-06-15T20:47:39.868Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.787Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/train.html</loc>
-    <lastmod>2025-06-15T20:47:38.911Z</lastmod>
+    <lastmod>2025-06-17T16:10:12.909Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html</loc>
-    <lastmod>2025-06-15T20:47:39.887Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.807Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html</loc>
-    <lastmod>2025-06-15T20:47:39.619Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.538Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html</loc>
-    <lastmod>2025-06-15T20:47:40.264Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.199Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html</loc>
-    <lastmod>2025-06-15T20:47:40.369Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.303Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html</loc>
-    <lastmod>2025-06-15T20:47:39.526Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.445Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html</loc>
-    <lastmod>2025-06-15T20:47:39.828Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.748Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/common.architectures.html</loc>
-    <lastmod>2025-06-15T20:47:40.283Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.218Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.utils.html</loc>
-    <lastmod>2025-06-15T20:47:40.141Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.060Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.chat_templates.html</loc>
-    <lastmod>2025-06-15T20:47:39.933Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.852Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html</loc>
-    <lastmod>2025-06-15T20:47:40.387Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.322Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.main.html</loc>
-    <lastmod>2025-06-15T20:47:39.177Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.096Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html</loc>
-    <lastmod>2025-06-15T20:47:39.393Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.309Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html</loc>
-    <lastmod>2025-06-15T20:47:40.384Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.318Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/loaders.model.html</loc>
-    <lastmod>2025-06-15T20:47:39.416Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.333Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.tokenization.html</loc>
-    <lastmod>2025-06-15T20:47:39.923Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.842Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html</loc>
-    <lastmod>2025-06-15T20:47:39.640Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.559Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html</loc>
-    <lastmod>2025-06-15T20:47:39.943Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.863Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html</loc>
-    <lastmod>2025-06-15T20:47:39.456Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.375Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html</loc>
-    <lastmod>2025-06-15T20:47:39.129Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.050Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html</loc>
-    <lastmod>2025-06-15T20:47:39.624Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.543Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html</loc>
-    <lastmod>2025-06-15T20:47:39.662Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.581Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html</loc>
-    <lastmod>2025-06-15T20:47:39.820Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.739Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.base.html</loc>
-    <lastmod>2025-06-15T20:47:39.480Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.399Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html</loc>
-    <lastmod>2025-06-15T20:47:39.405Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.321Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.collators.batching.html</loc>
-    <lastmod>2025-06-15T20:47:40.321Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.255Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html</loc>
-    <lastmod>2025-06-15T20:47:39.857Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.776Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html</loc>
-    <lastmod>2025-06-15T20:47:40.272Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.206Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.enums.html</loc>
-    <lastmod>2025-06-15T20:47:40.135Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.055Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/datasets.html</loc>
-    <lastmod>2025-06-15T20:47:38.932Z</lastmod>
+    <lastmod>2025-06-17T16:10:12.930Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html</loc>
-    <lastmod>2025-06-15T20:47:39.621Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.540Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html</loc>
-    <lastmod>2025-06-15T20:47:39.886Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.805Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.model.html</loc>
-    <lastmod>2025-06-15T20:47:40.073Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.991Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html</loc>
-    <lastmod>2025-06-15T20:47:40.263Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.198Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.trainer.html</loc>
-    <lastmod>2025-06-15T20:47:39.971Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.890Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html</loc>
-    <lastmod>2025-06-15T20:47:40.380Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.315Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.data.pretraining.html</loc>
-    <lastmod>2025-06-15T20:47:40.028Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.946Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html</loc>
-    <lastmod>2025-06-15T20:47:40.379Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.313Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html</loc>
-    <lastmod>2025-06-15T20:47:39.583Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.502Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.collators.core.html</loc>
-    <lastmod>2025-06-15T20:47:40.302Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.236Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html</loc>
-    <lastmod>2025-06-15T20:47:39.873Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.793Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html</loc>
-    <lastmod>2025-06-15T20:47:39.539Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.458Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.lora.html</loc>
-    <lastmod>2025-06-15T20:47:39.938Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.857Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/qat.html</loc>
-    <lastmod>2025-06-15T20:47:12.066Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.782Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/quantize.html</loc>
-    <lastmod>2025-06-15T20:47:12.066Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.782Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/ray-integration.html</loc>
-    <lastmod>2025-06-15T20:47:12.066Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.782Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/sequence_parallelism.html</loc>
-    <lastmod>2025-06-15T20:47:12.066Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.782Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/reward_modelling.html</loc>
-    <lastmod>2025-06-15T20:47:12.066Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.782Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/index.html</loc>
-    <lastmod>2025-06-15T20:47:12.079Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html</loc>
-    <lastmod>2025-06-15T20:47:12.083Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/FAQS.html</loc>
-    <lastmod>2025-06-15T20:47:12.060Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.774Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html</loc>
-    <lastmod>2025-06-15T20:47:12.083Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/TODO.html</loc>
-    <lastmod>2025-06-15T20:47:12.060Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.774Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html</loc>
-    <lastmod>2025-06-15T20:47:12.067Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.783Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/torchao.html</loc>
-    <lastmod>2025-06-15T20:47:12.066Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.782Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/config.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.776Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/input_output.html</loc>
-    <lastmod>2025-06-15T20:47:12.065Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.781Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/batch_vs_grad.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.776Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.quantization.html</loc>
-    <lastmod>2025-06-15T20:47:40.055Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.973Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.bench.html</loc>
-    <lastmod>2025-06-15T20:47:39.947Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.866Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/loaders.tokenizer.html</loc>
-    <lastmod>2025-06-15T20:47:39.424Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.343Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.freeze.html</loc>
-    <lastmod>2025-06-15T20:47:39.954Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.874Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html</loc>
-    <lastmod>2025-06-15T20:47:39.587Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.505Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.training.html</loc>
-    <lastmod>2025-06-15T20:47:40.078Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.996Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html</loc>
-    <lastmod>2025-06-15T20:47:40.281Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.216Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html</loc>
-    <lastmod>2025-06-15T20:47:39.479Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.398Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.inference.html</loc>
-    <lastmod>2025-06-15T20:47:39.252Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.170Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/logging_config.html</loc>
-    <lastmod>2025-06-15T20:47:38.997Z</lastmod>
+    <lastmod>2025-06-17T16:10:12.994Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/loaders.constants.html</loc>
-    <lastmod>2025-06-15T20:47:39.440Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.360Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html</loc>
-    <lastmod>2025-06-15T20:47:39.599Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.518Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.args.html</loc>
-    <lastmod>2025-06-15T20:47:39.213Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.132Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.trl.html</loc>
-    <lastmod>2025-06-15T20:47:40.107Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.027Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html</loc>
-    <lastmod>2025-06-15T20:47:39.593Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.512Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/convert.html</loc>
-    <lastmod>2025-06-15T20:47:38.946Z</lastmod>
+    <lastmod>2025-06-17T16:10:12.943Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.base.html</loc>
-    <lastmod>2025-06-15T20:47:39.350Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.266Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.preprocess.html</loc>
-    <lastmod>2025-06-15T20:47:39.281Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.198Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.config.html</loc>
-    <lastmod>2025-06-15T20:47:39.237Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.156Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.relora.html</loc>
-    <lastmod>2025-06-15T20:47:39.827Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.746Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.chat.format.shared.html</loc>
-    <lastmod>2025-06-15T20:47:39.132Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.053Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html</loc>
-    <lastmod>2025-06-15T20:47:39.382Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.299Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.qat.html</loc>
-    <lastmod>2025-06-15T20:47:40.394Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.328Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html</loc>
-    <lastmod>2025-06-15T20:47:40.026Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.945Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.evaluate.html</loc>
-    <lastmod>2025-06-15T20:47:39.193Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.112Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.trl.html</loc>
-    <lastmod>2025-06-15T20:47:39.366Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.283Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.builders.base.html</loc>
-    <lastmod>2025-06-15T20:47:39.003Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.000Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html</loc>
-    <lastmod>2025-06-15T20:47:39.877Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.796Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html</loc>
-    <lastmod>2025-06-15T20:47:39.916Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.836Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.distributed.html</loc>
-    <lastmod>2025-06-15T20:47:40.015Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.934Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html</loc>
-    <lastmod>2025-06-15T20:47:39.572Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.491Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.config.html</loc>
-    <lastmod>2025-06-15T20:47:40.066Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.984Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.utils.html</loc>
-    <lastmod>2025-06-15T20:47:39.318Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.235Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html</loc>
-    <lastmod>2025-06-15T20:47:40.375Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.310Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html</loc>
-    <lastmod>2025-06-15T20:47:40.125Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.044Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/loaders.patch_manager.html</loc>
-    <lastmod>2025-06-15T20:47:39.439Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.358Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.utils.html</loc>
-    <lastmod>2025-06-15T20:47:39.865Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.784Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.vllm_serve.html</loc>
-    <lastmod>2025-06-15T20:47:39.325Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.242Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html</loc>
-    <lastmod>2025-06-15T20:47:39.819Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.738Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.liger.args.html</loc>
-    <lastmod>2025-06-15T20:47:40.275Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.209Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html</loc>
-    <lastmod>2025-06-15T20:47:39.528Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.447Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.data.sft.html</loc>
-    <lastmod>2025-06-15T20:47:40.034Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.953Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html</loc>
-    <lastmod>2025-06-15T20:47:40.329Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.263Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html</loc>
-    <lastmod>2025-06-15T20:47:39.560Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.479Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html</loc>
-    <lastmod>2025-06-15T20:47:39.883Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.802Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html</loc>
-    <lastmod>2025-06-15T20:47:39.272Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.190Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.merge_lora.html</loc>
-    <lastmod>2025-06-15T20:47:39.260Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.178Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.base.html</loc>
-    <lastmod>2025-06-15T20:47:40.259Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.195Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html</loc>
-    <lastmod>2025-06-15T20:47:39.449Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.368Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html</loc>
-    <lastmod>2025-06-15T20:47:39.803Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.722Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.quantize.html</loc>
-    <lastmod>2025-06-15T20:47:39.776Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.695Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/evaluate.html</loc>
-    <lastmod>2025-06-15T20:47:38.922Z</lastmod>
+    <lastmod>2025-06-17T16:10:12.919Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.builders.rl.html</loc>
-    <lastmod>2025-06-15T20:47:39.016Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.013Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html</loc>
-    <lastmod>2025-06-15T20:47:40.096Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.014Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/common.datasets.html</loc>
-    <lastmod>2025-06-15T20:47:40.299Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.234Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.utils.html</loc>
-    <lastmod>2025-06-15T20:47:39.778Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.697Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html</loc>
-    <lastmod>2025-06-15T20:47:39.566Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.485Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html</loc>
-    <lastmod>2025-06-15T20:47:39.665Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.584Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html</loc>
-    <lastmod>2025-06-15T20:47:39.576Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.495Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.swiglu.html</loc>
-    <lastmod>2025-06-15T20:47:39.769Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.688Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.cloud.base.html</loc>
-    <lastmod>2025-06-15T20:47:39.329Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.245Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html</loc>
-    <lastmod>2025-06-15T20:47:39.641Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.561Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html</loc>
-    <lastmod>2025-06-15T20:47:39.513Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.432Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html</loc>
-    <lastmod>2025-06-15T20:47:39.866Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.785Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.peft.html</loc>
-    <lastmod>2025-06-15T20:47:40.104Z</lastmod>
+    <lastmod>2025-06-17T16:10:14.023Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.datasets.chat.html</loc>
-    <lastmod>2025-06-15T20:47:39.137Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.058Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.utils.html</loc>
-    <lastmod>2025-06-15T20:47:39.406Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.323Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.geglu.html</loc>
-    <lastmod>2025-06-15T20:47:39.758Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.678Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html</loc>
-    <lastmod>2025-06-15T20:47:39.335Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.251Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html</loc>
-    <lastmod>2025-06-15T20:47:39.805Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.724Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schedulers.html</loc>
-    <lastmod>2025-06-15T20:47:39.996Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.914Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html</loc>
-    <lastmod>2025-06-15T20:47:39.609Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.528Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.sweeps.html</loc>
-    <lastmod>2025-06-15T20:47:39.287Z</lastmod>
+    <lastmod>2025-06-17T16:10:13.204Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/multimodal.html</loc>
-    <lastmod>2025-06-15T20:47:12.066Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.781Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/debugging.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.777Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/multi-gpu.html</loc>
-    <lastmod>2025-06-15T20:47:12.066Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.781Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/lora_optims.html</loc>
-    <lastmod>2025-06-15T20:47:12.065Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.781Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/rlhf.html</loc>
-    <lastmod>2025-06-15T20:47:12.066Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.782Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/amd_hpc.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.776Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/installation.html</loc>
-    <lastmod>2025-06-15T20:47:12.065Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.781Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/multipack.html</loc>
-    <lastmod>2025-06-15T20:47:12.066Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.781Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset_preprocessing.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.777Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset_loading.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.777Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.777Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/template_free.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.777Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/index.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.776Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/pretraining.html</loc>
-    <lastmod>2025-06-15T20:47:12.062Z</lastmod>
+    <lastmod>2025-06-17T16:09:43.777Z</lastmod>
   </url>
 </urlset>
diff --git a/src/axolotl/integrations/LICENSE.html b/src/axolotl/integrations/LICENSE.html
index e4f1fb61a..9c5a30f99 100644
--- a/src/axolotl/integrations/LICENSE.html
+++ b/src/axolotl/integrations/LICENSE.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
diff --git a/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html b/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html
index b2dccf400..139616c0b 100644
--- a/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html
+++ b/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html
@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.7.31">
+<meta name="generator" content="quarto-1.7.32">
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../../../site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<link href="../../../../site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../../../site_libs/bootstrap/bootstrap-ed9d63b928ec3538d7b05c99c63ac09f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">