From 2fa856633397ee80b59402f4d86b9e8a55306969 Mon Sep 17 00:00:00 2001 From: Quarto GHA Workflow Runner Date: Tue, 17 Jun 2025 16:11:55 +0000 Subject: [PATCH] Built site for gh-pages --- .nojekyll | 2 +- FAQS.html | 4 +- TODO.html | 4 +- docs/amd_hpc.html | 4 +- docs/api/cli.args.html | 4 +- docs/api/cli.checks.html | 4 +- docs/api/cli.cloud.base.html | 4 +- docs/api/cli.cloud.modal_.html | 4 +- docs/api/cli.config.html | 4 +- docs/api/cli.evaluate.html | 4 +- docs/api/cli.inference.html | 4 +- docs/api/cli.main.html | 4 +- docs/api/cli.merge_lora.html | 4 +- docs/api/cli.merge_sharded_fsdp_weights.html | 4 +- docs/api/cli.preprocess.html | 4 +- docs/api/cli.quantize.html | 4 +- docs/api/cli.sweeps.html | 4 +- docs/api/cli.train.html | 4 +- docs/api/cli.utils.html | 4 +- docs/api/cli.vllm_serve.html | 4 +- docs/api/common.architectures.html | 4 +- docs/api/common.const.html | 4 +- docs/api/common.datasets.html | 4 +- docs/api/convert.html | 4 +- docs/api/core.builders.base.html | 4 +- docs/api/core.builders.causal.html | 4 +- docs/api/core.builders.rl.html | 4 +- docs/api/core.chat.format.chatml.html | 4 +- docs/api/core.chat.format.llama3x.html | 4 +- docs/api/core.chat.format.shared.html | 4 +- docs/api/core.chat.messages.html | 4 +- docs/api/core.datasets.chat.html | 4 +- ...core.datasets.transforms.chat_builder.html | 4 +- docs/api/core.trainers.base.html | 4 +- docs/api/core.trainers.dpo.trainer.html | 4 +- docs/api/core.trainers.grpo.sampler.html | 4 +- docs/api/core.trainers.grpo.trainer.html | 4 +- docs/api/core.trainers.mamba.html | 4 +- docs/api/core.trainers.mixins.optimizer.html | 4 +- ...core.trainers.mixins.rng_state_loader.html | 4 +- docs/api/core.trainers.mixins.scheduler.html | 4 +- docs/api/core.trainers.relora.html | 4 +- docs/api/core.trainers.trl.html | 4 +- docs/api/core.trainers.utils.html | 4 +- docs/api/core.training_args.html | 363 +-------- docs/api/datasets.html | 4 +- docs/api/evaluate.html | 4 +- docs/api/index.html | 4 +- docs/api/integrations.base.html | 695 +++++++++++------- .../integrations.cut_cross_entropy.args.html | 4 +- docs/api/integrations.grokfast.optimizer.html | 4 +- docs/api/integrations.kd.trainer.html | 12 +- docs/api/integrations.liger.args.html | 4 +- docs/api/integrations.lm_eval.args.html | 4 +- docs/api/integrations.spectrum.args.html | 4 +- docs/api/kernels.geglu.html | 4 +- docs/api/kernels.lora.html | 4 +- docs/api/kernels.quantize.html | 4 +- docs/api/kernels.swiglu.html | 4 +- docs/api/kernels.utils.html | 4 +- docs/api/loaders.adapter.html | 4 +- docs/api/loaders.constants.html | 4 +- docs/api/loaders.model.html | 4 +- docs/api/loaders.patch_manager.html | 4 +- docs/api/loaders.processor.html | 4 +- docs/api/loaders.tokenizer.html | 4 +- docs/api/logging_config.html | 4 +- docs/api/models.mamba.modeling_mamba.html | 4 +- .../monkeypatch.btlm_attn_hijack_flash.html | 4 +- ...onkeypatch.data.batch_dataset_fetcher.html | 4 +- ...ch.gradient_checkpointing.offload_cpu.html | 4 +- ...h.gradient_checkpointing.offload_disk.html | 4 +- .../monkeypatch.llama_attn_hijack_flash.html | 4 +- ...onkeypatch.llama_attn_hijack_xformers.html | 4 +- docs/api/monkeypatch.llama_expand_mask.html | 4 +- .../monkeypatch.llama_patch_multipack.html | 4 +- docs/api/monkeypatch.lora_kernels.html | 4 +- ...monkeypatch.mistral_attn_hijack_flash.html | 4 +- docs/api/monkeypatch.mixtral.html | 4 +- docs/api/monkeypatch.multipack.html | 4 +- docs/api/monkeypatch.relora.html | 4 +- ...onkeypatch.stablelm_attn_hijack_flash.html | 4 +- docs/api/monkeypatch.trainer_fsdp_optim.html | 4 +- .../monkeypatch.transformers_fa_utils.html | 4 +- docs/api/monkeypatch.unsloth_.html | 4 +- docs/api/monkeypatch.utils.html | 4 +- docs/api/prompt_strategies.alpaca_chat.html | 4 +- .../prompt_strategies.alpaca_instruct.html | 4 +- .../prompt_strategies.alpaca_w_system.html | 4 +- docs/api/prompt_strategies.base.html | 4 +- ...rompt_strategies.bradley_terry.llama3.html | 4 +- docs/api/prompt_strategies.chat_template.html | 4 +- docs/api/prompt_strategies.completion.html | 4 +- .../prompt_strategies.dpo.chat_template.html | 4 +- docs/api/prompt_strategies.dpo.chatml.html | 4 +- docs/api/prompt_strategies.dpo.llama3.html | 4 +- .../prompt_strategies.dpo.passthrough.html | 4 +- .../prompt_strategies.dpo.user_defined.html | 4 +- docs/api/prompt_strategies.dpo.zephyr.html | 4 +- docs/api/prompt_strategies.input_output.html | 4 +- docs/api/prompt_strategies.kto.chatml.html | 4 +- docs/api/prompt_strategies.kto.llama3.html | 4 +- .../prompt_strategies.kto.user_defined.html | 4 +- docs/api/prompt_strategies.llama2_chat.html | 4 +- docs/api/prompt_strategies.messages.chat.html | 4 +- docs/api/prompt_strategies.metharme.html | 4 +- docs/api/prompt_strategies.orcamini.html | 4 +- .../prompt_strategies.orpo.chat_template.html | 4 +- docs/api/prompt_strategies.pygmalion.html | 4 +- ...prompt_strategies.stepwise_supervised.html | 4 +- docs/api/prompt_strategies.user_defined.html | 4 +- docs/api/prompt_tokenizers.html | 4 +- docs/api/train.html | 10 +- docs/api/utils.bench.html | 4 +- docs/api/utils.callbacks.comet_.html | 4 +- docs/api/utils.callbacks.lisa.html | 4 +- docs/api/utils.callbacks.mlflow_.html | 4 +- docs/api/utils.callbacks.perplexity.html | 4 +- docs/api/utils.callbacks.profiler.html | 4 +- docs/api/utils.callbacks.qat.html | 4 +- docs/api/utils.chat_templates.html | 4 +- docs/api/utils.collators.batching.html | 4 +- docs/api/utils.collators.core.html | 4 +- docs/api/utils.collators.mamba.html | 4 +- docs/api/utils.collators.mm_chat.html | 4 +- .../utils.ctx_managers.sequence_parallel.html | 4 +- docs/api/utils.data.pretraining.html | 4 +- docs/api/utils.data.sft.html | 4 +- docs/api/utils.dict.html | 4 +- docs/api/utils.distributed.html | 4 +- docs/api/utils.freeze.html | 4 +- docs/api/utils.lora.html | 4 +- docs/api/utils.model_shard_quant.html | 4 +- docs/api/utils.optimizers.adopt.html | 4 +- docs/api/utils.quantization.html | 4 +- docs/api/utils.samplers.multipack.html | 6 +- docs/api/utils.schedulers.html | 4 +- docs/api/utils.schemas.config.html | 4 +- docs/api/utils.schemas.datasets.html | 4 +- docs/api/utils.schemas.enums.html | 4 +- docs/api/utils.schemas.integrations.html | 4 +- docs/api/utils.schemas.model.html | 4 +- docs/api/utils.schemas.multimodal.html | 4 +- docs/api/utils.schemas.peft.html | 4 +- docs/api/utils.schemas.training.html | 4 +- docs/api/utils.schemas.trl.html | 4 +- docs/api/utils.schemas.utils.html | 4 +- docs/api/utils.tokenization.html | 4 +- docs/api/utils.trainer.html | 4 +- docs/batch_vs_grad.html | 4 +- docs/cli.html | 4 +- docs/config.html | 4 +- docs/custom_integrations.html | 4 +- docs/dataset-formats/conversation.html | 4 +- docs/dataset-formats/index.html | 4 +- docs/dataset-formats/inst_tune.html | 4 +- docs/dataset-formats/pretraining.html | 4 +- docs/dataset-formats/stepwise_supervised.html | 4 +- docs/dataset-formats/template_free.html | 4 +- docs/dataset-formats/tokenized.html | 4 +- docs/dataset_loading.html | 4 +- docs/dataset_preprocessing.html | 4 +- docs/debugging.html | 4 +- docs/docker.html | 4 +- docs/faq.html | 4 +- docs/fsdp_qlora.html | 4 +- docs/getting-started.html | 4 +- docs/inference.html | 4 +- docs/input_output.html | 4 +- docs/installation.html | 4 +- docs/lora_optims.html | 4 +- docs/lr_groups.html | 4 +- docs/mac.html | 4 +- docs/multi-gpu.html | 4 +- docs/multi-node.html | 4 +- docs/multimodal.html | 4 +- docs/multipack.html | 4 +- docs/nccl.html | 4 +- docs/qat.html | 4 +- docs/quantize.html | 4 +- docs/ray-integration.html | 4 +- docs/reward_modelling.html | 4 +- docs/rlhf.html | 4 +- docs/sequence_parallelism.html | 4 +- docs/torchao.html | 4 +- docs/unsloth.html | 4 +- .../colab-axolotl-example.html | 4 +- index.html | 4 +- search.json | 20 +- ...dark-2fef5ea3f8957b3e4ecc936fc74692ca.css} | 2 +- sitemap.xml | 378 +++++----- src/axolotl/integrations/LICENSE.html | 4 +- .../cut_cross_entropy/ACKNOWLEDGEMENTS.html | 4 +- 193 files changed, 1017 insertions(+), 1207 deletions(-) rename site_libs/quarto-html/{quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css => quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css} (98%) diff --git a/.nojekyll b/.nojekyll index 8fc3cab38..b505fa798 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -ce7842d3 \ No newline at end of file +e3a927da \ No newline at end of file diff --git a/FAQS.html b/FAQS.html index f830a41e7..b8e57147a 100644 --- a/FAQS.html +++ b/FAQS.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/TODO.html b/TODO.html index fe087edb8..043afc751 100644 --- a/TODO.html +++ b/TODO.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/amd_hpc.html b/docs/amd_hpc.html index 43f28314c..6729e593b 100644 --- a/docs/amd_hpc.html +++ b/docs/amd_hpc.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.args.html b/docs/api/cli.args.html index 5c7ccf150..45326f855 100644 --- a/docs/api/cli.args.html +++ b/docs/api/cli.args.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.checks.html b/docs/api/cli.checks.html index 3a399516b..81b7a2db1 100644 --- a/docs/api/cli.checks.html +++ b/docs/api/cli.checks.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.cloud.base.html b/docs/api/cli.cloud.base.html index d84d7361c..8c7084221 100644 --- a/docs/api/cli.cloud.base.html +++ b/docs/api/cli.cloud.base.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.cloud.modal_.html b/docs/api/cli.cloud.modal_.html index ef7344cde..70ada4697 100644 --- a/docs/api/cli.cloud.modal_.html +++ b/docs/api/cli.cloud.modal_.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.config.html b/docs/api/cli.config.html index 699a9c317..a99253e7e 100644 --- a/docs/api/cli.config.html +++ b/docs/api/cli.config.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.evaluate.html b/docs/api/cli.evaluate.html index bd7172685..4bcf4396f 100644 --- a/docs/api/cli.evaluate.html +++ b/docs/api/cli.evaluate.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.inference.html b/docs/api/cli.inference.html index 179b702fd..a2e44c3a9 100644 --- a/docs/api/cli.inference.html +++ b/docs/api/cli.inference.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.main.html b/docs/api/cli.main.html index ed8aaab61..ad902b8de 100644 --- a/docs/api/cli.main.html +++ b/docs/api/cli.main.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.merge_lora.html b/docs/api/cli.merge_lora.html index 2db0ca75e..64d3e26c8 100644 --- a/docs/api/cli.merge_lora.html +++ b/docs/api/cli.merge_lora.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.merge_sharded_fsdp_weights.html b/docs/api/cli.merge_sharded_fsdp_weights.html index 259201127..e30111985 100644 --- a/docs/api/cli.merge_sharded_fsdp_weights.html +++ b/docs/api/cli.merge_sharded_fsdp_weights.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.preprocess.html b/docs/api/cli.preprocess.html index 7be534fa3..ea78d1c3a 100644 --- a/docs/api/cli.preprocess.html +++ b/docs/api/cli.preprocess.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.quantize.html b/docs/api/cli.quantize.html index 19d629c57..ece804f0e 100644 --- a/docs/api/cli.quantize.html +++ b/docs/api/cli.quantize.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.sweeps.html b/docs/api/cli.sweeps.html index 85006fc13..7774d319f 100644 --- a/docs/api/cli.sweeps.html +++ b/docs/api/cli.sweeps.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.train.html b/docs/api/cli.train.html index 9aeca8838..696c29151 100644 --- a/docs/api/cli.train.html +++ b/docs/api/cli.train.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.utils.html b/docs/api/cli.utils.html index 8e465ee35..a1c20b2e0 100644 --- a/docs/api/cli.utils.html +++ b/docs/api/cli.utils.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/cli.vllm_serve.html b/docs/api/cli.vllm_serve.html index 390efa71c..9b4756035 100644 --- a/docs/api/cli.vllm_serve.html +++ b/docs/api/cli.vllm_serve.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/common.architectures.html b/docs/api/common.architectures.html index 16f5c5968..2387cbdce 100644 --- a/docs/api/common.architectures.html +++ b/docs/api/common.architectures.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/common.const.html b/docs/api/common.const.html index abce5b674..26823cc80 100644 --- a/docs/api/common.const.html +++ b/docs/api/common.const.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/common.datasets.html b/docs/api/common.datasets.html index fcea0706f..f7348888d 100644 --- a/docs/api/common.datasets.html +++ b/docs/api/common.datasets.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/convert.html b/docs/api/convert.html index e12228a5b..3085bb207 100644 --- a/docs/api/convert.html +++ b/docs/api/convert.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.builders.base.html b/docs/api/core.builders.base.html index e8e66d364..887ef2037 100644 --- a/docs/api/core.builders.base.html +++ b/docs/api/core.builders.base.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.builders.causal.html b/docs/api/core.builders.causal.html index 2ed028e94..943afe52d 100644 --- a/docs/api/core.builders.causal.html +++ b/docs/api/core.builders.causal.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.builders.rl.html b/docs/api/core.builders.rl.html index 0057eae22..a4908556c 100644 --- a/docs/api/core.builders.rl.html +++ b/docs/api/core.builders.rl.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.chat.format.chatml.html b/docs/api/core.chat.format.chatml.html index 611ecd3da..3adbc037a 100644 --- a/docs/api/core.chat.format.chatml.html +++ b/docs/api/core.chat.format.chatml.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/core.chat.format.llama3x.html b/docs/api/core.chat.format.llama3x.html index 96dbb7a21..57a26f889 100644 --- a/docs/api/core.chat.format.llama3x.html +++ b/docs/api/core.chat.format.llama3x.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/core.chat.format.shared.html b/docs/api/core.chat.format.shared.html index b2853426b..bf92033e0 100644 --- a/docs/api/core.chat.format.shared.html +++ b/docs/api/core.chat.format.shared.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/core.chat.messages.html b/docs/api/core.chat.messages.html index 41c1796c3..e5fda6d2e 100644 --- a/docs/api/core.chat.messages.html +++ b/docs/api/core.chat.messages.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.datasets.chat.html b/docs/api/core.datasets.chat.html index e5adfdb9f..2a7bf4af9 100644 --- a/docs/api/core.datasets.chat.html +++ b/docs/api/core.datasets.chat.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.datasets.transforms.chat_builder.html b/docs/api/core.datasets.transforms.chat_builder.html index 938c416b8..0f5e8297e 100644 --- a/docs/api/core.datasets.transforms.chat_builder.html +++ b/docs/api/core.datasets.transforms.chat_builder.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.base.html b/docs/api/core.trainers.base.html index d89770446..541c9fa51 100644 --- a/docs/api/core.trainers.base.html +++ b/docs/api/core.trainers.base.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.dpo.trainer.html b/docs/api/core.trainers.dpo.trainer.html index 1c6925099..514549c33 100644 --- a/docs/api/core.trainers.dpo.trainer.html +++ b/docs/api/core.trainers.dpo.trainer.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.grpo.sampler.html b/docs/api/core.trainers.grpo.sampler.html index c6f989026..103dc7b9b 100644 --- a/docs/api/core.trainers.grpo.sampler.html +++ b/docs/api/core.trainers.grpo.sampler.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.grpo.trainer.html b/docs/api/core.trainers.grpo.trainer.html index 38949c3ec..aa0bcecfc 100644 --- a/docs/api/core.trainers.grpo.trainer.html +++ b/docs/api/core.trainers.grpo.trainer.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.mamba.html b/docs/api/core.trainers.mamba.html index a6a9393d3..13a503d08 100644 --- a/docs/api/core.trainers.mamba.html +++ b/docs/api/core.trainers.mamba.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.mixins.optimizer.html b/docs/api/core.trainers.mixins.optimizer.html index 5a1e2d1e8..efdcaa905 100644 --- a/docs/api/core.trainers.mixins.optimizer.html +++ b/docs/api/core.trainers.mixins.optimizer.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.mixins.rng_state_loader.html b/docs/api/core.trainers.mixins.rng_state_loader.html index 12a6edceb..8e754b6ec 100644 --- a/docs/api/core.trainers.mixins.rng_state_loader.html +++ b/docs/api/core.trainers.mixins.rng_state_loader.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.mixins.scheduler.html b/docs/api/core.trainers.mixins.scheduler.html index 70fc85389..64b74ae7b 100644 --- a/docs/api/core.trainers.mixins.scheduler.html +++ b/docs/api/core.trainers.mixins.scheduler.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.relora.html b/docs/api/core.trainers.relora.html index 8ca9b12b2..bad4b4c7a 100644 --- a/docs/api/core.trainers.relora.html +++ b/docs/api/core.trainers.relora.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.trl.html b/docs/api/core.trainers.trl.html index 2f6145b8e..6ca87adf9 100644 --- a/docs/api/core.trainers.trl.html +++ b/docs/api/core.trainers.trl.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/core.trainers.utils.html b/docs/api/core.trainers.utils.html index e58ef1e3b..fbf337991 100644 --- a/docs/api/core.trainers.utils.html +++ b/docs/api/core.trainers.utils.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/core.training_args.html b/docs/api/core.training_args.html index f35ea760a..e2c507b72 100644 --- a/docs/api/core.training_args.html +++ b/docs/api/core.training_args.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + @@ -477,7 +477,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
  • AxolotlPRMConfig
  • AxolotlRewardConfig
  • AxolotlTrainingArguments
  • -
  • AxolotlTrainingMixins
  • @@ -527,385 +526,39 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); AxolotlTrainingArguments Training arguments for Causal trainer - -AxolotlTrainingMixins -Mixin class for the Axolotl training args. -

    AxolotlCPOConfig

    -
    core.training_args.AxolotlCPOConfig(
    -    model_type=None,
    -    lr_quadratic_warmup=False,
    -    pretraining=False,
    -    sample_packing=False,
    -    sample_packing_sequentially=False,
    -    multipack_real_batches=False,
    -    eval_sample_packing=None,
    -    sample_packing_efficiency=1.0,
    -    sample_packing_bin_size=200,
    -    sample_packing_group_size=100000,
    -    max_seq_length=2048,
    -    dataset_num_proc=None,
    -    relora_steps=None,
    -    relora_warmup_steps=None,
    -    relora_anneal_steps=None,
    -    relora_prune_ratio=0.9,
    -    bench_split='eval',
    -    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',
    -    do_bench_eval=False,
    -    do_causal_lm_eval=False,
    -    max_bench_samples=None,
    -    bench_source_max_len=2048,
    -    dataloader_prefetch_factor=None,
    -    cosine_min_lr_ratio=None,
    -    cosine_constant_lr_ratio=None,
    -    loraplus_lr_ratio=None,
    -    loraplus_lr_embedding=1e-06,
    -    embedding_lr_scale=None,
    -    lr_groups=None,
    -    embedding_lr=None,
    -    qlora=False,
    -    orpo_alpha=None,
    -    lisa_n_layers=None,
    -    lisa_step_interval=None,
    -    lisa_layers_attribute=None,
    -    curriculum_sampling=None,
    -    alternate_lr_scheduler_type=None,
    -    chat_template=None,
    -    kd_ce_alpha=None,
    -    kd_alpha=1.0,
    -    kd_temperature=1.0,
    -    kd_zscore_base_temp=None,
    -    kd_top_k_before_softmax=None,
    -    adam_beta3=None,
    -    adam_epsilon2=None,
    -    image_size=None,
    -    image_resize_algorithm=None,
    -    simpo_gamma=None,
    -)
    +
    core.training_args.AxolotlCPOConfig(simpo_gamma=None)

    CPO config for CPO training

    AxolotlKTOConfig

    -
    core.training_args.AxolotlKTOConfig(
    -    model_type=None,
    -    lr_quadratic_warmup=False,
    -    pretraining=False,
    -    sample_packing=False,
    -    sample_packing_sequentially=False,
    -    multipack_real_batches=False,
    -    eval_sample_packing=None,
    -    sample_packing_efficiency=1.0,
    -    sample_packing_bin_size=200,
    -    sample_packing_group_size=100000,
    -    max_seq_length=2048,
    -    dataset_num_proc=None,
    -    relora_steps=None,
    -    relora_warmup_steps=None,
    -    relora_anneal_steps=None,
    -    relora_prune_ratio=0.9,
    -    bench_split='eval',
    -    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',
    -    do_bench_eval=False,
    -    do_causal_lm_eval=False,
    -    max_bench_samples=None,
    -    bench_source_max_len=2048,
    -    dataloader_prefetch_factor=None,
    -    cosine_min_lr_ratio=None,
    -    cosine_constant_lr_ratio=None,
    -    loraplus_lr_ratio=None,
    -    loraplus_lr_embedding=1e-06,
    -    embedding_lr_scale=None,
    -    lr_groups=None,
    -    embedding_lr=None,
    -    qlora=False,
    -    orpo_alpha=None,
    -    lisa_n_layers=None,
    -    lisa_step_interval=None,
    -    lisa_layers_attribute=None,
    -    curriculum_sampling=None,
    -    alternate_lr_scheduler_type=None,
    -    chat_template=None,
    -    kd_ce_alpha=None,
    -    kd_alpha=1.0,
    -    kd_temperature=1.0,
    -    kd_zscore_base_temp=None,
    -    kd_top_k_before_softmax=None,
    -    adam_beta3=None,
    -    adam_epsilon2=None,
    -    image_size=None,
    -    image_resize_algorithm=None,
    -)
    +
    core.training_args.AxolotlKTOConfig()

    KTO config for KTO training

    AxolotlORPOConfig

    -
    core.training_args.AxolotlORPOConfig(
    -    model_type=None,
    -    lr_quadratic_warmup=False,
    -    pretraining=False,
    -    sample_packing=False,
    -    sample_packing_sequentially=False,
    -    multipack_real_batches=False,
    -    eval_sample_packing=None,
    -    sample_packing_efficiency=1.0,
    -    sample_packing_bin_size=200,
    -    sample_packing_group_size=100000,
    -    max_seq_length=2048,
    -    dataset_num_proc=None,
    -    relora_steps=None,
    -    relora_warmup_steps=None,
    -    relora_anneal_steps=None,
    -    relora_prune_ratio=0.9,
    -    bench_split='eval',
    -    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',
    -    do_bench_eval=False,
    -    do_causal_lm_eval=False,
    -    max_bench_samples=None,
    -    bench_source_max_len=2048,
    -    dataloader_prefetch_factor=None,
    -    cosine_min_lr_ratio=None,
    -    cosine_constant_lr_ratio=None,
    -    loraplus_lr_ratio=None,
    -    loraplus_lr_embedding=1e-06,
    -    embedding_lr_scale=None,
    -    lr_groups=None,
    -    embedding_lr=None,
    -    qlora=False,
    -    orpo_alpha=None,
    -    lisa_n_layers=None,
    -    lisa_step_interval=None,
    -    lisa_layers_attribute=None,
    -    curriculum_sampling=None,
    -    alternate_lr_scheduler_type=None,
    -    chat_template=None,
    -    kd_ce_alpha=None,
    -    kd_alpha=1.0,
    -    kd_temperature=1.0,
    -    kd_zscore_base_temp=None,
    -    kd_top_k_before_softmax=None,
    -    adam_beta3=None,
    -    adam_epsilon2=None,
    -    image_size=None,
    -    image_resize_algorithm=None,
    -)
    +
    core.training_args.AxolotlORPOConfig()

    ORPO config for ORPO training

    AxolotlPRMConfig

    -
    core.training_args.AxolotlPRMConfig(
    -    model_type=None,
    -    lr_quadratic_warmup=False,
    -    pretraining=False,
    -    sample_packing=False,
    -    sample_packing_sequentially=False,
    -    multipack_real_batches=False,
    -    eval_sample_packing=None,
    -    sample_packing_efficiency=1.0,
    -    sample_packing_bin_size=200,
    -    sample_packing_group_size=100000,
    -    max_seq_length=2048,
    -    dataset_num_proc=None,
    -    relora_steps=None,
    -    relora_warmup_steps=None,
    -    relora_anneal_steps=None,
    -    relora_prune_ratio=0.9,
    -    bench_split='eval',
    -    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',
    -    do_bench_eval=False,
    -    do_causal_lm_eval=False,
    -    max_bench_samples=None,
    -    bench_source_max_len=2048,
    -    dataloader_prefetch_factor=None,
    -    cosine_min_lr_ratio=None,
    -    cosine_constant_lr_ratio=None,
    -    loraplus_lr_ratio=None,
    -    loraplus_lr_embedding=1e-06,
    -    embedding_lr_scale=None,
    -    lr_groups=None,
    -    embedding_lr=None,
    -    qlora=False,
    -    orpo_alpha=None,
    -    lisa_n_layers=None,
    -    lisa_step_interval=None,
    -    lisa_layers_attribute=None,
    -    curriculum_sampling=None,
    -    alternate_lr_scheduler_type=None,
    -    chat_template=None,
    -    kd_ce_alpha=None,
    -    kd_alpha=1.0,
    -    kd_temperature=1.0,
    -    kd_zscore_base_temp=None,
    -    kd_top_k_before_softmax=None,
    -    adam_beta3=None,
    -    adam_epsilon2=None,
    -    image_size=None,
    -    image_resize_algorithm=None,
    -)
    +
    core.training_args.AxolotlPRMConfig()

    PRM config for PRM training

    AxolotlRewardConfig

    -
    core.training_args.AxolotlRewardConfig(
    -    model_type=None,
    -    lr_quadratic_warmup=False,
    -    pretraining=False,
    -    sample_packing=False,
    -    sample_packing_sequentially=False,
    -    multipack_real_batches=False,
    -    eval_sample_packing=None,
    -    sample_packing_efficiency=1.0,
    -    sample_packing_bin_size=200,
    -    sample_packing_group_size=100000,
    -    max_seq_length=2048,
    -    dataset_num_proc=None,
    -    relora_steps=None,
    -    relora_warmup_steps=None,
    -    relora_anneal_steps=None,
    -    relora_prune_ratio=0.9,
    -    bench_split='eval',
    -    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',
    -    do_bench_eval=False,
    -    do_causal_lm_eval=False,
    -    max_bench_samples=None,
    -    bench_source_max_len=2048,
    -    dataloader_prefetch_factor=None,
    -    cosine_min_lr_ratio=None,
    -    cosine_constant_lr_ratio=None,
    -    loraplus_lr_ratio=None,
    -    loraplus_lr_embedding=1e-06,
    -    embedding_lr_scale=None,
    -    lr_groups=None,
    -    embedding_lr=None,
    -    qlora=False,
    -    orpo_alpha=None,
    -    lisa_n_layers=None,
    -    lisa_step_interval=None,
    -    lisa_layers_attribute=None,
    -    curriculum_sampling=None,
    -    alternate_lr_scheduler_type=None,
    -    chat_template=None,
    -    kd_ce_alpha=None,
    -    kd_alpha=1.0,
    -    kd_temperature=1.0,
    -    kd_zscore_base_temp=None,
    -    kd_top_k_before_softmax=None,
    -    adam_beta3=None,
    -    adam_epsilon2=None,
    -    image_size=None,
    -    image_resize_algorithm=None,
    -)
    +
    core.training_args.AxolotlRewardConfig()

    Reward config for Reward training

    AxolotlTrainingArguments

    -
    core.training_args.AxolotlTrainingArguments(
    -    model_type=None,
    -    lr_quadratic_warmup=False,
    -    pretraining=False,
    -    sample_packing=False,
    -    sample_packing_sequentially=False,
    -    multipack_real_batches=False,
    -    eval_sample_packing=None,
    -    sample_packing_efficiency=1.0,
    -    sample_packing_bin_size=200,
    -    sample_packing_group_size=100000,
    -    max_seq_length=2048,
    -    dataset_num_proc=None,
    -    relora_steps=None,
    -    relora_warmup_steps=None,
    -    relora_anneal_steps=None,
    -    relora_prune_ratio=0.9,
    -    bench_split='eval',
    -    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',
    -    do_bench_eval=False,
    -    do_causal_lm_eval=False,
    -    max_bench_samples=None,
    -    bench_source_max_len=2048,
    -    dataloader_prefetch_factor=None,
    -    cosine_min_lr_ratio=None,
    -    cosine_constant_lr_ratio=None,
    -    loraplus_lr_ratio=None,
    -    loraplus_lr_embedding=1e-06,
    -    embedding_lr_scale=None,
    -    lr_groups=None,
    -    embedding_lr=None,
    -    qlora=False,
    -    orpo_alpha=None,
    -    lisa_n_layers=None,
    -    lisa_step_interval=None,
    -    lisa_layers_attribute=None,
    -    curriculum_sampling=None,
    -    alternate_lr_scheduler_type=None,
    -    chat_template=None,
    -    kd_ce_alpha=None,
    -    kd_alpha=1.0,
    -    kd_temperature=1.0,
    -    kd_zscore_base_temp=None,
    -    kd_top_k_before_softmax=None,
    -    adam_beta3=None,
    -    adam_epsilon2=None,
    -    image_size=None,
    -    image_resize_algorithm=None,
    -)
    +
    core.training_args.AxolotlTrainingArguments()

    Training arguments for Causal trainer

    This code is duplicated due to HF TrainingArguments not setting output_dir with a default value so it can’t be used as a mixin.

    -
    -
    -

    AxolotlTrainingMixins

    -
    core.training_args.AxolotlTrainingMixins(
    -    model_type=None,
    -    lr_quadratic_warmup=False,
    -    pretraining=False,
    -    sample_packing=False,
    -    sample_packing_sequentially=False,
    -    multipack_real_batches=False,
    -    eval_sample_packing=None,
    -    sample_packing_efficiency=1.0,
    -    sample_packing_bin_size=200,
    -    sample_packing_group_size=100000,
    -    max_seq_length=2048,
    -    dataset_num_proc=None,
    -    relora_steps=None,
    -    relora_warmup_steps=None,
    -    relora_anneal_steps=None,
    -    relora_prune_ratio=0.9,
    -    bench_split='eval',
    -    bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',
    -    do_bench_eval=False,
    -    do_causal_lm_eval=False,
    -    max_bench_samples=None,
    -    bench_source_max_len=2048,
    -    dataloader_prefetch_factor=None,
    -    cosine_min_lr_ratio=None,
    -    cosine_constant_lr_ratio=None,
    -    loraplus_lr_ratio=None,
    -    loraplus_lr_embedding=1e-06,
    -    embedding_lr_scale=None,
    -    lr_groups=None,
    -    embedding_lr=None,
    -    qlora=False,
    -    orpo_alpha=None,
    -    lisa_n_layers=None,
    -    lisa_step_interval=None,
    -    lisa_layers_attribute=None,
    -    curriculum_sampling=None,
    -    alternate_lr_scheduler_type=None,
    -    chat_template=None,
    -    kd_ce_alpha=None,
    -    kd_alpha=1.0,
    -    kd_temperature=1.0,
    -    kd_zscore_base_temp=None,
    -    kd_top_k_before_softmax=None,
    -    adam_beta3=None,
    -    adam_epsilon2=None,
    -    image_size=None,
    -    image_resize_algorithm=None,
    -)
    -

    Mixin class for the Axolotl training args.

    diff --git a/docs/api/datasets.html b/docs/api/datasets.html index 6047df4c5..e5ac8fc2f 100644 --- a/docs/api/datasets.html +++ b/docs/api/datasets.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/evaluate.html b/docs/api/evaluate.html index 670f8fcbb..d18334785 100644 --- a/docs/api/evaluate.html +++ b/docs/api/evaluate.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/index.html b/docs/api/index.html index dd9712d9c..0e8eb2f6e 100644 --- a/docs/api/index.html +++ b/docs/api/index.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/integrations.base.html b/docs/api/integrations.base.html index d176d3583..af130b9a5 100644 --- a/docs/api/integrations.base.html +++ b/docs/api/integrations.base.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + @@ -583,50 +583,62 @@ training.

    Creates and returns an optimizer for training. +get_collator_cls_and_kwargs +Returns a custom class for the collator. + + get_input_args Returns a pydantic model for the plugin’s input arguments. - + get_trainer_cls Returns a custom class for the trainer. + +get_training_args +Returns custom training arguments to set on TrainingArgs. + +get_training_args_mixin +Returns a dataclass model for the plugin’s training arguments. + + load_datasets Loads and preprocesses the dataset for training. - + post_lora_load Performs actions after LoRA weights are loaded. - + post_model_build Performs actions after the model is built/loaded, but before any adapters are applied. - + post_model_load Performs actions after the model is loaded. - + post_train Performs actions after training is complete. - + post_train_unload Performs actions after training is complete and the model is unloaded. - + post_trainer_create Performs actions after the trainer is created. - + pre_lora_load Performs actions before LoRA weights are loaded. - + pre_model_load Performs actions before the model is loaded. - + register Registers the plugin with the given configuration. @@ -883,17 +895,74 @@ callbacks that require access to the model or trainer.

    +
    +
    get_collator_cls_and_kwargs
    +
    integrations.base.BasePlugin.get_collator_cls_and_kwargs(cfg, is_eval=False)
    +

    Returns a custom class for the collator.

    +
    +
    Parameters
    + ++++++ + + + + + + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    cfgDictDefaultThe global axolotl configuration.required
    is_evalboolWhether this is an eval split.False
    +
    +
    +
    Returns
    + + + + + + + + + + + + + + + +
    NameTypeDescription
    classThe class for the collator.
    +
    +
    get_input_args
    -
    integrations.base.BasePlugin.get_input_args()
    +
    integrations.base.BasePlugin.get_input_args()

    Returns a pydantic model for the plugin’s input arguments.

    get_trainer_cls
    -
    integrations.base.BasePlugin.get_trainer_cls(cfg)
    +
    integrations.base.BasePlugin.get_trainer_cls(cfg)

    Returns a custom class for the trainer.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -919,8 +988,8 @@ callbacks that require access to the model or trainer.

    -
    -
    Returns
    +
    +
    Returns
    @@ -944,12 +1013,68 @@ callbacks that require access to the model or trainer.

    +
    +
    get_training_args
    +
    integrations.base.BasePlugin.get_training_args(cfg)
    +

    Returns custom training arguments to set on TrainingArgs.

    +
    +
    Parameters
    + ++++++ + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    cfgDictDefaultThe global axolotl configuration.required
    +
    +
    +
    Returns
    + + + + + + + + + + + + + + + +
    NameTypeDescription
    objectdict containing the training arguments.
    +
    +
    +
    +
    get_training_args_mixin
    +
    integrations.base.BasePlugin.get_training_args_mixin()
    +

    Returns a dataclass model for the plugin’s training arguments.

    +
    load_datasets
    -
    integrations.base.BasePlugin.load_datasets(cfg, preprocess=False)
    +
    integrations.base.BasePlugin.load_datasets(cfg, preprocess=False)

    Loads and preprocesses the dataset for training.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -981,8 +1106,8 @@ callbacks that require access to the model or trainer.

    -
    -
    Returns
    +
    +
    Returns
    @@ -1008,78 +1133,8 @@ callbacks that require access to the model or trainer.

    post_lora_load
    -
    integrations.base.BasePlugin.post_lora_load(cfg, model)
    +
    integrations.base.BasePlugin.post_lora_load(cfg, model)

    Performs actions after LoRA weights are loaded.

    -
    -
    Parameters
    -
    ------ - - - - - - - - - - - - - - - - - - - - - - -
    NameTypeDescriptionDefault
    cfgDictDefaultThe configuration for the plugin.required
    modelPreTrainedModel | PeftModelThe loaded model.required
    -
    -
    -
    -
    post_model_build
    -
    integrations.base.BasePlugin.post_model_build(cfg, model)
    -

    Performs actions after the model is built/loaded, but before any adapters are applied.

    -
    -
    Parameters
    - ------ - - - - - - - - - - - - - - - - -
    NameTypeDescriptionDefault
    cfgDictDefaultThe configuration for the plugin.required
    -
    -
    -
    -
    post_model_load
    -
    integrations.base.BasePlugin.post_model_load(cfg, model)
    -

    Performs actions after the model is loaded.

    Parameters
    @@ -1114,14 +1169,84 @@ callbacks that require access to the model or trainer.

    -
    -
    post_train
    -
    integrations.base.BasePlugin.post_train(cfg, model)
    -

    Performs actions after training is complete.

    +
    +
    post_model_build
    +
    integrations.base.BasePlugin.post_model_build(cfg, model)
    +

    Performs actions after the model is built/loaded, but before any adapters are applied.

    Parameters
    +++++ + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    cfgDictDefaultThe configuration for the plugin.required
    +
    +
    +
    +
    post_model_load
    +
    integrations.base.BasePlugin.post_model_load(cfg, model)
    +

    Performs actions after the model is loaded.

    +
    +
    Parameters
    + ++++++ + + + + + + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    cfgDictDefaultThe configuration for the plugin.required
    modelPreTrainedModel | PeftModelThe loaded model.required
    +
    +
    +
    +
    post_train
    +
    integrations.base.BasePlugin.post_train(cfg, model)
    +

    Performs actions after training is complete.

    +
    +
    Parameters
    + +@@ -1154,10 +1279,10 @@ callbacks that require access to the model or trainer.

    post_train_unload
    -
    integrations.base.BasePlugin.post_train_unload(cfg)
    +
    integrations.base.BasePlugin.post_train_unload(cfg)

    Performs actions after training is complete and the model is unloaded.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -1186,10 +1311,10 @@ callbacks that require access to the model or trainer.

    post_trainer_create
    -
    integrations.base.BasePlugin.post_trainer_create(cfg, trainer)
    +
    integrations.base.BasePlugin.post_trainer_create(cfg, trainer)

    Performs actions after the trainer is created.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -1224,10 +1349,10 @@ callbacks that require access to the model or trainer.

    pre_lora_load
    -
    integrations.base.BasePlugin.pre_lora_load(cfg, model)
    +
    integrations.base.BasePlugin.pre_lora_load(cfg, model)

    Performs actions before LoRA weights are loaded.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -1262,10 +1387,10 @@ callbacks that require access to the model or trainer.

    pre_model_load
    -
    integrations.base.BasePlugin.pre_model_load(cfg)
    +
    integrations.base.BasePlugin.pre_model_load(cfg)

    Performs actions before the model is loaded.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -1294,10 +1419,10 @@ callbacks that require access to the model or trainer.

    register
    -
    integrations.base.BasePlugin.register(cfg)
    +
    integrations.base.BasePlugin.register(cfg)

    Registers the plugin with the given configuration.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -1328,7 +1453,7 @@ callbacks that require access to the model or trainer.

    PluginManager

    -
    integrations.base.PluginManager()
    +
    integrations.base.PluginManager()

    The PluginManager class is responsible for loading and managing plugins. It should be a singleton so it can be accessed from anywhere in the codebase.

    @@ -1384,54 +1509,66 @@ should be a singleton so it can be accessed from anywhere in the codebase.

    + + + + - + - + + + + + + + + + - + - + - + - + - + - + - + - + - + @@ -1439,10 +1576,10 @@ should be a singleton so it can be accessed from anywhere in the codebase.

    Calls the create_optimizer method of all registered plugins and returns
    get_collator_cls_and_kwargsCalls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.
    get_input_args Returns a list of Pydantic classes for all registered plugins’ input arguments.’
    get_instance Returns the singleton instance of PluginManager. If the instance doesn’t
    get_trainer_cls Calls the get_trainer_cls method of all registered plugins and returns the
    get_training_argsCalls the get_training_args method of all registered plugins and returns the combined training arguments.
    get_training_args_mixinReturns a list of dataclasses for all registered plugins’ training args mixins’
    load_datasets Calls the load_datasets method of each registered plugin.
    post_lora_load Calls the post_lora_load method of all registered plugins.
    post_model_build Calls the post_model_build method of all registered plugins after the
    post_model_load Calls the post_model_load method of all registered plugins after the model
    post_train Calls the post_train method of all registered plugins.
    post_train_unload Calls the post_train_unload method of all registered plugins.
    post_trainer_create Calls the post_trainer_create method of all registered plugins.
    pre_lora_load Calls the pre_lora_load method of all registered plugins.
    pre_model_load Calls the pre_model_load method of all registered plugins.
    register Registers a new plugin by its name.
    add_callbacks_post_trainer
    -
    integrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)
    +
    integrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)

    Calls the add_callbacks_post_trainer method of all registered plugins.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -1474,8 +1611,8 @@ should be a singleton so it can be accessed from anywhere in the codebase.

    -
    -
    Returns
    +
    +
    Returns
    @@ -1501,10 +1638,10 @@ should be a singleton so it can be accessed from anywhere in the codebase.

    add_callbacks_pre_trainer
    -
    integrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)
    +
    integrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)

    Calls the add_callbacks_pre_trainer method of all registered plugins.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -1536,8 +1673,8 @@ should be a singleton so it can be accessed from anywhere in the codebase.

    -
    -
    Returns
    +
    +
    Returns
    @@ -1563,15 +1700,15 @@ should be a singleton so it can be accessed from anywhere in the codebase.

    create_lr_scheduler
    -
    integrations.base.PluginManager.create_lr_scheduler(
    -    trainer,
    -    optimizer,
    -    num_training_steps,
    -)
    +
    integrations.base.PluginManager.create_lr_scheduler(
    +    trainer,
    +    optimizer,
    +    num_training_steps,
    +)

    Calls the create_lr_scheduler method of all registered plugins and returns the first non-None scheduler.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -1603,8 +1740,8 @@ the first non-None scheduler.

    -
    -
    Returns
    +
    +
    Returns
    @@ -1630,11 +1767,11 @@ the first non-None scheduler.

    create_optimizer
    -
    integrations.base.PluginManager.create_optimizer(trainer)
    +
    integrations.base.PluginManager.create_optimizer(trainer)

    Calls the create_optimizer method of all registered plugins and returns the first non-None optimizer.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -1654,8 +1791,8 @@ the first non-None optimizer.

    -
    -
    Returns
    +
    +
    Returns
    @@ -1679,12 +1816,22 @@ the first non-None optimizer.

    +
    +
    get_collator_cls_and_kwargs
    +
    integrations.base.PluginManager.get_collator_cls_and_kwargs(cfg, is_eval=False)
    +

    Calls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.

    +

    Parameters: +cfg (dict): The configuration for the plugins. +is_eval (bool): Whether this is an eval split.

    +

    Returns: +object: The collator class, or None if none was found.

    +
    get_input_args
    -
    integrations.base.PluginManager.get_input_args()
    +
    integrations.base.PluginManager.get_input_args()

    Returns a list of Pydantic classes for all registered plugins’ input arguments.’

    -
    -
    Returns
    +
    +
    Returns
    @@ -1710,17 +1857,17 @@ the first non-None optimizer.

    get_instance
    -
    integrations.base.PluginManager.get_instance()
    +
    integrations.base.PluginManager.get_instance()

    Returns the singleton instance of PluginManager. If the instance doesn’t exist, it creates a new one.

    get_trainer_cls
    -
    integrations.base.PluginManager.get_trainer_cls(cfg)
    +
    integrations.base.PluginManager.get_trainer_cls(cfg)

    Calls the get_trainer_cls method of all registered plugins and returns the first non-None trainer class.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -1746,8 +1893,8 @@ first non-None trainer class.

    -
    -
    Returns
    +
    +
    Returns
    @@ -1771,12 +1918,28 @@ first non-None trainer class.

    +
    +
    get_training_args
    +
    integrations.base.PluginManager.get_training_args(cfg)
    +

    Calls the get_training_args method of all registered plugins and returns the combined training arguments.

    +

    Parameters: +cfg (dict): The configuration for the plugins.

    +

    Returns: +object: The training arguments

    +
    +
    +
    get_training_args_mixin
    +
    integrations.base.PluginManager.get_training_args_mixin()
    +

    Returns a list of dataclasses for all registered plugins’ training args mixins’

    +

    Returns: +list[str]: A list of dataclsses

    +
    load_datasets
    -
    integrations.base.PluginManager.load_datasets(cfg, preprocess=False)
    +
    integrations.base.PluginManager.load_datasets(cfg, preprocess=False)

    Calls the load_datasets method of each registered plugin.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -1808,8 +1971,8 @@ first non-None trainer class.

    -
    -
    Returns
    +
    +
    Returns
    @@ -1835,86 +1998,8 @@ first non-None trainer class.

    post_lora_load
    -
    integrations.base.PluginManager.post_lora_load(cfg, model)
    +
    integrations.base.PluginManager.post_lora_load(cfg, model)

    Calls the post_lora_load method of all registered plugins.

    -
    -
    Parameters
    -
    ------ - - - - - - - - - - - - - - - - - - - - - - -
    NameTypeDescriptionDefault
    cfgDictDefaultThe configuration for the plugins.required
    modelPreTrainedModel | PeftModelThe loaded model.required
    -
    -
    -
    -
    post_model_build
    -
    integrations.base.PluginManager.post_model_build(cfg, model)
    -

    Calls the post_model_build method of all registered plugins after the -model has been built / loaded, but before any adapters have been applied.

    -
    -
    Parameters
    - ------ - - - - - - - - - - - - - - - - - - - - - - -
    NameTypeDescriptionDefault
    cfgDictDefaultThe configuration for the plugins.required
    modelPreTrainedModelThe loaded model.required
    -
    -
    -
    -
    post_model_load
    -
    integrations.base.PluginManager.post_model_load(cfg, model)
    -

    Calls the post_model_load method of all registered plugins after the model -has been loaded inclusive of any adapters.

    Parameters
    @@ -1949,14 +2034,92 @@ has been loaded inclusive of any adapters.

    -
    -
    post_train
    -
    integrations.base.PluginManager.post_train(cfg, model)
    -

    Calls the post_train method of all registered plugins.

    +
    +
    post_model_build
    +
    integrations.base.PluginManager.post_model_build(cfg, model)
    +

    Calls the post_model_build method of all registered plugins after the +model has been built / loaded, but before any adapters have been applied.

    Parameters
    +++++ + + + + + + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    cfgDictDefaultThe configuration for the plugins.required
    modelPreTrainedModelThe loaded model.required
    +
    +
    +
    +
    post_model_load
    +
    integrations.base.PluginManager.post_model_load(cfg, model)
    +

    Calls the post_model_load method of all registered plugins after the model +has been loaded inclusive of any adapters.

    +
    +
    Parameters
    + ++++++ + + + + + + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    cfgDictDefaultThe configuration for the plugins.required
    modelPreTrainedModel | PeftModelThe loaded model.required
    +
    +
    +
    +
    post_train
    +
    integrations.base.PluginManager.post_train(cfg, model)
    +

    Calls the post_train method of all registered plugins.

    +
    +
    Parameters
    + +@@ -1989,10 +2152,10 @@ has been loaded inclusive of any adapters.

    post_train_unload
    -
    integrations.base.PluginManager.post_train_unload(cfg)
    +
    integrations.base.PluginManager.post_train_unload(cfg)

    Calls the post_train_unload method of all registered plugins.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -2021,10 +2184,10 @@ has been loaded inclusive of any adapters.

    post_trainer_create
    -
    integrations.base.PluginManager.post_trainer_create(cfg, trainer)
    +
    integrations.base.PluginManager.post_trainer_create(cfg, trainer)

    Calls the post_trainer_create method of all registered plugins.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -2059,10 +2222,10 @@ has been loaded inclusive of any adapters.

    pre_lora_load
    -
    integrations.base.PluginManager.pre_lora_load(cfg, model)
    +
    integrations.base.PluginManager.pre_lora_load(cfg, model)

    Calls the pre_lora_load method of all registered plugins.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -2097,10 +2260,10 @@ has been loaded inclusive of any adapters.

    pre_model_load
    -
    integrations.base.PluginManager.pre_model_load(cfg)
    +
    integrations.base.PluginManager.pre_model_load(cfg)

    Calls the pre_model_load method of all registered plugins.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -2129,10 +2292,10 @@ has been loaded inclusive of any adapters.

    register
    -
    integrations.base.PluginManager.register(plugin_name)
    +
    integrations.base.PluginManager.register(plugin_name)

    Registers a new plugin by its name.

    -
    -
    Parameters
    +
    +
    Parameters
    @@ -2199,13 +2362,13 @@ has been loaded inclusive of any adapters.

    load_plugin

    -
    integrations.base.load_plugin(plugin_name)
    +
    integrations.base.load_plugin(plugin_name)

    Loads a plugin based on the given plugin name.

    The plugin name should be in the format “module_name.class_name”. This function splits the plugin name into module and class, imports the module, retrieves the class from the module, and creates an instance of the class.

    -
    -

    Parameters

    +
    +

    Parameters

    @@ -2231,8 +2394,8 @@ class from the module, and creates an instance of the class.

    -
    -

    Returns

    +
    +

    Returns

    diff --git a/docs/api/integrations.cut_cross_entropy.args.html b/docs/api/integrations.cut_cross_entropy.args.html index 591ecb279..0d9c196b1 100644 --- a/docs/api/integrations.cut_cross_entropy.args.html +++ b/docs/api/integrations.cut_cross_entropy.args.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/integrations.grokfast.optimizer.html b/docs/api/integrations.grokfast.optimizer.html index 58bdec4c3..0ce5db289 100644 --- a/docs/api/integrations.grokfast.optimizer.html +++ b/docs/api/integrations.grokfast.optimizer.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/integrations.kd.trainer.html b/docs/api/integrations.kd.trainer.html index 3c07400a4..a67cc634f 100644 --- a/docs/api/integrations.kd.trainer.html +++ b/docs/api/integrations.kd.trainer.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + @@ -505,13 +505,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});

    AxolotlKDTrainer

    -
    integrations.kd.trainer.AxolotlKDTrainer(
    -    *_args,
    -    bench_data_collator=None,
    -    eval_data_collator=None,
    -    dataset_tags=None,
    -    **kwargs,
    -)
    +
    integrations.kd.trainer.AxolotlKDTrainer(*args, **kwargs)

    Custom trainer subclass for Knowledge Distillation (KD)

    Methods

    diff --git a/docs/api/integrations.liger.args.html b/docs/api/integrations.liger.args.html index eb9da55cb..c11026969 100644 --- a/docs/api/integrations.liger.args.html +++ b/docs/api/integrations.liger.args.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/integrations.lm_eval.args.html b/docs/api/integrations.lm_eval.args.html index 6d83f251c..69654f754 100644 --- a/docs/api/integrations.lm_eval.args.html +++ b/docs/api/integrations.lm_eval.args.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/integrations.spectrum.args.html b/docs/api/integrations.spectrum.args.html index bdd1cf8f7..9654d35f9 100644 --- a/docs/api/integrations.spectrum.args.html +++ b/docs/api/integrations.spectrum.args.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/kernels.geglu.html b/docs/api/kernels.geglu.html index e4600f65b..8facfc925 100644 --- a/docs/api/kernels.geglu.html +++ b/docs/api/kernels.geglu.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/kernels.lora.html b/docs/api/kernels.lora.html index b56c81867..d95f833dd 100644 --- a/docs/api/kernels.lora.html +++ b/docs/api/kernels.lora.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/kernels.quantize.html b/docs/api/kernels.quantize.html index 1d6d5514b..7c0f0863f 100644 --- a/docs/api/kernels.quantize.html +++ b/docs/api/kernels.quantize.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/kernels.swiglu.html b/docs/api/kernels.swiglu.html index 4e328e85a..4b8ea80b4 100644 --- a/docs/api/kernels.swiglu.html +++ b/docs/api/kernels.swiglu.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/kernels.utils.html b/docs/api/kernels.utils.html index fa72ae146..efb913292 100644 --- a/docs/api/kernels.utils.html +++ b/docs/api/kernels.utils.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/loaders.adapter.html b/docs/api/loaders.adapter.html index 61ae67633..4b26428c1 100644 --- a/docs/api/loaders.adapter.html +++ b/docs/api/loaders.adapter.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/loaders.constants.html b/docs/api/loaders.constants.html index 19361878a..f16ff75d6 100644 --- a/docs/api/loaders.constants.html +++ b/docs/api/loaders.constants.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/loaders.model.html b/docs/api/loaders.model.html index 34ea3818f..df1f57516 100644 --- a/docs/api/loaders.model.html +++ b/docs/api/loaders.model.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/loaders.patch_manager.html b/docs/api/loaders.patch_manager.html index 1cf87e755..686be477f 100644 --- a/docs/api/loaders.patch_manager.html +++ b/docs/api/loaders.patch_manager.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/loaders.processor.html b/docs/api/loaders.processor.html index 08f696443..4f0a25431 100644 --- a/docs/api/loaders.processor.html +++ b/docs/api/loaders.processor.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/loaders.tokenizer.html b/docs/api/loaders.tokenizer.html index 91cd68fee..04e93ce7e 100644 --- a/docs/api/loaders.tokenizer.html +++ b/docs/api/loaders.tokenizer.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/logging_config.html b/docs/api/logging_config.html index 14f28e365..9b8a27560 100644 --- a/docs/api/logging_config.html +++ b/docs/api/logging_config.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/models.mamba.modeling_mamba.html b/docs/api/models.mamba.modeling_mamba.html index 5b3124262..d8aff1760 100644 --- a/docs/api/models.mamba.modeling_mamba.html +++ b/docs/api/models.mamba.modeling_mamba.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/monkeypatch.btlm_attn_hijack_flash.html b/docs/api/monkeypatch.btlm_attn_hijack_flash.html index a3c442d79..f72113b26 100644 --- a/docs/api/monkeypatch.btlm_attn_hijack_flash.html +++ b/docs/api/monkeypatch.btlm_attn_hijack_flash.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/monkeypatch.data.batch_dataset_fetcher.html b/docs/api/monkeypatch.data.batch_dataset_fetcher.html index 41750b39f..36b227261 100644 --- a/docs/api/monkeypatch.data.batch_dataset_fetcher.html +++ b/docs/api/monkeypatch.data.batch_dataset_fetcher.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html b/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html index 79c7f5fad..6e15306d4 100644 --- a/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html +++ b/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html b/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html index f74d8ffc0..8fe9d2c1c 100644 --- a/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html +++ b/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/monkeypatch.llama_attn_hijack_flash.html b/docs/api/monkeypatch.llama_attn_hijack_flash.html index f15371eeb..9bf46e22c 100644 --- a/docs/api/monkeypatch.llama_attn_hijack_flash.html +++ b/docs/api/monkeypatch.llama_attn_hijack_flash.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/monkeypatch.llama_attn_hijack_xformers.html b/docs/api/monkeypatch.llama_attn_hijack_xformers.html index da2231c03..c8bc4c41b 100644 --- a/docs/api/monkeypatch.llama_attn_hijack_xformers.html +++ b/docs/api/monkeypatch.llama_attn_hijack_xformers.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/monkeypatch.llama_expand_mask.html b/docs/api/monkeypatch.llama_expand_mask.html index d43769c27..f898103f0 100644 --- a/docs/api/monkeypatch.llama_expand_mask.html +++ b/docs/api/monkeypatch.llama_expand_mask.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/monkeypatch.llama_patch_multipack.html b/docs/api/monkeypatch.llama_patch_multipack.html index 95e11f64b..9dd00677b 100644 --- a/docs/api/monkeypatch.llama_patch_multipack.html +++ b/docs/api/monkeypatch.llama_patch_multipack.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/monkeypatch.lora_kernels.html b/docs/api/monkeypatch.lora_kernels.html index a88400489..cc66cd680 100644 --- a/docs/api/monkeypatch.lora_kernels.html +++ b/docs/api/monkeypatch.lora_kernels.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/monkeypatch.mistral_attn_hijack_flash.html b/docs/api/monkeypatch.mistral_attn_hijack_flash.html index 69af0d089..e3044bbe0 100644 --- a/docs/api/monkeypatch.mistral_attn_hijack_flash.html +++ b/docs/api/monkeypatch.mistral_attn_hijack_flash.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/monkeypatch.mixtral.html b/docs/api/monkeypatch.mixtral.html index c18d339ae..60a205058 100644 --- a/docs/api/monkeypatch.mixtral.html +++ b/docs/api/monkeypatch.mixtral.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/monkeypatch.multipack.html b/docs/api/monkeypatch.multipack.html index ea34d7f4a..494871088 100644 --- a/docs/api/monkeypatch.multipack.html +++ b/docs/api/monkeypatch.multipack.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/monkeypatch.relora.html b/docs/api/monkeypatch.relora.html index 7851da067..5798a22f8 100644 --- a/docs/api/monkeypatch.relora.html +++ b/docs/api/monkeypatch.relora.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/monkeypatch.stablelm_attn_hijack_flash.html b/docs/api/monkeypatch.stablelm_attn_hijack_flash.html index 531cb5170..686ebab48 100644 --- a/docs/api/monkeypatch.stablelm_attn_hijack_flash.html +++ b/docs/api/monkeypatch.stablelm_attn_hijack_flash.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/monkeypatch.trainer_fsdp_optim.html b/docs/api/monkeypatch.trainer_fsdp_optim.html index 88b610955..0ed007eb0 100644 --- a/docs/api/monkeypatch.trainer_fsdp_optim.html +++ b/docs/api/monkeypatch.trainer_fsdp_optim.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/monkeypatch.transformers_fa_utils.html b/docs/api/monkeypatch.transformers_fa_utils.html index 0595de574..47076d09f 100644 --- a/docs/api/monkeypatch.transformers_fa_utils.html +++ b/docs/api/monkeypatch.transformers_fa_utils.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/monkeypatch.unsloth_.html b/docs/api/monkeypatch.unsloth_.html index 5ffdcde1a..ff130a0c3 100644 --- a/docs/api/monkeypatch.unsloth_.html +++ b/docs/api/monkeypatch.unsloth_.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/monkeypatch.utils.html b/docs/api/monkeypatch.utils.html index 33f0a2db6..5be182d1b 100644 --- a/docs/api/monkeypatch.utils.html +++ b/docs/api/monkeypatch.utils.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.alpaca_chat.html b/docs/api/prompt_strategies.alpaca_chat.html index a43047296..6ba4f93b9 100644 --- a/docs/api/prompt_strategies.alpaca_chat.html +++ b/docs/api/prompt_strategies.alpaca_chat.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.alpaca_instruct.html b/docs/api/prompt_strategies.alpaca_instruct.html index 1f002b430..cfe84be70 100644 --- a/docs/api/prompt_strategies.alpaca_instruct.html +++ b/docs/api/prompt_strategies.alpaca_instruct.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/prompt_strategies.alpaca_w_system.html b/docs/api/prompt_strategies.alpaca_w_system.html index 7b9a5dcb6..8021611ac 100644 --- a/docs/api/prompt_strategies.alpaca_w_system.html +++ b/docs/api/prompt_strategies.alpaca_w_system.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.base.html b/docs/api/prompt_strategies.base.html index 6e7f372ee..66ac332cd 100644 --- a/docs/api/prompt_strategies.base.html +++ b/docs/api/prompt_strategies.base.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/prompt_strategies.bradley_terry.llama3.html b/docs/api/prompt_strategies.bradley_terry.llama3.html index ba3ac919c..a0a2fdace 100644 --- a/docs/api/prompt_strategies.bradley_terry.llama3.html +++ b/docs/api/prompt_strategies.bradley_terry.llama3.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.chat_template.html b/docs/api/prompt_strategies.chat_template.html index 95f0bff50..f45b43fc8 100644 --- a/docs/api/prompt_strategies.chat_template.html +++ b/docs/api/prompt_strategies.chat_template.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.completion.html b/docs/api/prompt_strategies.completion.html index 26d067702..7a6da24d1 100644 --- a/docs/api/prompt_strategies.completion.html +++ b/docs/api/prompt_strategies.completion.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.dpo.chat_template.html b/docs/api/prompt_strategies.dpo.chat_template.html index 1c3d29d40..57db53620 100644 --- a/docs/api/prompt_strategies.dpo.chat_template.html +++ b/docs/api/prompt_strategies.dpo.chat_template.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/prompt_strategies.dpo.chatml.html b/docs/api/prompt_strategies.dpo.chatml.html index c1248f8d2..b80ade62b 100644 --- a/docs/api/prompt_strategies.dpo.chatml.html +++ b/docs/api/prompt_strategies.dpo.chatml.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.dpo.llama3.html b/docs/api/prompt_strategies.dpo.llama3.html index 23b79a69e..2cb33b69e 100644 --- a/docs/api/prompt_strategies.dpo.llama3.html +++ b/docs/api/prompt_strategies.dpo.llama3.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.dpo.passthrough.html b/docs/api/prompt_strategies.dpo.passthrough.html index 5c721367a..24687a9c9 100644 --- a/docs/api/prompt_strategies.dpo.passthrough.html +++ b/docs/api/prompt_strategies.dpo.passthrough.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/prompt_strategies.dpo.user_defined.html b/docs/api/prompt_strategies.dpo.user_defined.html index e14dc761e..10cfd40a2 100644 --- a/docs/api/prompt_strategies.dpo.user_defined.html +++ b/docs/api/prompt_strategies.dpo.user_defined.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/prompt_strategies.dpo.zephyr.html b/docs/api/prompt_strategies.dpo.zephyr.html index 8a96d88d4..333fdae27 100644 --- a/docs/api/prompt_strategies.dpo.zephyr.html +++ b/docs/api/prompt_strategies.dpo.zephyr.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/prompt_strategies.input_output.html b/docs/api/prompt_strategies.input_output.html index 890fb4c2f..70571b848 100644 --- a/docs/api/prompt_strategies.input_output.html +++ b/docs/api/prompt_strategies.input_output.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.kto.chatml.html b/docs/api/prompt_strategies.kto.chatml.html index 6176331ab..d3cc38423 100644 --- a/docs/api/prompt_strategies.kto.chatml.html +++ b/docs/api/prompt_strategies.kto.chatml.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.kto.llama3.html b/docs/api/prompt_strategies.kto.llama3.html index 4987a4561..a334aa3a7 100644 --- a/docs/api/prompt_strategies.kto.llama3.html +++ b/docs/api/prompt_strategies.kto.llama3.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.kto.user_defined.html b/docs/api/prompt_strategies.kto.user_defined.html index 975aa129d..051605cd7 100644 --- a/docs/api/prompt_strategies.kto.user_defined.html +++ b/docs/api/prompt_strategies.kto.user_defined.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/prompt_strategies.llama2_chat.html b/docs/api/prompt_strategies.llama2_chat.html index 3a6019f7b..8a6a44a8a 100644 --- a/docs/api/prompt_strategies.llama2_chat.html +++ b/docs/api/prompt_strategies.llama2_chat.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.messages.chat.html b/docs/api/prompt_strategies.messages.chat.html index 45cc71ce2..852463d4c 100644 --- a/docs/api/prompt_strategies.messages.chat.html +++ b/docs/api/prompt_strategies.messages.chat.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.metharme.html b/docs/api/prompt_strategies.metharme.html index c915475d4..2c7190d25 100644 --- a/docs/api/prompt_strategies.metharme.html +++ b/docs/api/prompt_strategies.metharme.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.orcamini.html b/docs/api/prompt_strategies.orcamini.html index 3ad7cd470..15b7979ca 100644 --- a/docs/api/prompt_strategies.orcamini.html +++ b/docs/api/prompt_strategies.orcamini.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.orpo.chat_template.html b/docs/api/prompt_strategies.orpo.chat_template.html index 855c72f5c..e48e2197c 100644 --- a/docs/api/prompt_strategies.orpo.chat_template.html +++ b/docs/api/prompt_strategies.orpo.chat_template.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.pygmalion.html b/docs/api/prompt_strategies.pygmalion.html index ae26c5850..3f27dc0ea 100644 --- a/docs/api/prompt_strategies.pygmalion.html +++ b/docs/api/prompt_strategies.pygmalion.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.stepwise_supervised.html b/docs/api/prompt_strategies.stepwise_supervised.html index 6dfc697c9..d612bef34 100644 --- a/docs/api/prompt_strategies.stepwise_supervised.html +++ b/docs/api/prompt_strategies.stepwise_supervised.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_strategies.user_defined.html b/docs/api/prompt_strategies.user_defined.html index 4c6ac9243..1184e80d1 100644 --- a/docs/api/prompt_strategies.user_defined.html +++ b/docs/api/prompt_strategies.user_defined.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/prompt_tokenizers.html b/docs/api/prompt_tokenizers.html index 7afafb58d..1556e0974 100644 --- a/docs/api/prompt_tokenizers.html +++ b/docs/api/prompt_tokenizers.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/train.html b/docs/api/train.html index 4ff1d6204..f26aba0e2 100644 --- a/docs/api/train.html +++ b/docs/api/train.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + @@ -958,8 +958,8 @@ trainer setup.

    --++ @@ -971,7 +971,7 @@ trainer setup.

    - + diff --git a/docs/api/utils.bench.html b/docs/api/utils.bench.html index d6c3c9374..4ca5c00ea 100644 --- a/docs/api/utils.bench.html +++ b/docs/api/utils.bench.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.callbacks.comet_.html b/docs/api/utils.callbacks.comet_.html index c15a1bb43..89a30e784 100644 --- a/docs/api/utils.callbacks.comet_.html +++ b/docs/api/utils.callbacks.comet_.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.callbacks.lisa.html b/docs/api/utils.callbacks.lisa.html index 59292b946..416a23d89 100644 --- a/docs/api/utils.callbacks.lisa.html +++ b/docs/api/utils.callbacks.lisa.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/utils.callbacks.mlflow_.html b/docs/api/utils.callbacks.mlflow_.html index bf403287b..0c0bb2cdc 100644 --- a/docs/api/utils.callbacks.mlflow_.html +++ b/docs/api/utils.callbacks.mlflow_.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.callbacks.perplexity.html b/docs/api/utils.callbacks.perplexity.html index 6d1f8f899..699622984 100644 --- a/docs/api/utils.callbacks.perplexity.html +++ b/docs/api/utils.callbacks.perplexity.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.callbacks.profiler.html b/docs/api/utils.callbacks.profiler.html index 824d1dcde..084f7a6f0 100644 --- a/docs/api/utils.callbacks.profiler.html +++ b/docs/api/utils.callbacks.profiler.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.callbacks.qat.html b/docs/api/utils.callbacks.qat.html index 30ea386fa..6b16a4c99 100644 --- a/docs/api/utils.callbacks.qat.html +++ b/docs/api/utils.callbacks.qat.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.chat_templates.html b/docs/api/utils.chat_templates.html index 92c0816f6..49a544319 100644 --- a/docs/api/utils.chat_templates.html +++ b/docs/api/utils.chat_templates.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.collators.batching.html b/docs/api/utils.collators.batching.html index 47b7388d1..b0760a962 100644 --- a/docs/api/utils.collators.batching.html +++ b/docs/api/utils.collators.batching.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.collators.core.html b/docs/api/utils.collators.core.html index a7b4d7461..c9719b121 100644 --- a/docs/api/utils.collators.core.html +++ b/docs/api/utils.collators.core.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/utils.collators.mamba.html b/docs/api/utils.collators.mamba.html index a960b8c19..ba734d472 100644 --- a/docs/api/utils.collators.mamba.html +++ b/docs/api/utils.collators.mamba.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.collators.mm_chat.html b/docs/api/utils.collators.mm_chat.html index 4988bac19..e14fac627 100644 --- a/docs/api/utils.collators.mm_chat.html +++ b/docs/api/utils.collators.mm_chat.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.ctx_managers.sequence_parallel.html b/docs/api/utils.ctx_managers.sequence_parallel.html index 4991af2fb..926b2ce62 100644 --- a/docs/api/utils.ctx_managers.sequence_parallel.html +++ b/docs/api/utils.ctx_managers.sequence_parallel.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.data.pretraining.html b/docs/api/utils.data.pretraining.html index 402ae4732..8b2cb83c7 100644 --- a/docs/api/utils.data.pretraining.html +++ b/docs/api/utils.data.pretraining.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/api/utils.data.sft.html b/docs/api/utils.data.sft.html index 6fff26a34..1c8ba124f 100644 --- a/docs/api/utils.data.sft.html +++ b/docs/api/utils.data.sft.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.dict.html b/docs/api/utils.dict.html index 995837ca1..0fa828e67 100644 --- a/docs/api/utils.dict.html +++ b/docs/api/utils.dict.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.distributed.html b/docs/api/utils.distributed.html index 4fd370fcd..57ba7da1f 100644 --- a/docs/api/utils.distributed.html +++ b/docs/api/utils.distributed.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.freeze.html b/docs/api/utils.freeze.html index 073e0d0a6..7c4f6ca36 100644 --- a/docs/api/utils.freeze.html +++ b/docs/api/utils.freeze.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.lora.html b/docs/api/utils.lora.html index 1a648ffa2..ae2351e56 100644 --- a/docs/api/utils.lora.html +++ b/docs/api/utils.lora.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.model_shard_quant.html b/docs/api/utils.model_shard_quant.html index c8d6c3800..2f481d570 100644 --- a/docs/api/utils.model_shard_quant.html +++ b/docs/api/utils.model_shard_quant.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.optimizers.adopt.html b/docs/api/utils.optimizers.adopt.html index abface679..686e1fb5a 100644 --- a/docs/api/utils.optimizers.adopt.html +++ b/docs/api/utils.optimizers.adopt.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.quantization.html b/docs/api/utils.quantization.html index b39c84ba2..5e5227176 100644 --- a/docs/api/utils.quantization.html +++ b/docs/api/utils.quantization.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.samplers.multipack.html b/docs/api/utils.samplers.multipack.html index 75a23093a..5bfc6b943 100644 --- a/docs/api/utils.samplers.multipack.html +++ b/docs/api/utils.samplers.multipack.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + @@ -519,7 +519,7 @@ into fixed-capacity batches to optimize memory usage and training throughput.

    batch_max_len, lengths, packing_efficiency_estimate=1.0, - drop_last=False, + drop_last=True, num_count_samples=8, sequential=False, group_size=100000, diff --git a/docs/api/utils.schedulers.html b/docs/api/utils.schedulers.html index 933b8a66b..cf276638f 100644 --- a/docs/api/utils.schedulers.html +++ b/docs/api/utils.schedulers.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.schemas.config.html b/docs/api/utils.schemas.config.html index 544ca7182..2f51a2a5e 100644 --- a/docs/api/utils.schemas.config.html +++ b/docs/api/utils.schemas.config.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.schemas.datasets.html b/docs/api/utils.schemas.datasets.html index ad2cf1d3a..86c609a39 100644 --- a/docs/api/utils.schemas.datasets.html +++ b/docs/api/utils.schemas.datasets.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.schemas.enums.html b/docs/api/utils.schemas.enums.html index bed4f1650..ca834d521 100644 --- a/docs/api/utils.schemas.enums.html +++ b/docs/api/utils.schemas.enums.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.schemas.integrations.html b/docs/api/utils.schemas.integrations.html index 9045eda30..02104fd3b 100644 --- a/docs/api/utils.schemas.integrations.html +++ b/docs/api/utils.schemas.integrations.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.schemas.model.html b/docs/api/utils.schemas.model.html index e7d4e3261..d04c16aad 100644 --- a/docs/api/utils.schemas.model.html +++ b/docs/api/utils.schemas.model.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.schemas.multimodal.html b/docs/api/utils.schemas.multimodal.html index c81509a29..ae93a5193 100644 --- a/docs/api/utils.schemas.multimodal.html +++ b/docs/api/utils.schemas.multimodal.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.schemas.peft.html b/docs/api/utils.schemas.peft.html index 0ed7da969..5776ee34e 100644 --- a/docs/api/utils.schemas.peft.html +++ b/docs/api/utils.schemas.peft.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.schemas.training.html b/docs/api/utils.schemas.training.html index ee9e20530..2f6044285 100644 --- a/docs/api/utils.schemas.training.html +++ b/docs/api/utils.schemas.training.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.schemas.trl.html b/docs/api/utils.schemas.trl.html index ace507cb4..701353f6e 100644 --- a/docs/api/utils.schemas.trl.html +++ b/docs/api/utils.schemas.trl.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.schemas.utils.html b/docs/api/utils.schemas.utils.html index bedcd58db..ea584823a 100644 --- a/docs/api/utils.schemas.utils.html +++ b/docs/api/utils.schemas.utils.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.tokenization.html b/docs/api/utils.tokenization.html index 34ff65a62..914885b79 100644 --- a/docs/api/utils.tokenization.html +++ b/docs/api/utils.tokenization.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/api/utils.trainer.html b/docs/api/utils.trainer.html index 27d912099..c56877014 100644 --- a/docs/api/utils.trainer.html +++ b/docs/api/utils.trainer.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/batch_vs_grad.html b/docs/batch_vs_grad.html index 0bb231e05..fab84b01c 100644 --- a/docs/batch_vs_grad.html +++ b/docs/batch_vs_grad.html @@ -2,7 +2,7 @@ - + @@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/cli.html b/docs/cli.html index b4ac80ccd..9186c8c89 100644 --- a/docs/cli.html +++ b/docs/cli.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/config.html b/docs/config.html index 20b491caa..84e5fb6c6 100644 --- a/docs/config.html +++ b/docs/config.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/custom_integrations.html b/docs/custom_integrations.html index c02b213bd..cd72e6f71 100644 --- a/docs/custom_integrations.html +++ b/docs/custom_integrations.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/dataset-formats/conversation.html b/docs/dataset-formats/conversation.html index b3f68d3e1..c03cfcc80 100644 --- a/docs/dataset-formats/conversation.html +++ b/docs/dataset-formats/conversation.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/dataset-formats/index.html b/docs/dataset-formats/index.html index 86fc9726d..a534c4200 100644 --- a/docs/dataset-formats/index.html +++ b/docs/dataset-formats/index.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/dataset-formats/inst_tune.html b/docs/dataset-formats/inst_tune.html index bb3102eb7..f7aa3d294 100644 --- a/docs/dataset-formats/inst_tune.html +++ b/docs/dataset-formats/inst_tune.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/dataset-formats/pretraining.html b/docs/dataset-formats/pretraining.html index f80701f5b..3cbc523da 100644 --- a/docs/dataset-formats/pretraining.html +++ b/docs/dataset-formats/pretraining.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/dataset-formats/stepwise_supervised.html b/docs/dataset-formats/stepwise_supervised.html index 923d0e539..c1959429b 100644 --- a/docs/dataset-formats/stepwise_supervised.html +++ b/docs/dataset-formats/stepwise_supervised.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/dataset-formats/template_free.html b/docs/dataset-formats/template_free.html index bda64fd55..479ca2d50 100644 --- a/docs/dataset-formats/template_free.html +++ b/docs/dataset-formats/template_free.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/dataset-formats/tokenized.html b/docs/dataset-formats/tokenized.html index cc45dd48c..8b7530290 100644 --- a/docs/dataset-formats/tokenized.html +++ b/docs/dataset-formats/tokenized.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/dataset_loading.html b/docs/dataset_loading.html index 8d7fd93da..a9a80bc2d 100644 --- a/docs/dataset_loading.html +++ b/docs/dataset_loading.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/dataset_preprocessing.html b/docs/dataset_preprocessing.html index 3989b8f67..d4f38dff9 100644 --- a/docs/dataset_preprocessing.html +++ b/docs/dataset_preprocessing.html @@ -2,7 +2,7 @@ - + @@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/debugging.html b/docs/debugging.html index fbf57b1ce..387537a40 100644 --- a/docs/debugging.html +++ b/docs/debugging.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/docker.html b/docs/docker.html index 5c9b4c4a3..a2f2dfcc8 100644 --- a/docs/docker.html +++ b/docs/docker.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/faq.html b/docs/faq.html index bc369cec5..c45204292 100644 --- a/docs/faq.html +++ b/docs/faq.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/fsdp_qlora.html b/docs/fsdp_qlora.html index 8534386c6..44a58694a 100644 --- a/docs/fsdp_qlora.html +++ b/docs/fsdp_qlora.html @@ -2,7 +2,7 @@ - + @@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/getting-started.html b/docs/getting-started.html index d11a07ffe..6ed7e8cd4 100644 --- a/docs/getting-started.html +++ b/docs/getting-started.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/inference.html b/docs/inference.html index e5453dd41..6b722a2c0 100644 --- a/docs/inference.html +++ b/docs/inference.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/input_output.html b/docs/input_output.html index 6c451de3f..8a4f02837 100644 --- a/docs/input_output.html +++ b/docs/input_output.html @@ -2,7 +2,7 @@ - + @@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/installation.html b/docs/installation.html index 344a41f2b..b48621d30 100644 --- a/docs/installation.html +++ b/docs/installation.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/lora_optims.html b/docs/lora_optims.html index 50ff20bb4..6051a3f8b 100644 --- a/docs/lora_optims.html +++ b/docs/lora_optims.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/lr_groups.html b/docs/lr_groups.html index c4233d6ef..e5027f04e 100644 --- a/docs/lr_groups.html +++ b/docs/lr_groups.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/mac.html b/docs/mac.html index 1a2372cb5..1d456b738 100644 --- a/docs/mac.html +++ b/docs/mac.html @@ -2,7 +2,7 @@ - + @@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/multi-gpu.html b/docs/multi-gpu.html index f108b5438..c27d38361 100644 --- a/docs/multi-gpu.html +++ b/docs/multi-gpu.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/multi-node.html b/docs/multi-node.html index 75e4dd3bb..7719c0af1 100644 --- a/docs/multi-node.html +++ b/docs/multi-node.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/multimodal.html b/docs/multimodal.html index 40d016b0d..f609b2e6d 100644 --- a/docs/multimodal.html +++ b/docs/multimodal.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/multipack.html b/docs/multipack.html index 00c774d3a..fc8d958c5 100644 --- a/docs/multipack.html +++ b/docs/multipack.html @@ -2,7 +2,7 @@ - + @@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/docs/nccl.html b/docs/nccl.html index 1cbbe00c5..202e69448 100644 --- a/docs/nccl.html +++ b/docs/nccl.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/qat.html b/docs/qat.html index fff59d03e..ea5e15216 100644 --- a/docs/qat.html +++ b/docs/qat.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/quantize.html b/docs/quantize.html index df02b4a41..0e6f97bd9 100644 --- a/docs/quantize.html +++ b/docs/quantize.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/ray-integration.html b/docs/ray-integration.html index 66ce8d648..ebc818736 100644 --- a/docs/ray-integration.html +++ b/docs/ray-integration.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/reward_modelling.html b/docs/reward_modelling.html index 213c7b371..5fd93a079 100644 --- a/docs/reward_modelling.html +++ b/docs/reward_modelling.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/rlhf.html b/docs/rlhf.html index 61a7fec9a..710441105 100644 --- a/docs/rlhf.html +++ b/docs/rlhf.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/sequence_parallelism.html b/docs/sequence_parallelism.html index f175c4a46..8c1d23e9d 100644 --- a/docs/sequence_parallelism.html +++ b/docs/sequence_parallelism.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/torchao.html b/docs/torchao.html index ca6ee1d91..d36819861 100644 --- a/docs/torchao.html +++ b/docs/torchao.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/docs/unsloth.html b/docs/unsloth.html index 4b00088e0..ce1b4e94f 100644 --- a/docs/unsloth.html +++ b/docs/unsloth.html @@ -2,7 +2,7 @@ - + @@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/examples/colab-notebooks/colab-axolotl-example.html b/examples/colab-notebooks/colab-axolotl-example.html index 954f57fd6..15ec4064f 100644 --- a/examples/colab-notebooks/colab-axolotl-example.html +++ b/examples/colab-notebooks/colab-axolotl-example.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/index.html b/index.html index 19401ce86..4612eed0e 100644 --- a/index.html +++ b/index.html @@ -2,7 +2,7 @@ - + @@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin - + diff --git a/search.json b/search.json index 70ea3f187..7e66ac345 100644 --- a/search.json +++ b/search.json @@ -644,14 +644,14 @@ "href": "docs/api/core.training_args.html", "title": "core.training_args", "section": "", - "text": "core.training_args\nextra axolotl specific training args\n\n\n\n\n\nName\nDescription\n\n\n\n\nAxolotlCPOConfig\nCPO config for CPO training\n\n\nAxolotlKTOConfig\nKTO config for KTO training\n\n\nAxolotlORPOConfig\nORPO config for ORPO training\n\n\nAxolotlPRMConfig\nPRM config for PRM training\n\n\nAxolotlRewardConfig\nReward config for Reward training\n\n\nAxolotlTrainingArguments\nTraining arguments for Causal trainer\n\n\nAxolotlTrainingMixins\nMixin class for the Axolotl training args.\n\n\n\n\n\ncore.training_args.AxolotlCPOConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n simpo_gamma=None,\n)\nCPO config for CPO training\n\n\n\ncore.training_args.AxolotlKTOConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nKTO config for KTO training\n\n\n\ncore.training_args.AxolotlORPOConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nORPO config for ORPO training\n\n\n\ncore.training_args.AxolotlPRMConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nPRM config for PRM training\n\n\n\ncore.training_args.AxolotlRewardConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nReward config for Reward training\n\n\n\ncore.training_args.AxolotlTrainingArguments(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nTraining arguments for Causal trainer\nThis code is duplicated due to HF TrainingArguments not setting output_dir with a\ndefault value so it can’t be used as a mixin.\n\n\n\ncore.training_args.AxolotlTrainingMixins(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nMixin class for the Axolotl training args." + "text": "core.training_args\nextra axolotl specific training args\n\n\n\n\n\nName\nDescription\n\n\n\n\nAxolotlCPOConfig\nCPO config for CPO training\n\n\nAxolotlKTOConfig\nKTO config for KTO training\n\n\nAxolotlORPOConfig\nORPO config for ORPO training\n\n\nAxolotlPRMConfig\nPRM config for PRM training\n\n\nAxolotlRewardConfig\nReward config for Reward training\n\n\nAxolotlTrainingArguments\nTraining arguments for Causal trainer\n\n\n\n\n\ncore.training_args.AxolotlCPOConfig(simpo_gamma=None)\nCPO config for CPO training\n\n\n\ncore.training_args.AxolotlKTOConfig()\nKTO config for KTO training\n\n\n\ncore.training_args.AxolotlORPOConfig()\nORPO config for ORPO training\n\n\n\ncore.training_args.AxolotlPRMConfig()\nPRM config for PRM training\n\n\n\ncore.training_args.AxolotlRewardConfig()\nReward config for Reward training\n\n\n\ncore.training_args.AxolotlTrainingArguments()\nTraining arguments for Causal trainer\nThis code is duplicated due to HF TrainingArguments not setting output_dir with a\ndefault value so it can’t be used as a mixin." }, { "objectID": "docs/api/core.training_args.html#classes", "href": "docs/api/core.training_args.html#classes", "title": "core.training_args", "section": "", - "text": "Name\nDescription\n\n\n\n\nAxolotlCPOConfig\nCPO config for CPO training\n\n\nAxolotlKTOConfig\nKTO config for KTO training\n\n\nAxolotlORPOConfig\nORPO config for ORPO training\n\n\nAxolotlPRMConfig\nPRM config for PRM training\n\n\nAxolotlRewardConfig\nReward config for Reward training\n\n\nAxolotlTrainingArguments\nTraining arguments for Causal trainer\n\n\nAxolotlTrainingMixins\nMixin class for the Axolotl training args.\n\n\n\n\n\ncore.training_args.AxolotlCPOConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n simpo_gamma=None,\n)\nCPO config for CPO training\n\n\n\ncore.training_args.AxolotlKTOConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nKTO config for KTO training\n\n\n\ncore.training_args.AxolotlORPOConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nORPO config for ORPO training\n\n\n\ncore.training_args.AxolotlPRMConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nPRM config for PRM training\n\n\n\ncore.training_args.AxolotlRewardConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nReward config for Reward training\n\n\n\ncore.training_args.AxolotlTrainingArguments(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nTraining arguments for Causal trainer\nThis code is duplicated due to HF TrainingArguments not setting output_dir with a\ndefault value so it can’t be used as a mixin.\n\n\n\ncore.training_args.AxolotlTrainingMixins(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nMixin class for the Axolotl training args." + "text": "Name\nDescription\n\n\n\n\nAxolotlCPOConfig\nCPO config for CPO training\n\n\nAxolotlKTOConfig\nKTO config for KTO training\n\n\nAxolotlORPOConfig\nORPO config for ORPO training\n\n\nAxolotlPRMConfig\nPRM config for PRM training\n\n\nAxolotlRewardConfig\nReward config for Reward training\n\n\nAxolotlTrainingArguments\nTraining arguments for Causal trainer\n\n\n\n\n\ncore.training_args.AxolotlCPOConfig(simpo_gamma=None)\nCPO config for CPO training\n\n\n\ncore.training_args.AxolotlKTOConfig()\nKTO config for KTO training\n\n\n\ncore.training_args.AxolotlORPOConfig()\nORPO config for ORPO training\n\n\n\ncore.training_args.AxolotlPRMConfig()\nPRM config for PRM training\n\n\n\ncore.training_args.AxolotlRewardConfig()\nReward config for Reward training\n\n\n\ncore.training_args.AxolotlTrainingArguments()\nTraining arguments for Causal trainer\nThis code is duplicated due to HF TrainingArguments not setting output_dir with a\ndefault value so it can’t be used as a mixin." }, { "objectID": "docs/api/prompt_strategies.user_defined.html", @@ -1085,14 +1085,14 @@ "href": "docs/api/train.html", "title": "train", "section": "", - "text": "train\nPrepare and train a model on a dataset. Can also infer from a model or merge lora\n\n\n\n\n\nName\nDescription\n\n\n\n\ncreate_model_card\nCreate a model card for the trained model if needed.\n\n\ndetermine_resume_checkpoint\nDetermine the checkpoint to resume from based on configuration.\n\n\nexecute_training\nExecute the training process with appropriate SDP kernel configurations.\n\n\nhandle_untrained_tokens_fix\nApply fixes for untrained tokens if configured.\n\n\nsave_initial_configs\nSave initial configurations before training.\n\n\nsave_trained_model\nSave the trained model according to configuration and training setup.\n\n\nsetup_model_and_tokenizer\nLoad the tokenizer, processor (for multimodal models), and model based on\n\n\nsetup_model_and_trainer\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\n\n\nsetup_model_card\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\nsetup_reference_model\nSet up the reference model for RL training if needed.\n\n\nsetup_signal_handler\nSet up signal handler for graceful termination.\n\n\ntrain\nTrain a model on the given dataset.\n\n\n\n\n\ntrain.create_model_card(cfg, trainer)\nCreate a model card for the trained model if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object with model card creation capabilities.\nrequired\n\n\n\n\n\n\n\ntrain.determine_resume_checkpoint(cfg)\nDetermine the checkpoint to resume from based on configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nstr | None\nPath to the checkpoint to resume from, or None if not resuming.\n\n\n\n\n\n\n\ntrain.execute_training(cfg, trainer, resume_from_checkpoint)\nExecute the training process with appropriate SDP kernel configurations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe configured trainer object.\nrequired\n\n\nresume_from_checkpoint\nstr | None\nPath to checkpoint to resume from, if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.handle_untrained_tokens_fix(\n cfg,\n model,\n tokenizer,\n train_dataset,\n safe_serialization,\n)\nApply fixes for untrained tokens if configured.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to apply fixes to.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer for token identification.\nrequired\n\n\ntrain_dataset\nDataset\nThe training dataset to use.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving.\nrequired\n\n\n\n\n\n\n\ntrain.save_initial_configs(cfg, tokenizer, model, peft_config, processor)\nSave initial configurations before training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to save.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save configuration for.\nrequired\n\n\npeft_config\nPeftConfig | None\nThe PEFT configuration to save if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.save_trained_model(cfg, trainer, model, safe_serialization)\nSave the trained model according to configuration and training setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe trainer object.\nrequired\n\n\nmodel\nPreTrainedModel\nThe trained model to save.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization.\nrequired\n\n\n\n\n\n\n\ntrain.setup_model_and_tokenizer(cfg)\nLoad the tokenizer, processor (for multimodal models), and model based on\nconfiguration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple containing model, tokenizer, peft_config (if LoRA / QLoRA, else None), and processor (if multimodal, else None).\n\n\n\n\n\n\n\ntrain.setup_model_and_trainer(cfg, dataset_meta)\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\ntrainer setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters.\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[HFRLTrainerBuilder | HFCausalTrainerBuilder, PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor\n\n\n\n\n\n\n\ntrain.setup_model_card(cfg)\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\ntrain.setup_reference_model(cfg, tokenizer)\nSet up the reference model for RL training if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to use for the reference model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nPreTrainedModel | None\nReference model if needed for RL training, None otherwise.\n\n\n\n\n\n\n\ntrain.setup_signal_handler(cfg, model, safe_serialization)\nSet up signal handler for graceful termination.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save on termination\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving\nrequired\n\n\n\n\n\n\n\ntrain.train(cfg, dataset_meta)\nTrain a model on the given dataset.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PeftModel | PreTrainedModel, PreTrainedTokenizer, Trainer]\nTuple of (model, tokenizer) after training" + "text": "train\nPrepare and train a model on a dataset. Can also infer from a model or merge lora\n\n\n\n\n\nName\nDescription\n\n\n\n\ncreate_model_card\nCreate a model card for the trained model if needed.\n\n\ndetermine_resume_checkpoint\nDetermine the checkpoint to resume from based on configuration.\n\n\nexecute_training\nExecute the training process with appropriate SDP kernel configurations.\n\n\nhandle_untrained_tokens_fix\nApply fixes for untrained tokens if configured.\n\n\nsave_initial_configs\nSave initial configurations before training.\n\n\nsave_trained_model\nSave the trained model according to configuration and training setup.\n\n\nsetup_model_and_tokenizer\nLoad the tokenizer, processor (for multimodal models), and model based on\n\n\nsetup_model_and_trainer\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\n\n\nsetup_model_card\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\nsetup_reference_model\nSet up the reference model for RL training if needed.\n\n\nsetup_signal_handler\nSet up signal handler for graceful termination.\n\n\ntrain\nTrain a model on the given dataset.\n\n\n\n\n\ntrain.create_model_card(cfg, trainer)\nCreate a model card for the trained model if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object with model card creation capabilities.\nrequired\n\n\n\n\n\n\n\ntrain.determine_resume_checkpoint(cfg)\nDetermine the checkpoint to resume from based on configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nstr | None\nPath to the checkpoint to resume from, or None if not resuming.\n\n\n\n\n\n\n\ntrain.execute_training(cfg, trainer, resume_from_checkpoint)\nExecute the training process with appropriate SDP kernel configurations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe configured trainer object.\nrequired\n\n\nresume_from_checkpoint\nstr | None\nPath to checkpoint to resume from, if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.handle_untrained_tokens_fix(\n cfg,\n model,\n tokenizer,\n train_dataset,\n safe_serialization,\n)\nApply fixes for untrained tokens if configured.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to apply fixes to.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer for token identification.\nrequired\n\n\ntrain_dataset\nDataset\nThe training dataset to use.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving.\nrequired\n\n\n\n\n\n\n\ntrain.save_initial_configs(cfg, tokenizer, model, peft_config, processor)\nSave initial configurations before training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to save.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save configuration for.\nrequired\n\n\npeft_config\nPeftConfig | None\nThe PEFT configuration to save if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.save_trained_model(cfg, trainer, model, safe_serialization)\nSave the trained model according to configuration and training setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe trainer object.\nrequired\n\n\nmodel\nPreTrainedModel\nThe trained model to save.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization.\nrequired\n\n\n\n\n\n\n\ntrain.setup_model_and_tokenizer(cfg)\nLoad the tokenizer, processor (for multimodal models), and model based on\nconfiguration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple containing model, tokenizer, peft_config (if LoRA / QLoRA, else None), and processor (if multimodal, else None).\n\n\n\n\n\n\n\ntrain.setup_model_and_trainer(cfg, dataset_meta)\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\ntrainer setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters.\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple['HFRLTrainerBuilder' | 'HFCausalTrainerBuilder', PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor\n\n\n\n\n\n\n\ntrain.setup_model_card(cfg)\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\ntrain.setup_reference_model(cfg, tokenizer)\nSet up the reference model for RL training if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to use for the reference model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nPreTrainedModel | None\nReference model if needed for RL training, None otherwise.\n\n\n\n\n\n\n\ntrain.setup_signal_handler(cfg, model, safe_serialization)\nSet up signal handler for graceful termination.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save on termination\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving\nrequired\n\n\n\n\n\n\n\ntrain.train(cfg, dataset_meta)\nTrain a model on the given dataset.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PeftModel | PreTrainedModel, PreTrainedTokenizer, Trainer]\nTuple of (model, tokenizer) after training" }, { "objectID": "docs/api/train.html#functions", "href": "docs/api/train.html#functions", "title": "train", "section": "", - "text": "Name\nDescription\n\n\n\n\ncreate_model_card\nCreate a model card for the trained model if needed.\n\n\ndetermine_resume_checkpoint\nDetermine the checkpoint to resume from based on configuration.\n\n\nexecute_training\nExecute the training process with appropriate SDP kernel configurations.\n\n\nhandle_untrained_tokens_fix\nApply fixes for untrained tokens if configured.\n\n\nsave_initial_configs\nSave initial configurations before training.\n\n\nsave_trained_model\nSave the trained model according to configuration and training setup.\n\n\nsetup_model_and_tokenizer\nLoad the tokenizer, processor (for multimodal models), and model based on\n\n\nsetup_model_and_trainer\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\n\n\nsetup_model_card\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\nsetup_reference_model\nSet up the reference model for RL training if needed.\n\n\nsetup_signal_handler\nSet up signal handler for graceful termination.\n\n\ntrain\nTrain a model on the given dataset.\n\n\n\n\n\ntrain.create_model_card(cfg, trainer)\nCreate a model card for the trained model if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object with model card creation capabilities.\nrequired\n\n\n\n\n\n\n\ntrain.determine_resume_checkpoint(cfg)\nDetermine the checkpoint to resume from based on configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nstr | None\nPath to the checkpoint to resume from, or None if not resuming.\n\n\n\n\n\n\n\ntrain.execute_training(cfg, trainer, resume_from_checkpoint)\nExecute the training process with appropriate SDP kernel configurations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe configured trainer object.\nrequired\n\n\nresume_from_checkpoint\nstr | None\nPath to checkpoint to resume from, if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.handle_untrained_tokens_fix(\n cfg,\n model,\n tokenizer,\n train_dataset,\n safe_serialization,\n)\nApply fixes for untrained tokens if configured.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to apply fixes to.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer for token identification.\nrequired\n\n\ntrain_dataset\nDataset\nThe training dataset to use.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving.\nrequired\n\n\n\n\n\n\n\ntrain.save_initial_configs(cfg, tokenizer, model, peft_config, processor)\nSave initial configurations before training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to save.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save configuration for.\nrequired\n\n\npeft_config\nPeftConfig | None\nThe PEFT configuration to save if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.save_trained_model(cfg, trainer, model, safe_serialization)\nSave the trained model according to configuration and training setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe trainer object.\nrequired\n\n\nmodel\nPreTrainedModel\nThe trained model to save.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization.\nrequired\n\n\n\n\n\n\n\ntrain.setup_model_and_tokenizer(cfg)\nLoad the tokenizer, processor (for multimodal models), and model based on\nconfiguration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple containing model, tokenizer, peft_config (if LoRA / QLoRA, else None), and processor (if multimodal, else None).\n\n\n\n\n\n\n\ntrain.setup_model_and_trainer(cfg, dataset_meta)\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\ntrainer setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters.\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[HFRLTrainerBuilder | HFCausalTrainerBuilder, PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor\n\n\n\n\n\n\n\ntrain.setup_model_card(cfg)\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\ntrain.setup_reference_model(cfg, tokenizer)\nSet up the reference model for RL training if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to use for the reference model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nPreTrainedModel | None\nReference model if needed for RL training, None otherwise.\n\n\n\n\n\n\n\ntrain.setup_signal_handler(cfg, model, safe_serialization)\nSet up signal handler for graceful termination.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save on termination\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving\nrequired\n\n\n\n\n\n\n\ntrain.train(cfg, dataset_meta)\nTrain a model on the given dataset.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PeftModel | PreTrainedModel, PreTrainedTokenizer, Trainer]\nTuple of (model, tokenizer) after training" + "text": "Name\nDescription\n\n\n\n\ncreate_model_card\nCreate a model card for the trained model if needed.\n\n\ndetermine_resume_checkpoint\nDetermine the checkpoint to resume from based on configuration.\n\n\nexecute_training\nExecute the training process with appropriate SDP kernel configurations.\n\n\nhandle_untrained_tokens_fix\nApply fixes for untrained tokens if configured.\n\n\nsave_initial_configs\nSave initial configurations before training.\n\n\nsave_trained_model\nSave the trained model according to configuration and training setup.\n\n\nsetup_model_and_tokenizer\nLoad the tokenizer, processor (for multimodal models), and model based on\n\n\nsetup_model_and_trainer\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\n\n\nsetup_model_card\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\nsetup_reference_model\nSet up the reference model for RL training if needed.\n\n\nsetup_signal_handler\nSet up signal handler for graceful termination.\n\n\ntrain\nTrain a model on the given dataset.\n\n\n\n\n\ntrain.create_model_card(cfg, trainer)\nCreate a model card for the trained model if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object with model card creation capabilities.\nrequired\n\n\n\n\n\n\n\ntrain.determine_resume_checkpoint(cfg)\nDetermine the checkpoint to resume from based on configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nstr | None\nPath to the checkpoint to resume from, or None if not resuming.\n\n\n\n\n\n\n\ntrain.execute_training(cfg, trainer, resume_from_checkpoint)\nExecute the training process with appropriate SDP kernel configurations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe configured trainer object.\nrequired\n\n\nresume_from_checkpoint\nstr | None\nPath to checkpoint to resume from, if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.handle_untrained_tokens_fix(\n cfg,\n model,\n tokenizer,\n train_dataset,\n safe_serialization,\n)\nApply fixes for untrained tokens if configured.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to apply fixes to.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer for token identification.\nrequired\n\n\ntrain_dataset\nDataset\nThe training dataset to use.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving.\nrequired\n\n\n\n\n\n\n\ntrain.save_initial_configs(cfg, tokenizer, model, peft_config, processor)\nSave initial configurations before training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to save.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save configuration for.\nrequired\n\n\npeft_config\nPeftConfig | None\nThe PEFT configuration to save if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.save_trained_model(cfg, trainer, model, safe_serialization)\nSave the trained model according to configuration and training setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe trainer object.\nrequired\n\n\nmodel\nPreTrainedModel\nThe trained model to save.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization.\nrequired\n\n\n\n\n\n\n\ntrain.setup_model_and_tokenizer(cfg)\nLoad the tokenizer, processor (for multimodal models), and model based on\nconfiguration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple containing model, tokenizer, peft_config (if LoRA / QLoRA, else None), and processor (if multimodal, else None).\n\n\n\n\n\n\n\ntrain.setup_model_and_trainer(cfg, dataset_meta)\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\ntrainer setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters.\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple['HFRLTrainerBuilder' | 'HFCausalTrainerBuilder', PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor\n\n\n\n\n\n\n\ntrain.setup_model_card(cfg)\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\ntrain.setup_reference_model(cfg, tokenizer)\nSet up the reference model for RL training if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to use for the reference model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nPreTrainedModel | None\nReference model if needed for RL training, None otherwise.\n\n\n\n\n\n\n\ntrain.setup_signal_handler(cfg, model, safe_serialization)\nSet up signal handler for graceful termination.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save on termination\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving\nrequired\n\n\n\n\n\n\n\ntrain.train(cfg, dataset_meta)\nTrain a model on the given dataset.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PeftModel | PreTrainedModel, PreTrainedTokenizer, Trainer]\nTuple of (model, tokenizer) after training" }, { "objectID": "docs/api/monkeypatch.mixtral.html", @@ -1127,14 +1127,14 @@ "href": "docs/api/utils.samplers.multipack.html", "title": "utils.samplers.multipack", "section": "", - "text": "utils.samplers.multipack\nMultipack Batch Sampler - An efficient batch sampler for packing variable-length sequences\ninto fixed-capacity batches to optimize memory usage and training throughput.\n\n\n\n\n\nName\nDescription\n\n\n\n\nMultipackBatchSampler\nBatch sampler class for efficient packing of variable-length sequences\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler(\n sampler,\n batch_size,\n batch_max_len,\n lengths,\n packing_efficiency_estimate=1.0,\n drop_last=False,\n num_count_samples=8,\n sequential=False,\n group_size=100000,\n bin_size=200,\n num_processes=None,\n safe_mode=True,\n **kwargs,\n)\nBatch sampler class for efficient packing of variable-length sequences\nThis sampler packs sequences into fixed-capacity bins (batches) to maximize\nGPU memory utilization and training throughput by reducing padding.\nIt supports both parallel packing (using FFD algorithm) and\nsequential packing (preserving original sequence order).\n\n\n\n\n\nName\nDescription\n\n\n\n\nefficiency\nCalculate the packing efficiency (ratio of tokens used to total token slots).\n\n\ngather_efficiency\nGather and synchronize packing efficiency estimates across all distributed\n\n\ngather_len_batches\nGather and synchronize batch counts across all distributed ranks. Returns\n\n\ngenerate_batches\nGenerate packed batches for training.\n\n\nset_epoch\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.efficiency()\nCalculate the packing efficiency (ratio of tokens used to total token slots).\nHigher is better - 1.0 would mean perfect packing with no wasted space.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_efficiency()\nGather and synchronize packing efficiency estimates across all distributed\nranks.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nfloat\nA conservative efficiency estimate based on the measurements.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_len_batches(num)\nGather and synchronize batch counts across all distributed ranks. Returns\nthe minimum number of batches available on any rank.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.generate_batches(set_stats=False)\nGenerate packed batches for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nset_stats\nbool\nWhether to update efficiency statistics.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[list[int]]]\nList of batches, where each batch contains multiple bins, and each bin contains multiple sequence indices.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.set_epoch(epoch)\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nallocate_sequentially\nSequential allocator that preserves example order.\n\n\nffd_check\nFirst-fit-decreasing bin packing algorithm check.\n\n\npack_group\nPack a group of sequences into bins using First-Fit Decreasing algorithm.\n\n\npack_parallel\nPack sequences into bins using parallel processing.\n\n\n\n\n\nutils.samplers.multipack.allocate_sequentially(\n sequence_lengths,\n rank,\n bin_capacity,\n num_ranks,\n)\nSequential allocator that preserves example order.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nThe lengths of all examples.\nrequired\n\n\nrank\nint\nThe current rank (for distributed training).\nrequired\n\n\nbin_capacity\nint\nThe capacity of each bin (maximum sequence length).\nrequired\n\n\nnum_ranks\nint\nNumber of ranks (processes / GPUs).\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nrank_batches\nlist[list[int]]\nList of batches for the current rank.\n\n\ntotal_tokens_used\nint\nNumber of actual example tokens.\n\n\ntotal_token_slots\nint\nMaximum theoretical number of example tokens (number of bins * bin capacity).\n\n\n\n\n\n\n\nutils.samplers.multipack.ffd_check(sequence_lengths, bin_capacity, num_bins)\nFirst-fit-decreasing bin packing algorithm check.\nChecks if sequences with the given lengths could fit in the specified number of\nbins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin.\nrequired\n\n\nnum_bins\nint\nNumber of bins available.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nbool\nTrue if all sequences can be packed, False otherwise.\n\n\n\n\n\n\n\nutils.samplers.multipack.pack_group(\n sequence_lengths,\n group_offset,\n bin_capacity,\n max_bins,\n bin_size,\n safe_mode=True,\n)\nPack a group of sequences into bins using First-Fit Decreasing algorithm.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\ngroup_offset\nint\nOffset to apply to indices when returning results.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin.\nrequired\n\n\nmax_bins\nint\nMaximum number of bins to use.\nrequired\n\n\nbin_size\nint\nMaximum number of sequences per bin.\nrequired\n\n\nsafe_mode\nbool\nIf True, use a more conservative packing approach.\nTrue\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[int]]\nList of bins, where each bin contains indices of sequences assigned to it.\n\n\n\n\n\n\n\nutils.samplers.multipack.pack_parallel(\n sequence_lengths,\n bin_capacity,\n group_size,\n bin_size,\n num_processes=None,\n safe_mode=True,\n mp_start_method='spawn',\n)\nPack sequences into bins using parallel processing.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin as total number of tokens.\nrequired\n\n\ngroup_size\nint\nNumber of sequences to process in each group.\nrequired\n\n\nbin_size\nint\nMaximum number of bins to use.\nrequired\n\n\nnum_processes\nint | None\nNumber of parallel processes to use.\nNone\n\n\nsafe_mode\nbool\nIf True, use a more conservative packing approach.\nTrue\n\n\nmp_start_method\nstr | None\nMultiprocessing start method (‘fork’, ‘spawn’, ‘forkserver’). ‘spawn’ is often safer with Numba/PyTorch. Set to None to use system default.\n'spawn'\n\n\n\nReturns:\nList of bins, where each bin contains indices of sequences assigned to it." + "text": "utils.samplers.multipack\nMultipack Batch Sampler - An efficient batch sampler for packing variable-length sequences\ninto fixed-capacity batches to optimize memory usage and training throughput.\n\n\n\n\n\nName\nDescription\n\n\n\n\nMultipackBatchSampler\nBatch sampler class for efficient packing of variable-length sequences\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler(\n sampler,\n batch_size,\n batch_max_len,\n lengths,\n packing_efficiency_estimate=1.0,\n drop_last=True,\n num_count_samples=8,\n sequential=False,\n group_size=100000,\n bin_size=200,\n num_processes=None,\n safe_mode=True,\n **kwargs,\n)\nBatch sampler class for efficient packing of variable-length sequences\nThis sampler packs sequences into fixed-capacity bins (batches) to maximize\nGPU memory utilization and training throughput by reducing padding.\nIt supports both parallel packing (using FFD algorithm) and\nsequential packing (preserving original sequence order).\n\n\n\n\n\nName\nDescription\n\n\n\n\nefficiency\nCalculate the packing efficiency (ratio of tokens used to total token slots).\n\n\ngather_efficiency\nGather and synchronize packing efficiency estimates across all distributed\n\n\ngather_len_batches\nGather and synchronize batch counts across all distributed ranks. Returns\n\n\ngenerate_batches\nGenerate packed batches for training.\n\n\nset_epoch\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.efficiency()\nCalculate the packing efficiency (ratio of tokens used to total token slots).\nHigher is better - 1.0 would mean perfect packing with no wasted space.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_efficiency()\nGather and synchronize packing efficiency estimates across all distributed\nranks.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nfloat\nA conservative efficiency estimate based on the measurements.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_len_batches(num)\nGather and synchronize batch counts across all distributed ranks. Returns\nthe minimum number of batches available on any rank.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.generate_batches(set_stats=False)\nGenerate packed batches for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nset_stats\nbool\nWhether to update efficiency statistics.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[list[int]]]\nList of batches, where each batch contains multiple bins, and each bin contains multiple sequence indices.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.set_epoch(epoch)\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nallocate_sequentially\nSequential allocator that preserves example order.\n\n\nffd_check\nFirst-fit-decreasing bin packing algorithm check.\n\n\npack_group\nPack a group of sequences into bins using First-Fit Decreasing algorithm.\n\n\npack_parallel\nPack sequences into bins using parallel processing.\n\n\n\n\n\nutils.samplers.multipack.allocate_sequentially(\n sequence_lengths,\n rank,\n bin_capacity,\n num_ranks,\n)\nSequential allocator that preserves example order.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nThe lengths of all examples.\nrequired\n\n\nrank\nint\nThe current rank (for distributed training).\nrequired\n\n\nbin_capacity\nint\nThe capacity of each bin (maximum sequence length).\nrequired\n\n\nnum_ranks\nint\nNumber of ranks (processes / GPUs).\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nrank_batches\nlist[list[int]]\nList of batches for the current rank.\n\n\ntotal_tokens_used\nint\nNumber of actual example tokens.\n\n\ntotal_token_slots\nint\nMaximum theoretical number of example tokens (number of bins * bin capacity).\n\n\n\n\n\n\n\nutils.samplers.multipack.ffd_check(sequence_lengths, bin_capacity, num_bins)\nFirst-fit-decreasing bin packing algorithm check.\nChecks if sequences with the given lengths could fit in the specified number of\nbins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin.\nrequired\n\n\nnum_bins\nint\nNumber of bins available.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nbool\nTrue if all sequences can be packed, False otherwise.\n\n\n\n\n\n\n\nutils.samplers.multipack.pack_group(\n sequence_lengths,\n group_offset,\n bin_capacity,\n max_bins,\n bin_size,\n safe_mode=True,\n)\nPack a group of sequences into bins using First-Fit Decreasing algorithm.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\ngroup_offset\nint\nOffset to apply to indices when returning results.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin.\nrequired\n\n\nmax_bins\nint\nMaximum number of bins to use.\nrequired\n\n\nbin_size\nint\nMaximum number of sequences per bin.\nrequired\n\n\nsafe_mode\nbool\nIf True, use a more conservative packing approach.\nTrue\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[int]]\nList of bins, where each bin contains indices of sequences assigned to it.\n\n\n\n\n\n\n\nutils.samplers.multipack.pack_parallel(\n sequence_lengths,\n bin_capacity,\n group_size,\n bin_size,\n num_processes=None,\n safe_mode=True,\n mp_start_method='spawn',\n)\nPack sequences into bins using parallel processing.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin as total number of tokens.\nrequired\n\n\ngroup_size\nint\nNumber of sequences to process in each group.\nrequired\n\n\nbin_size\nint\nMaximum number of bins to use.\nrequired\n\n\nnum_processes\nint | None\nNumber of parallel processes to use.\nNone\n\n\nsafe_mode\nbool\nIf True, use a more conservative packing approach.\nTrue\n\n\nmp_start_method\nstr | None\nMultiprocessing start method (‘fork’, ‘spawn’, ‘forkserver’). ‘spawn’ is often safer with Numba/PyTorch. Set to None to use system default.\n'spawn'\n\n\n\nReturns:\nList of bins, where each bin contains indices of sequences assigned to it." }, { "objectID": "docs/api/utils.samplers.multipack.html#classes", "href": "docs/api/utils.samplers.multipack.html#classes", "title": "utils.samplers.multipack", "section": "", - "text": "Name\nDescription\n\n\n\n\nMultipackBatchSampler\nBatch sampler class for efficient packing of variable-length sequences\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler(\n sampler,\n batch_size,\n batch_max_len,\n lengths,\n packing_efficiency_estimate=1.0,\n drop_last=False,\n num_count_samples=8,\n sequential=False,\n group_size=100000,\n bin_size=200,\n num_processes=None,\n safe_mode=True,\n **kwargs,\n)\nBatch sampler class for efficient packing of variable-length sequences\nThis sampler packs sequences into fixed-capacity bins (batches) to maximize\nGPU memory utilization and training throughput by reducing padding.\nIt supports both parallel packing (using FFD algorithm) and\nsequential packing (preserving original sequence order).\n\n\n\n\n\nName\nDescription\n\n\n\n\nefficiency\nCalculate the packing efficiency (ratio of tokens used to total token slots).\n\n\ngather_efficiency\nGather and synchronize packing efficiency estimates across all distributed\n\n\ngather_len_batches\nGather and synchronize batch counts across all distributed ranks. Returns\n\n\ngenerate_batches\nGenerate packed batches for training.\n\n\nset_epoch\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.efficiency()\nCalculate the packing efficiency (ratio of tokens used to total token slots).\nHigher is better - 1.0 would mean perfect packing with no wasted space.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_efficiency()\nGather and synchronize packing efficiency estimates across all distributed\nranks.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nfloat\nA conservative efficiency estimate based on the measurements.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_len_batches(num)\nGather and synchronize batch counts across all distributed ranks. Returns\nthe minimum number of batches available on any rank.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.generate_batches(set_stats=False)\nGenerate packed batches for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nset_stats\nbool\nWhether to update efficiency statistics.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[list[int]]]\nList of batches, where each batch contains multiple bins, and each bin contains multiple sequence indices.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.set_epoch(epoch)\nSet the epoch number, used for reproducible shuffling across epochs" + "text": "Name\nDescription\n\n\n\n\nMultipackBatchSampler\nBatch sampler class for efficient packing of variable-length sequences\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler(\n sampler,\n batch_size,\n batch_max_len,\n lengths,\n packing_efficiency_estimate=1.0,\n drop_last=True,\n num_count_samples=8,\n sequential=False,\n group_size=100000,\n bin_size=200,\n num_processes=None,\n safe_mode=True,\n **kwargs,\n)\nBatch sampler class for efficient packing of variable-length sequences\nThis sampler packs sequences into fixed-capacity bins (batches) to maximize\nGPU memory utilization and training throughput by reducing padding.\nIt supports both parallel packing (using FFD algorithm) and\nsequential packing (preserving original sequence order).\n\n\n\n\n\nName\nDescription\n\n\n\n\nefficiency\nCalculate the packing efficiency (ratio of tokens used to total token slots).\n\n\ngather_efficiency\nGather and synchronize packing efficiency estimates across all distributed\n\n\ngather_len_batches\nGather and synchronize batch counts across all distributed ranks. Returns\n\n\ngenerate_batches\nGenerate packed batches for training.\n\n\nset_epoch\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.efficiency()\nCalculate the packing efficiency (ratio of tokens used to total token slots).\nHigher is better - 1.0 would mean perfect packing with no wasted space.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_efficiency()\nGather and synchronize packing efficiency estimates across all distributed\nranks.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nfloat\nA conservative efficiency estimate based on the measurements.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_len_batches(num)\nGather and synchronize batch counts across all distributed ranks. Returns\nthe minimum number of batches available on any rank.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.generate_batches(set_stats=False)\nGenerate packed batches for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nset_stats\nbool\nWhether to update efficiency statistics.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[list[int]]]\nList of batches, where each batch contains multiple bins, and each bin contains multiple sequence indices.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.set_epoch(epoch)\nSet the epoch number, used for reproducible shuffling across epochs" }, { "objectID": "docs/api/utils.samplers.multipack.html#functions", @@ -1428,14 +1428,14 @@ "href": "docs/api/integrations.kd.trainer.html", "title": "integrations.kd.trainer", "section": "", - "text": "integrations.kd.trainer\nKD trainer\n\n\n\n\n\nName\nDescription\n\n\n\n\nAxolotlKDTrainer\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer(\n *_args,\n bench_data_collator=None,\n eval_data_collator=None,\n dataset_tags=None,\n **kwargs,\n)\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nName\nDescription\n\n\n\n\ncompute_loss\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer.compute_loss(\n model,\n inputs,\n return_outputs=False,\n num_items_in_batch=None,\n)\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\nSubclass and override for custom behavior." + "text": "integrations.kd.trainer\nKD trainer\n\n\n\n\n\nName\nDescription\n\n\n\n\nAxolotlKDTrainer\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer(*args, **kwargs)\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nName\nDescription\n\n\n\n\ncompute_loss\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer.compute_loss(\n model,\n inputs,\n return_outputs=False,\n num_items_in_batch=None,\n)\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\nSubclass and override for custom behavior." }, { "objectID": "docs/api/integrations.kd.trainer.html#classes", "href": "docs/api/integrations.kd.trainer.html#classes", "title": "integrations.kd.trainer", "section": "", - "text": "Name\nDescription\n\n\n\n\nAxolotlKDTrainer\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer(\n *_args,\n bench_data_collator=None,\n eval_data_collator=None,\n dataset_tags=None,\n **kwargs,\n)\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nName\nDescription\n\n\n\n\ncompute_loss\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer.compute_loss(\n model,\n inputs,\n return_outputs=False,\n num_items_in_batch=None,\n)\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\nSubclass and override for custom behavior." + "text": "Name\nDescription\n\n\n\n\nAxolotlKDTrainer\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer(*args, **kwargs)\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nName\nDescription\n\n\n\n\ncompute_loss\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer.compute_loss(\n model,\n inputs,\n return_outputs=False,\n num_items_in_batch=None,\n)\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\nSubclass and override for custom behavior." }, { "objectID": "docs/api/utils.schemas.enums.html", @@ -2720,14 +2720,14 @@ "href": "docs/api/integrations.base.html", "title": "integrations.base", "section": "", - "text": "integrations.base\nBase class for all plugins.\nA plugin is a reusable, modular, and self-contained piece of code that extends the functionality of Axolotl.\nPlugins can be used to integrate third-party models, modify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and implement the required methods.\n\n\n\n\n\nName\nDescription\n\n\n\n\nBaseOptimizerFactory\nBase class for factories to create custom optimizers\n\n\nBasePlugin\nBase class for all plugins. Defines the interface for plugin methods.\n\n\nPluginManager\nThe PluginManager class is responsible for loading and managing plugins. It\n\n\n\n\n\nintegrations.base.BaseOptimizerFactory()\nBase class for factories to create custom optimizers\n\n\n\nintegrations.base.BasePlugin()\nBase class for all plugins. Defines the interface for plugin methods.\nA plugin is a reusable, modular, and self-contained piece of code that extends\nthe functionality of Axolotl. Plugins can be used to integrate third-party models,\nmodify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and\nimplement the required methods.\n\n\nPlugin methods include:\n- register(cfg): Registers the plugin with the given configuration.\n- load_datasets(cfg): Loads and preprocesses the dataset for training.\n- pre_model_load(cfg): Performs actions before the model is loaded.\n- post_model_build(cfg, model): Performs actions after the model is loaded, but\nbefore LoRA adapters are applied.\n- pre_lora_load(cfg, model): Performs actions before LoRA weights are loaded.\n- post_lora_load(cfg, model): Performs actions after LoRA weights are loaded.\n- post_model_load(cfg, model): Performs actions after the model is loaded,\ninclusive of any adapters.\n- post_trainer_create(cfg, trainer): Performs actions after the trainer is\ncreated.\n- create_optimizer(cfg, trainer): Creates and returns an optimizer for training.\n- create_lr_scheduler(cfg, trainer, optimizer, num_training_steps): Creates and\nreturns a learning rate scheduler.\n- add_callbacks_pre_trainer(cfg, model): Adds callbacks to the trainer before\ntraining.\n- add_callbacks_post_trainer(cfg, trainer): Adds callbacks to the trainer after\ntraining.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nAdds callbacks to the trainer after creating the trainer. This is useful for\n\n\nadd_callbacks_pre_trainer\nSet up callbacks before creating the trainer.\n\n\ncreate_lr_scheduler\nCreates and returns a learning rate scheduler.\n\n\ncreate_optimizer\nCreates and returns an optimizer for training.\n\n\nget_input_args\nReturns a pydantic model for the plugin’s input arguments.\n\n\nget_trainer_cls\nReturns a custom class for the trainer.\n\n\nload_datasets\nLoads and preprocesses the dataset for training.\n\n\npost_lora_load\nPerforms actions after LoRA weights are loaded.\n\n\npost_model_build\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\npost_model_load\nPerforms actions after the model is loaded.\n\n\npost_train\nPerforms actions after training is complete.\n\n\npost_train_unload\nPerforms actions after training is complete and the model is unloaded.\n\n\npost_trainer_create\nPerforms actions after the trainer is created.\n\n\npre_lora_load\nPerforms actions before LoRA weights are loaded.\n\n\npre_model_load\nPerforms actions before the model is loaded.\n\n\nregister\nRegisters the plugin with the given configuration.\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_post_trainer(cfg, trainer)\nAdds callbacks to the trainer after creating the trainer. This is useful for\ncallbacks that require access to the model or trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_pre_trainer(cfg, model)\nSet up callbacks before creating the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_lr_scheduler(\n cfg,\n trainer,\n optimizer,\n num_training_steps,\n)\nCreates and returns a learning rate scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\nnum_training_steps\nint\nTotal number of training steps\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_optimizer(cfg, trainer)\nCreates and returns an optimizer for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_input_args()\nReturns a pydantic model for the plugin’s input arguments.\n\n\n\nintegrations.base.BasePlugin.get_trainer_cls(cfg)\nReturns a custom class for the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.load_datasets(cfg, preprocess=False)\nLoads and preprocesses the dataset for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\npreprocess\nbool\nWhether this is the preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\ndataset_meta\nUnion['TrainDatasetMeta', None]\nThe metadata for the training dataset.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_lora_load(cfg, model)\nPerforms actions after LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_build(cfg, model)\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_load(cfg, model)\nPerforms actions after the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train(cfg, model)\nPerforms actions after training is complete.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe axolotl configuration.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train_unload(cfg)\nPerforms actions after training is complete and the model is unloaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_trainer_create(cfg, trainer)\nPerforms actions after the trainer is created.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_lora_load(cfg, model)\nPerforms actions before LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_model_load(cfg)\nPerforms actions before the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.register(cfg)\nRegisters the plugin with the given configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\n\n\nintegrations.base.PluginManager()\nThe PluginManager class is responsible for loading and managing plugins. It\nshould be a singleton so it can be accessed from anywhere in the codebase.\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nplugins\nOrderedDict[str, BasePlugin]\nA list of loaded plugins.\n\n\n\n\n\n\nKey methods include:\n- get_instance(): Static method to get the singleton instance of PluginManager.\n- register(plugin_name: str): Registers a new plugin by its name.\n- pre_model_load(cfg): Calls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\nadd_callbacks_pre_trainer\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\ncreate_lr_scheduler\nCalls the create_lr_scheduler method of all registered plugins and returns\n\n\ncreate_optimizer\nCalls the create_optimizer method of all registered plugins and returns\n\n\nget_input_args\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\nget_instance\nReturns the singleton instance of PluginManager. If the instance doesn’t\n\n\nget_trainer_cls\nCalls the get_trainer_cls method of all registered plugins and returns the\n\n\nload_datasets\nCalls the load_datasets method of each registered plugin.\n\n\npost_lora_load\nCalls the post_lora_load method of all registered plugins.\n\n\npost_model_build\nCalls the post_model_build method of all registered plugins after the\n\n\npost_model_load\nCalls the post_model_load method of all registered plugins after the model\n\n\npost_train\nCalls the post_train method of all registered plugins.\n\n\npost_train_unload\nCalls the post_train_unload method of all registered plugins.\n\n\npost_trainer_create\nCalls the post_trainer_create method of all registered plugins.\n\n\npre_lora_load\nCalls the pre_lora_load method of all registered plugins.\n\n\npre_model_load\nCalls the pre_model_load method of all registered plugins.\n\n\nregister\nRegisters a new plugin by its name.\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_lr_scheduler(\n trainer,\n optimizer,\n num_training_steps,\n)\nCalls the create_lr_scheduler method of all registered plugins and returns\nthe first non-None scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler, or None if not found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_optimizer(trainer)\nCalls the create_optimizer method of all registered plugins and returns\nthe first non-None optimizer.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer, or None if none was found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_input_args()\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[str]\nA list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_instance()\nReturns the singleton instance of PluginManager. If the instance doesn’t\nexist, it creates a new one.\n\n\n\nintegrations.base.PluginManager.get_trainer_cls(cfg)\nCalls the get_trainer_cls method of all registered plugins and returns the\nfirst non-None trainer class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.load_datasets(cfg, preprocess=False)\nCalls the load_datasets method of each registered plugin.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\npreprocess\nbool\nWhether this is preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nUnion['TrainDatasetMeta', None]\nThe dataset metadata loaded from all registered plugins.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_lora_load(cfg, model)\nCalls the post_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_build(cfg, model)\nCalls the post_model_build method of all registered plugins after the\nmodel has been built / loaded, but before any adapters have been applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_load(cfg, model)\nCalls the post_model_load method of all registered plugins after the model\nhas been loaded inclusive of any adapters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train(cfg, model)\nCalls the post_train method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train_unload(cfg)\nCalls the post_train_unload method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_trainer_create(cfg, trainer)\nCalls the post_trainer_create method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_lora_load(cfg, model)\nCalls the pre_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_model_load(cfg)\nCalls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.register(plugin_name)\nRegisters a new plugin by its name.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be registered.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported.\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nload_plugin\nLoads a plugin based on the given plugin name.\n\n\n\n\n\nintegrations.base.load_plugin(plugin_name)\nLoads a plugin based on the given plugin name.\nThe plugin name should be in the format “module_name.class_name”. This function\nsplits the plugin name into module and class, imports the module, retrieves the\nclass from the module, and creates an instance of the class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be loaded. The name should be in the format “module_name.class_name”.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nBasePlugin\nAn instance of the loaded plugin.\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported." + "text": "integrations.base\nBase class for all plugins.\nA plugin is a reusable, modular, and self-contained piece of code that extends the functionality of Axolotl.\nPlugins can be used to integrate third-party models, modify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and implement the required methods.\n\n\n\n\n\nName\nDescription\n\n\n\n\nBaseOptimizerFactory\nBase class for factories to create custom optimizers\n\n\nBasePlugin\nBase class for all plugins. Defines the interface for plugin methods.\n\n\nPluginManager\nThe PluginManager class is responsible for loading and managing plugins. It\n\n\n\n\n\nintegrations.base.BaseOptimizerFactory()\nBase class for factories to create custom optimizers\n\n\n\nintegrations.base.BasePlugin()\nBase class for all plugins. Defines the interface for plugin methods.\nA plugin is a reusable, modular, and self-contained piece of code that extends\nthe functionality of Axolotl. Plugins can be used to integrate third-party models,\nmodify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and\nimplement the required methods.\n\n\nPlugin methods include:\n- register(cfg): Registers the plugin with the given configuration.\n- load_datasets(cfg): Loads and preprocesses the dataset for training.\n- pre_model_load(cfg): Performs actions before the model is loaded.\n- post_model_build(cfg, model): Performs actions after the model is loaded, but\nbefore LoRA adapters are applied.\n- pre_lora_load(cfg, model): Performs actions before LoRA weights are loaded.\n- post_lora_load(cfg, model): Performs actions after LoRA weights are loaded.\n- post_model_load(cfg, model): Performs actions after the model is loaded,\ninclusive of any adapters.\n- post_trainer_create(cfg, trainer): Performs actions after the trainer is\ncreated.\n- create_optimizer(cfg, trainer): Creates and returns an optimizer for training.\n- create_lr_scheduler(cfg, trainer, optimizer, num_training_steps): Creates and\nreturns a learning rate scheduler.\n- add_callbacks_pre_trainer(cfg, model): Adds callbacks to the trainer before\ntraining.\n- add_callbacks_post_trainer(cfg, trainer): Adds callbacks to the trainer after\ntraining.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nAdds callbacks to the trainer after creating the trainer. This is useful for\n\n\nadd_callbacks_pre_trainer\nSet up callbacks before creating the trainer.\n\n\ncreate_lr_scheduler\nCreates and returns a learning rate scheduler.\n\n\ncreate_optimizer\nCreates and returns an optimizer for training.\n\n\nget_collator_cls_and_kwargs\nReturns a custom class for the collator.\n\n\nget_input_args\nReturns a pydantic model for the plugin’s input arguments.\n\n\nget_trainer_cls\nReturns a custom class for the trainer.\n\n\nget_training_args\nReturns custom training arguments to set on TrainingArgs.\n\n\nget_training_args_mixin\nReturns a dataclass model for the plugin’s training arguments.\n\n\nload_datasets\nLoads and preprocesses the dataset for training.\n\n\npost_lora_load\nPerforms actions after LoRA weights are loaded.\n\n\npost_model_build\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\npost_model_load\nPerforms actions after the model is loaded.\n\n\npost_train\nPerforms actions after training is complete.\n\n\npost_train_unload\nPerforms actions after training is complete and the model is unloaded.\n\n\npost_trainer_create\nPerforms actions after the trainer is created.\n\n\npre_lora_load\nPerforms actions before LoRA weights are loaded.\n\n\npre_model_load\nPerforms actions before the model is loaded.\n\n\nregister\nRegisters the plugin with the given configuration.\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_post_trainer(cfg, trainer)\nAdds callbacks to the trainer after creating the trainer. This is useful for\ncallbacks that require access to the model or trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_pre_trainer(cfg, model)\nSet up callbacks before creating the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_lr_scheduler(\n cfg,\n trainer,\n optimizer,\n num_training_steps,\n)\nCreates and returns a learning rate scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\nnum_training_steps\nint\nTotal number of training steps\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_optimizer(cfg, trainer)\nCreates and returns an optimizer for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_collator_cls_and_kwargs(cfg, is_eval=False)\nReturns a custom class for the collator.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\nis_eval\nbool\nWhether this is an eval split.\nFalse\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nclass\n\nThe class for the collator.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_input_args()\nReturns a pydantic model for the plugin’s input arguments.\n\n\n\nintegrations.base.BasePlugin.get_trainer_cls(cfg)\nReturns a custom class for the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_training_args(cfg)\nReturns custom training arguments to set on TrainingArgs.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nobject\n\ndict containing the training arguments.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_training_args_mixin()\nReturns a dataclass model for the plugin’s training arguments.\n\n\n\nintegrations.base.BasePlugin.load_datasets(cfg, preprocess=False)\nLoads and preprocesses the dataset for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\npreprocess\nbool\nWhether this is the preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\ndataset_meta\nUnion['TrainDatasetMeta', None]\nThe metadata for the training dataset.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_lora_load(cfg, model)\nPerforms actions after LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_build(cfg, model)\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_load(cfg, model)\nPerforms actions after the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train(cfg, model)\nPerforms actions after training is complete.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe axolotl configuration.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train_unload(cfg)\nPerforms actions after training is complete and the model is unloaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_trainer_create(cfg, trainer)\nPerforms actions after the trainer is created.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_lora_load(cfg, model)\nPerforms actions before LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_model_load(cfg)\nPerforms actions before the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.register(cfg)\nRegisters the plugin with the given configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\n\n\nintegrations.base.PluginManager()\nThe PluginManager class is responsible for loading and managing plugins. It\nshould be a singleton so it can be accessed from anywhere in the codebase.\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nplugins\nOrderedDict[str, BasePlugin]\nA list of loaded plugins.\n\n\n\n\n\n\nKey methods include:\n- get_instance(): Static method to get the singleton instance of PluginManager.\n- register(plugin_name: str): Registers a new plugin by its name.\n- pre_model_load(cfg): Calls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\nadd_callbacks_pre_trainer\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\ncreate_lr_scheduler\nCalls the create_lr_scheduler method of all registered plugins and returns\n\n\ncreate_optimizer\nCalls the create_optimizer method of all registered plugins and returns\n\n\nget_collator_cls_and_kwargs\nCalls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.\n\n\nget_input_args\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\nget_instance\nReturns the singleton instance of PluginManager. If the instance doesn’t\n\n\nget_trainer_cls\nCalls the get_trainer_cls method of all registered plugins and returns the\n\n\nget_training_args\nCalls the get_training_args method of all registered plugins and returns the combined training arguments.\n\n\nget_training_args_mixin\nReturns a list of dataclasses for all registered plugins’ training args mixins’\n\n\nload_datasets\nCalls the load_datasets method of each registered plugin.\n\n\npost_lora_load\nCalls the post_lora_load method of all registered plugins.\n\n\npost_model_build\nCalls the post_model_build method of all registered plugins after the\n\n\npost_model_load\nCalls the post_model_load method of all registered plugins after the model\n\n\npost_train\nCalls the post_train method of all registered plugins.\n\n\npost_train_unload\nCalls the post_train_unload method of all registered plugins.\n\n\npost_trainer_create\nCalls the post_trainer_create method of all registered plugins.\n\n\npre_lora_load\nCalls the pre_lora_load method of all registered plugins.\n\n\npre_model_load\nCalls the pre_model_load method of all registered plugins.\n\n\nregister\nRegisters a new plugin by its name.\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_lr_scheduler(\n trainer,\n optimizer,\n num_training_steps,\n)\nCalls the create_lr_scheduler method of all registered plugins and returns\nthe first non-None scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler, or None if not found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_optimizer(trainer)\nCalls the create_optimizer method of all registered plugins and returns\nthe first non-None optimizer.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer, or None if none was found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_collator_cls_and_kwargs(cfg, is_eval=False)\nCalls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.\nParameters:\ncfg (dict): The configuration for the plugins.\nis_eval (bool): Whether this is an eval split.\nReturns:\nobject: The collator class, or None if none was found.\n\n\n\nintegrations.base.PluginManager.get_input_args()\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[str]\nA list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_instance()\nReturns the singleton instance of PluginManager. If the instance doesn’t\nexist, it creates a new one.\n\n\n\nintegrations.base.PluginManager.get_trainer_cls(cfg)\nCalls the get_trainer_cls method of all registered plugins and returns the\nfirst non-None trainer class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_training_args(cfg)\nCalls the get_training_args method of all registered plugins and returns the combined training arguments.\nParameters:\ncfg (dict): The configuration for the plugins.\nReturns:\nobject: The training arguments\n\n\n\nintegrations.base.PluginManager.get_training_args_mixin()\nReturns a list of dataclasses for all registered plugins’ training args mixins’\nReturns:\nlist[str]: A list of dataclsses\n\n\n\nintegrations.base.PluginManager.load_datasets(cfg, preprocess=False)\nCalls the load_datasets method of each registered plugin.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\npreprocess\nbool\nWhether this is preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nUnion['TrainDatasetMeta', None]\nThe dataset metadata loaded from all registered plugins.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_lora_load(cfg, model)\nCalls the post_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_build(cfg, model)\nCalls the post_model_build method of all registered plugins after the\nmodel has been built / loaded, but before any adapters have been applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_load(cfg, model)\nCalls the post_model_load method of all registered plugins after the model\nhas been loaded inclusive of any adapters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train(cfg, model)\nCalls the post_train method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train_unload(cfg)\nCalls the post_train_unload method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_trainer_create(cfg, trainer)\nCalls the post_trainer_create method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_lora_load(cfg, model)\nCalls the pre_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_model_load(cfg)\nCalls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.register(plugin_name)\nRegisters a new plugin by its name.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be registered.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported.\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nload_plugin\nLoads a plugin based on the given plugin name.\n\n\n\n\n\nintegrations.base.load_plugin(plugin_name)\nLoads a plugin based on the given plugin name.\nThe plugin name should be in the format “module_name.class_name”. This function\nsplits the plugin name into module and class, imports the module, retrieves the\nclass from the module, and creates an instance of the class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be loaded. The name should be in the format “module_name.class_name”.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nBasePlugin\nAn instance of the loaded plugin.\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported." }, { "objectID": "docs/api/integrations.base.html#classes", "href": "docs/api/integrations.base.html#classes", "title": "integrations.base", "section": "", - "text": "Name\nDescription\n\n\n\n\nBaseOptimizerFactory\nBase class for factories to create custom optimizers\n\n\nBasePlugin\nBase class for all plugins. Defines the interface for plugin methods.\n\n\nPluginManager\nThe PluginManager class is responsible for loading and managing plugins. It\n\n\n\n\n\nintegrations.base.BaseOptimizerFactory()\nBase class for factories to create custom optimizers\n\n\n\nintegrations.base.BasePlugin()\nBase class for all plugins. Defines the interface for plugin methods.\nA plugin is a reusable, modular, and self-contained piece of code that extends\nthe functionality of Axolotl. Plugins can be used to integrate third-party models,\nmodify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and\nimplement the required methods.\n\n\nPlugin methods include:\n- register(cfg): Registers the plugin with the given configuration.\n- load_datasets(cfg): Loads and preprocesses the dataset for training.\n- pre_model_load(cfg): Performs actions before the model is loaded.\n- post_model_build(cfg, model): Performs actions after the model is loaded, but\nbefore LoRA adapters are applied.\n- pre_lora_load(cfg, model): Performs actions before LoRA weights are loaded.\n- post_lora_load(cfg, model): Performs actions after LoRA weights are loaded.\n- post_model_load(cfg, model): Performs actions after the model is loaded,\ninclusive of any adapters.\n- post_trainer_create(cfg, trainer): Performs actions after the trainer is\ncreated.\n- create_optimizer(cfg, trainer): Creates and returns an optimizer for training.\n- create_lr_scheduler(cfg, trainer, optimizer, num_training_steps): Creates and\nreturns a learning rate scheduler.\n- add_callbacks_pre_trainer(cfg, model): Adds callbacks to the trainer before\ntraining.\n- add_callbacks_post_trainer(cfg, trainer): Adds callbacks to the trainer after\ntraining.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nAdds callbacks to the trainer after creating the trainer. This is useful for\n\n\nadd_callbacks_pre_trainer\nSet up callbacks before creating the trainer.\n\n\ncreate_lr_scheduler\nCreates and returns a learning rate scheduler.\n\n\ncreate_optimizer\nCreates and returns an optimizer for training.\n\n\nget_input_args\nReturns a pydantic model for the plugin’s input arguments.\n\n\nget_trainer_cls\nReturns a custom class for the trainer.\n\n\nload_datasets\nLoads and preprocesses the dataset for training.\n\n\npost_lora_load\nPerforms actions after LoRA weights are loaded.\n\n\npost_model_build\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\npost_model_load\nPerforms actions after the model is loaded.\n\n\npost_train\nPerforms actions after training is complete.\n\n\npost_train_unload\nPerforms actions after training is complete and the model is unloaded.\n\n\npost_trainer_create\nPerforms actions after the trainer is created.\n\n\npre_lora_load\nPerforms actions before LoRA weights are loaded.\n\n\npre_model_load\nPerforms actions before the model is loaded.\n\n\nregister\nRegisters the plugin with the given configuration.\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_post_trainer(cfg, trainer)\nAdds callbacks to the trainer after creating the trainer. This is useful for\ncallbacks that require access to the model or trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_pre_trainer(cfg, model)\nSet up callbacks before creating the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_lr_scheduler(\n cfg,\n trainer,\n optimizer,\n num_training_steps,\n)\nCreates and returns a learning rate scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\nnum_training_steps\nint\nTotal number of training steps\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_optimizer(cfg, trainer)\nCreates and returns an optimizer for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_input_args()\nReturns a pydantic model for the plugin’s input arguments.\n\n\n\nintegrations.base.BasePlugin.get_trainer_cls(cfg)\nReturns a custom class for the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.load_datasets(cfg, preprocess=False)\nLoads and preprocesses the dataset for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\npreprocess\nbool\nWhether this is the preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\ndataset_meta\nUnion['TrainDatasetMeta', None]\nThe metadata for the training dataset.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_lora_load(cfg, model)\nPerforms actions after LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_build(cfg, model)\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_load(cfg, model)\nPerforms actions after the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train(cfg, model)\nPerforms actions after training is complete.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe axolotl configuration.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train_unload(cfg)\nPerforms actions after training is complete and the model is unloaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_trainer_create(cfg, trainer)\nPerforms actions after the trainer is created.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_lora_load(cfg, model)\nPerforms actions before LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_model_load(cfg)\nPerforms actions before the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.register(cfg)\nRegisters the plugin with the given configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\n\n\nintegrations.base.PluginManager()\nThe PluginManager class is responsible for loading and managing plugins. It\nshould be a singleton so it can be accessed from anywhere in the codebase.\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nplugins\nOrderedDict[str, BasePlugin]\nA list of loaded plugins.\n\n\n\n\n\n\nKey methods include:\n- get_instance(): Static method to get the singleton instance of PluginManager.\n- register(plugin_name: str): Registers a new plugin by its name.\n- pre_model_load(cfg): Calls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\nadd_callbacks_pre_trainer\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\ncreate_lr_scheduler\nCalls the create_lr_scheduler method of all registered plugins and returns\n\n\ncreate_optimizer\nCalls the create_optimizer method of all registered plugins and returns\n\n\nget_input_args\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\nget_instance\nReturns the singleton instance of PluginManager. If the instance doesn’t\n\n\nget_trainer_cls\nCalls the get_trainer_cls method of all registered plugins and returns the\n\n\nload_datasets\nCalls the load_datasets method of each registered plugin.\n\n\npost_lora_load\nCalls the post_lora_load method of all registered plugins.\n\n\npost_model_build\nCalls the post_model_build method of all registered plugins after the\n\n\npost_model_load\nCalls the post_model_load method of all registered plugins after the model\n\n\npost_train\nCalls the post_train method of all registered plugins.\n\n\npost_train_unload\nCalls the post_train_unload method of all registered plugins.\n\n\npost_trainer_create\nCalls the post_trainer_create method of all registered plugins.\n\n\npre_lora_load\nCalls the pre_lora_load method of all registered plugins.\n\n\npre_model_load\nCalls the pre_model_load method of all registered plugins.\n\n\nregister\nRegisters a new plugin by its name.\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_lr_scheduler(\n trainer,\n optimizer,\n num_training_steps,\n)\nCalls the create_lr_scheduler method of all registered plugins and returns\nthe first non-None scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler, or None if not found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_optimizer(trainer)\nCalls the create_optimizer method of all registered plugins and returns\nthe first non-None optimizer.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer, or None if none was found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_input_args()\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[str]\nA list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_instance()\nReturns the singleton instance of PluginManager. If the instance doesn’t\nexist, it creates a new one.\n\n\n\nintegrations.base.PluginManager.get_trainer_cls(cfg)\nCalls the get_trainer_cls method of all registered plugins and returns the\nfirst non-None trainer class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.load_datasets(cfg, preprocess=False)\nCalls the load_datasets method of each registered plugin.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\npreprocess\nbool\nWhether this is preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nUnion['TrainDatasetMeta', None]\nThe dataset metadata loaded from all registered plugins.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_lora_load(cfg, model)\nCalls the post_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_build(cfg, model)\nCalls the post_model_build method of all registered plugins after the\nmodel has been built / loaded, but before any adapters have been applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_load(cfg, model)\nCalls the post_model_load method of all registered plugins after the model\nhas been loaded inclusive of any adapters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train(cfg, model)\nCalls the post_train method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train_unload(cfg)\nCalls the post_train_unload method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_trainer_create(cfg, trainer)\nCalls the post_trainer_create method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_lora_load(cfg, model)\nCalls the pre_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_model_load(cfg)\nCalls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.register(plugin_name)\nRegisters a new plugin by its name.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be registered.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported." + "text": "Name\nDescription\n\n\n\n\nBaseOptimizerFactory\nBase class for factories to create custom optimizers\n\n\nBasePlugin\nBase class for all plugins. Defines the interface for plugin methods.\n\n\nPluginManager\nThe PluginManager class is responsible for loading and managing plugins. It\n\n\n\n\n\nintegrations.base.BaseOptimizerFactory()\nBase class for factories to create custom optimizers\n\n\n\nintegrations.base.BasePlugin()\nBase class for all plugins. Defines the interface for plugin methods.\nA plugin is a reusable, modular, and self-contained piece of code that extends\nthe functionality of Axolotl. Plugins can be used to integrate third-party models,\nmodify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and\nimplement the required methods.\n\n\nPlugin methods include:\n- register(cfg): Registers the plugin with the given configuration.\n- load_datasets(cfg): Loads and preprocesses the dataset for training.\n- pre_model_load(cfg): Performs actions before the model is loaded.\n- post_model_build(cfg, model): Performs actions after the model is loaded, but\nbefore LoRA adapters are applied.\n- pre_lora_load(cfg, model): Performs actions before LoRA weights are loaded.\n- post_lora_load(cfg, model): Performs actions after LoRA weights are loaded.\n- post_model_load(cfg, model): Performs actions after the model is loaded,\ninclusive of any adapters.\n- post_trainer_create(cfg, trainer): Performs actions after the trainer is\ncreated.\n- create_optimizer(cfg, trainer): Creates and returns an optimizer for training.\n- create_lr_scheduler(cfg, trainer, optimizer, num_training_steps): Creates and\nreturns a learning rate scheduler.\n- add_callbacks_pre_trainer(cfg, model): Adds callbacks to the trainer before\ntraining.\n- add_callbacks_post_trainer(cfg, trainer): Adds callbacks to the trainer after\ntraining.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nAdds callbacks to the trainer after creating the trainer. This is useful for\n\n\nadd_callbacks_pre_trainer\nSet up callbacks before creating the trainer.\n\n\ncreate_lr_scheduler\nCreates and returns a learning rate scheduler.\n\n\ncreate_optimizer\nCreates and returns an optimizer for training.\n\n\nget_collator_cls_and_kwargs\nReturns a custom class for the collator.\n\n\nget_input_args\nReturns a pydantic model for the plugin’s input arguments.\n\n\nget_trainer_cls\nReturns a custom class for the trainer.\n\n\nget_training_args\nReturns custom training arguments to set on TrainingArgs.\n\n\nget_training_args_mixin\nReturns a dataclass model for the plugin’s training arguments.\n\n\nload_datasets\nLoads and preprocesses the dataset for training.\n\n\npost_lora_load\nPerforms actions after LoRA weights are loaded.\n\n\npost_model_build\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\npost_model_load\nPerforms actions after the model is loaded.\n\n\npost_train\nPerforms actions after training is complete.\n\n\npost_train_unload\nPerforms actions after training is complete and the model is unloaded.\n\n\npost_trainer_create\nPerforms actions after the trainer is created.\n\n\npre_lora_load\nPerforms actions before LoRA weights are loaded.\n\n\npre_model_load\nPerforms actions before the model is loaded.\n\n\nregister\nRegisters the plugin with the given configuration.\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_post_trainer(cfg, trainer)\nAdds callbacks to the trainer after creating the trainer. This is useful for\ncallbacks that require access to the model or trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_pre_trainer(cfg, model)\nSet up callbacks before creating the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_lr_scheduler(\n cfg,\n trainer,\n optimizer,\n num_training_steps,\n)\nCreates and returns a learning rate scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\nnum_training_steps\nint\nTotal number of training steps\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_optimizer(cfg, trainer)\nCreates and returns an optimizer for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_collator_cls_and_kwargs(cfg, is_eval=False)\nReturns a custom class for the collator.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\nis_eval\nbool\nWhether this is an eval split.\nFalse\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nclass\n\nThe class for the collator.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_input_args()\nReturns a pydantic model for the plugin’s input arguments.\n\n\n\nintegrations.base.BasePlugin.get_trainer_cls(cfg)\nReturns a custom class for the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_training_args(cfg)\nReturns custom training arguments to set on TrainingArgs.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nobject\n\ndict containing the training arguments.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_training_args_mixin()\nReturns a dataclass model for the plugin’s training arguments.\n\n\n\nintegrations.base.BasePlugin.load_datasets(cfg, preprocess=False)\nLoads and preprocesses the dataset for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\npreprocess\nbool\nWhether this is the preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\ndataset_meta\nUnion['TrainDatasetMeta', None]\nThe metadata for the training dataset.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_lora_load(cfg, model)\nPerforms actions after LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_build(cfg, model)\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_load(cfg, model)\nPerforms actions after the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train(cfg, model)\nPerforms actions after training is complete.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe axolotl configuration.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train_unload(cfg)\nPerforms actions after training is complete and the model is unloaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_trainer_create(cfg, trainer)\nPerforms actions after the trainer is created.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_lora_load(cfg, model)\nPerforms actions before LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_model_load(cfg)\nPerforms actions before the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.register(cfg)\nRegisters the plugin with the given configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\n\n\nintegrations.base.PluginManager()\nThe PluginManager class is responsible for loading and managing plugins. It\nshould be a singleton so it can be accessed from anywhere in the codebase.\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nplugins\nOrderedDict[str, BasePlugin]\nA list of loaded plugins.\n\n\n\n\n\n\nKey methods include:\n- get_instance(): Static method to get the singleton instance of PluginManager.\n- register(plugin_name: str): Registers a new plugin by its name.\n- pre_model_load(cfg): Calls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\nadd_callbacks_pre_trainer\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\ncreate_lr_scheduler\nCalls the create_lr_scheduler method of all registered plugins and returns\n\n\ncreate_optimizer\nCalls the create_optimizer method of all registered plugins and returns\n\n\nget_collator_cls_and_kwargs\nCalls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.\n\n\nget_input_args\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\nget_instance\nReturns the singleton instance of PluginManager. If the instance doesn’t\n\n\nget_trainer_cls\nCalls the get_trainer_cls method of all registered plugins and returns the\n\n\nget_training_args\nCalls the get_training_args method of all registered plugins and returns the combined training arguments.\n\n\nget_training_args_mixin\nReturns a list of dataclasses for all registered plugins’ training args mixins’\n\n\nload_datasets\nCalls the load_datasets method of each registered plugin.\n\n\npost_lora_load\nCalls the post_lora_load method of all registered plugins.\n\n\npost_model_build\nCalls the post_model_build method of all registered plugins after the\n\n\npost_model_load\nCalls the post_model_load method of all registered plugins after the model\n\n\npost_train\nCalls the post_train method of all registered plugins.\n\n\npost_train_unload\nCalls the post_train_unload method of all registered plugins.\n\n\npost_trainer_create\nCalls the post_trainer_create method of all registered plugins.\n\n\npre_lora_load\nCalls the pre_lora_load method of all registered plugins.\n\n\npre_model_load\nCalls the pre_model_load method of all registered plugins.\n\n\nregister\nRegisters a new plugin by its name.\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_lr_scheduler(\n trainer,\n optimizer,\n num_training_steps,\n)\nCalls the create_lr_scheduler method of all registered plugins and returns\nthe first non-None scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler, or None if not found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_optimizer(trainer)\nCalls the create_optimizer method of all registered plugins and returns\nthe first non-None optimizer.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer, or None if none was found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_collator_cls_and_kwargs(cfg, is_eval=False)\nCalls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.\nParameters:\ncfg (dict): The configuration for the plugins.\nis_eval (bool): Whether this is an eval split.\nReturns:\nobject: The collator class, or None if none was found.\n\n\n\nintegrations.base.PluginManager.get_input_args()\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[str]\nA list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_instance()\nReturns the singleton instance of PluginManager. If the instance doesn’t\nexist, it creates a new one.\n\n\n\nintegrations.base.PluginManager.get_trainer_cls(cfg)\nCalls the get_trainer_cls method of all registered plugins and returns the\nfirst non-None trainer class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_training_args(cfg)\nCalls the get_training_args method of all registered plugins and returns the combined training arguments.\nParameters:\ncfg (dict): The configuration for the plugins.\nReturns:\nobject: The training arguments\n\n\n\nintegrations.base.PluginManager.get_training_args_mixin()\nReturns a list of dataclasses for all registered plugins’ training args mixins’\nReturns:\nlist[str]: A list of dataclsses\n\n\n\nintegrations.base.PluginManager.load_datasets(cfg, preprocess=False)\nCalls the load_datasets method of each registered plugin.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\npreprocess\nbool\nWhether this is preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nUnion['TrainDatasetMeta', None]\nThe dataset metadata loaded from all registered plugins.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_lora_load(cfg, model)\nCalls the post_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_build(cfg, model)\nCalls the post_model_build method of all registered plugins after the\nmodel has been built / loaded, but before any adapters have been applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_load(cfg, model)\nCalls the post_model_load method of all registered plugins after the model\nhas been loaded inclusive of any adapters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train(cfg, model)\nCalls the post_train method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train_unload(cfg)\nCalls the post_train_unload method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_trainer_create(cfg, trainer)\nCalls the post_trainer_create method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_lora_load(cfg, model)\nCalls the pre_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_model_load(cfg)\nCalls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.register(plugin_name)\nRegisters a new plugin by its name.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be registered.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported." }, { "objectID": "docs/api/integrations.base.html#functions", diff --git a/site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css b/site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css similarity index 98% rename from site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css rename to site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css index ccf2cfe39..c4f3fb5fd 100644 --- a/site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css +++ b/site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css @@ -216,4 +216,4 @@ code span.wa { content: " https://docs.axolotl.ai/docs/unsloth.html - 2025-06-15T20:47:12.066Z + 2025-06-17T16:09:43.782Z https://docs.axolotl.ai/docs/dataset-formats/conversation.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.776Z https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.777Z https://docs.axolotl.ai/docs/dataset-formats/tokenized.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.777Z https://docs.axolotl.ai/docs/mac.html - 2025-06-15T20:47:12.065Z + 2025-06-17T16:09:43.781Z https://docs.axolotl.ai/docs/nccl.html - 2025-06-15T20:47:12.066Z + 2025-06-17T16:09:43.781Z https://docs.axolotl.ai/docs/multi-node.html - 2025-06-15T20:47:12.066Z + 2025-06-17T16:09:43.781Z https://docs.axolotl.ai/docs/docker.html - 2025-06-15T20:47:12.063Z + 2025-06-17T16:09:43.777Z https://docs.axolotl.ai/docs/lr_groups.html - 2025-06-15T20:47:12.065Z + 2025-06-17T16:09:43.781Z https://docs.axolotl.ai/docs/inference.html - 2025-06-15T20:47:12.065Z + 2025-06-17T16:09:43.781Z https://docs.axolotl.ai/docs/cli.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.776Z https://docs.axolotl.ai/docs/faq.html - 2025-06-15T20:47:12.063Z + 2025-06-17T16:09:43.777Z https://docs.axolotl.ai/docs/getting-started.html - 2025-06-15T20:47:12.063Z + 2025-06-17T16:09:43.777Z https://docs.axolotl.ai/docs/custom_integrations.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.776Z https://docs.axolotl.ai/docs/fsdp_qlora.html - 2025-06-15T20:47:12.063Z + 2025-06-17T16:09:43.777Z https://docs.axolotl.ai/docs/api/common.const.html - 2025-06-15T20:47:40.284Z + 2025-06-17T16:10:14.219Z https://docs.axolotl.ai/docs/api/prompt_tokenizers.html - 2025-06-15T20:47:38.988Z + 2025-06-17T16:10:12.985Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html - 2025-06-15T20:47:39.622Z + 2025-06-17T16:10:13.541Z https://docs.axolotl.ai/docs/api/core.training_args.html - 2025-06-15T20:47:39.105Z + 2025-06-17T16:10:13.025Z https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html - 2025-06-15T20:47:39.547Z + 2025-06-17T16:10:13.466Z https://docs.axolotl.ai/docs/api/utils.dict.html - 2025-06-15T20:47:40.019Z + 2025-06-17T16:10:13.937Z https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html - 2025-06-15T20:47:39.884Z + 2025-06-17T16:10:13.804Z https://docs.axolotl.ai/docs/api/utils.collators.mamba.html - 2025-06-15T20:47:40.324Z + 2025-06-17T16:10:14.259Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html - 2025-06-15T20:47:39.446Z + 2025-06-17T16:10:13.365Z https://docs.axolotl.ai/docs/api/cli.train.html - 2025-06-15T20:47:39.185Z + 2025-06-17T16:10:13.104Z https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html - 2025-06-15T20:47:39.597Z + 2025-06-17T16:10:13.516Z https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html - 2025-06-15T20:47:39.131Z + 2025-06-17T16:10:13.051Z https://docs.axolotl.ai/docs/api/loaders.processor.html - 2025-06-15T20:47:39.426Z + 2025-06-17T16:10:13.344Z https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html - 2025-06-15T20:47:39.145Z + 2025-06-17T16:10:13.065Z https://docs.axolotl.ai/docs/api/core.trainers.mamba.html - 2025-06-15T20:47:39.371Z + 2025-06-17T16:10:13.288Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html - 2025-06-15T20:47:39.891Z + 2025-06-17T16:10:13.810Z https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html - 2025-06-15T20:47:40.300Z + 2025-06-17T16:10:14.235Z https://docs.axolotl.ai/docs/api/core.trainers.relora.html - 2025-06-15T20:47:39.376Z + 2025-06-17T16:10:13.292Z https://docs.axolotl.ai/docs/api/core.builders.causal.html - 2025-06-15T20:47:39.008Z + 2025-06-17T16:10:13.005Z https://docs.axolotl.ai/docs/api/core.chat.messages.html - 2025-06-15T20:47:39.128Z + 2025-06-17T16:10:13.048Z https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html - 2025-06-15T20:47:40.278Z + 2025-06-17T16:10:14.213Z https://docs.axolotl.ai/docs/api/cli.quantize.html - 2025-06-15T20:47:39.340Z + 2025-06-17T16:10:13.256Z https://docs.axolotl.ai/docs/api/cli.checks.html - 2025-06-15T20:47:39.219Z + 2025-06-17T16:10:13.138Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html - 2025-06-15T20:47:39.632Z + 2025-06-17T16:10:13.551Z https://docs.axolotl.ai/docs/api/kernels.lora.html - 2025-06-15T20:47:39.748Z + 2025-06-17T16:10:13.667Z https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html - 2025-06-15T20:47:40.113Z + 2025-06-17T16:10:14.032Z https://docs.axolotl.ai/docs/api/loaders.adapter.html - 2025-06-15T20:47:39.431Z + 2025-06-17T16:10:13.350Z https://docs.axolotl.ai/docs/api/index.html - 2025-06-15T20:47:38.850Z + 2025-06-17T16:10:12.847Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html - 2025-06-15T20:47:39.868Z + 2025-06-17T16:10:13.787Z https://docs.axolotl.ai/docs/api/train.html - 2025-06-15T20:47:38.911Z + 2025-06-17T16:10:12.909Z https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html - 2025-06-15T20:47:39.887Z + 2025-06-17T16:10:13.807Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html - 2025-06-15T20:47:39.619Z + 2025-06-17T16:10:13.538Z https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html - 2025-06-15T20:47:40.264Z + 2025-06-17T16:10:14.199Z https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html - 2025-06-15T20:47:40.369Z + 2025-06-17T16:10:14.303Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html - 2025-06-15T20:47:39.526Z + 2025-06-17T16:10:13.445Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html - 2025-06-15T20:47:39.828Z + 2025-06-17T16:10:13.748Z https://docs.axolotl.ai/docs/api/common.architectures.html - 2025-06-15T20:47:40.283Z + 2025-06-17T16:10:14.218Z https://docs.axolotl.ai/docs/api/utils.schemas.utils.html - 2025-06-15T20:47:40.141Z + 2025-06-17T16:10:14.060Z https://docs.axolotl.ai/docs/api/utils.chat_templates.html - 2025-06-15T20:47:39.933Z + 2025-06-17T16:10:13.852Z https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html - 2025-06-15T20:47:40.387Z + 2025-06-17T16:10:14.322Z https://docs.axolotl.ai/docs/api/cli.main.html - 2025-06-15T20:47:39.177Z + 2025-06-17T16:10:13.096Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html - 2025-06-15T20:47:39.393Z + 2025-06-17T16:10:13.309Z https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html - 2025-06-15T20:47:40.384Z + 2025-06-17T16:10:14.318Z https://docs.axolotl.ai/docs/api/loaders.model.html - 2025-06-15T20:47:39.416Z + 2025-06-17T16:10:13.333Z https://docs.axolotl.ai/docs/api/utils.tokenization.html - 2025-06-15T20:47:39.923Z + 2025-06-17T16:10:13.842Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html - 2025-06-15T20:47:39.640Z + 2025-06-17T16:10:13.559Z https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html - 2025-06-15T20:47:39.943Z + 2025-06-17T16:10:13.863Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html - 2025-06-15T20:47:39.456Z + 2025-06-17T16:10:13.375Z https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html - 2025-06-15T20:47:39.129Z + 2025-06-17T16:10:13.050Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html - 2025-06-15T20:47:39.624Z + 2025-06-17T16:10:13.543Z https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html - 2025-06-15T20:47:39.662Z + 2025-06-17T16:10:13.581Z https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html - 2025-06-15T20:47:39.820Z + 2025-06-17T16:10:13.739Z https://docs.axolotl.ai/docs/api/prompt_strategies.base.html - 2025-06-15T20:47:39.480Z + 2025-06-17T16:10:13.399Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html - 2025-06-15T20:47:39.405Z + 2025-06-17T16:10:13.321Z https://docs.axolotl.ai/docs/api/utils.collators.batching.html - 2025-06-15T20:47:40.321Z + 2025-06-17T16:10:14.255Z https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html - 2025-06-15T20:47:39.857Z + 2025-06-17T16:10:13.776Z https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html - 2025-06-15T20:47:40.272Z + 2025-06-17T16:10:14.206Z https://docs.axolotl.ai/docs/api/utils.schemas.enums.html - 2025-06-15T20:47:40.135Z + 2025-06-17T16:10:14.055Z https://docs.axolotl.ai/docs/api/datasets.html - 2025-06-15T20:47:38.932Z + 2025-06-17T16:10:12.930Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html - 2025-06-15T20:47:39.621Z + 2025-06-17T16:10:13.540Z https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html - 2025-06-15T20:47:39.886Z + 2025-06-17T16:10:13.805Z https://docs.axolotl.ai/docs/api/utils.schemas.model.html - 2025-06-15T20:47:40.073Z + 2025-06-17T16:10:13.991Z https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html - 2025-06-15T20:47:40.263Z + 2025-06-17T16:10:14.198Z https://docs.axolotl.ai/docs/api/utils.trainer.html - 2025-06-15T20:47:39.971Z + 2025-06-17T16:10:13.890Z https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html - 2025-06-15T20:47:40.380Z + 2025-06-17T16:10:14.315Z https://docs.axolotl.ai/docs/api/utils.data.pretraining.html - 2025-06-15T20:47:40.028Z + 2025-06-17T16:10:13.946Z https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html - 2025-06-15T20:47:40.379Z + 2025-06-17T16:10:14.313Z https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html - 2025-06-15T20:47:39.583Z + 2025-06-17T16:10:13.502Z https://docs.axolotl.ai/docs/api/utils.collators.core.html - 2025-06-15T20:47:40.302Z + 2025-06-17T16:10:14.236Z https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html - 2025-06-15T20:47:39.873Z + 2025-06-17T16:10:13.793Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html - 2025-06-15T20:47:39.539Z + 2025-06-17T16:10:13.458Z https://docs.axolotl.ai/docs/api/utils.lora.html - 2025-06-15T20:47:39.938Z + 2025-06-17T16:10:13.857Z https://docs.axolotl.ai/docs/qat.html - 2025-06-15T20:47:12.066Z + 2025-06-17T16:09:43.782Z https://docs.axolotl.ai/docs/quantize.html - 2025-06-15T20:47:12.066Z + 2025-06-17T16:09:43.782Z https://docs.axolotl.ai/docs/ray-integration.html - 2025-06-15T20:47:12.066Z + 2025-06-17T16:09:43.782Z https://docs.axolotl.ai/docs/sequence_parallelism.html - 2025-06-15T20:47:12.066Z + 2025-06-17T16:09:43.782Z https://docs.axolotl.ai/docs/reward_modelling.html - 2025-06-15T20:47:12.066Z + 2025-06-17T16:09:43.782Z https://docs.axolotl.ai/index.html - 2025-06-15T20:47:12.079Z + 2025-06-17T16:09:43.797Z https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html - 2025-06-15T20:47:12.083Z + 2025-06-17T16:09:43.801Z https://docs.axolotl.ai/FAQS.html - 2025-06-15T20:47:12.060Z + 2025-06-17T16:09:43.774Z https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html - 2025-06-15T20:47:12.083Z + 2025-06-17T16:09:43.801Z https://docs.axolotl.ai/TODO.html - 2025-06-15T20:47:12.060Z + 2025-06-17T16:09:43.774Z https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html - 2025-06-15T20:47:12.067Z + 2025-06-17T16:09:43.783Z https://docs.axolotl.ai/docs/torchao.html - 2025-06-15T20:47:12.066Z + 2025-06-17T16:09:43.782Z https://docs.axolotl.ai/docs/config.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.776Z https://docs.axolotl.ai/docs/input_output.html - 2025-06-15T20:47:12.065Z + 2025-06-17T16:09:43.781Z https://docs.axolotl.ai/docs/batch_vs_grad.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.776Z https://docs.axolotl.ai/docs/api/utils.quantization.html - 2025-06-15T20:47:40.055Z + 2025-06-17T16:10:13.973Z https://docs.axolotl.ai/docs/api/utils.bench.html - 2025-06-15T20:47:39.947Z + 2025-06-17T16:10:13.866Z https://docs.axolotl.ai/docs/api/loaders.tokenizer.html - 2025-06-15T20:47:39.424Z + 2025-06-17T16:10:13.343Z https://docs.axolotl.ai/docs/api/utils.freeze.html - 2025-06-15T20:47:39.954Z + 2025-06-17T16:10:13.874Z https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html - 2025-06-15T20:47:39.587Z + 2025-06-17T16:10:13.505Z https://docs.axolotl.ai/docs/api/utils.schemas.training.html - 2025-06-15T20:47:40.078Z + 2025-06-17T16:10:13.996Z https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html - 2025-06-15T20:47:40.281Z + 2025-06-17T16:10:14.216Z https://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html - 2025-06-15T20:47:39.479Z + 2025-06-17T16:10:13.398Z https://docs.axolotl.ai/docs/api/cli.inference.html - 2025-06-15T20:47:39.252Z + 2025-06-17T16:10:13.170Z https://docs.axolotl.ai/docs/api/logging_config.html - 2025-06-15T20:47:38.997Z + 2025-06-17T16:10:12.994Z https://docs.axolotl.ai/docs/api/loaders.constants.html - 2025-06-15T20:47:39.440Z + 2025-06-17T16:10:13.360Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html - 2025-06-15T20:47:39.599Z + 2025-06-17T16:10:13.518Z https://docs.axolotl.ai/docs/api/cli.args.html - 2025-06-15T20:47:39.213Z + 2025-06-17T16:10:13.132Z https://docs.axolotl.ai/docs/api/utils.schemas.trl.html - 2025-06-15T20:47:40.107Z + 2025-06-17T16:10:14.027Z https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html - 2025-06-15T20:47:39.593Z + 2025-06-17T16:10:13.512Z https://docs.axolotl.ai/docs/api/convert.html - 2025-06-15T20:47:38.946Z + 2025-06-17T16:10:12.943Z https://docs.axolotl.ai/docs/api/core.trainers.base.html - 2025-06-15T20:47:39.350Z + 2025-06-17T16:10:13.266Z https://docs.axolotl.ai/docs/api/cli.preprocess.html - 2025-06-15T20:47:39.281Z + 2025-06-17T16:10:13.198Z https://docs.axolotl.ai/docs/api/cli.config.html - 2025-06-15T20:47:39.237Z + 2025-06-17T16:10:13.156Z https://docs.axolotl.ai/docs/api/monkeypatch.relora.html - 2025-06-15T20:47:39.827Z + 2025-06-17T16:10:13.746Z https://docs.axolotl.ai/docs/api/core.chat.format.shared.html - 2025-06-15T20:47:39.132Z + 2025-06-17T16:10:13.053Z https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html - 2025-06-15T20:47:39.382Z + 2025-06-17T16:10:13.299Z https://docs.axolotl.ai/docs/api/utils.callbacks.qat.html - 2025-06-15T20:47:40.394Z + 2025-06-17T16:10:14.328Z https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html - 2025-06-15T20:47:40.026Z + 2025-06-17T16:10:13.945Z https://docs.axolotl.ai/docs/api/cli.evaluate.html - 2025-06-15T20:47:39.193Z + 2025-06-17T16:10:13.112Z https://docs.axolotl.ai/docs/api/core.trainers.trl.html - 2025-06-15T20:47:39.366Z + 2025-06-17T16:10:13.283Z https://docs.axolotl.ai/docs/api/core.builders.base.html - 2025-06-15T20:47:39.003Z + 2025-06-17T16:10:13.000Z https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html - 2025-06-15T20:47:39.877Z + 2025-06-17T16:10:13.796Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html - 2025-06-15T20:47:39.916Z + 2025-06-17T16:10:13.836Z https://docs.axolotl.ai/docs/api/utils.distributed.html - 2025-06-15T20:47:40.015Z + 2025-06-17T16:10:13.934Z https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html - 2025-06-15T20:47:39.572Z + 2025-06-17T16:10:13.491Z https://docs.axolotl.ai/docs/api/utils.schemas.config.html - 2025-06-15T20:47:40.066Z + 2025-06-17T16:10:13.984Z https://docs.axolotl.ai/docs/api/cli.utils.html - 2025-06-15T20:47:39.318Z + 2025-06-17T16:10:13.235Z https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html - 2025-06-15T20:47:40.375Z + 2025-06-17T16:10:14.310Z https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html - 2025-06-15T20:47:40.125Z + 2025-06-17T16:10:14.044Z https://docs.axolotl.ai/docs/api/loaders.patch_manager.html - 2025-06-15T20:47:39.439Z + 2025-06-17T16:10:13.358Z https://docs.axolotl.ai/docs/api/monkeypatch.utils.html - 2025-06-15T20:47:39.865Z + 2025-06-17T16:10:13.784Z https://docs.axolotl.ai/docs/api/cli.vllm_serve.html - 2025-06-15T20:47:39.325Z + 2025-06-17T16:10:13.242Z https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html - 2025-06-15T20:47:39.819Z + 2025-06-17T16:10:13.738Z https://docs.axolotl.ai/docs/api/integrations.liger.args.html - 2025-06-15T20:47:40.275Z + 2025-06-17T16:10:14.209Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html - 2025-06-15T20:47:39.528Z + 2025-06-17T16:10:13.447Z https://docs.axolotl.ai/docs/api/utils.data.sft.html - 2025-06-15T20:47:40.034Z + 2025-06-17T16:10:13.953Z https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html - 2025-06-15T20:47:40.329Z + 2025-06-17T16:10:14.263Z https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html - 2025-06-15T20:47:39.560Z + 2025-06-17T16:10:13.479Z https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html - 2025-06-15T20:47:39.883Z + 2025-06-17T16:10:13.802Z https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html - 2025-06-15T20:47:39.272Z + 2025-06-17T16:10:13.190Z https://docs.axolotl.ai/docs/api/cli.merge_lora.html - 2025-06-15T20:47:39.260Z + 2025-06-17T16:10:13.178Z https://docs.axolotl.ai/docs/api/integrations.base.html - 2025-06-15T20:47:40.259Z + 2025-06-17T16:10:14.195Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html - 2025-06-15T20:47:39.449Z + 2025-06-17T16:10:13.368Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html - 2025-06-15T20:47:39.803Z + 2025-06-17T16:10:13.722Z https://docs.axolotl.ai/docs/api/kernels.quantize.html - 2025-06-15T20:47:39.776Z + 2025-06-17T16:10:13.695Z https://docs.axolotl.ai/docs/api/evaluate.html - 2025-06-15T20:47:38.922Z + 2025-06-17T16:10:12.919Z https://docs.axolotl.ai/docs/api/core.builders.rl.html - 2025-06-15T20:47:39.016Z + 2025-06-17T16:10:13.013Z https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html - 2025-06-15T20:47:40.096Z + 2025-06-17T16:10:14.014Z https://docs.axolotl.ai/docs/api/common.datasets.html - 2025-06-15T20:47:40.299Z + 2025-06-17T16:10:14.234Z https://docs.axolotl.ai/docs/api/kernels.utils.html - 2025-06-15T20:47:39.778Z + 2025-06-17T16:10:13.697Z https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html - 2025-06-15T20:47:39.566Z + 2025-06-17T16:10:13.485Z https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html - 2025-06-15T20:47:39.665Z + 2025-06-17T16:10:13.584Z https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html - 2025-06-15T20:47:39.576Z + 2025-06-17T16:10:13.495Z https://docs.axolotl.ai/docs/api/kernels.swiglu.html - 2025-06-15T20:47:39.769Z + 2025-06-17T16:10:13.688Z https://docs.axolotl.ai/docs/api/cli.cloud.base.html - 2025-06-15T20:47:39.329Z + 2025-06-17T16:10:13.245Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html - 2025-06-15T20:47:39.641Z + 2025-06-17T16:10:13.561Z https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html - 2025-06-15T20:47:39.513Z + 2025-06-17T16:10:13.432Z https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html - 2025-06-15T20:47:39.866Z + 2025-06-17T16:10:13.785Z https://docs.axolotl.ai/docs/api/utils.schemas.peft.html - 2025-06-15T20:47:40.104Z + 2025-06-17T16:10:14.023Z https://docs.axolotl.ai/docs/api/core.datasets.chat.html - 2025-06-15T20:47:39.137Z + 2025-06-17T16:10:13.058Z https://docs.axolotl.ai/docs/api/core.trainers.utils.html - 2025-06-15T20:47:39.406Z + 2025-06-17T16:10:13.323Z https://docs.axolotl.ai/docs/api/kernels.geglu.html - 2025-06-15T20:47:39.758Z + 2025-06-17T16:10:13.678Z https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html - 2025-06-15T20:47:39.335Z + 2025-06-17T16:10:13.251Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html - 2025-06-15T20:47:39.805Z + 2025-06-17T16:10:13.724Z https://docs.axolotl.ai/docs/api/utils.schedulers.html - 2025-06-15T20:47:39.996Z + 2025-06-17T16:10:13.914Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html - 2025-06-15T20:47:39.609Z + 2025-06-17T16:10:13.528Z https://docs.axolotl.ai/docs/api/cli.sweeps.html - 2025-06-15T20:47:39.287Z + 2025-06-17T16:10:13.204Z https://docs.axolotl.ai/docs/multimodal.html - 2025-06-15T20:47:12.066Z + 2025-06-17T16:09:43.781Z https://docs.axolotl.ai/docs/debugging.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.777Z https://docs.axolotl.ai/docs/multi-gpu.html - 2025-06-15T20:47:12.066Z + 2025-06-17T16:09:43.781Z https://docs.axolotl.ai/docs/lora_optims.html - 2025-06-15T20:47:12.065Z + 2025-06-17T16:09:43.781Z https://docs.axolotl.ai/docs/rlhf.html - 2025-06-15T20:47:12.066Z + 2025-06-17T16:09:43.782Z https://docs.axolotl.ai/docs/amd_hpc.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.776Z https://docs.axolotl.ai/docs/installation.html - 2025-06-15T20:47:12.065Z + 2025-06-17T16:09:43.781Z https://docs.axolotl.ai/docs/multipack.html - 2025-06-15T20:47:12.066Z + 2025-06-17T16:09:43.781Z https://docs.axolotl.ai/docs/dataset_preprocessing.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.777Z https://docs.axolotl.ai/docs/dataset_loading.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.777Z https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.777Z https://docs.axolotl.ai/docs/dataset-formats/template_free.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.777Z https://docs.axolotl.ai/docs/dataset-formats/index.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.776Z https://docs.axolotl.ai/docs/dataset-formats/pretraining.html - 2025-06-15T20:47:12.062Z + 2025-06-17T16:09:43.777Z diff --git a/src/axolotl/integrations/LICENSE.html b/src/axolotl/integrations/LICENSE.html index e4f1fb61a..9c5a30f99 100644 --- a/src/axolotl/integrations/LICENSE.html +++ b/src/axolotl/integrations/LICENSE.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - + diff --git a/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html b/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html index b2dccf400..139616c0b 100644 --- a/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html +++ b/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html @@ -2,7 +2,7 @@ - + @@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] { - +
    tuple[HFRLTrainerBuilder | HFCausalTrainerBuilder, PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]tuple['HFRLTrainerBuilder' | 'HFCausalTrainerBuilder', PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None] Tuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor