@@ -2199,13 +2362,13 @@ has been loaded inclusive of any adapters.
load_plugin
-
integrations.base.load_plugin(plugin_name)
+
integrations.base.load_plugin(plugin_name)
Loads a plugin based on the given plugin name.
The plugin name should be in the format “module_name.class_name”. This function
splits the plugin name into module and class, imports the module, retrieves the
class from the module, and creates an instance of the class.
-
-
Parameters
+
+
Parameters
@@ -2231,8 +2394,8 @@ class from the module, and creates an instance of the class.
Tuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor
diff --git a/docs/api/utils.bench.html b/docs/api/utils.bench.html
index d6c3c9374..4ca5c00ea 100644
--- a/docs/api/utils.bench.html
+++ b/docs/api/utils.bench.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.callbacks.comet_.html b/docs/api/utils.callbacks.comet_.html
index c15a1bb43..89a30e784 100644
--- a/docs/api/utils.callbacks.comet_.html
+++ b/docs/api/utils.callbacks.comet_.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.callbacks.lisa.html b/docs/api/utils.callbacks.lisa.html
index 59292b946..416a23d89 100644
--- a/docs/api/utils.callbacks.lisa.html
+++ b/docs/api/utils.callbacks.lisa.html
@@ -2,7 +2,7 @@
-
+
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
-
+
diff --git a/docs/api/utils.callbacks.mlflow_.html b/docs/api/utils.callbacks.mlflow_.html
index bf403287b..0c0bb2cdc 100644
--- a/docs/api/utils.callbacks.mlflow_.html
+++ b/docs/api/utils.callbacks.mlflow_.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.callbacks.perplexity.html b/docs/api/utils.callbacks.perplexity.html
index 6d1f8f899..699622984 100644
--- a/docs/api/utils.callbacks.perplexity.html
+++ b/docs/api/utils.callbacks.perplexity.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.callbacks.profiler.html b/docs/api/utils.callbacks.profiler.html
index 824d1dcde..084f7a6f0 100644
--- a/docs/api/utils.callbacks.profiler.html
+++ b/docs/api/utils.callbacks.profiler.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.callbacks.qat.html b/docs/api/utils.callbacks.qat.html
index 30ea386fa..6b16a4c99 100644
--- a/docs/api/utils.callbacks.qat.html
+++ b/docs/api/utils.callbacks.qat.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.chat_templates.html b/docs/api/utils.chat_templates.html
index 92c0816f6..49a544319 100644
--- a/docs/api/utils.chat_templates.html
+++ b/docs/api/utils.chat_templates.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.collators.batching.html b/docs/api/utils.collators.batching.html
index 47b7388d1..b0760a962 100644
--- a/docs/api/utils.collators.batching.html
+++ b/docs/api/utils.collators.batching.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.collators.core.html b/docs/api/utils.collators.core.html
index a7b4d7461..c9719b121 100644
--- a/docs/api/utils.collators.core.html
+++ b/docs/api/utils.collators.core.html
@@ -2,7 +2,7 @@
-
+
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
-
+
diff --git a/docs/api/utils.collators.mamba.html b/docs/api/utils.collators.mamba.html
index a960b8c19..ba734d472 100644
--- a/docs/api/utils.collators.mamba.html
+++ b/docs/api/utils.collators.mamba.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.collators.mm_chat.html b/docs/api/utils.collators.mm_chat.html
index 4988bac19..e14fac627 100644
--- a/docs/api/utils.collators.mm_chat.html
+++ b/docs/api/utils.collators.mm_chat.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.ctx_managers.sequence_parallel.html b/docs/api/utils.ctx_managers.sequence_parallel.html
index 4991af2fb..926b2ce62 100644
--- a/docs/api/utils.ctx_managers.sequence_parallel.html
+++ b/docs/api/utils.ctx_managers.sequence_parallel.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.data.pretraining.html b/docs/api/utils.data.pretraining.html
index 402ae4732..8b2cb83c7 100644
--- a/docs/api/utils.data.pretraining.html
+++ b/docs/api/utils.data.pretraining.html
@@ -2,7 +2,7 @@
-
+
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
-
+
diff --git a/docs/api/utils.data.sft.html b/docs/api/utils.data.sft.html
index 6fff26a34..1c8ba124f 100644
--- a/docs/api/utils.data.sft.html
+++ b/docs/api/utils.data.sft.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.dict.html b/docs/api/utils.dict.html
index 995837ca1..0fa828e67 100644
--- a/docs/api/utils.dict.html
+++ b/docs/api/utils.dict.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.distributed.html b/docs/api/utils.distributed.html
index 4fd370fcd..57ba7da1f 100644
--- a/docs/api/utils.distributed.html
+++ b/docs/api/utils.distributed.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.freeze.html b/docs/api/utils.freeze.html
index 073e0d0a6..7c4f6ca36 100644
--- a/docs/api/utils.freeze.html
+++ b/docs/api/utils.freeze.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.lora.html b/docs/api/utils.lora.html
index 1a648ffa2..ae2351e56 100644
--- a/docs/api/utils.lora.html
+++ b/docs/api/utils.lora.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.model_shard_quant.html b/docs/api/utils.model_shard_quant.html
index c8d6c3800..2f481d570 100644
--- a/docs/api/utils.model_shard_quant.html
+++ b/docs/api/utils.model_shard_quant.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.optimizers.adopt.html b/docs/api/utils.optimizers.adopt.html
index abface679..686e1fb5a 100644
--- a/docs/api/utils.optimizers.adopt.html
+++ b/docs/api/utils.optimizers.adopt.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.quantization.html b/docs/api/utils.quantization.html
index b39c84ba2..5e5227176 100644
--- a/docs/api/utils.quantization.html
+++ b/docs/api/utils.quantization.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.samplers.multipack.html b/docs/api/utils.samplers.multipack.html
index 75a23093a..5bfc6b943 100644
--- a/docs/api/utils.samplers.multipack.html
+++ b/docs/api/utils.samplers.multipack.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
@@ -519,7 +519,7 @@ into fixed-capacity batches to optimize memory usage and training throughput. batch_max_len,
lengths, packing_efficiency_estimate=1.0,
- drop_last=False,
+ drop_last=True, num_count_samples=8, sequential=False, group_size=100000,
diff --git a/docs/api/utils.schedulers.html b/docs/api/utils.schedulers.html
index 933b8a66b..cf276638f 100644
--- a/docs/api/utils.schedulers.html
+++ b/docs/api/utils.schedulers.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.schemas.config.html b/docs/api/utils.schemas.config.html
index 544ca7182..2f51a2a5e 100644
--- a/docs/api/utils.schemas.config.html
+++ b/docs/api/utils.schemas.config.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.schemas.datasets.html b/docs/api/utils.schemas.datasets.html
index ad2cf1d3a..86c609a39 100644
--- a/docs/api/utils.schemas.datasets.html
+++ b/docs/api/utils.schemas.datasets.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.schemas.enums.html b/docs/api/utils.schemas.enums.html
index bed4f1650..ca834d521 100644
--- a/docs/api/utils.schemas.enums.html
+++ b/docs/api/utils.schemas.enums.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.schemas.integrations.html b/docs/api/utils.schemas.integrations.html
index 9045eda30..02104fd3b 100644
--- a/docs/api/utils.schemas.integrations.html
+++ b/docs/api/utils.schemas.integrations.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.schemas.model.html b/docs/api/utils.schemas.model.html
index e7d4e3261..d04c16aad 100644
--- a/docs/api/utils.schemas.model.html
+++ b/docs/api/utils.schemas.model.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.schemas.multimodal.html b/docs/api/utils.schemas.multimodal.html
index c81509a29..ae93a5193 100644
--- a/docs/api/utils.schemas.multimodal.html
+++ b/docs/api/utils.schemas.multimodal.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.schemas.peft.html b/docs/api/utils.schemas.peft.html
index 0ed7da969..5776ee34e 100644
--- a/docs/api/utils.schemas.peft.html
+++ b/docs/api/utils.schemas.peft.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.schemas.training.html b/docs/api/utils.schemas.training.html
index ee9e20530..2f6044285 100644
--- a/docs/api/utils.schemas.training.html
+++ b/docs/api/utils.schemas.training.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.schemas.trl.html b/docs/api/utils.schemas.trl.html
index ace507cb4..701353f6e 100644
--- a/docs/api/utils.schemas.trl.html
+++ b/docs/api/utils.schemas.trl.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.schemas.utils.html b/docs/api/utils.schemas.utils.html
index bedcd58db..ea584823a 100644
--- a/docs/api/utils.schemas.utils.html
+++ b/docs/api/utils.schemas.utils.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.tokenization.html b/docs/api/utils.tokenization.html
index 34ff65a62..914885b79 100644
--- a/docs/api/utils.tokenization.html
+++ b/docs/api/utils.tokenization.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/api/utils.trainer.html b/docs/api/utils.trainer.html
index 27d912099..c56877014 100644
--- a/docs/api/utils.trainer.html
+++ b/docs/api/utils.trainer.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/batch_vs_grad.html b/docs/batch_vs_grad.html
index 0bb231e05..fab84b01c 100644
--- a/docs/batch_vs_grad.html
+++ b/docs/batch_vs_grad.html
@@ -2,7 +2,7 @@
-
+
@@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] {
-
+
diff --git a/docs/cli.html b/docs/cli.html
index b4ac80ccd..9186c8c89 100644
--- a/docs/cli.html
+++ b/docs/cli.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/config.html b/docs/config.html
index 20b491caa..84e5fb6c6 100644
--- a/docs/config.html
+++ b/docs/config.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/custom_integrations.html b/docs/custom_integrations.html
index c02b213bd..cd72e6f71 100644
--- a/docs/custom_integrations.html
+++ b/docs/custom_integrations.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/dataset-formats/conversation.html b/docs/dataset-formats/conversation.html
index b3f68d3e1..c03cfcc80 100644
--- a/docs/dataset-formats/conversation.html
+++ b/docs/dataset-formats/conversation.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/dataset-formats/index.html b/docs/dataset-formats/index.html
index 86fc9726d..a534c4200 100644
--- a/docs/dataset-formats/index.html
+++ b/docs/dataset-formats/index.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/dataset-formats/inst_tune.html b/docs/dataset-formats/inst_tune.html
index bb3102eb7..f7aa3d294 100644
--- a/docs/dataset-formats/inst_tune.html
+++ b/docs/dataset-formats/inst_tune.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/dataset-formats/pretraining.html b/docs/dataset-formats/pretraining.html
index f80701f5b..3cbc523da 100644
--- a/docs/dataset-formats/pretraining.html
+++ b/docs/dataset-formats/pretraining.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/dataset-formats/stepwise_supervised.html b/docs/dataset-formats/stepwise_supervised.html
index 923d0e539..c1959429b 100644
--- a/docs/dataset-formats/stepwise_supervised.html
+++ b/docs/dataset-formats/stepwise_supervised.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/dataset-formats/template_free.html b/docs/dataset-formats/template_free.html
index bda64fd55..479ca2d50 100644
--- a/docs/dataset-formats/template_free.html
+++ b/docs/dataset-formats/template_free.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/dataset-formats/tokenized.html b/docs/dataset-formats/tokenized.html
index cc45dd48c..8b7530290 100644
--- a/docs/dataset-formats/tokenized.html
+++ b/docs/dataset-formats/tokenized.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/dataset_loading.html b/docs/dataset_loading.html
index 8d7fd93da..a9a80bc2d 100644
--- a/docs/dataset_loading.html
+++ b/docs/dataset_loading.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/dataset_preprocessing.html b/docs/dataset_preprocessing.html
index 3989b8f67..d4f38dff9 100644
--- a/docs/dataset_preprocessing.html
+++ b/docs/dataset_preprocessing.html
@@ -2,7 +2,7 @@
-
+
@@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] {
-
+
diff --git a/docs/debugging.html b/docs/debugging.html
index fbf57b1ce..387537a40 100644
--- a/docs/debugging.html
+++ b/docs/debugging.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/docker.html b/docs/docker.html
index 5c9b4c4a3..a2f2dfcc8 100644
--- a/docs/docker.html
+++ b/docs/docker.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/faq.html b/docs/faq.html
index bc369cec5..c45204292 100644
--- a/docs/faq.html
+++ b/docs/faq.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/fsdp_qlora.html b/docs/fsdp_qlora.html
index 8534386c6..44a58694a 100644
--- a/docs/fsdp_qlora.html
+++ b/docs/fsdp_qlora.html
@@ -2,7 +2,7 @@
-
+
@@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] {
-
+
diff --git a/docs/getting-started.html b/docs/getting-started.html
index d11a07ffe..6ed7e8cd4 100644
--- a/docs/getting-started.html
+++ b/docs/getting-started.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/inference.html b/docs/inference.html
index e5453dd41..6b722a2c0 100644
--- a/docs/inference.html
+++ b/docs/inference.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/input_output.html b/docs/input_output.html
index 6c451de3f..8a4f02837 100644
--- a/docs/input_output.html
+++ b/docs/input_output.html
@@ -2,7 +2,7 @@
-
+
@@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] {
-
+
diff --git a/docs/installation.html b/docs/installation.html
index 344a41f2b..b48621d30 100644
--- a/docs/installation.html
+++ b/docs/installation.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/lora_optims.html b/docs/lora_optims.html
index 50ff20bb4..6051a3f8b 100644
--- a/docs/lora_optims.html
+++ b/docs/lora_optims.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/lr_groups.html b/docs/lr_groups.html
index c4233d6ef..e5027f04e 100644
--- a/docs/lr_groups.html
+++ b/docs/lr_groups.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/mac.html b/docs/mac.html
index 1a2372cb5..1d456b738 100644
--- a/docs/mac.html
+++ b/docs/mac.html
@@ -2,7 +2,7 @@
-
+
@@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] {
-
+
diff --git a/docs/multi-gpu.html b/docs/multi-gpu.html
index f108b5438..c27d38361 100644
--- a/docs/multi-gpu.html
+++ b/docs/multi-gpu.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/multi-node.html b/docs/multi-node.html
index 75e4dd3bb..7719c0af1 100644
--- a/docs/multi-node.html
+++ b/docs/multi-node.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/multimodal.html b/docs/multimodal.html
index 40d016b0d..f609b2e6d 100644
--- a/docs/multimodal.html
+++ b/docs/multimodal.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/multipack.html b/docs/multipack.html
index 00c774d3a..fc8d958c5 100644
--- a/docs/multipack.html
+++ b/docs/multipack.html
@@ -2,7 +2,7 @@
-
+
@@ -37,7 +37,7 @@ ul.task-list li input[type="checkbox"] {
-
+
diff --git a/docs/nccl.html b/docs/nccl.html
index 1cbbe00c5..202e69448 100644
--- a/docs/nccl.html
+++ b/docs/nccl.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/qat.html b/docs/qat.html
index fff59d03e..ea5e15216 100644
--- a/docs/qat.html
+++ b/docs/qat.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/quantize.html b/docs/quantize.html
index df02b4a41..0e6f97bd9 100644
--- a/docs/quantize.html
+++ b/docs/quantize.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/ray-integration.html b/docs/ray-integration.html
index 66ce8d648..ebc818736 100644
--- a/docs/ray-integration.html
+++ b/docs/ray-integration.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/reward_modelling.html b/docs/reward_modelling.html
index 213c7b371..5fd93a079 100644
--- a/docs/reward_modelling.html
+++ b/docs/reward_modelling.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/rlhf.html b/docs/rlhf.html
index 61a7fec9a..710441105 100644
--- a/docs/rlhf.html
+++ b/docs/rlhf.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/sequence_parallelism.html b/docs/sequence_parallelism.html
index f175c4a46..8c1d23e9d 100644
--- a/docs/sequence_parallelism.html
+++ b/docs/sequence_parallelism.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/torchao.html b/docs/torchao.html
index ca6ee1d91..d36819861 100644
--- a/docs/torchao.html
+++ b/docs/torchao.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/docs/unsloth.html b/docs/unsloth.html
index 4b00088e0..ce1b4e94f 100644
--- a/docs/unsloth.html
+++ b/docs/unsloth.html
@@ -2,7 +2,7 @@
-
+
@@ -72,7 +72,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/examples/colab-notebooks/colab-axolotl-example.html b/examples/colab-notebooks/colab-axolotl-example.html
index 954f57fd6..15ec4064f 100644
--- a/examples/colab-notebooks/colab-axolotl-example.html
+++ b/examples/colab-notebooks/colab-axolotl-example.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/index.html b/index.html
index 19401ce86..4612eed0e 100644
--- a/index.html
+++ b/index.html
@@ -2,7 +2,7 @@
-
+
@@ -71,7 +71,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
-
+
diff --git a/search.json b/search.json
index 70ea3f187..7e66ac345 100644
--- a/search.json
+++ b/search.json
@@ -644,14 +644,14 @@
"href": "docs/api/core.training_args.html",
"title": "core.training_args",
"section": "",
- "text": "core.training_args\nextra axolotl specific training args\n\n\n\n\n\nName\nDescription\n\n\n\n\nAxolotlCPOConfig\nCPO config for CPO training\n\n\nAxolotlKTOConfig\nKTO config for KTO training\n\n\nAxolotlORPOConfig\nORPO config for ORPO training\n\n\nAxolotlPRMConfig\nPRM config for PRM training\n\n\nAxolotlRewardConfig\nReward config for Reward training\n\n\nAxolotlTrainingArguments\nTraining arguments for Causal trainer\n\n\nAxolotlTrainingMixins\nMixin class for the Axolotl training args.\n\n\n\n\n\ncore.training_args.AxolotlCPOConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n simpo_gamma=None,\n)\nCPO config for CPO training\n\n\n\ncore.training_args.AxolotlKTOConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nKTO config for KTO training\n\n\n\ncore.training_args.AxolotlORPOConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nORPO config for ORPO training\n\n\n\ncore.training_args.AxolotlPRMConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nPRM config for PRM training\n\n\n\ncore.training_args.AxolotlRewardConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nReward config for Reward training\n\n\n\ncore.training_args.AxolotlTrainingArguments(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nTraining arguments for Causal trainer\nThis code is duplicated due to HF TrainingArguments not setting output_dir with a\ndefault value so it can’t be used as a mixin.\n\n\n\ncore.training_args.AxolotlTrainingMixins(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nMixin class for the Axolotl training args."
+ "text": "core.training_args\nextra axolotl specific training args\n\n\n\n\n\nName\nDescription\n\n\n\n\nAxolotlCPOConfig\nCPO config for CPO training\n\n\nAxolotlKTOConfig\nKTO config for KTO training\n\n\nAxolotlORPOConfig\nORPO config for ORPO training\n\n\nAxolotlPRMConfig\nPRM config for PRM training\n\n\nAxolotlRewardConfig\nReward config for Reward training\n\n\nAxolotlTrainingArguments\nTraining arguments for Causal trainer\n\n\n\n\n\ncore.training_args.AxolotlCPOConfig(simpo_gamma=None)\nCPO config for CPO training\n\n\n\ncore.training_args.AxolotlKTOConfig()\nKTO config for KTO training\n\n\n\ncore.training_args.AxolotlORPOConfig()\nORPO config for ORPO training\n\n\n\ncore.training_args.AxolotlPRMConfig()\nPRM config for PRM training\n\n\n\ncore.training_args.AxolotlRewardConfig()\nReward config for Reward training\n\n\n\ncore.training_args.AxolotlTrainingArguments()\nTraining arguments for Causal trainer\nThis code is duplicated due to HF TrainingArguments not setting output_dir with a\ndefault value so it can’t be used as a mixin."
},
{
"objectID": "docs/api/core.training_args.html#classes",
"href": "docs/api/core.training_args.html#classes",
"title": "core.training_args",
"section": "",
- "text": "Name\nDescription\n\n\n\n\nAxolotlCPOConfig\nCPO config for CPO training\n\n\nAxolotlKTOConfig\nKTO config for KTO training\n\n\nAxolotlORPOConfig\nORPO config for ORPO training\n\n\nAxolotlPRMConfig\nPRM config for PRM training\n\n\nAxolotlRewardConfig\nReward config for Reward training\n\n\nAxolotlTrainingArguments\nTraining arguments for Causal trainer\n\n\nAxolotlTrainingMixins\nMixin class for the Axolotl training args.\n\n\n\n\n\ncore.training_args.AxolotlCPOConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n simpo_gamma=None,\n)\nCPO config for CPO training\n\n\n\ncore.training_args.AxolotlKTOConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nKTO config for KTO training\n\n\n\ncore.training_args.AxolotlORPOConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nORPO config for ORPO training\n\n\n\ncore.training_args.AxolotlPRMConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nPRM config for PRM training\n\n\n\ncore.training_args.AxolotlRewardConfig(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nReward config for Reward training\n\n\n\ncore.training_args.AxolotlTrainingArguments(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nTraining arguments for Causal trainer\nThis code is duplicated due to HF TrainingArguments not setting output_dir with a\ndefault value so it can’t be used as a mixin.\n\n\n\ncore.training_args.AxolotlTrainingMixins(\n model_type=None,\n lr_quadratic_warmup=False,\n pretraining=False,\n sample_packing=False,\n sample_packing_sequentially=False,\n multipack_real_batches=False,\n eval_sample_packing=None,\n sample_packing_efficiency=1.0,\n sample_packing_bin_size=200,\n sample_packing_group_size=100000,\n max_seq_length=2048,\n dataset_num_proc=None,\n relora_steps=None,\n relora_warmup_steps=None,\n relora_anneal_steps=None,\n relora_prune_ratio=0.9,\n bench_split='eval',\n bench_dataset='pharaouk/dharma-1/dharma_1_mini.json',\n do_bench_eval=False,\n do_causal_lm_eval=False,\n max_bench_samples=None,\n bench_source_max_len=2048,\n dataloader_prefetch_factor=None,\n cosine_min_lr_ratio=None,\n cosine_constant_lr_ratio=None,\n loraplus_lr_ratio=None,\n loraplus_lr_embedding=1e-06,\n embedding_lr_scale=None,\n lr_groups=None,\n embedding_lr=None,\n qlora=False,\n orpo_alpha=None,\n lisa_n_layers=None,\n lisa_step_interval=None,\n lisa_layers_attribute=None,\n curriculum_sampling=None,\n alternate_lr_scheduler_type=None,\n chat_template=None,\n kd_ce_alpha=None,\n kd_alpha=1.0,\n kd_temperature=1.0,\n kd_zscore_base_temp=None,\n kd_top_k_before_softmax=None,\n adam_beta3=None,\n adam_epsilon2=None,\n image_size=None,\n image_resize_algorithm=None,\n)\nMixin class for the Axolotl training args."
+ "text": "Name\nDescription\n\n\n\n\nAxolotlCPOConfig\nCPO config for CPO training\n\n\nAxolotlKTOConfig\nKTO config for KTO training\n\n\nAxolotlORPOConfig\nORPO config for ORPO training\n\n\nAxolotlPRMConfig\nPRM config for PRM training\n\n\nAxolotlRewardConfig\nReward config for Reward training\n\n\nAxolotlTrainingArguments\nTraining arguments for Causal trainer\n\n\n\n\n\ncore.training_args.AxolotlCPOConfig(simpo_gamma=None)\nCPO config for CPO training\n\n\n\ncore.training_args.AxolotlKTOConfig()\nKTO config for KTO training\n\n\n\ncore.training_args.AxolotlORPOConfig()\nORPO config for ORPO training\n\n\n\ncore.training_args.AxolotlPRMConfig()\nPRM config for PRM training\n\n\n\ncore.training_args.AxolotlRewardConfig()\nReward config for Reward training\n\n\n\ncore.training_args.AxolotlTrainingArguments()\nTraining arguments for Causal trainer\nThis code is duplicated due to HF TrainingArguments not setting output_dir with a\ndefault value so it can’t be used as a mixin."
},
{
"objectID": "docs/api/prompt_strategies.user_defined.html",
@@ -1085,14 +1085,14 @@
"href": "docs/api/train.html",
"title": "train",
"section": "",
- "text": "train\nPrepare and train a model on a dataset. Can also infer from a model or merge lora\n\n\n\n\n\nName\nDescription\n\n\n\n\ncreate_model_card\nCreate a model card for the trained model if needed.\n\n\ndetermine_resume_checkpoint\nDetermine the checkpoint to resume from based on configuration.\n\n\nexecute_training\nExecute the training process with appropriate SDP kernel configurations.\n\n\nhandle_untrained_tokens_fix\nApply fixes for untrained tokens if configured.\n\n\nsave_initial_configs\nSave initial configurations before training.\n\n\nsave_trained_model\nSave the trained model according to configuration and training setup.\n\n\nsetup_model_and_tokenizer\nLoad the tokenizer, processor (for multimodal models), and model based on\n\n\nsetup_model_and_trainer\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\n\n\nsetup_model_card\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\nsetup_reference_model\nSet up the reference model for RL training if needed.\n\n\nsetup_signal_handler\nSet up signal handler for graceful termination.\n\n\ntrain\nTrain a model on the given dataset.\n\n\n\n\n\ntrain.create_model_card(cfg, trainer)\nCreate a model card for the trained model if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object with model card creation capabilities.\nrequired\n\n\n\n\n\n\n\ntrain.determine_resume_checkpoint(cfg)\nDetermine the checkpoint to resume from based on configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nstr | None\nPath to the checkpoint to resume from, or None if not resuming.\n\n\n\n\n\n\n\ntrain.execute_training(cfg, trainer, resume_from_checkpoint)\nExecute the training process with appropriate SDP kernel configurations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe configured trainer object.\nrequired\n\n\nresume_from_checkpoint\nstr | None\nPath to checkpoint to resume from, if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.handle_untrained_tokens_fix(\n cfg,\n model,\n tokenizer,\n train_dataset,\n safe_serialization,\n)\nApply fixes for untrained tokens if configured.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to apply fixes to.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer for token identification.\nrequired\n\n\ntrain_dataset\nDataset\nThe training dataset to use.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving.\nrequired\n\n\n\n\n\n\n\ntrain.save_initial_configs(cfg, tokenizer, model, peft_config, processor)\nSave initial configurations before training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to save.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save configuration for.\nrequired\n\n\npeft_config\nPeftConfig | None\nThe PEFT configuration to save if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.save_trained_model(cfg, trainer, model, safe_serialization)\nSave the trained model according to configuration and training setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe trainer object.\nrequired\n\n\nmodel\nPreTrainedModel\nThe trained model to save.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization.\nrequired\n\n\n\n\n\n\n\ntrain.setup_model_and_tokenizer(cfg)\nLoad the tokenizer, processor (for multimodal models), and model based on\nconfiguration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple containing model, tokenizer, peft_config (if LoRA / QLoRA, else None), and processor (if multimodal, else None).\n\n\n\n\n\n\n\ntrain.setup_model_and_trainer(cfg, dataset_meta)\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\ntrainer setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters.\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[HFRLTrainerBuilder | HFCausalTrainerBuilder, PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor\n\n\n\n\n\n\n\ntrain.setup_model_card(cfg)\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\ntrain.setup_reference_model(cfg, tokenizer)\nSet up the reference model for RL training if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to use for the reference model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nPreTrainedModel | None\nReference model if needed for RL training, None otherwise.\n\n\n\n\n\n\n\ntrain.setup_signal_handler(cfg, model, safe_serialization)\nSet up signal handler for graceful termination.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save on termination\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving\nrequired\n\n\n\n\n\n\n\ntrain.train(cfg, dataset_meta)\nTrain a model on the given dataset.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PeftModel | PreTrainedModel, PreTrainedTokenizer, Trainer]\nTuple of (model, tokenizer) after training"
+ "text": "train\nPrepare and train a model on a dataset. Can also infer from a model or merge lora\n\n\n\n\n\nName\nDescription\n\n\n\n\ncreate_model_card\nCreate a model card for the trained model if needed.\n\n\ndetermine_resume_checkpoint\nDetermine the checkpoint to resume from based on configuration.\n\n\nexecute_training\nExecute the training process with appropriate SDP kernel configurations.\n\n\nhandle_untrained_tokens_fix\nApply fixes for untrained tokens if configured.\n\n\nsave_initial_configs\nSave initial configurations before training.\n\n\nsave_trained_model\nSave the trained model according to configuration and training setup.\n\n\nsetup_model_and_tokenizer\nLoad the tokenizer, processor (for multimodal models), and model based on\n\n\nsetup_model_and_trainer\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\n\n\nsetup_model_card\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\nsetup_reference_model\nSet up the reference model for RL training if needed.\n\n\nsetup_signal_handler\nSet up signal handler for graceful termination.\n\n\ntrain\nTrain a model on the given dataset.\n\n\n\n\n\ntrain.create_model_card(cfg, trainer)\nCreate a model card for the trained model if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object with model card creation capabilities.\nrequired\n\n\n\n\n\n\n\ntrain.determine_resume_checkpoint(cfg)\nDetermine the checkpoint to resume from based on configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nstr | None\nPath to the checkpoint to resume from, or None if not resuming.\n\n\n\n\n\n\n\ntrain.execute_training(cfg, trainer, resume_from_checkpoint)\nExecute the training process with appropriate SDP kernel configurations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe configured trainer object.\nrequired\n\n\nresume_from_checkpoint\nstr | None\nPath to checkpoint to resume from, if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.handle_untrained_tokens_fix(\n cfg,\n model,\n tokenizer,\n train_dataset,\n safe_serialization,\n)\nApply fixes for untrained tokens if configured.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to apply fixes to.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer for token identification.\nrequired\n\n\ntrain_dataset\nDataset\nThe training dataset to use.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving.\nrequired\n\n\n\n\n\n\n\ntrain.save_initial_configs(cfg, tokenizer, model, peft_config, processor)\nSave initial configurations before training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to save.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save configuration for.\nrequired\n\n\npeft_config\nPeftConfig | None\nThe PEFT configuration to save if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.save_trained_model(cfg, trainer, model, safe_serialization)\nSave the trained model according to configuration and training setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe trainer object.\nrequired\n\n\nmodel\nPreTrainedModel\nThe trained model to save.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization.\nrequired\n\n\n\n\n\n\n\ntrain.setup_model_and_tokenizer(cfg)\nLoad the tokenizer, processor (for multimodal models), and model based on\nconfiguration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple containing model, tokenizer, peft_config (if LoRA / QLoRA, else None), and processor (if multimodal, else None).\n\n\n\n\n\n\n\ntrain.setup_model_and_trainer(cfg, dataset_meta)\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\ntrainer setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters.\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple['HFRLTrainerBuilder' | 'HFCausalTrainerBuilder', PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor\n\n\n\n\n\n\n\ntrain.setup_model_card(cfg)\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\ntrain.setup_reference_model(cfg, tokenizer)\nSet up the reference model for RL training if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to use for the reference model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nPreTrainedModel | None\nReference model if needed for RL training, None otherwise.\n\n\n\n\n\n\n\ntrain.setup_signal_handler(cfg, model, safe_serialization)\nSet up signal handler for graceful termination.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save on termination\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving\nrequired\n\n\n\n\n\n\n\ntrain.train(cfg, dataset_meta)\nTrain a model on the given dataset.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PeftModel | PreTrainedModel, PreTrainedTokenizer, Trainer]\nTuple of (model, tokenizer) after training"
},
{
"objectID": "docs/api/train.html#functions",
"href": "docs/api/train.html#functions",
"title": "train",
"section": "",
- "text": "Name\nDescription\n\n\n\n\ncreate_model_card\nCreate a model card for the trained model if needed.\n\n\ndetermine_resume_checkpoint\nDetermine the checkpoint to resume from based on configuration.\n\n\nexecute_training\nExecute the training process with appropriate SDP kernel configurations.\n\n\nhandle_untrained_tokens_fix\nApply fixes for untrained tokens if configured.\n\n\nsave_initial_configs\nSave initial configurations before training.\n\n\nsave_trained_model\nSave the trained model according to configuration and training setup.\n\n\nsetup_model_and_tokenizer\nLoad the tokenizer, processor (for multimodal models), and model based on\n\n\nsetup_model_and_trainer\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\n\n\nsetup_model_card\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\nsetup_reference_model\nSet up the reference model for RL training if needed.\n\n\nsetup_signal_handler\nSet up signal handler for graceful termination.\n\n\ntrain\nTrain a model on the given dataset.\n\n\n\n\n\ntrain.create_model_card(cfg, trainer)\nCreate a model card for the trained model if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object with model card creation capabilities.\nrequired\n\n\n\n\n\n\n\ntrain.determine_resume_checkpoint(cfg)\nDetermine the checkpoint to resume from based on configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nstr | None\nPath to the checkpoint to resume from, or None if not resuming.\n\n\n\n\n\n\n\ntrain.execute_training(cfg, trainer, resume_from_checkpoint)\nExecute the training process with appropriate SDP kernel configurations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe configured trainer object.\nrequired\n\n\nresume_from_checkpoint\nstr | None\nPath to checkpoint to resume from, if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.handle_untrained_tokens_fix(\n cfg,\n model,\n tokenizer,\n train_dataset,\n safe_serialization,\n)\nApply fixes for untrained tokens if configured.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to apply fixes to.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer for token identification.\nrequired\n\n\ntrain_dataset\nDataset\nThe training dataset to use.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving.\nrequired\n\n\n\n\n\n\n\ntrain.save_initial_configs(cfg, tokenizer, model, peft_config, processor)\nSave initial configurations before training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to save.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save configuration for.\nrequired\n\n\npeft_config\nPeftConfig | None\nThe PEFT configuration to save if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.save_trained_model(cfg, trainer, model, safe_serialization)\nSave the trained model according to configuration and training setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe trainer object.\nrequired\n\n\nmodel\nPreTrainedModel\nThe trained model to save.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization.\nrequired\n\n\n\n\n\n\n\ntrain.setup_model_and_tokenizer(cfg)\nLoad the tokenizer, processor (for multimodal models), and model based on\nconfiguration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple containing model, tokenizer, peft_config (if LoRA / QLoRA, else None), and processor (if multimodal, else None).\n\n\n\n\n\n\n\ntrain.setup_model_and_trainer(cfg, dataset_meta)\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\ntrainer setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters.\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[HFRLTrainerBuilder | HFCausalTrainerBuilder, PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor\n\n\n\n\n\n\n\ntrain.setup_model_card(cfg)\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\ntrain.setup_reference_model(cfg, tokenizer)\nSet up the reference model for RL training if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to use for the reference model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nPreTrainedModel | None\nReference model if needed for RL training, None otherwise.\n\n\n\n\n\n\n\ntrain.setup_signal_handler(cfg, model, safe_serialization)\nSet up signal handler for graceful termination.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save on termination\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving\nrequired\n\n\n\n\n\n\n\ntrain.train(cfg, dataset_meta)\nTrain a model on the given dataset.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PeftModel | PreTrainedModel, PreTrainedTokenizer, Trainer]\nTuple of (model, tokenizer) after training"
+ "text": "Name\nDescription\n\n\n\n\ncreate_model_card\nCreate a model card for the trained model if needed.\n\n\ndetermine_resume_checkpoint\nDetermine the checkpoint to resume from based on configuration.\n\n\nexecute_training\nExecute the training process with appropriate SDP kernel configurations.\n\n\nhandle_untrained_tokens_fix\nApply fixes for untrained tokens if configured.\n\n\nsave_initial_configs\nSave initial configurations before training.\n\n\nsave_trained_model\nSave the trained model according to configuration and training setup.\n\n\nsetup_model_and_tokenizer\nLoad the tokenizer, processor (for multimodal models), and model based on\n\n\nsetup_model_and_trainer\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\n\n\nsetup_model_card\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\nsetup_reference_model\nSet up the reference model for RL training if needed.\n\n\nsetup_signal_handler\nSet up signal handler for graceful termination.\n\n\ntrain\nTrain a model on the given dataset.\n\n\n\n\n\ntrain.create_model_card(cfg, trainer)\nCreate a model card for the trained model if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object with model card creation capabilities.\nrequired\n\n\n\n\n\n\n\ntrain.determine_resume_checkpoint(cfg)\nDetermine the checkpoint to resume from based on configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nstr | None\nPath to the checkpoint to resume from, or None if not resuming.\n\n\n\n\n\n\n\ntrain.execute_training(cfg, trainer, resume_from_checkpoint)\nExecute the training process with appropriate SDP kernel configurations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe configured trainer object.\nrequired\n\n\nresume_from_checkpoint\nstr | None\nPath to checkpoint to resume from, if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.handle_untrained_tokens_fix(\n cfg,\n model,\n tokenizer,\n train_dataset,\n safe_serialization,\n)\nApply fixes for untrained tokens if configured.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to apply fixes to.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer for token identification.\nrequired\n\n\ntrain_dataset\nDataset\nThe training dataset to use.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving.\nrequired\n\n\n\n\n\n\n\ntrain.save_initial_configs(cfg, tokenizer, model, peft_config, processor)\nSave initial configurations before training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to save.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save configuration for.\nrequired\n\n\npeft_config\nPeftConfig | None\nThe PEFT configuration to save if applicable.\nrequired\n\n\n\n\n\n\n\ntrain.save_trained_model(cfg, trainer, model, safe_serialization)\nSave the trained model according to configuration and training setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntrainer\nAny\nThe trainer object.\nrequired\n\n\nmodel\nPreTrainedModel\nThe trained model to save.\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization.\nrequired\n\n\n\n\n\n\n\ntrain.setup_model_and_tokenizer(cfg)\nLoad the tokenizer, processor (for multimodal models), and model based on\nconfiguration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple containing model, tokenizer, peft_config (if LoRA / QLoRA, else None), and processor (if multimodal, else None).\n\n\n\n\n\n\n\ntrain.setup_model_and_trainer(cfg, dataset_meta)\nLoad model, tokenizer, trainer, etc. Helper function to encapsulate the full\ntrainer setup.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters.\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple['HFRLTrainerBuilder' | 'HFCausalTrainerBuilder', PeftModel | PreTrainedModel, PreTrainedTokenizer, PeftConfig | None, ProcessorMixin | None]\nTuple of: - Trainer (Causal or RLHF) - Model - Tokenizer - PEFT config - Processor\n\n\n\n\n\n\n\ntrain.setup_model_card(cfg)\nSet up the Axolotl badge and add the Axolotl config to the model card if available.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\n\n\n\n\n\ntrain.setup_reference_model(cfg, tokenizer)\nSet up the reference model for RL training if needed.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\ntokenizer\nPreTrainedTokenizer\nThe tokenizer to use for the reference model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nPreTrainedModel | None\nReference model if needed for RL training, None otherwise.\n\n\n\n\n\n\n\ntrain.setup_signal_handler(cfg, model, safe_serialization)\nSet up signal handler for graceful termination.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nDictionary mapping axolotl config keys to values.\nrequired\n\n\nmodel\nPreTrainedModel\nThe model to save on termination\nrequired\n\n\nsafe_serialization\nbool\nWhether to use safe serialization when saving\nrequired\n\n\n\n\n\n\n\ntrain.train(cfg, dataset_meta)\nTrain a model on the given dataset.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration dictionary with training parameters\nrequired\n\n\ndataset_meta\nTrainDatasetMeta\nObject with training, validation datasets and metadata\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntuple[PeftModel | PreTrainedModel, PreTrainedTokenizer, Trainer]\nTuple of (model, tokenizer) after training"
},
{
"objectID": "docs/api/monkeypatch.mixtral.html",
@@ -1127,14 +1127,14 @@
"href": "docs/api/utils.samplers.multipack.html",
"title": "utils.samplers.multipack",
"section": "",
- "text": "utils.samplers.multipack\nMultipack Batch Sampler - An efficient batch sampler for packing variable-length sequences\ninto fixed-capacity batches to optimize memory usage and training throughput.\n\n\n\n\n\nName\nDescription\n\n\n\n\nMultipackBatchSampler\nBatch sampler class for efficient packing of variable-length sequences\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler(\n sampler,\n batch_size,\n batch_max_len,\n lengths,\n packing_efficiency_estimate=1.0,\n drop_last=False,\n num_count_samples=8,\n sequential=False,\n group_size=100000,\n bin_size=200,\n num_processes=None,\n safe_mode=True,\n **kwargs,\n)\nBatch sampler class for efficient packing of variable-length sequences\nThis sampler packs sequences into fixed-capacity bins (batches) to maximize\nGPU memory utilization and training throughput by reducing padding.\nIt supports both parallel packing (using FFD algorithm) and\nsequential packing (preserving original sequence order).\n\n\n\n\n\nName\nDescription\n\n\n\n\nefficiency\nCalculate the packing efficiency (ratio of tokens used to total token slots).\n\n\ngather_efficiency\nGather and synchronize packing efficiency estimates across all distributed\n\n\ngather_len_batches\nGather and synchronize batch counts across all distributed ranks. Returns\n\n\ngenerate_batches\nGenerate packed batches for training.\n\n\nset_epoch\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.efficiency()\nCalculate the packing efficiency (ratio of tokens used to total token slots).\nHigher is better - 1.0 would mean perfect packing with no wasted space.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_efficiency()\nGather and synchronize packing efficiency estimates across all distributed\nranks.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nfloat\nA conservative efficiency estimate based on the measurements.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_len_batches(num)\nGather and synchronize batch counts across all distributed ranks. Returns\nthe minimum number of batches available on any rank.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.generate_batches(set_stats=False)\nGenerate packed batches for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nset_stats\nbool\nWhether to update efficiency statistics.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[list[int]]]\nList of batches, where each batch contains multiple bins, and each bin contains multiple sequence indices.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.set_epoch(epoch)\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nallocate_sequentially\nSequential allocator that preserves example order.\n\n\nffd_check\nFirst-fit-decreasing bin packing algorithm check.\n\n\npack_group\nPack a group of sequences into bins using First-Fit Decreasing algorithm.\n\n\npack_parallel\nPack sequences into bins using parallel processing.\n\n\n\n\n\nutils.samplers.multipack.allocate_sequentially(\n sequence_lengths,\n rank,\n bin_capacity,\n num_ranks,\n)\nSequential allocator that preserves example order.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nThe lengths of all examples.\nrequired\n\n\nrank\nint\nThe current rank (for distributed training).\nrequired\n\n\nbin_capacity\nint\nThe capacity of each bin (maximum sequence length).\nrequired\n\n\nnum_ranks\nint\nNumber of ranks (processes / GPUs).\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nrank_batches\nlist[list[int]]\nList of batches for the current rank.\n\n\ntotal_tokens_used\nint\nNumber of actual example tokens.\n\n\ntotal_token_slots\nint\nMaximum theoretical number of example tokens (number of bins * bin capacity).\n\n\n\n\n\n\n\nutils.samplers.multipack.ffd_check(sequence_lengths, bin_capacity, num_bins)\nFirst-fit-decreasing bin packing algorithm check.\nChecks if sequences with the given lengths could fit in the specified number of\nbins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin.\nrequired\n\n\nnum_bins\nint\nNumber of bins available.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nbool\nTrue if all sequences can be packed, False otherwise.\n\n\n\n\n\n\n\nutils.samplers.multipack.pack_group(\n sequence_lengths,\n group_offset,\n bin_capacity,\n max_bins,\n bin_size,\n safe_mode=True,\n)\nPack a group of sequences into bins using First-Fit Decreasing algorithm.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\ngroup_offset\nint\nOffset to apply to indices when returning results.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin.\nrequired\n\n\nmax_bins\nint\nMaximum number of bins to use.\nrequired\n\n\nbin_size\nint\nMaximum number of sequences per bin.\nrequired\n\n\nsafe_mode\nbool\nIf True, use a more conservative packing approach.\nTrue\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[int]]\nList of bins, where each bin contains indices of sequences assigned to it.\n\n\n\n\n\n\n\nutils.samplers.multipack.pack_parallel(\n sequence_lengths,\n bin_capacity,\n group_size,\n bin_size,\n num_processes=None,\n safe_mode=True,\n mp_start_method='spawn',\n)\nPack sequences into bins using parallel processing.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin as total number of tokens.\nrequired\n\n\ngroup_size\nint\nNumber of sequences to process in each group.\nrequired\n\n\nbin_size\nint\nMaximum number of bins to use.\nrequired\n\n\nnum_processes\nint | None\nNumber of parallel processes to use.\nNone\n\n\nsafe_mode\nbool\nIf True, use a more conservative packing approach.\nTrue\n\n\nmp_start_method\nstr | None\nMultiprocessing start method (‘fork’, ‘spawn’, ‘forkserver’). ‘spawn’ is often safer with Numba/PyTorch. Set to None to use system default.\n'spawn'\n\n\n\nReturns:\nList of bins, where each bin contains indices of sequences assigned to it."
+ "text": "utils.samplers.multipack\nMultipack Batch Sampler - An efficient batch sampler for packing variable-length sequences\ninto fixed-capacity batches to optimize memory usage and training throughput.\n\n\n\n\n\nName\nDescription\n\n\n\n\nMultipackBatchSampler\nBatch sampler class for efficient packing of variable-length sequences\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler(\n sampler,\n batch_size,\n batch_max_len,\n lengths,\n packing_efficiency_estimate=1.0,\n drop_last=True,\n num_count_samples=8,\n sequential=False,\n group_size=100000,\n bin_size=200,\n num_processes=None,\n safe_mode=True,\n **kwargs,\n)\nBatch sampler class for efficient packing of variable-length sequences\nThis sampler packs sequences into fixed-capacity bins (batches) to maximize\nGPU memory utilization and training throughput by reducing padding.\nIt supports both parallel packing (using FFD algorithm) and\nsequential packing (preserving original sequence order).\n\n\n\n\n\nName\nDescription\n\n\n\n\nefficiency\nCalculate the packing efficiency (ratio of tokens used to total token slots).\n\n\ngather_efficiency\nGather and synchronize packing efficiency estimates across all distributed\n\n\ngather_len_batches\nGather and synchronize batch counts across all distributed ranks. Returns\n\n\ngenerate_batches\nGenerate packed batches for training.\n\n\nset_epoch\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.efficiency()\nCalculate the packing efficiency (ratio of tokens used to total token slots).\nHigher is better - 1.0 would mean perfect packing with no wasted space.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_efficiency()\nGather and synchronize packing efficiency estimates across all distributed\nranks.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nfloat\nA conservative efficiency estimate based on the measurements.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_len_batches(num)\nGather and synchronize batch counts across all distributed ranks. Returns\nthe minimum number of batches available on any rank.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.generate_batches(set_stats=False)\nGenerate packed batches for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nset_stats\nbool\nWhether to update efficiency statistics.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[list[int]]]\nList of batches, where each batch contains multiple bins, and each bin contains multiple sequence indices.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.set_epoch(epoch)\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nallocate_sequentially\nSequential allocator that preserves example order.\n\n\nffd_check\nFirst-fit-decreasing bin packing algorithm check.\n\n\npack_group\nPack a group of sequences into bins using First-Fit Decreasing algorithm.\n\n\npack_parallel\nPack sequences into bins using parallel processing.\n\n\n\n\n\nutils.samplers.multipack.allocate_sequentially(\n sequence_lengths,\n rank,\n bin_capacity,\n num_ranks,\n)\nSequential allocator that preserves example order.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nThe lengths of all examples.\nrequired\n\n\nrank\nint\nThe current rank (for distributed training).\nrequired\n\n\nbin_capacity\nint\nThe capacity of each bin (maximum sequence length).\nrequired\n\n\nnum_ranks\nint\nNumber of ranks (processes / GPUs).\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nrank_batches\nlist[list[int]]\nList of batches for the current rank.\n\n\ntotal_tokens_used\nint\nNumber of actual example tokens.\n\n\ntotal_token_slots\nint\nMaximum theoretical number of example tokens (number of bins * bin capacity).\n\n\n\n\n\n\n\nutils.samplers.multipack.ffd_check(sequence_lengths, bin_capacity, num_bins)\nFirst-fit-decreasing bin packing algorithm check.\nChecks if sequences with the given lengths could fit in the specified number of\nbins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin.\nrequired\n\n\nnum_bins\nint\nNumber of bins available.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nbool\nTrue if all sequences can be packed, False otherwise.\n\n\n\n\n\n\n\nutils.samplers.multipack.pack_group(\n sequence_lengths,\n group_offset,\n bin_capacity,\n max_bins,\n bin_size,\n safe_mode=True,\n)\nPack a group of sequences into bins using First-Fit Decreasing algorithm.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\ngroup_offset\nint\nOffset to apply to indices when returning results.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin.\nrequired\n\n\nmax_bins\nint\nMaximum number of bins to use.\nrequired\n\n\nbin_size\nint\nMaximum number of sequences per bin.\nrequired\n\n\nsafe_mode\nbool\nIf True, use a more conservative packing approach.\nTrue\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[int]]\nList of bins, where each bin contains indices of sequences assigned to it.\n\n\n\n\n\n\n\nutils.samplers.multipack.pack_parallel(\n sequence_lengths,\n bin_capacity,\n group_size,\n bin_size,\n num_processes=None,\n safe_mode=True,\n mp_start_method='spawn',\n)\nPack sequences into bins using parallel processing.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsequence_lengths\nnp.ndarray\nArray of sequence lengths.\nrequired\n\n\nbin_capacity\nint\nMaximum capacity of each bin as total number of tokens.\nrequired\n\n\ngroup_size\nint\nNumber of sequences to process in each group.\nrequired\n\n\nbin_size\nint\nMaximum number of bins to use.\nrequired\n\n\nnum_processes\nint | None\nNumber of parallel processes to use.\nNone\n\n\nsafe_mode\nbool\nIf True, use a more conservative packing approach.\nTrue\n\n\nmp_start_method\nstr | None\nMultiprocessing start method (‘fork’, ‘spawn’, ‘forkserver’). ‘spawn’ is often safer with Numba/PyTorch. Set to None to use system default.\n'spawn'\n\n\n\nReturns:\nList of bins, where each bin contains indices of sequences assigned to it."
},
{
"objectID": "docs/api/utils.samplers.multipack.html#classes",
"href": "docs/api/utils.samplers.multipack.html#classes",
"title": "utils.samplers.multipack",
"section": "",
- "text": "Name\nDescription\n\n\n\n\nMultipackBatchSampler\nBatch sampler class for efficient packing of variable-length sequences\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler(\n sampler,\n batch_size,\n batch_max_len,\n lengths,\n packing_efficiency_estimate=1.0,\n drop_last=False,\n num_count_samples=8,\n sequential=False,\n group_size=100000,\n bin_size=200,\n num_processes=None,\n safe_mode=True,\n **kwargs,\n)\nBatch sampler class for efficient packing of variable-length sequences\nThis sampler packs sequences into fixed-capacity bins (batches) to maximize\nGPU memory utilization and training throughput by reducing padding.\nIt supports both parallel packing (using FFD algorithm) and\nsequential packing (preserving original sequence order).\n\n\n\n\n\nName\nDescription\n\n\n\n\nefficiency\nCalculate the packing efficiency (ratio of tokens used to total token slots).\n\n\ngather_efficiency\nGather and synchronize packing efficiency estimates across all distributed\n\n\ngather_len_batches\nGather and synchronize batch counts across all distributed ranks. Returns\n\n\ngenerate_batches\nGenerate packed batches for training.\n\n\nset_epoch\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.efficiency()\nCalculate the packing efficiency (ratio of tokens used to total token slots).\nHigher is better - 1.0 would mean perfect packing with no wasted space.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_efficiency()\nGather and synchronize packing efficiency estimates across all distributed\nranks.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nfloat\nA conservative efficiency estimate based on the measurements.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_len_batches(num)\nGather and synchronize batch counts across all distributed ranks. Returns\nthe minimum number of batches available on any rank.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.generate_batches(set_stats=False)\nGenerate packed batches for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nset_stats\nbool\nWhether to update efficiency statistics.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[list[int]]]\nList of batches, where each batch contains multiple bins, and each bin contains multiple sequence indices.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.set_epoch(epoch)\nSet the epoch number, used for reproducible shuffling across epochs"
+ "text": "Name\nDescription\n\n\n\n\nMultipackBatchSampler\nBatch sampler class for efficient packing of variable-length sequences\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler(\n sampler,\n batch_size,\n batch_max_len,\n lengths,\n packing_efficiency_estimate=1.0,\n drop_last=True,\n num_count_samples=8,\n sequential=False,\n group_size=100000,\n bin_size=200,\n num_processes=None,\n safe_mode=True,\n **kwargs,\n)\nBatch sampler class for efficient packing of variable-length sequences\nThis sampler packs sequences into fixed-capacity bins (batches) to maximize\nGPU memory utilization and training throughput by reducing padding.\nIt supports both parallel packing (using FFD algorithm) and\nsequential packing (preserving original sequence order).\n\n\n\n\n\nName\nDescription\n\n\n\n\nefficiency\nCalculate the packing efficiency (ratio of tokens used to total token slots).\n\n\ngather_efficiency\nGather and synchronize packing efficiency estimates across all distributed\n\n\ngather_len_batches\nGather and synchronize batch counts across all distributed ranks. Returns\n\n\ngenerate_batches\nGenerate packed batches for training.\n\n\nset_epoch\nSet the epoch number, used for reproducible shuffling across epochs\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.efficiency()\nCalculate the packing efficiency (ratio of tokens used to total token slots).\nHigher is better - 1.0 would mean perfect packing with no wasted space.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_efficiency()\nGather and synchronize packing efficiency estimates across all distributed\nranks.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nfloat\nA conservative efficiency estimate based on the measurements.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.gather_len_batches(num)\nGather and synchronize batch counts across all distributed ranks. Returns\nthe minimum number of batches available on any rank.\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.generate_batches(set_stats=False)\nGenerate packed batches for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nset_stats\nbool\nWhether to update efficiency statistics.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[list[list[int]]]\nList of batches, where each batch contains multiple bins, and each bin contains multiple sequence indices.\n\n\n\n\n\n\n\nutils.samplers.multipack.MultipackBatchSampler.set_epoch(epoch)\nSet the epoch number, used for reproducible shuffling across epochs"
},
{
"objectID": "docs/api/utils.samplers.multipack.html#functions",
@@ -1428,14 +1428,14 @@
"href": "docs/api/integrations.kd.trainer.html",
"title": "integrations.kd.trainer",
"section": "",
- "text": "integrations.kd.trainer\nKD trainer\n\n\n\n\n\nName\nDescription\n\n\n\n\nAxolotlKDTrainer\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer(\n *_args,\n bench_data_collator=None,\n eval_data_collator=None,\n dataset_tags=None,\n **kwargs,\n)\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nName\nDescription\n\n\n\n\ncompute_loss\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer.compute_loss(\n model,\n inputs,\n return_outputs=False,\n num_items_in_batch=None,\n)\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\nSubclass and override for custom behavior."
+ "text": "integrations.kd.trainer\nKD trainer\n\n\n\n\n\nName\nDescription\n\n\n\n\nAxolotlKDTrainer\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer(*args, **kwargs)\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nName\nDescription\n\n\n\n\ncompute_loss\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer.compute_loss(\n model,\n inputs,\n return_outputs=False,\n num_items_in_batch=None,\n)\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\nSubclass and override for custom behavior."
},
{
"objectID": "docs/api/integrations.kd.trainer.html#classes",
"href": "docs/api/integrations.kd.trainer.html#classes",
"title": "integrations.kd.trainer",
"section": "",
- "text": "Name\nDescription\n\n\n\n\nAxolotlKDTrainer\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer(\n *_args,\n bench_data_collator=None,\n eval_data_collator=None,\n dataset_tags=None,\n **kwargs,\n)\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nName\nDescription\n\n\n\n\ncompute_loss\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer.compute_loss(\n model,\n inputs,\n return_outputs=False,\n num_items_in_batch=None,\n)\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\nSubclass and override for custom behavior."
+ "text": "Name\nDescription\n\n\n\n\nAxolotlKDTrainer\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer(*args, **kwargs)\nCustom trainer subclass for Knowledge Distillation (KD)\n\n\n\n\n\nName\nDescription\n\n\n\n\ncompute_loss\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\n\n\n\n\n\nintegrations.kd.trainer.AxolotlKDTrainer.compute_loss(\n model,\n inputs,\n return_outputs=False,\n num_items_in_batch=None,\n)\nHow the loss is computed by Trainer. By default, all models return the loss in the first element.\nSubclass and override for custom behavior."
},
{
"objectID": "docs/api/utils.schemas.enums.html",
@@ -2720,14 +2720,14 @@
"href": "docs/api/integrations.base.html",
"title": "integrations.base",
"section": "",
- "text": "integrations.base\nBase class for all plugins.\nA plugin is a reusable, modular, and self-contained piece of code that extends the functionality of Axolotl.\nPlugins can be used to integrate third-party models, modify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and implement the required methods.\n\n\n\n\n\nName\nDescription\n\n\n\n\nBaseOptimizerFactory\nBase class for factories to create custom optimizers\n\n\nBasePlugin\nBase class for all plugins. Defines the interface for plugin methods.\n\n\nPluginManager\nThe PluginManager class is responsible for loading and managing plugins. It\n\n\n\n\n\nintegrations.base.BaseOptimizerFactory()\nBase class for factories to create custom optimizers\n\n\n\nintegrations.base.BasePlugin()\nBase class for all plugins. Defines the interface for plugin methods.\nA plugin is a reusable, modular, and self-contained piece of code that extends\nthe functionality of Axolotl. Plugins can be used to integrate third-party models,\nmodify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and\nimplement the required methods.\n\n\nPlugin methods include:\n- register(cfg): Registers the plugin with the given configuration.\n- load_datasets(cfg): Loads and preprocesses the dataset for training.\n- pre_model_load(cfg): Performs actions before the model is loaded.\n- post_model_build(cfg, model): Performs actions after the model is loaded, but\nbefore LoRA adapters are applied.\n- pre_lora_load(cfg, model): Performs actions before LoRA weights are loaded.\n- post_lora_load(cfg, model): Performs actions after LoRA weights are loaded.\n- post_model_load(cfg, model): Performs actions after the model is loaded,\ninclusive of any adapters.\n- post_trainer_create(cfg, trainer): Performs actions after the trainer is\ncreated.\n- create_optimizer(cfg, trainer): Creates and returns an optimizer for training.\n- create_lr_scheduler(cfg, trainer, optimizer, num_training_steps): Creates and\nreturns a learning rate scheduler.\n- add_callbacks_pre_trainer(cfg, model): Adds callbacks to the trainer before\ntraining.\n- add_callbacks_post_trainer(cfg, trainer): Adds callbacks to the trainer after\ntraining.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nAdds callbacks to the trainer after creating the trainer. This is useful for\n\n\nadd_callbacks_pre_trainer\nSet up callbacks before creating the trainer.\n\n\ncreate_lr_scheduler\nCreates and returns a learning rate scheduler.\n\n\ncreate_optimizer\nCreates and returns an optimizer for training.\n\n\nget_input_args\nReturns a pydantic model for the plugin’s input arguments.\n\n\nget_trainer_cls\nReturns a custom class for the trainer.\n\n\nload_datasets\nLoads and preprocesses the dataset for training.\n\n\npost_lora_load\nPerforms actions after LoRA weights are loaded.\n\n\npost_model_build\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\npost_model_load\nPerforms actions after the model is loaded.\n\n\npost_train\nPerforms actions after training is complete.\n\n\npost_train_unload\nPerforms actions after training is complete and the model is unloaded.\n\n\npost_trainer_create\nPerforms actions after the trainer is created.\n\n\npre_lora_load\nPerforms actions before LoRA weights are loaded.\n\n\npre_model_load\nPerforms actions before the model is loaded.\n\n\nregister\nRegisters the plugin with the given configuration.\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_post_trainer(cfg, trainer)\nAdds callbacks to the trainer after creating the trainer. This is useful for\ncallbacks that require access to the model or trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_pre_trainer(cfg, model)\nSet up callbacks before creating the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_lr_scheduler(\n cfg,\n trainer,\n optimizer,\n num_training_steps,\n)\nCreates and returns a learning rate scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\nnum_training_steps\nint\nTotal number of training steps\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_optimizer(cfg, trainer)\nCreates and returns an optimizer for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_input_args()\nReturns a pydantic model for the plugin’s input arguments.\n\n\n\nintegrations.base.BasePlugin.get_trainer_cls(cfg)\nReturns a custom class for the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.load_datasets(cfg, preprocess=False)\nLoads and preprocesses the dataset for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\npreprocess\nbool\nWhether this is the preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\ndataset_meta\nUnion['TrainDatasetMeta', None]\nThe metadata for the training dataset.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_lora_load(cfg, model)\nPerforms actions after LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_build(cfg, model)\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_load(cfg, model)\nPerforms actions after the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train(cfg, model)\nPerforms actions after training is complete.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe axolotl configuration.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train_unload(cfg)\nPerforms actions after training is complete and the model is unloaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_trainer_create(cfg, trainer)\nPerforms actions after the trainer is created.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_lora_load(cfg, model)\nPerforms actions before LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_model_load(cfg)\nPerforms actions before the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.register(cfg)\nRegisters the plugin with the given configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\n\n\nintegrations.base.PluginManager()\nThe PluginManager class is responsible for loading and managing plugins. It\nshould be a singleton so it can be accessed from anywhere in the codebase.\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nplugins\nOrderedDict[str, BasePlugin]\nA list of loaded plugins.\n\n\n\n\n\n\nKey methods include:\n- get_instance(): Static method to get the singleton instance of PluginManager.\n- register(plugin_name: str): Registers a new plugin by its name.\n- pre_model_load(cfg): Calls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\nadd_callbacks_pre_trainer\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\ncreate_lr_scheduler\nCalls the create_lr_scheduler method of all registered plugins and returns\n\n\ncreate_optimizer\nCalls the create_optimizer method of all registered plugins and returns\n\n\nget_input_args\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\nget_instance\nReturns the singleton instance of PluginManager. If the instance doesn’t\n\n\nget_trainer_cls\nCalls the get_trainer_cls method of all registered plugins and returns the\n\n\nload_datasets\nCalls the load_datasets method of each registered plugin.\n\n\npost_lora_load\nCalls the post_lora_load method of all registered plugins.\n\n\npost_model_build\nCalls the post_model_build method of all registered plugins after the\n\n\npost_model_load\nCalls the post_model_load method of all registered plugins after the model\n\n\npost_train\nCalls the post_train method of all registered plugins.\n\n\npost_train_unload\nCalls the post_train_unload method of all registered plugins.\n\n\npost_trainer_create\nCalls the post_trainer_create method of all registered plugins.\n\n\npre_lora_load\nCalls the pre_lora_load method of all registered plugins.\n\n\npre_model_load\nCalls the pre_model_load method of all registered plugins.\n\n\nregister\nRegisters a new plugin by its name.\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_lr_scheduler(\n trainer,\n optimizer,\n num_training_steps,\n)\nCalls the create_lr_scheduler method of all registered plugins and returns\nthe first non-None scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler, or None if not found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_optimizer(trainer)\nCalls the create_optimizer method of all registered plugins and returns\nthe first non-None optimizer.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer, or None if none was found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_input_args()\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[str]\nA list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_instance()\nReturns the singleton instance of PluginManager. If the instance doesn’t\nexist, it creates a new one.\n\n\n\nintegrations.base.PluginManager.get_trainer_cls(cfg)\nCalls the get_trainer_cls method of all registered plugins and returns the\nfirst non-None trainer class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.load_datasets(cfg, preprocess=False)\nCalls the load_datasets method of each registered plugin.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\npreprocess\nbool\nWhether this is preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nUnion['TrainDatasetMeta', None]\nThe dataset metadata loaded from all registered plugins.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_lora_load(cfg, model)\nCalls the post_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_build(cfg, model)\nCalls the post_model_build method of all registered plugins after the\nmodel has been built / loaded, but before any adapters have been applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_load(cfg, model)\nCalls the post_model_load method of all registered plugins after the model\nhas been loaded inclusive of any adapters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train(cfg, model)\nCalls the post_train method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train_unload(cfg)\nCalls the post_train_unload method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_trainer_create(cfg, trainer)\nCalls the post_trainer_create method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_lora_load(cfg, model)\nCalls the pre_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_model_load(cfg)\nCalls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.register(plugin_name)\nRegisters a new plugin by its name.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be registered.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported.\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nload_plugin\nLoads a plugin based on the given plugin name.\n\n\n\n\n\nintegrations.base.load_plugin(plugin_name)\nLoads a plugin based on the given plugin name.\nThe plugin name should be in the format “module_name.class_name”. This function\nsplits the plugin name into module and class, imports the module, retrieves the\nclass from the module, and creates an instance of the class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be loaded. The name should be in the format “module_name.class_name”.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nBasePlugin\nAn instance of the loaded plugin.\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported."
+ "text": "integrations.base\nBase class for all plugins.\nA plugin is a reusable, modular, and self-contained piece of code that extends the functionality of Axolotl.\nPlugins can be used to integrate third-party models, modify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and implement the required methods.\n\n\n\n\n\nName\nDescription\n\n\n\n\nBaseOptimizerFactory\nBase class for factories to create custom optimizers\n\n\nBasePlugin\nBase class for all plugins. Defines the interface for plugin methods.\n\n\nPluginManager\nThe PluginManager class is responsible for loading and managing plugins. It\n\n\n\n\n\nintegrations.base.BaseOptimizerFactory()\nBase class for factories to create custom optimizers\n\n\n\nintegrations.base.BasePlugin()\nBase class for all plugins. Defines the interface for plugin methods.\nA plugin is a reusable, modular, and self-contained piece of code that extends\nthe functionality of Axolotl. Plugins can be used to integrate third-party models,\nmodify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and\nimplement the required methods.\n\n\nPlugin methods include:\n- register(cfg): Registers the plugin with the given configuration.\n- load_datasets(cfg): Loads and preprocesses the dataset for training.\n- pre_model_load(cfg): Performs actions before the model is loaded.\n- post_model_build(cfg, model): Performs actions after the model is loaded, but\nbefore LoRA adapters are applied.\n- pre_lora_load(cfg, model): Performs actions before LoRA weights are loaded.\n- post_lora_load(cfg, model): Performs actions after LoRA weights are loaded.\n- post_model_load(cfg, model): Performs actions after the model is loaded,\ninclusive of any adapters.\n- post_trainer_create(cfg, trainer): Performs actions after the trainer is\ncreated.\n- create_optimizer(cfg, trainer): Creates and returns an optimizer for training.\n- create_lr_scheduler(cfg, trainer, optimizer, num_training_steps): Creates and\nreturns a learning rate scheduler.\n- add_callbacks_pre_trainer(cfg, model): Adds callbacks to the trainer before\ntraining.\n- add_callbacks_post_trainer(cfg, trainer): Adds callbacks to the trainer after\ntraining.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nAdds callbacks to the trainer after creating the trainer. This is useful for\n\n\nadd_callbacks_pre_trainer\nSet up callbacks before creating the trainer.\n\n\ncreate_lr_scheduler\nCreates and returns a learning rate scheduler.\n\n\ncreate_optimizer\nCreates and returns an optimizer for training.\n\n\nget_collator_cls_and_kwargs\nReturns a custom class for the collator.\n\n\nget_input_args\nReturns a pydantic model for the plugin’s input arguments.\n\n\nget_trainer_cls\nReturns a custom class for the trainer.\n\n\nget_training_args\nReturns custom training arguments to set on TrainingArgs.\n\n\nget_training_args_mixin\nReturns a dataclass model for the plugin’s training arguments.\n\n\nload_datasets\nLoads and preprocesses the dataset for training.\n\n\npost_lora_load\nPerforms actions after LoRA weights are loaded.\n\n\npost_model_build\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\npost_model_load\nPerforms actions after the model is loaded.\n\n\npost_train\nPerforms actions after training is complete.\n\n\npost_train_unload\nPerforms actions after training is complete and the model is unloaded.\n\n\npost_trainer_create\nPerforms actions after the trainer is created.\n\n\npre_lora_load\nPerforms actions before LoRA weights are loaded.\n\n\npre_model_load\nPerforms actions before the model is loaded.\n\n\nregister\nRegisters the plugin with the given configuration.\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_post_trainer(cfg, trainer)\nAdds callbacks to the trainer after creating the trainer. This is useful for\ncallbacks that require access to the model or trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_pre_trainer(cfg, model)\nSet up callbacks before creating the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_lr_scheduler(\n cfg,\n trainer,\n optimizer,\n num_training_steps,\n)\nCreates and returns a learning rate scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\nnum_training_steps\nint\nTotal number of training steps\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_optimizer(cfg, trainer)\nCreates and returns an optimizer for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_collator_cls_and_kwargs(cfg, is_eval=False)\nReturns a custom class for the collator.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\nis_eval\nbool\nWhether this is an eval split.\nFalse\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nclass\n\nThe class for the collator.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_input_args()\nReturns a pydantic model for the plugin’s input arguments.\n\n\n\nintegrations.base.BasePlugin.get_trainer_cls(cfg)\nReturns a custom class for the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_training_args(cfg)\nReturns custom training arguments to set on TrainingArgs.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nobject\n\ndict containing the training arguments.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_training_args_mixin()\nReturns a dataclass model for the plugin’s training arguments.\n\n\n\nintegrations.base.BasePlugin.load_datasets(cfg, preprocess=False)\nLoads and preprocesses the dataset for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\npreprocess\nbool\nWhether this is the preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\ndataset_meta\nUnion['TrainDatasetMeta', None]\nThe metadata for the training dataset.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_lora_load(cfg, model)\nPerforms actions after LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_build(cfg, model)\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_load(cfg, model)\nPerforms actions after the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train(cfg, model)\nPerforms actions after training is complete.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe axolotl configuration.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train_unload(cfg)\nPerforms actions after training is complete and the model is unloaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_trainer_create(cfg, trainer)\nPerforms actions after the trainer is created.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_lora_load(cfg, model)\nPerforms actions before LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_model_load(cfg)\nPerforms actions before the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.register(cfg)\nRegisters the plugin with the given configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\n\n\nintegrations.base.PluginManager()\nThe PluginManager class is responsible for loading and managing plugins. It\nshould be a singleton so it can be accessed from anywhere in the codebase.\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nplugins\nOrderedDict[str, BasePlugin]\nA list of loaded plugins.\n\n\n\n\n\n\nKey methods include:\n- get_instance(): Static method to get the singleton instance of PluginManager.\n- register(plugin_name: str): Registers a new plugin by its name.\n- pre_model_load(cfg): Calls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\nadd_callbacks_pre_trainer\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\ncreate_lr_scheduler\nCalls the create_lr_scheduler method of all registered plugins and returns\n\n\ncreate_optimizer\nCalls the create_optimizer method of all registered plugins and returns\n\n\nget_collator_cls_and_kwargs\nCalls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.\n\n\nget_input_args\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\nget_instance\nReturns the singleton instance of PluginManager. If the instance doesn’t\n\n\nget_trainer_cls\nCalls the get_trainer_cls method of all registered plugins and returns the\n\n\nget_training_args\nCalls the get_training_args method of all registered plugins and returns the combined training arguments.\n\n\nget_training_args_mixin\nReturns a list of dataclasses for all registered plugins’ training args mixins’\n\n\nload_datasets\nCalls the load_datasets method of each registered plugin.\n\n\npost_lora_load\nCalls the post_lora_load method of all registered plugins.\n\n\npost_model_build\nCalls the post_model_build method of all registered plugins after the\n\n\npost_model_load\nCalls the post_model_load method of all registered plugins after the model\n\n\npost_train\nCalls the post_train method of all registered plugins.\n\n\npost_train_unload\nCalls the post_train_unload method of all registered plugins.\n\n\npost_trainer_create\nCalls the post_trainer_create method of all registered plugins.\n\n\npre_lora_load\nCalls the pre_lora_load method of all registered plugins.\n\n\npre_model_load\nCalls the pre_model_load method of all registered plugins.\n\n\nregister\nRegisters a new plugin by its name.\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_lr_scheduler(\n trainer,\n optimizer,\n num_training_steps,\n)\nCalls the create_lr_scheduler method of all registered plugins and returns\nthe first non-None scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler, or None if not found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_optimizer(trainer)\nCalls the create_optimizer method of all registered plugins and returns\nthe first non-None optimizer.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer, or None if none was found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_collator_cls_and_kwargs(cfg, is_eval=False)\nCalls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.\nParameters:\ncfg (dict): The configuration for the plugins.\nis_eval (bool): Whether this is an eval split.\nReturns:\nobject: The collator class, or None if none was found.\n\n\n\nintegrations.base.PluginManager.get_input_args()\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[str]\nA list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_instance()\nReturns the singleton instance of PluginManager. If the instance doesn’t\nexist, it creates a new one.\n\n\n\nintegrations.base.PluginManager.get_trainer_cls(cfg)\nCalls the get_trainer_cls method of all registered plugins and returns the\nfirst non-None trainer class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_training_args(cfg)\nCalls the get_training_args method of all registered plugins and returns the combined training arguments.\nParameters:\ncfg (dict): The configuration for the plugins.\nReturns:\nobject: The training arguments\n\n\n\nintegrations.base.PluginManager.get_training_args_mixin()\nReturns a list of dataclasses for all registered plugins’ training args mixins’\nReturns:\nlist[str]: A list of dataclsses\n\n\n\nintegrations.base.PluginManager.load_datasets(cfg, preprocess=False)\nCalls the load_datasets method of each registered plugin.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\npreprocess\nbool\nWhether this is preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nUnion['TrainDatasetMeta', None]\nThe dataset metadata loaded from all registered plugins.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_lora_load(cfg, model)\nCalls the post_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_build(cfg, model)\nCalls the post_model_build method of all registered plugins after the\nmodel has been built / loaded, but before any adapters have been applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_load(cfg, model)\nCalls the post_model_load method of all registered plugins after the model\nhas been loaded inclusive of any adapters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train(cfg, model)\nCalls the post_train method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train_unload(cfg)\nCalls the post_train_unload method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_trainer_create(cfg, trainer)\nCalls the post_trainer_create method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_lora_load(cfg, model)\nCalls the pre_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_model_load(cfg)\nCalls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.register(plugin_name)\nRegisters a new plugin by its name.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be registered.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported.\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nload_plugin\nLoads a plugin based on the given plugin name.\n\n\n\n\n\nintegrations.base.load_plugin(plugin_name)\nLoads a plugin based on the given plugin name.\nThe plugin name should be in the format “module_name.class_name”. This function\nsplits the plugin name into module and class, imports the module, retrieves the\nclass from the module, and creates an instance of the class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be loaded. The name should be in the format “module_name.class_name”.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nBasePlugin\nAn instance of the loaded plugin.\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported."
},
{
"objectID": "docs/api/integrations.base.html#classes",
"href": "docs/api/integrations.base.html#classes",
"title": "integrations.base",
"section": "",
- "text": "Name\nDescription\n\n\n\n\nBaseOptimizerFactory\nBase class for factories to create custom optimizers\n\n\nBasePlugin\nBase class for all plugins. Defines the interface for plugin methods.\n\n\nPluginManager\nThe PluginManager class is responsible for loading and managing plugins. It\n\n\n\n\n\nintegrations.base.BaseOptimizerFactory()\nBase class for factories to create custom optimizers\n\n\n\nintegrations.base.BasePlugin()\nBase class for all plugins. Defines the interface for plugin methods.\nA plugin is a reusable, modular, and self-contained piece of code that extends\nthe functionality of Axolotl. Plugins can be used to integrate third-party models,\nmodify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and\nimplement the required methods.\n\n\nPlugin methods include:\n- register(cfg): Registers the plugin with the given configuration.\n- load_datasets(cfg): Loads and preprocesses the dataset for training.\n- pre_model_load(cfg): Performs actions before the model is loaded.\n- post_model_build(cfg, model): Performs actions after the model is loaded, but\nbefore LoRA adapters are applied.\n- pre_lora_load(cfg, model): Performs actions before LoRA weights are loaded.\n- post_lora_load(cfg, model): Performs actions after LoRA weights are loaded.\n- post_model_load(cfg, model): Performs actions after the model is loaded,\ninclusive of any adapters.\n- post_trainer_create(cfg, trainer): Performs actions after the trainer is\ncreated.\n- create_optimizer(cfg, trainer): Creates and returns an optimizer for training.\n- create_lr_scheduler(cfg, trainer, optimizer, num_training_steps): Creates and\nreturns a learning rate scheduler.\n- add_callbacks_pre_trainer(cfg, model): Adds callbacks to the trainer before\ntraining.\n- add_callbacks_post_trainer(cfg, trainer): Adds callbacks to the trainer after\ntraining.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nAdds callbacks to the trainer after creating the trainer. This is useful for\n\n\nadd_callbacks_pre_trainer\nSet up callbacks before creating the trainer.\n\n\ncreate_lr_scheduler\nCreates and returns a learning rate scheduler.\n\n\ncreate_optimizer\nCreates and returns an optimizer for training.\n\n\nget_input_args\nReturns a pydantic model for the plugin’s input arguments.\n\n\nget_trainer_cls\nReturns a custom class for the trainer.\n\n\nload_datasets\nLoads and preprocesses the dataset for training.\n\n\npost_lora_load\nPerforms actions after LoRA weights are loaded.\n\n\npost_model_build\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\npost_model_load\nPerforms actions after the model is loaded.\n\n\npost_train\nPerforms actions after training is complete.\n\n\npost_train_unload\nPerforms actions after training is complete and the model is unloaded.\n\n\npost_trainer_create\nPerforms actions after the trainer is created.\n\n\npre_lora_load\nPerforms actions before LoRA weights are loaded.\n\n\npre_model_load\nPerforms actions before the model is loaded.\n\n\nregister\nRegisters the plugin with the given configuration.\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_post_trainer(cfg, trainer)\nAdds callbacks to the trainer after creating the trainer. This is useful for\ncallbacks that require access to the model or trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_pre_trainer(cfg, model)\nSet up callbacks before creating the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_lr_scheduler(\n cfg,\n trainer,\n optimizer,\n num_training_steps,\n)\nCreates and returns a learning rate scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\nnum_training_steps\nint\nTotal number of training steps\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_optimizer(cfg, trainer)\nCreates and returns an optimizer for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_input_args()\nReturns a pydantic model for the plugin’s input arguments.\n\n\n\nintegrations.base.BasePlugin.get_trainer_cls(cfg)\nReturns a custom class for the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.load_datasets(cfg, preprocess=False)\nLoads and preprocesses the dataset for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\npreprocess\nbool\nWhether this is the preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\ndataset_meta\nUnion['TrainDatasetMeta', None]\nThe metadata for the training dataset.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_lora_load(cfg, model)\nPerforms actions after LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_build(cfg, model)\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_load(cfg, model)\nPerforms actions after the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train(cfg, model)\nPerforms actions after training is complete.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe axolotl configuration.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train_unload(cfg)\nPerforms actions after training is complete and the model is unloaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_trainer_create(cfg, trainer)\nPerforms actions after the trainer is created.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_lora_load(cfg, model)\nPerforms actions before LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_model_load(cfg)\nPerforms actions before the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.register(cfg)\nRegisters the plugin with the given configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\n\n\nintegrations.base.PluginManager()\nThe PluginManager class is responsible for loading and managing plugins. It\nshould be a singleton so it can be accessed from anywhere in the codebase.\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nplugins\nOrderedDict[str, BasePlugin]\nA list of loaded plugins.\n\n\n\n\n\n\nKey methods include:\n- get_instance(): Static method to get the singleton instance of PluginManager.\n- register(plugin_name: str): Registers a new plugin by its name.\n- pre_model_load(cfg): Calls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\nadd_callbacks_pre_trainer\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\ncreate_lr_scheduler\nCalls the create_lr_scheduler method of all registered plugins and returns\n\n\ncreate_optimizer\nCalls the create_optimizer method of all registered plugins and returns\n\n\nget_input_args\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\nget_instance\nReturns the singleton instance of PluginManager. If the instance doesn’t\n\n\nget_trainer_cls\nCalls the get_trainer_cls method of all registered plugins and returns the\n\n\nload_datasets\nCalls the load_datasets method of each registered plugin.\n\n\npost_lora_load\nCalls the post_lora_load method of all registered plugins.\n\n\npost_model_build\nCalls the post_model_build method of all registered plugins after the\n\n\npost_model_load\nCalls the post_model_load method of all registered plugins after the model\n\n\npost_train\nCalls the post_train method of all registered plugins.\n\n\npost_train_unload\nCalls the post_train_unload method of all registered plugins.\n\n\npost_trainer_create\nCalls the post_trainer_create method of all registered plugins.\n\n\npre_lora_load\nCalls the pre_lora_load method of all registered plugins.\n\n\npre_model_load\nCalls the pre_model_load method of all registered plugins.\n\n\nregister\nRegisters a new plugin by its name.\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_lr_scheduler(\n trainer,\n optimizer,\n num_training_steps,\n)\nCalls the create_lr_scheduler method of all registered plugins and returns\nthe first non-None scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler, or None if not found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_optimizer(trainer)\nCalls the create_optimizer method of all registered plugins and returns\nthe first non-None optimizer.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer, or None if none was found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_input_args()\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[str]\nA list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_instance()\nReturns the singleton instance of PluginManager. If the instance doesn’t\nexist, it creates a new one.\n\n\n\nintegrations.base.PluginManager.get_trainer_cls(cfg)\nCalls the get_trainer_cls method of all registered plugins and returns the\nfirst non-None trainer class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.load_datasets(cfg, preprocess=False)\nCalls the load_datasets method of each registered plugin.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\npreprocess\nbool\nWhether this is preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nUnion['TrainDatasetMeta', None]\nThe dataset metadata loaded from all registered plugins.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_lora_load(cfg, model)\nCalls the post_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_build(cfg, model)\nCalls the post_model_build method of all registered plugins after the\nmodel has been built / loaded, but before any adapters have been applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_load(cfg, model)\nCalls the post_model_load method of all registered plugins after the model\nhas been loaded inclusive of any adapters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train(cfg, model)\nCalls the post_train method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train_unload(cfg)\nCalls the post_train_unload method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_trainer_create(cfg, trainer)\nCalls the post_trainer_create method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_lora_load(cfg, model)\nCalls the pre_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_model_load(cfg)\nCalls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.register(plugin_name)\nRegisters a new plugin by its name.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be registered.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported."
+ "text": "Name\nDescription\n\n\n\n\nBaseOptimizerFactory\nBase class for factories to create custom optimizers\n\n\nBasePlugin\nBase class for all plugins. Defines the interface for plugin methods.\n\n\nPluginManager\nThe PluginManager class is responsible for loading and managing plugins. It\n\n\n\n\n\nintegrations.base.BaseOptimizerFactory()\nBase class for factories to create custom optimizers\n\n\n\nintegrations.base.BasePlugin()\nBase class for all plugins. Defines the interface for plugin methods.\nA plugin is a reusable, modular, and self-contained piece of code that extends\nthe functionality of Axolotl. Plugins can be used to integrate third-party models,\nmodify the training process, or add new features.\nTo create a new plugin, you need to inherit from the BasePlugin class and\nimplement the required methods.\n\n\nPlugin methods include:\n- register(cfg): Registers the plugin with the given configuration.\n- load_datasets(cfg): Loads and preprocesses the dataset for training.\n- pre_model_load(cfg): Performs actions before the model is loaded.\n- post_model_build(cfg, model): Performs actions after the model is loaded, but\nbefore LoRA adapters are applied.\n- pre_lora_load(cfg, model): Performs actions before LoRA weights are loaded.\n- post_lora_load(cfg, model): Performs actions after LoRA weights are loaded.\n- post_model_load(cfg, model): Performs actions after the model is loaded,\ninclusive of any adapters.\n- post_trainer_create(cfg, trainer): Performs actions after the trainer is\ncreated.\n- create_optimizer(cfg, trainer): Creates and returns an optimizer for training.\n- create_lr_scheduler(cfg, trainer, optimizer, num_training_steps): Creates and\nreturns a learning rate scheduler.\n- add_callbacks_pre_trainer(cfg, model): Adds callbacks to the trainer before\ntraining.\n- add_callbacks_post_trainer(cfg, trainer): Adds callbacks to the trainer after\ntraining.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nAdds callbacks to the trainer after creating the trainer. This is useful for\n\n\nadd_callbacks_pre_trainer\nSet up callbacks before creating the trainer.\n\n\ncreate_lr_scheduler\nCreates and returns a learning rate scheduler.\n\n\ncreate_optimizer\nCreates and returns an optimizer for training.\n\n\nget_collator_cls_and_kwargs\nReturns a custom class for the collator.\n\n\nget_input_args\nReturns a pydantic model for the plugin’s input arguments.\n\n\nget_trainer_cls\nReturns a custom class for the trainer.\n\n\nget_training_args\nReturns custom training arguments to set on TrainingArgs.\n\n\nget_training_args_mixin\nReturns a dataclass model for the plugin’s training arguments.\n\n\nload_datasets\nLoads and preprocesses the dataset for training.\n\n\npost_lora_load\nPerforms actions after LoRA weights are loaded.\n\n\npost_model_build\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\npost_model_load\nPerforms actions after the model is loaded.\n\n\npost_train\nPerforms actions after training is complete.\n\n\npost_train_unload\nPerforms actions after training is complete and the model is unloaded.\n\n\npost_trainer_create\nPerforms actions after the trainer is created.\n\n\npre_lora_load\nPerforms actions before LoRA weights are loaded.\n\n\npre_model_load\nPerforms actions before the model is loaded.\n\n\nregister\nRegisters the plugin with the given configuration.\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_post_trainer(cfg, trainer)\nAdds callbacks to the trainer after creating the trainer. This is useful for\ncallbacks that require access to the model or trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.add_callbacks_pre_trainer(cfg, model)\nSet up callbacks before creating the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_lr_scheduler(\n cfg,\n trainer,\n optimizer,\n num_training_steps,\n)\nCreates and returns a learning rate scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\nnum_training_steps\nint\nTotal number of training steps\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.create_optimizer(cfg, trainer)\nCreates and returns an optimizer for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_collator_cls_and_kwargs(cfg, is_eval=False)\nReturns a custom class for the collator.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\nis_eval\nbool\nWhether this is an eval split.\nFalse\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nclass\n\nThe class for the collator.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_input_args()\nReturns a pydantic model for the plugin’s input arguments.\n\n\n\nintegrations.base.BasePlugin.get_trainer_cls(cfg)\nReturns a custom class for the trainer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_training_args(cfg)\nReturns custom training arguments to set on TrainingArgs.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe global axolotl configuration.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nobject\n\ndict containing the training arguments.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.get_training_args_mixin()\nReturns a dataclass model for the plugin’s training arguments.\n\n\n\nintegrations.base.BasePlugin.load_datasets(cfg, preprocess=False)\nLoads and preprocesses the dataset for training.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\npreprocess\nbool\nWhether this is the preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\ndataset_meta\nUnion['TrainDatasetMeta', None]\nThe metadata for the training dataset.\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_lora_load(cfg, model)\nPerforms actions after LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_build(cfg, model)\nPerforms actions after the model is built/loaded, but before any adapters are applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_model_load(cfg, model)\nPerforms actions after the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train(cfg, model)\nPerforms actions after training is complete.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe axolotl configuration.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_train_unload(cfg)\nPerforms actions after training is complete and the model is unloaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.post_trainer_create(cfg, trainer)\nPerforms actions after the trainer is created.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_lora_load(cfg, model)\nPerforms actions before LoRA weights are loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.pre_model_load(cfg)\nPerforms actions before the model is loaded.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\nintegrations.base.BasePlugin.register(cfg)\nRegisters the plugin with the given configuration.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugin.\nrequired\n\n\n\n\n\n\n\n\n\nintegrations.base.PluginManager()\nThe PluginManager class is responsible for loading and managing plugins. It\nshould be a singleton so it can be accessed from anywhere in the codebase.\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nplugins\nOrderedDict[str, BasePlugin]\nA list of loaded plugins.\n\n\n\n\n\n\nKey methods include:\n- get_instance(): Static method to get the singleton instance of PluginManager.\n- register(plugin_name: str): Registers a new plugin by its name.\n- pre_model_load(cfg): Calls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nadd_callbacks_post_trainer\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\nadd_callbacks_pre_trainer\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\ncreate_lr_scheduler\nCalls the create_lr_scheduler method of all registered plugins and returns\n\n\ncreate_optimizer\nCalls the create_optimizer method of all registered plugins and returns\n\n\nget_collator_cls_and_kwargs\nCalls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.\n\n\nget_input_args\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\nget_instance\nReturns the singleton instance of PluginManager. If the instance doesn’t\n\n\nget_trainer_cls\nCalls the get_trainer_cls method of all registered plugins and returns the\n\n\nget_training_args\nCalls the get_training_args method of all registered plugins and returns the combined training arguments.\n\n\nget_training_args_mixin\nReturns a list of dataclasses for all registered plugins’ training args mixins’\n\n\nload_datasets\nCalls the load_datasets method of each registered plugin.\n\n\npost_lora_load\nCalls the post_lora_load method of all registered plugins.\n\n\npost_model_build\nCalls the post_model_build method of all registered plugins after the\n\n\npost_model_load\nCalls the post_model_load method of all registered plugins after the model\n\n\npost_train\nCalls the post_train method of all registered plugins.\n\n\npost_train_unload\nCalls the post_train_unload method of all registered plugins.\n\n\npost_trainer_create\nCalls the post_trainer_create method of all registered plugins.\n\n\npre_lora_load\nCalls the pre_lora_load method of all registered plugins.\n\n\npre_model_load\nCalls the pre_model_load method of all registered plugins.\n\n\nregister\nRegisters a new plugin by its name.\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_post_trainer(cfg, trainer)\nCalls the add_callbacks_post_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.add_callbacks_pre_trainer(cfg, model)\nCalls the add_callbacks_pre_trainer method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[Callable]\nA list of callback functions to be added to the TrainingArgs.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_lr_scheduler(\n trainer,\n optimizer,\n num_training_steps,\n)\nCalls the create_lr_scheduler method of all registered plugins and returns\nthe first non-None scheduler.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\noptimizer\nOptimizer\nThe optimizer for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nLRScheduler | None\nThe created learning rate scheduler, or None if not found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.create_optimizer(trainer)\nCalls the create_optimizer method of all registered plugins and returns\nthe first non-None optimizer.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nOptimizer | None\nThe created optimizer, or None if none was found.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_collator_cls_and_kwargs(cfg, is_eval=False)\nCalls the get_collator_cls_and_kwargs method of all registered plugins and returns the first non-None collator class.\nParameters:\ncfg (dict): The configuration for the plugins.\nis_eval (bool): Whether this is an eval split.\nReturns:\nobject: The collator class, or None if none was found.\n\n\n\nintegrations.base.PluginManager.get_input_args()\nReturns a list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nlist[str]\nA list of Pydantic classes for all registered plugins’ input arguments.’\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_instance()\nReturns the singleton instance of PluginManager. If the instance doesn’t\nexist, it creates a new one.\n\n\n\nintegrations.base.PluginManager.get_trainer_cls(cfg)\nCalls the get_trainer_cls method of all registered plugins and returns the\nfirst non-None trainer class.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nTrainer | None\nThe first non-None trainer class returned by a plugin.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.get_training_args(cfg)\nCalls the get_training_args method of all registered plugins and returns the combined training arguments.\nParameters:\ncfg (dict): The configuration for the plugins.\nReturns:\nobject: The training arguments\n\n\n\nintegrations.base.PluginManager.get_training_args_mixin()\nReturns a list of dataclasses for all registered plugins’ training args mixins’\nReturns:\nlist[str]: A list of dataclsses\n\n\n\nintegrations.base.PluginManager.load_datasets(cfg, preprocess=False)\nCalls the load_datasets method of each registered plugin.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\npreprocess\nbool\nWhether this is preprocess step of the datasets.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nUnion['TrainDatasetMeta', None]\nThe dataset metadata loaded from all registered plugins.\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_lora_load(cfg, model)\nCalls the post_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_build(cfg, model)\nCalls the post_model_build method of all registered plugins after the\nmodel has been built / loaded, but before any adapters have been applied.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_model_load(cfg, model)\nCalls the post_model_load method of all registered plugins after the model\nhas been loaded inclusive of any adapters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train(cfg, model)\nCalls the post_train method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel | PeftModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_train_unload(cfg)\nCalls the post_train_unload method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.post_trainer_create(cfg, trainer)\nCalls the post_trainer_create method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\ntrainer\nTrainer\nThe trainer object for training.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_lora_load(cfg, model)\nCalls the pre_lora_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\nmodel\nPreTrainedModel\nThe loaded model.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.pre_model_load(cfg)\nCalls the pre_model_load method of all registered plugins.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncfg\nDictDefault\nThe configuration for the plugins.\nrequired\n\n\n\n\n\n\n\nintegrations.base.PluginManager.register(plugin_name)\nRegisters a new plugin by its name.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nplugin_name\nstr\nThe name of the plugin to be registered.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\nImportError\nIf the plugin module cannot be imported."
},
{
"objectID": "docs/api/integrations.base.html#functions",
diff --git a/site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css b/site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css
similarity index 98%
rename from site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css
rename to site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css
index ccf2cfe39..c4f3fb5fd 100644
--- a/site_libs/quarto-html/quarto-syntax-highlighting-dark-8ef56b68f8fa1e9d2ba328e99e439f80.css
+++ b/site_libs/quarto-html/quarto-syntax-highlighting-dark-2fef5ea3f8957b3e4ecc936fc74692ca.css
@@ -216,4 +216,4 @@ code span.wa {
content: "";
}
-/*# sourceMappingURL=cf5278f7cea5512246cb3b41cb474b3b.css.map */
+/*# sourceMappingURL=9d1c423767af11a14e59cf8ecfea55bb.css.map */
diff --git a/sitemap.xml b/sitemap.xml
index 92885c679..b64e0bd2b 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,758 +2,758 @@
https://docs.axolotl.ai/docs/unsloth.html
- 2025-06-15T20:47:12.066Z
+ 2025-06-17T16:09:43.782Zhttps://docs.axolotl.ai/docs/dataset-formats/conversation.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.776Zhttps://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.777Zhttps://docs.axolotl.ai/docs/dataset-formats/tokenized.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.777Zhttps://docs.axolotl.ai/docs/mac.html
- 2025-06-15T20:47:12.065Z
+ 2025-06-17T16:09:43.781Zhttps://docs.axolotl.ai/docs/nccl.html
- 2025-06-15T20:47:12.066Z
+ 2025-06-17T16:09:43.781Zhttps://docs.axolotl.ai/docs/multi-node.html
- 2025-06-15T20:47:12.066Z
+ 2025-06-17T16:09:43.781Zhttps://docs.axolotl.ai/docs/docker.html
- 2025-06-15T20:47:12.063Z
+ 2025-06-17T16:09:43.777Zhttps://docs.axolotl.ai/docs/lr_groups.html
- 2025-06-15T20:47:12.065Z
+ 2025-06-17T16:09:43.781Zhttps://docs.axolotl.ai/docs/inference.html
- 2025-06-15T20:47:12.065Z
+ 2025-06-17T16:09:43.781Zhttps://docs.axolotl.ai/docs/cli.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.776Zhttps://docs.axolotl.ai/docs/faq.html
- 2025-06-15T20:47:12.063Z
+ 2025-06-17T16:09:43.777Zhttps://docs.axolotl.ai/docs/getting-started.html
- 2025-06-15T20:47:12.063Z
+ 2025-06-17T16:09:43.777Zhttps://docs.axolotl.ai/docs/custom_integrations.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.776Zhttps://docs.axolotl.ai/docs/fsdp_qlora.html
- 2025-06-15T20:47:12.063Z
+ 2025-06-17T16:09:43.777Zhttps://docs.axolotl.ai/docs/api/common.const.html
- 2025-06-15T20:47:40.284Z
+ 2025-06-17T16:10:14.219Zhttps://docs.axolotl.ai/docs/api/prompt_tokenizers.html
- 2025-06-15T20:47:38.988Z
+ 2025-06-17T16:10:12.985Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html
- 2025-06-15T20:47:39.622Z
+ 2025-06-17T16:10:13.541Zhttps://docs.axolotl.ai/docs/api/core.training_args.html
- 2025-06-15T20:47:39.105Z
+ 2025-06-17T16:10:13.025Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html
- 2025-06-15T20:47:39.547Z
+ 2025-06-17T16:10:13.466Zhttps://docs.axolotl.ai/docs/api/utils.dict.html
- 2025-06-15T20:47:40.019Z
+ 2025-06-17T16:10:13.937Zhttps://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html
- 2025-06-15T20:47:39.884Z
+ 2025-06-17T16:10:13.804Zhttps://docs.axolotl.ai/docs/api/utils.collators.mamba.html
- 2025-06-15T20:47:40.324Z
+ 2025-06-17T16:10:14.259Zhttps://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html
- 2025-06-15T20:47:39.446Z
+ 2025-06-17T16:10:13.365Zhttps://docs.axolotl.ai/docs/api/cli.train.html
- 2025-06-15T20:47:39.185Z
+ 2025-06-17T16:10:13.104Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html
- 2025-06-15T20:47:39.597Z
+ 2025-06-17T16:10:13.516Zhttps://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html
- 2025-06-15T20:47:39.131Z
+ 2025-06-17T16:10:13.051Zhttps://docs.axolotl.ai/docs/api/loaders.processor.html
- 2025-06-15T20:47:39.426Z
+ 2025-06-17T16:10:13.344Zhttps://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html
- 2025-06-15T20:47:39.145Z
+ 2025-06-17T16:10:13.065Zhttps://docs.axolotl.ai/docs/api/core.trainers.mamba.html
- 2025-06-15T20:47:39.371Z
+ 2025-06-17T16:10:13.288Zhttps://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html
- 2025-06-15T20:47:39.891Z
+ 2025-06-17T16:10:13.810Zhttps://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html
- 2025-06-15T20:47:40.300Z
+ 2025-06-17T16:10:14.235Zhttps://docs.axolotl.ai/docs/api/core.trainers.relora.html
- 2025-06-15T20:47:39.376Z
+ 2025-06-17T16:10:13.292Zhttps://docs.axolotl.ai/docs/api/core.builders.causal.html
- 2025-06-15T20:47:39.008Z
+ 2025-06-17T16:10:13.005Zhttps://docs.axolotl.ai/docs/api/core.chat.messages.html
- 2025-06-15T20:47:39.128Z
+ 2025-06-17T16:10:13.048Zhttps://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html
- 2025-06-15T20:47:40.278Z
+ 2025-06-17T16:10:14.213Zhttps://docs.axolotl.ai/docs/api/cli.quantize.html
- 2025-06-15T20:47:39.340Z
+ 2025-06-17T16:10:13.256Zhttps://docs.axolotl.ai/docs/api/cli.checks.html
- 2025-06-15T20:47:39.219Z
+ 2025-06-17T16:10:13.138Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html
- 2025-06-15T20:47:39.632Z
+ 2025-06-17T16:10:13.551Zhttps://docs.axolotl.ai/docs/api/kernels.lora.html
- 2025-06-15T20:47:39.748Z
+ 2025-06-17T16:10:13.667Zhttps://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html
- 2025-06-15T20:47:40.113Z
+ 2025-06-17T16:10:14.032Zhttps://docs.axolotl.ai/docs/api/loaders.adapter.html
- 2025-06-15T20:47:39.431Z
+ 2025-06-17T16:10:13.350Zhttps://docs.axolotl.ai/docs/api/index.html
- 2025-06-15T20:47:38.850Z
+ 2025-06-17T16:10:12.847Zhttps://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html
- 2025-06-15T20:47:39.868Z
+ 2025-06-17T16:10:13.787Zhttps://docs.axolotl.ai/docs/api/train.html
- 2025-06-15T20:47:38.911Z
+ 2025-06-17T16:10:12.909Zhttps://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html
- 2025-06-15T20:47:39.887Z
+ 2025-06-17T16:10:13.807Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html
- 2025-06-15T20:47:39.619Z
+ 2025-06-17T16:10:13.538Zhttps://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html
- 2025-06-15T20:47:40.264Z
+ 2025-06-17T16:10:14.199Zhttps://docs.axolotl.ai/docs/api/utils.samplers.multipack.html
- 2025-06-15T20:47:40.369Z
+ 2025-06-17T16:10:14.303Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html
- 2025-06-15T20:47:39.526Z
+ 2025-06-17T16:10:13.445Zhttps://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html
- 2025-06-15T20:47:39.828Z
+ 2025-06-17T16:10:13.748Zhttps://docs.axolotl.ai/docs/api/common.architectures.html
- 2025-06-15T20:47:40.283Z
+ 2025-06-17T16:10:14.218Zhttps://docs.axolotl.ai/docs/api/utils.schemas.utils.html
- 2025-06-15T20:47:40.141Z
+ 2025-06-17T16:10:14.060Zhttps://docs.axolotl.ai/docs/api/utils.chat_templates.html
- 2025-06-15T20:47:39.933Z
+ 2025-06-17T16:10:13.852Zhttps://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html
- 2025-06-15T20:47:40.387Z
+ 2025-06-17T16:10:14.322Zhttps://docs.axolotl.ai/docs/api/cli.main.html
- 2025-06-15T20:47:39.177Z
+ 2025-06-17T16:10:13.096Zhttps://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html
- 2025-06-15T20:47:39.393Z
+ 2025-06-17T16:10:13.309Zhttps://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html
- 2025-06-15T20:47:40.384Z
+ 2025-06-17T16:10:14.318Zhttps://docs.axolotl.ai/docs/api/loaders.model.html
- 2025-06-15T20:47:39.416Z
+ 2025-06-17T16:10:13.333Zhttps://docs.axolotl.ai/docs/api/utils.tokenization.html
- 2025-06-15T20:47:39.923Z
+ 2025-06-17T16:10:13.842Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html
- 2025-06-15T20:47:39.640Z
+ 2025-06-17T16:10:13.559Zhttps://docs.axolotl.ai/docs/api/utils.model_shard_quant.html
- 2025-06-15T20:47:39.943Z
+ 2025-06-17T16:10:13.863Zhttps://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html
- 2025-06-15T20:47:39.456Z
+ 2025-06-17T16:10:13.375Zhttps://docs.axolotl.ai/docs/api/core.chat.format.chatml.html
- 2025-06-15T20:47:39.129Z
+ 2025-06-17T16:10:13.050Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html
- 2025-06-15T20:47:39.624Z
+ 2025-06-17T16:10:13.543Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html
- 2025-06-15T20:47:39.662Z
+ 2025-06-17T16:10:13.581Zhttps://docs.axolotl.ai/docs/api/monkeypatch.multipack.html
- 2025-06-15T20:47:39.820Z
+ 2025-06-17T16:10:13.739Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.base.html
- 2025-06-15T20:47:39.480Z
+ 2025-06-17T16:10:13.399Zhttps://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html
- 2025-06-15T20:47:39.405Z
+ 2025-06-17T16:10:13.321Zhttps://docs.axolotl.ai/docs/api/utils.collators.batching.html
- 2025-06-15T20:47:40.321Z
+ 2025-06-17T16:10:14.255Zhttps://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html
- 2025-06-15T20:47:39.857Z
+ 2025-06-17T16:10:13.776Zhttps://docs.axolotl.ai/docs/api/integrations.kd.trainer.html
- 2025-06-15T20:47:40.272Z
+ 2025-06-17T16:10:14.206Zhttps://docs.axolotl.ai/docs/api/utils.schemas.enums.html
- 2025-06-15T20:47:40.135Z
+ 2025-06-17T16:10:14.055Zhttps://docs.axolotl.ai/docs/api/datasets.html
- 2025-06-15T20:47:38.932Z
+ 2025-06-17T16:10:12.930Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html
- 2025-06-15T20:47:39.621Z
+ 2025-06-17T16:10:13.540Zhttps://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html
- 2025-06-15T20:47:39.886Z
+ 2025-06-17T16:10:13.805Zhttps://docs.axolotl.ai/docs/api/utils.schemas.model.html
- 2025-06-15T20:47:40.073Z
+ 2025-06-17T16:10:13.991Zhttps://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html
- 2025-06-15T20:47:40.263Z
+ 2025-06-17T16:10:14.198Zhttps://docs.axolotl.ai/docs/api/utils.trainer.html
- 2025-06-15T20:47:39.971Z
+ 2025-06-17T16:10:13.890Zhttps://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html
- 2025-06-15T20:47:40.380Z
+ 2025-06-17T16:10:14.315Zhttps://docs.axolotl.ai/docs/api/utils.data.pretraining.html
- 2025-06-15T20:47:40.028Z
+ 2025-06-17T16:10:13.946Zhttps://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html
- 2025-06-15T20:47:40.379Z
+ 2025-06-17T16:10:14.313Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html
- 2025-06-15T20:47:39.583Z
+ 2025-06-17T16:10:13.502Zhttps://docs.axolotl.ai/docs/api/utils.collators.core.html
- 2025-06-15T20:47:40.302Z
+ 2025-06-17T16:10:14.236Zhttps://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html
- 2025-06-15T20:47:39.873Z
+ 2025-06-17T16:10:13.793Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html
- 2025-06-15T20:47:39.539Z
+ 2025-06-17T16:10:13.458Zhttps://docs.axolotl.ai/docs/api/utils.lora.html
- 2025-06-15T20:47:39.938Z
+ 2025-06-17T16:10:13.857Zhttps://docs.axolotl.ai/docs/qat.html
- 2025-06-15T20:47:12.066Z
+ 2025-06-17T16:09:43.782Zhttps://docs.axolotl.ai/docs/quantize.html
- 2025-06-15T20:47:12.066Z
+ 2025-06-17T16:09:43.782Zhttps://docs.axolotl.ai/docs/ray-integration.html
- 2025-06-15T20:47:12.066Z
+ 2025-06-17T16:09:43.782Zhttps://docs.axolotl.ai/docs/sequence_parallelism.html
- 2025-06-15T20:47:12.066Z
+ 2025-06-17T16:09:43.782Zhttps://docs.axolotl.ai/docs/reward_modelling.html
- 2025-06-15T20:47:12.066Z
+ 2025-06-17T16:09:43.782Zhttps://docs.axolotl.ai/index.html
- 2025-06-15T20:47:12.079Z
+ 2025-06-17T16:09:43.797Zhttps://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html
- 2025-06-15T20:47:12.083Z
+ 2025-06-17T16:09:43.801Zhttps://docs.axolotl.ai/FAQS.html
- 2025-06-15T20:47:12.060Z
+ 2025-06-17T16:09:43.774Zhttps://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html
- 2025-06-15T20:47:12.083Z
+ 2025-06-17T16:09:43.801Zhttps://docs.axolotl.ai/TODO.html
- 2025-06-15T20:47:12.060Z
+ 2025-06-17T16:09:43.774Zhttps://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html
- 2025-06-15T20:47:12.067Z
+ 2025-06-17T16:09:43.783Zhttps://docs.axolotl.ai/docs/torchao.html
- 2025-06-15T20:47:12.066Z
+ 2025-06-17T16:09:43.782Zhttps://docs.axolotl.ai/docs/config.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.776Zhttps://docs.axolotl.ai/docs/input_output.html
- 2025-06-15T20:47:12.065Z
+ 2025-06-17T16:09:43.781Zhttps://docs.axolotl.ai/docs/batch_vs_grad.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.776Zhttps://docs.axolotl.ai/docs/api/utils.quantization.html
- 2025-06-15T20:47:40.055Z
+ 2025-06-17T16:10:13.973Zhttps://docs.axolotl.ai/docs/api/utils.bench.html
- 2025-06-15T20:47:39.947Z
+ 2025-06-17T16:10:13.866Zhttps://docs.axolotl.ai/docs/api/loaders.tokenizer.html
- 2025-06-15T20:47:39.424Z
+ 2025-06-17T16:10:13.343Zhttps://docs.axolotl.ai/docs/api/utils.freeze.html
- 2025-06-15T20:47:39.954Z
+ 2025-06-17T16:10:13.874Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html
- 2025-06-15T20:47:39.587Z
+ 2025-06-17T16:10:13.505Zhttps://docs.axolotl.ai/docs/api/utils.schemas.training.html
- 2025-06-15T20:47:40.078Z
+ 2025-06-17T16:10:13.996Zhttps://docs.axolotl.ai/docs/api/integrations.spectrum.args.html
- 2025-06-15T20:47:40.281Z
+ 2025-06-17T16:10:14.216Zhttps://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html
- 2025-06-15T20:47:39.479Z
+ 2025-06-17T16:10:13.398Zhttps://docs.axolotl.ai/docs/api/cli.inference.html
- 2025-06-15T20:47:39.252Z
+ 2025-06-17T16:10:13.170Zhttps://docs.axolotl.ai/docs/api/logging_config.html
- 2025-06-15T20:47:38.997Z
+ 2025-06-17T16:10:12.994Zhttps://docs.axolotl.ai/docs/api/loaders.constants.html
- 2025-06-15T20:47:39.440Z
+ 2025-06-17T16:10:13.360Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html
- 2025-06-15T20:47:39.599Z
+ 2025-06-17T16:10:13.518Zhttps://docs.axolotl.ai/docs/api/cli.args.html
- 2025-06-15T20:47:39.213Z
+ 2025-06-17T16:10:13.132Zhttps://docs.axolotl.ai/docs/api/utils.schemas.trl.html
- 2025-06-15T20:47:40.107Z
+ 2025-06-17T16:10:14.027Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html
- 2025-06-15T20:47:39.593Z
+ 2025-06-17T16:10:13.512Zhttps://docs.axolotl.ai/docs/api/convert.html
- 2025-06-15T20:47:38.946Z
+ 2025-06-17T16:10:12.943Zhttps://docs.axolotl.ai/docs/api/core.trainers.base.html
- 2025-06-15T20:47:39.350Z
+ 2025-06-17T16:10:13.266Zhttps://docs.axolotl.ai/docs/api/cli.preprocess.html
- 2025-06-15T20:47:39.281Z
+ 2025-06-17T16:10:13.198Zhttps://docs.axolotl.ai/docs/api/cli.config.html
- 2025-06-15T20:47:39.237Z
+ 2025-06-17T16:10:13.156Zhttps://docs.axolotl.ai/docs/api/monkeypatch.relora.html
- 2025-06-15T20:47:39.827Z
+ 2025-06-17T16:10:13.746Zhttps://docs.axolotl.ai/docs/api/core.chat.format.shared.html
- 2025-06-15T20:47:39.132Z
+ 2025-06-17T16:10:13.053Zhttps://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html
- 2025-06-15T20:47:39.382Z
+ 2025-06-17T16:10:13.299Zhttps://docs.axolotl.ai/docs/api/utils.callbacks.qat.html
- 2025-06-15T20:47:40.394Z
+ 2025-06-17T16:10:14.328Zhttps://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html
- 2025-06-15T20:47:40.026Z
+ 2025-06-17T16:10:13.945Zhttps://docs.axolotl.ai/docs/api/cli.evaluate.html
- 2025-06-15T20:47:39.193Z
+ 2025-06-17T16:10:13.112Zhttps://docs.axolotl.ai/docs/api/core.trainers.trl.html
- 2025-06-15T20:47:39.366Z
+ 2025-06-17T16:10:13.283Zhttps://docs.axolotl.ai/docs/api/core.builders.base.html
- 2025-06-15T20:47:39.003Z
+ 2025-06-17T16:10:13.000Zhttps://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html
- 2025-06-15T20:47:39.877Z
+ 2025-06-17T16:10:13.796Zhttps://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html
- 2025-06-15T20:47:39.916Z
+ 2025-06-17T16:10:13.836Zhttps://docs.axolotl.ai/docs/api/utils.distributed.html
- 2025-06-15T20:47:40.015Z
+ 2025-06-17T16:10:13.934Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html
- 2025-06-15T20:47:39.572Z
+ 2025-06-17T16:10:13.491Zhttps://docs.axolotl.ai/docs/api/utils.schemas.config.html
- 2025-06-15T20:47:40.066Z
+ 2025-06-17T16:10:13.984Zhttps://docs.axolotl.ai/docs/api/cli.utils.html
- 2025-06-15T20:47:39.318Z
+ 2025-06-17T16:10:13.235Zhttps://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html
- 2025-06-15T20:47:40.375Z
+ 2025-06-17T16:10:14.310Zhttps://docs.axolotl.ai/docs/api/utils.schemas.integrations.html
- 2025-06-15T20:47:40.125Z
+ 2025-06-17T16:10:14.044Zhttps://docs.axolotl.ai/docs/api/loaders.patch_manager.html
- 2025-06-15T20:47:39.439Z
+ 2025-06-17T16:10:13.358Zhttps://docs.axolotl.ai/docs/api/monkeypatch.utils.html
- 2025-06-15T20:47:39.865Z
+ 2025-06-17T16:10:13.784Zhttps://docs.axolotl.ai/docs/api/cli.vllm_serve.html
- 2025-06-15T20:47:39.325Z
+ 2025-06-17T16:10:13.242Zhttps://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html
- 2025-06-15T20:47:39.819Z
+ 2025-06-17T16:10:13.738Zhttps://docs.axolotl.ai/docs/api/integrations.liger.args.html
- 2025-06-15T20:47:40.275Z
+ 2025-06-17T16:10:14.209Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html
- 2025-06-15T20:47:39.528Z
+ 2025-06-17T16:10:13.447Zhttps://docs.axolotl.ai/docs/api/utils.data.sft.html
- 2025-06-15T20:47:40.034Z
+ 2025-06-17T16:10:13.953Zhttps://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html
- 2025-06-15T20:47:40.329Z
+ 2025-06-17T16:10:14.263Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html
- 2025-06-15T20:47:39.560Z
+ 2025-06-17T16:10:13.479Zhttps://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html
- 2025-06-15T20:47:39.883Z
+ 2025-06-17T16:10:13.802Zhttps://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html
- 2025-06-15T20:47:39.272Z
+ 2025-06-17T16:10:13.190Zhttps://docs.axolotl.ai/docs/api/cli.merge_lora.html
- 2025-06-15T20:47:39.260Z
+ 2025-06-17T16:10:13.178Zhttps://docs.axolotl.ai/docs/api/integrations.base.html
- 2025-06-15T20:47:40.259Z
+ 2025-06-17T16:10:14.195Zhttps://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html
- 2025-06-15T20:47:39.449Z
+ 2025-06-17T16:10:13.368Zhttps://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html
- 2025-06-15T20:47:39.803Z
+ 2025-06-17T16:10:13.722Zhttps://docs.axolotl.ai/docs/api/kernels.quantize.html
- 2025-06-15T20:47:39.776Z
+ 2025-06-17T16:10:13.695Zhttps://docs.axolotl.ai/docs/api/evaluate.html
- 2025-06-15T20:47:38.922Z
+ 2025-06-17T16:10:12.919Zhttps://docs.axolotl.ai/docs/api/core.builders.rl.html
- 2025-06-15T20:47:39.016Z
+ 2025-06-17T16:10:13.013Zhttps://docs.axolotl.ai/docs/api/utils.schemas.datasets.html
- 2025-06-15T20:47:40.096Z
+ 2025-06-17T16:10:14.014Zhttps://docs.axolotl.ai/docs/api/common.datasets.html
- 2025-06-15T20:47:40.299Z
+ 2025-06-17T16:10:14.234Zhttps://docs.axolotl.ai/docs/api/kernels.utils.html
- 2025-06-15T20:47:39.778Z
+ 2025-06-17T16:10:13.697Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.completion.html
- 2025-06-15T20:47:39.566Z
+ 2025-06-17T16:10:13.485Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html
- 2025-06-15T20:47:39.665Z
+ 2025-06-17T16:10:13.584Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html
- 2025-06-15T20:47:39.576Z
+ 2025-06-17T16:10:13.495Zhttps://docs.axolotl.ai/docs/api/kernels.swiglu.html
- 2025-06-15T20:47:39.769Z
+ 2025-06-17T16:10:13.688Zhttps://docs.axolotl.ai/docs/api/cli.cloud.base.html
- 2025-06-15T20:47:39.329Z
+ 2025-06-17T16:10:13.245Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html
- 2025-06-15T20:47:39.641Z
+ 2025-06-17T16:10:13.561Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html
- 2025-06-15T20:47:39.513Z
+ 2025-06-17T16:10:13.432Zhttps://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html
- 2025-06-15T20:47:39.866Z
+ 2025-06-17T16:10:13.785Zhttps://docs.axolotl.ai/docs/api/utils.schemas.peft.html
- 2025-06-15T20:47:40.104Z
+ 2025-06-17T16:10:14.023Zhttps://docs.axolotl.ai/docs/api/core.datasets.chat.html
- 2025-06-15T20:47:39.137Z
+ 2025-06-17T16:10:13.058Zhttps://docs.axolotl.ai/docs/api/core.trainers.utils.html
- 2025-06-15T20:47:39.406Z
+ 2025-06-17T16:10:13.323Zhttps://docs.axolotl.ai/docs/api/kernels.geglu.html
- 2025-06-15T20:47:39.758Z
+ 2025-06-17T16:10:13.678Zhttps://docs.axolotl.ai/docs/api/cli.cloud.modal_.html
- 2025-06-15T20:47:39.335Z
+ 2025-06-17T16:10:13.251Zhttps://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html
- 2025-06-15T20:47:39.805Z
+ 2025-06-17T16:10:13.724Zhttps://docs.axolotl.ai/docs/api/utils.schedulers.html
- 2025-06-15T20:47:39.996Z
+ 2025-06-17T16:10:13.914Zhttps://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html
- 2025-06-15T20:47:39.609Z
+ 2025-06-17T16:10:13.528Zhttps://docs.axolotl.ai/docs/api/cli.sweeps.html
- 2025-06-15T20:47:39.287Z
+ 2025-06-17T16:10:13.204Zhttps://docs.axolotl.ai/docs/multimodal.html
- 2025-06-15T20:47:12.066Z
+ 2025-06-17T16:09:43.781Zhttps://docs.axolotl.ai/docs/debugging.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.777Zhttps://docs.axolotl.ai/docs/multi-gpu.html
- 2025-06-15T20:47:12.066Z
+ 2025-06-17T16:09:43.781Zhttps://docs.axolotl.ai/docs/lora_optims.html
- 2025-06-15T20:47:12.065Z
+ 2025-06-17T16:09:43.781Zhttps://docs.axolotl.ai/docs/rlhf.html
- 2025-06-15T20:47:12.066Z
+ 2025-06-17T16:09:43.782Zhttps://docs.axolotl.ai/docs/amd_hpc.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.776Zhttps://docs.axolotl.ai/docs/installation.html
- 2025-06-15T20:47:12.065Z
+ 2025-06-17T16:09:43.781Zhttps://docs.axolotl.ai/docs/multipack.html
- 2025-06-15T20:47:12.066Z
+ 2025-06-17T16:09:43.781Zhttps://docs.axolotl.ai/docs/dataset_preprocessing.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.777Zhttps://docs.axolotl.ai/docs/dataset_loading.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.777Zhttps://docs.axolotl.ai/docs/dataset-formats/inst_tune.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.777Zhttps://docs.axolotl.ai/docs/dataset-formats/template_free.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.777Zhttps://docs.axolotl.ai/docs/dataset-formats/index.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.776Zhttps://docs.axolotl.ai/docs/dataset-formats/pretraining.html
- 2025-06-15T20:47:12.062Z
+ 2025-06-17T16:09:43.777Z
diff --git a/src/axolotl/integrations/LICENSE.html b/src/axolotl/integrations/LICENSE.html
index e4f1fb61a..9c5a30f99 100644
--- a/src/axolotl/integrations/LICENSE.html
+++ b/src/axolotl/integrations/LICENSE.html
@@ -2,7 +2,7 @@
-
+
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
-
+
diff --git a/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html b/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html
index b2dccf400..139616c0b 100644
--- a/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html
+++ b/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html
@@ -2,7 +2,7 @@
-
+
@@ -36,7 +36,7 @@ ul.task-list li input[type="checkbox"] {
-
+