From 75e142195a92eb9177eb966af1a8f4646cde434a Mon Sep 17 00:00:00 2001 From: Quarto GHA Workflow Runner Date: Wed, 6 Aug 2025 12:07:50 +0000 Subject: [PATCH] Built site for gh-pages --- .nojekyll | 2 +- FAQS.html | 12 +- TODO.html | 12 +- docs/amd_hpc.html | 12 +- docs/api/cli.args.html | 12 +- docs/api/cli.art.html | 12 +- docs/api/cli.checks.html | 12 +- docs/api/cli.cloud.base.html | 12 +- docs/api/cli.cloud.modal_.html | 12 +- docs/api/cli.config.html | 12 +- docs/api/cli.delinearize_llama4.html | 12 +- docs/api/cli.evaluate.html | 12 +- docs/api/cli.inference.html | 12 +- docs/api/cli.main.html | 12 +- docs/api/cli.merge_lora.html | 12 +- docs/api/cli.merge_sharded_fsdp_weights.html | 12 +- docs/api/cli.preprocess.html | 12 +- docs/api/cli.quantize.html | 12 +- docs/api/cli.train.html | 12 +- docs/api/cli.utils.args.html | 12 +- docs/api/cli.utils.fetch.html | 12 +- docs/api/cli.utils.html | 12 +- docs/api/cli.utils.load.html | 12 +- docs/api/cli.utils.sweeps.html | 12 +- docs/api/cli.utils.train.html | 12 +- docs/api/cli.vllm_serve.html | 12 +- docs/api/common.architectures.html | 12 +- docs/api/common.const.html | 12 +- docs/api/common.datasets.html | 12 +- docs/api/convert.html | 12 +- docs/api/core.builders.base.html | 12 +- docs/api/core.builders.causal.html | 12 +- docs/api/core.builders.rl.html | 12 +- docs/api/core.chat.format.chatml.html | 12 +- docs/api/core.chat.format.llama3x.html | 12 +- docs/api/core.chat.format.shared.html | 12 +- docs/api/core.chat.messages.html | 12 +- docs/api/core.datasets.chat.html | 12 +- ...core.datasets.transforms.chat_builder.html | 12 +- docs/api/core.trainers.base.html | 12 +- docs/api/core.trainers.dpo.trainer.html | 12 +- docs/api/core.trainers.grpo.sampler.html | 12 +- docs/api/core.trainers.grpo.trainer.html | 12 +- docs/api/core.trainers.mamba.html | 12 +- docs/api/core.trainers.mixins.optimizer.html | 12 +- ...core.trainers.mixins.rng_state_loader.html | 12 +- docs/api/core.trainers.mixins.scheduler.html | 12 +- docs/api/core.trainers.trl.html | 12 +- docs/api/core.trainers.utils.html | 12 +- docs/api/core.training_args.html | 12 +- docs/api/datasets.html | 12 +- docs/api/evaluate.html | 12 +- docs/api/index.html | 12 +- docs/api/integrations.base.html | 12 +- .../integrations.cut_cross_entropy.args.html | 12 +- docs/api/integrations.grokfast.optimizer.html | 12 +- docs/api/integrations.kd.trainer.html | 12 +- docs/api/integrations.liger.args.html | 12 +- docs/api/integrations.lm_eval.args.html | 12 +- docs/api/integrations.spectrum.args.html | 12 +- docs/api/kernels.geglu.html | 12 +- docs/api/kernels.lora.html | 12 +- docs/api/kernels.quantize.html | 12 +- docs/api/kernels.swiglu.html | 12 +- docs/api/kernels.utils.html | 12 +- docs/api/loaders.adapter.html | 12 +- docs/api/loaders.constants.html | 12 +- docs/api/loaders.model.html | 12 +- docs/api/loaders.patch_manager.html | 12 +- docs/api/loaders.processor.html | 12 +- docs/api/loaders.tokenizer.html | 12 +- docs/api/logging_config.html | 12 +- docs/api/models.mamba.modeling_mamba.html | 12 +- .../monkeypatch.btlm_attn_hijack_flash.html | 12 +- ...onkeypatch.data.batch_dataset_fetcher.html | 12 +- ...ch.gradient_checkpointing.offload_cpu.html | 12 +- ...h.gradient_checkpointing.offload_disk.html | 12 +- .../monkeypatch.llama_attn_hijack_flash.html | 12 +- ...onkeypatch.llama_attn_hijack_xformers.html | 12 +- docs/api/monkeypatch.llama_expand_mask.html | 12 +- .../monkeypatch.llama_patch_multipack.html | 12 +- docs/api/monkeypatch.lora_kernels.html | 12 +- ...monkeypatch.mistral_attn_hijack_flash.html | 12 +- docs/api/monkeypatch.mixtral.html | 12 +- docs/api/monkeypatch.multipack.html | 12 +- docs/api/monkeypatch.relora.html | 12 +- ...onkeypatch.stablelm_attn_hijack_flash.html | 12 +- docs/api/monkeypatch.trainer_fsdp_optim.html | 12 +- .../monkeypatch.transformers_fa_utils.html | 12 +- docs/api/monkeypatch.unsloth_.html | 12 +- docs/api/monkeypatch.utils.html | 12 +- docs/api/prompt_strategies.alpaca_chat.html | 12 +- .../prompt_strategies.alpaca_instruct.html | 12 +- .../prompt_strategies.alpaca_w_system.html | 12 +- docs/api/prompt_strategies.base.html | 12 +- ...rompt_strategies.bradley_terry.llama3.html | 12 +- docs/api/prompt_strategies.chat_template.html | 12 +- docs/api/prompt_strategies.completion.html | 12 +- .../prompt_strategies.dpo.chat_template.html | 12 +- docs/api/prompt_strategies.dpo.chatml.html | 12 +- docs/api/prompt_strategies.dpo.llama3.html | 12 +- .../prompt_strategies.dpo.passthrough.html | 12 +- .../prompt_strategies.dpo.user_defined.html | 12 +- docs/api/prompt_strategies.dpo.zephyr.html | 12 +- docs/api/prompt_strategies.input_output.html | 12 +- docs/api/prompt_strategies.kto.chatml.html | 12 +- docs/api/prompt_strategies.kto.llama3.html | 12 +- .../prompt_strategies.kto.user_defined.html | 12 +- docs/api/prompt_strategies.llama2_chat.html | 12 +- docs/api/prompt_strategies.messages.chat.html | 12 +- docs/api/prompt_strategies.metharme.html | 12 +- docs/api/prompt_strategies.orcamini.html | 12 +- .../prompt_strategies.orpo.chat_template.html | 12 +- docs/api/prompt_strategies.pygmalion.html | 12 +- ...prompt_strategies.stepwise_supervised.html | 12 +- docs/api/prompt_strategies.user_defined.html | 12 +- docs/api/prompt_tokenizers.html | 12 +- docs/api/train.html | 12 +- docs/api/utils.bench.html | 12 +- docs/api/utils.callbacks.comet_.html | 12 +- docs/api/utils.callbacks.lisa.html | 12 +- docs/api/utils.callbacks.mlflow_.html | 12 +- docs/api/utils.callbacks.perplexity.html | 12 +- docs/api/utils.callbacks.profiler.html | 12 +- docs/api/utils.callbacks.qat.html | 12 +- docs/api/utils.chat_templates.html | 12 +- docs/api/utils.collators.batching.html | 12 +- docs/api/utils.collators.core.html | 12 +- docs/api/utils.collators.mamba.html | 12 +- docs/api/utils.collators.mm_chat.html | 12 +- .../utils.ctx_managers.sequence_parallel.html | 12 +- docs/api/utils.data.pretraining.html | 12 +- docs/api/utils.data.sft.html | 12 +- docs/api/utils.dict.html | 12 +- docs/api/utils.distributed.html | 12 +- docs/api/utils.freeze.html | 12 +- docs/api/utils.lora.html | 12 +- docs/api/utils.model_shard_quant.html | 12 +- docs/api/utils.optimizers.adopt.html | 12 +- docs/api/utils.quantization.html | 12 +- docs/api/utils.samplers.multipack.html | 12 +- docs/api/utils.schedulers.html | 12 +- docs/api/utils.schemas.config.html | 12 +- docs/api/utils.schemas.datasets.html | 12 +- docs/api/utils.schemas.enums.html | 12 +- docs/api/utils.schemas.integrations.html | 12 +- docs/api/utils.schemas.model.html | 12 +- docs/api/utils.schemas.multimodal.html | 12 +- docs/api/utils.schemas.peft.html | 12 +- docs/api/utils.schemas.training.html | 12 +- docs/api/utils.schemas.trl.html | 12 +- docs/api/utils.schemas.utils.html | 12 +- docs/api/utils.tokenization.html | 12 +- docs/api/utils.trainer.html | 12 +- docs/batch_vs_grad.html | 12 +- docs/cli.html | 12 +- docs/config-reference.html | 12 +- docs/custom_integrations.html | 12 +- docs/dataset-formats/conversation.html | 12 +- docs/dataset-formats/index.html | 12 +- docs/dataset-formats/inst_tune.html | 12 +- docs/dataset-formats/pretraining.html | 12 +- docs/dataset-formats/stepwise_supervised.html | 12 +- docs/dataset-formats/template_free.html | 12 +- docs/dataset-formats/tokenized.html | 12 +- docs/dataset_loading.html | 12 +- docs/dataset_preprocessing.html | 12 +- docs/debugging.html | 12 +- docs/docker.html | 12 +- docs/faq.html | 12 +- docs/fsdp_qlora.html | 12 +- docs/getting-started.html | 12 +- docs/gradient_checkpointing.html | 12 +- docs/inference.html | 12 +- docs/input_output.html | 12 +- docs/installation.html | 12 +- docs/lora_optims.html | 12 +- docs/lr_groups.html | 12 +- docs/mac.html | 12 +- docs/mixed_precision.html | 12 +- docs/multi-gpu.html | 12 +- docs/multi-node.html | 12 +- docs/multimodal.html | 12 +- docs/multipack.html | 12 +- docs/nccl.html | 12 +- docs/nd_parallelism.html | 12 +- docs/optimizers.html | 139 +++++- docs/qat.html | 12 +- docs/quantize.html | 12 +- docs/ray-integration.html | 12 +- docs/reward_modelling.html | 12 +- docs/rlhf.html | 12 +- docs/sequence_parallelism.html | 12 +- docs/torchao.html | 12 +- docs/unsloth.html | 12 +- .../colab-axolotl-example.html | 12 +- index.html | 12 +- search.json | 26 +- sitemap.xml | 396 +++++++++--------- src/axolotl/integrations/LICENSE.html | 12 +- .../cut_cross_entropy/ACKNOWLEDGEMENTS.html | 12 +- 201 files changed, 1528 insertions(+), 1399 deletions(-) diff --git a/.nojekyll b/.nojekyll index cbc36b084..1cac6b7cb 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -221c970e \ No newline at end of file +441b49d6 \ No newline at end of file diff --git a/FAQS.html b/FAQS.html index c5e053dbd..9765948f5 100644 --- a/FAQS.html +++ b/FAQS.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/TODO.html b/TODO.html index c6537f758..b734f4ba8 100644 --- a/TODO.html +++ b/TODO.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/amd_hpc.html b/docs/amd_hpc.html index d5134510e..5bb582072 100644 --- a/docs/amd_hpc.html +++ b/docs/amd_hpc.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.args.html b/docs/api/cli.args.html index f7db753d5..c6e8d49a9 100644 --- a/docs/api/cli.args.html +++ b/docs/api/cli.args.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.art.html b/docs/api/cli.art.html index 8529882da..677a96af3 100644 --- a/docs/api/cli.art.html +++ b/docs/api/cli.art.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.checks.html b/docs/api/cli.checks.html index df26f6c4c..d952af3f3 100644 --- a/docs/api/cli.checks.html +++ b/docs/api/cli.checks.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.cloud.base.html b/docs/api/cli.cloud.base.html index 8dee5f84b..320c3605e 100644 --- a/docs/api/cli.cloud.base.html +++ b/docs/api/cli.cloud.base.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.cloud.modal_.html b/docs/api/cli.cloud.modal_.html index f26b05c92..6be132d3a 100644 --- a/docs/api/cli.cloud.modal_.html +++ b/docs/api/cli.cloud.modal_.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.config.html b/docs/api/cli.config.html index ef7db5960..4e51cf2bc 100644 --- a/docs/api/cli.config.html +++ b/docs/api/cli.config.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.delinearize_llama4.html b/docs/api/cli.delinearize_llama4.html index 17fc12aef..4cd18e205 100644 --- a/docs/api/cli.delinearize_llama4.html +++ b/docs/api/cli.delinearize_llama4.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.evaluate.html b/docs/api/cli.evaluate.html index 71a3ae7b7..9a9d3c07e 100644 --- a/docs/api/cli.evaluate.html +++ b/docs/api/cli.evaluate.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.inference.html b/docs/api/cli.inference.html index 054717143..e7dfc79bd 100644 --- a/docs/api/cli.inference.html +++ b/docs/api/cli.inference.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.main.html b/docs/api/cli.main.html index e0b7a8acf..5c55de359 100644 --- a/docs/api/cli.main.html +++ b/docs/api/cli.main.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.merge_lora.html b/docs/api/cli.merge_lora.html index 3ffc9ee74..abce97303 100644 --- a/docs/api/cli.merge_lora.html +++ b/docs/api/cli.merge_lora.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.merge_sharded_fsdp_weights.html b/docs/api/cli.merge_sharded_fsdp_weights.html index c4561ffad..8b1f7c83e 100644 --- a/docs/api/cli.merge_sharded_fsdp_weights.html +++ b/docs/api/cli.merge_sharded_fsdp_weights.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.preprocess.html b/docs/api/cli.preprocess.html index 88375acc5..6afe5c859 100644 --- a/docs/api/cli.preprocess.html +++ b/docs/api/cli.preprocess.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.quantize.html b/docs/api/cli.quantize.html index c3fa50b96..16f8d25d6 100644 --- a/docs/api/cli.quantize.html +++ b/docs/api/cli.quantize.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.train.html b/docs/api/cli.train.html index 524738207..5889c365a 100644 --- a/docs/api/cli.train.html +++ b/docs/api/cli.train.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.utils.args.html b/docs/api/cli.utils.args.html index e0becceff..fc90a6529 100644 --- a/docs/api/cli.utils.args.html +++ b/docs/api/cli.utils.args.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.utils.fetch.html b/docs/api/cli.utils.fetch.html index c514e02da..c8b12c19c 100644 --- a/docs/api/cli.utils.fetch.html +++ b/docs/api/cli.utils.fetch.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.utils.html b/docs/api/cli.utils.html index 67f4cffe7..8ce9638a3 100644 --- a/docs/api/cli.utils.html +++ b/docs/api/cli.utils.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.utils.load.html b/docs/api/cli.utils.load.html index 9a04b0d22..934088377 100644 --- a/docs/api/cli.utils.load.html +++ b/docs/api/cli.utils.load.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.utils.sweeps.html b/docs/api/cli.utils.sweeps.html index 0a2224331..c49a5c1fc 100644 --- a/docs/api/cli.utils.sweeps.html +++ b/docs/api/cli.utils.sweeps.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.utils.train.html b/docs/api/cli.utils.train.html index c8cac03bd..3603297f7 100644 --- a/docs/api/cli.utils.train.html +++ b/docs/api/cli.utils.train.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/cli.vllm_serve.html b/docs/api/cli.vllm_serve.html index 2450ef39b..1b4bc8122 100644 --- a/docs/api/cli.vllm_serve.html +++ b/docs/api/cli.vllm_serve.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/common.architectures.html b/docs/api/common.architectures.html index 2c570aceb..2474d6f0d 100644 --- a/docs/api/common.architectures.html +++ b/docs/api/common.architectures.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/common.const.html b/docs/api/common.const.html index 2b6532346..f053ea96f 100644 --- a/docs/api/common.const.html +++ b/docs/api/common.const.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/common.datasets.html b/docs/api/common.datasets.html index 1e5d71f8d..b1b60eb5e 100644 --- a/docs/api/common.datasets.html +++ b/docs/api/common.datasets.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/convert.html b/docs/api/convert.html index 8440d4c37..106c4b837 100644 --- a/docs/api/convert.html +++ b/docs/api/convert.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.builders.base.html b/docs/api/core.builders.base.html index bbdb3b29c..be8229771 100644 --- a/docs/api/core.builders.base.html +++ b/docs/api/core.builders.base.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.builders.causal.html b/docs/api/core.builders.causal.html index 6287c4ec1..dfb36d7cc 100644 --- a/docs/api/core.builders.causal.html +++ b/docs/api/core.builders.causal.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.builders.rl.html b/docs/api/core.builders.rl.html index b5f457059..bec8514de 100644 --- a/docs/api/core.builders.rl.html +++ b/docs/api/core.builders.rl.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.chat.format.chatml.html b/docs/api/core.chat.format.chatml.html index ff3034f78..c4eaf9291 100644 --- a/docs/api/core.chat.format.chatml.html +++ b/docs/api/core.chat.format.chatml.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.chat.format.llama3x.html b/docs/api/core.chat.format.llama3x.html index 64d1f88f2..ee3776331 100644 --- a/docs/api/core.chat.format.llama3x.html +++ b/docs/api/core.chat.format.llama3x.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.chat.format.shared.html b/docs/api/core.chat.format.shared.html index 1b629a721..8366afef8 100644 --- a/docs/api/core.chat.format.shared.html +++ b/docs/api/core.chat.format.shared.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.chat.messages.html b/docs/api/core.chat.messages.html index 8a64be74d..5fcb12e36 100644 --- a/docs/api/core.chat.messages.html +++ b/docs/api/core.chat.messages.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.datasets.chat.html b/docs/api/core.datasets.chat.html index 89127544d..4ed5dc8d6 100644 --- a/docs/api/core.datasets.chat.html +++ b/docs/api/core.datasets.chat.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.datasets.transforms.chat_builder.html b/docs/api/core.datasets.transforms.chat_builder.html index b28cd461d..07a2d620a 100644 --- a/docs/api/core.datasets.transforms.chat_builder.html +++ b/docs/api/core.datasets.transforms.chat_builder.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.trainers.base.html b/docs/api/core.trainers.base.html index e4cd44d03..17baee7e7 100644 --- a/docs/api/core.trainers.base.html +++ b/docs/api/core.trainers.base.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.trainers.dpo.trainer.html b/docs/api/core.trainers.dpo.trainer.html index d6afc908d..ffce396ab 100644 --- a/docs/api/core.trainers.dpo.trainer.html +++ b/docs/api/core.trainers.dpo.trainer.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.trainers.grpo.sampler.html b/docs/api/core.trainers.grpo.sampler.html index e501a4e70..1ed05ab6f 100644 --- a/docs/api/core.trainers.grpo.sampler.html +++ b/docs/api/core.trainers.grpo.sampler.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.trainers.grpo.trainer.html b/docs/api/core.trainers.grpo.trainer.html index 75fccc047..7c09c0d05 100644 --- a/docs/api/core.trainers.grpo.trainer.html +++ b/docs/api/core.trainers.grpo.trainer.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.trainers.mamba.html b/docs/api/core.trainers.mamba.html index 850cc5e39..7a3ab28d1 100644 --- a/docs/api/core.trainers.mamba.html +++ b/docs/api/core.trainers.mamba.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.trainers.mixins.optimizer.html b/docs/api/core.trainers.mixins.optimizer.html index 748c27c9b..2f12e2c14 100644 --- a/docs/api/core.trainers.mixins.optimizer.html +++ b/docs/api/core.trainers.mixins.optimizer.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.trainers.mixins.rng_state_loader.html b/docs/api/core.trainers.mixins.rng_state_loader.html index 60527709d..4fb76c18e 100644 --- a/docs/api/core.trainers.mixins.rng_state_loader.html +++ b/docs/api/core.trainers.mixins.rng_state_loader.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.trainers.mixins.scheduler.html b/docs/api/core.trainers.mixins.scheduler.html index bba9b564c..5a869e12f 100644 --- a/docs/api/core.trainers.mixins.scheduler.html +++ b/docs/api/core.trainers.mixins.scheduler.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.trainers.trl.html b/docs/api/core.trainers.trl.html index 12fd44817..12059b4a9 100644 --- a/docs/api/core.trainers.trl.html +++ b/docs/api/core.trainers.trl.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.trainers.utils.html b/docs/api/core.trainers.utils.html index 774545ae6..d4bdd368e 100644 --- a/docs/api/core.trainers.utils.html +++ b/docs/api/core.trainers.utils.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/core.training_args.html b/docs/api/core.training_args.html index c230acc02..5a6d481fa 100644 --- a/docs/api/core.training_args.html +++ b/docs/api/core.training_args.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/datasets.html b/docs/api/datasets.html index ae238e2cd..f08b08bc5 100644 --- a/docs/api/datasets.html +++ b/docs/api/datasets.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/evaluate.html b/docs/api/evaluate.html index 6b913c43a..7574beeb1 100644 --- a/docs/api/evaluate.html +++ b/docs/api/evaluate.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/index.html b/docs/api/index.html index 6f023b585..c64d1beb3 100644 --- a/docs/api/index.html +++ b/docs/api/index.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/integrations.base.html b/docs/api/integrations.base.html index b28a53fca..f8fa246a1 100644 --- a/docs/api/integrations.base.html +++ b/docs/api/integrations.base.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/integrations.cut_cross_entropy.args.html b/docs/api/integrations.cut_cross_entropy.args.html index 6f5fde91d..e99e4ae1a 100644 --- a/docs/api/integrations.cut_cross_entropy.args.html +++ b/docs/api/integrations.cut_cross_entropy.args.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/integrations.grokfast.optimizer.html b/docs/api/integrations.grokfast.optimizer.html index e9602e31e..0c4990dbb 100644 --- a/docs/api/integrations.grokfast.optimizer.html +++ b/docs/api/integrations.grokfast.optimizer.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/integrations.kd.trainer.html b/docs/api/integrations.kd.trainer.html index d001df293..9680f1a69 100644 --- a/docs/api/integrations.kd.trainer.html +++ b/docs/api/integrations.kd.trainer.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/integrations.liger.args.html b/docs/api/integrations.liger.args.html index 7588d173c..80e8ea06c 100644 --- a/docs/api/integrations.liger.args.html +++ b/docs/api/integrations.liger.args.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/integrations.lm_eval.args.html b/docs/api/integrations.lm_eval.args.html index 3c786c608..a7097b117 100644 --- a/docs/api/integrations.lm_eval.args.html +++ b/docs/api/integrations.lm_eval.args.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/integrations.spectrum.args.html b/docs/api/integrations.spectrum.args.html index 4b25bb9f4..4455c8ef2 100644 --- a/docs/api/integrations.spectrum.args.html +++ b/docs/api/integrations.spectrum.args.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/kernels.geglu.html b/docs/api/kernels.geglu.html index 1ec7f39f9..5c6137b2b 100644 --- a/docs/api/kernels.geglu.html +++ b/docs/api/kernels.geglu.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/kernels.lora.html b/docs/api/kernels.lora.html index e0449c6e6..12b85e9a0 100644 --- a/docs/api/kernels.lora.html +++ b/docs/api/kernels.lora.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/kernels.quantize.html b/docs/api/kernels.quantize.html index 4e9f09ff8..a5120f000 100644 --- a/docs/api/kernels.quantize.html +++ b/docs/api/kernels.quantize.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/kernels.swiglu.html b/docs/api/kernels.swiglu.html index ac08c6fe4..3fb9dc76d 100644 --- a/docs/api/kernels.swiglu.html +++ b/docs/api/kernels.swiglu.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/kernels.utils.html b/docs/api/kernels.utils.html index 529564083..85676d4f1 100644 --- a/docs/api/kernels.utils.html +++ b/docs/api/kernels.utils.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/loaders.adapter.html b/docs/api/loaders.adapter.html index e4f94ceff..e5c694b3c 100644 --- a/docs/api/loaders.adapter.html +++ b/docs/api/loaders.adapter.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/loaders.constants.html b/docs/api/loaders.constants.html index 41103eab9..efdca665d 100644 --- a/docs/api/loaders.constants.html +++ b/docs/api/loaders.constants.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/loaders.model.html b/docs/api/loaders.model.html index 299fa5f5b..137b43b77 100644 --- a/docs/api/loaders.model.html +++ b/docs/api/loaders.model.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/loaders.patch_manager.html b/docs/api/loaders.patch_manager.html index b4cc45141..7bf34f74f 100644 --- a/docs/api/loaders.patch_manager.html +++ b/docs/api/loaders.patch_manager.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/loaders.processor.html b/docs/api/loaders.processor.html index fa367ebc5..a95375d40 100644 --- a/docs/api/loaders.processor.html +++ b/docs/api/loaders.processor.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/loaders.tokenizer.html b/docs/api/loaders.tokenizer.html index ac32c2d9b..babe005c7 100644 --- a/docs/api/loaders.tokenizer.html +++ b/docs/api/loaders.tokenizer.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/logging_config.html b/docs/api/logging_config.html index 75175dc6e..c6241bb65 100644 --- a/docs/api/logging_config.html +++ b/docs/api/logging_config.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/models.mamba.modeling_mamba.html b/docs/api/models.mamba.modeling_mamba.html index 8acd22558..522258607 100644 --- a/docs/api/models.mamba.modeling_mamba.html +++ b/docs/api/models.mamba.modeling_mamba.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.btlm_attn_hijack_flash.html b/docs/api/monkeypatch.btlm_attn_hijack_flash.html index b30b57610..24fd5003c 100644 --- a/docs/api/monkeypatch.btlm_attn_hijack_flash.html +++ b/docs/api/monkeypatch.btlm_attn_hijack_flash.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.data.batch_dataset_fetcher.html b/docs/api/monkeypatch.data.batch_dataset_fetcher.html index 75b912fcb..a80fbfae4 100644 --- a/docs/api/monkeypatch.data.batch_dataset_fetcher.html +++ b/docs/api/monkeypatch.data.batch_dataset_fetcher.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html b/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html index 64a33875d..7779a116d 100644 --- a/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html +++ b/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html b/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html index bca97eccf..4f6c0f1cb 100644 --- a/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html +++ b/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.llama_attn_hijack_flash.html b/docs/api/monkeypatch.llama_attn_hijack_flash.html index 80c876f7b..01db9d0c7 100644 --- a/docs/api/monkeypatch.llama_attn_hijack_flash.html +++ b/docs/api/monkeypatch.llama_attn_hijack_flash.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.llama_attn_hijack_xformers.html b/docs/api/monkeypatch.llama_attn_hijack_xformers.html index 4ea8b8e6e..0df62fb47 100644 --- a/docs/api/monkeypatch.llama_attn_hijack_xformers.html +++ b/docs/api/monkeypatch.llama_attn_hijack_xformers.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.llama_expand_mask.html b/docs/api/monkeypatch.llama_expand_mask.html index 2f8c5cffd..428a644ef 100644 --- a/docs/api/monkeypatch.llama_expand_mask.html +++ b/docs/api/monkeypatch.llama_expand_mask.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.llama_patch_multipack.html b/docs/api/monkeypatch.llama_patch_multipack.html index 8d07720e0..17c7b4580 100644 --- a/docs/api/monkeypatch.llama_patch_multipack.html +++ b/docs/api/monkeypatch.llama_patch_multipack.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.lora_kernels.html b/docs/api/monkeypatch.lora_kernels.html index e9f0caf0e..1be3ce87d 100644 --- a/docs/api/monkeypatch.lora_kernels.html +++ b/docs/api/monkeypatch.lora_kernels.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.mistral_attn_hijack_flash.html b/docs/api/monkeypatch.mistral_attn_hijack_flash.html index 448688553..2480c48b9 100644 --- a/docs/api/monkeypatch.mistral_attn_hijack_flash.html +++ b/docs/api/monkeypatch.mistral_attn_hijack_flash.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.mixtral.html b/docs/api/monkeypatch.mixtral.html index bcd26d530..7cd659378 100644 --- a/docs/api/monkeypatch.mixtral.html +++ b/docs/api/monkeypatch.mixtral.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.multipack.html b/docs/api/monkeypatch.multipack.html index ea37899a1..3b510150a 100644 --- a/docs/api/monkeypatch.multipack.html +++ b/docs/api/monkeypatch.multipack.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.relora.html b/docs/api/monkeypatch.relora.html index 009c0612a..fc4d09018 100644 --- a/docs/api/monkeypatch.relora.html +++ b/docs/api/monkeypatch.relora.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.stablelm_attn_hijack_flash.html b/docs/api/monkeypatch.stablelm_attn_hijack_flash.html index 613f15ff5..d144d672f 100644 --- a/docs/api/monkeypatch.stablelm_attn_hijack_flash.html +++ b/docs/api/monkeypatch.stablelm_attn_hijack_flash.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.trainer_fsdp_optim.html b/docs/api/monkeypatch.trainer_fsdp_optim.html index fe0556584..9b06d512a 100644 --- a/docs/api/monkeypatch.trainer_fsdp_optim.html +++ b/docs/api/monkeypatch.trainer_fsdp_optim.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.transformers_fa_utils.html b/docs/api/monkeypatch.transformers_fa_utils.html index 493fd844c..f1d5e5660 100644 --- a/docs/api/monkeypatch.transformers_fa_utils.html +++ b/docs/api/monkeypatch.transformers_fa_utils.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.unsloth_.html b/docs/api/monkeypatch.unsloth_.html index 2ca6d7a46..f7478a231 100644 --- a/docs/api/monkeypatch.unsloth_.html +++ b/docs/api/monkeypatch.unsloth_.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/monkeypatch.utils.html b/docs/api/monkeypatch.utils.html index 20b8a633c..9de303fef 100644 --- a/docs/api/monkeypatch.utils.html +++ b/docs/api/monkeypatch.utils.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.alpaca_chat.html b/docs/api/prompt_strategies.alpaca_chat.html index 6d60fa213..02f77a413 100644 --- a/docs/api/prompt_strategies.alpaca_chat.html +++ b/docs/api/prompt_strategies.alpaca_chat.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.alpaca_instruct.html b/docs/api/prompt_strategies.alpaca_instruct.html index 0e66a2304..05158520a 100644 --- a/docs/api/prompt_strategies.alpaca_instruct.html +++ b/docs/api/prompt_strategies.alpaca_instruct.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.alpaca_w_system.html b/docs/api/prompt_strategies.alpaca_w_system.html index 9907d1986..ceb8da7ab 100644 --- a/docs/api/prompt_strategies.alpaca_w_system.html +++ b/docs/api/prompt_strategies.alpaca_w_system.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.base.html b/docs/api/prompt_strategies.base.html index 70b51c650..dca911fe6 100644 --- a/docs/api/prompt_strategies.base.html +++ b/docs/api/prompt_strategies.base.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.bradley_terry.llama3.html b/docs/api/prompt_strategies.bradley_terry.llama3.html index e1054b580..289434017 100644 --- a/docs/api/prompt_strategies.bradley_terry.llama3.html +++ b/docs/api/prompt_strategies.bradley_terry.llama3.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.chat_template.html b/docs/api/prompt_strategies.chat_template.html index 38208d014..f3b2b1187 100644 --- a/docs/api/prompt_strategies.chat_template.html +++ b/docs/api/prompt_strategies.chat_template.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.completion.html b/docs/api/prompt_strategies.completion.html index f07b71b5d..8f4c321a3 100644 --- a/docs/api/prompt_strategies.completion.html +++ b/docs/api/prompt_strategies.completion.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.dpo.chat_template.html b/docs/api/prompt_strategies.dpo.chat_template.html index 571cbd07f..d25d98aa8 100644 --- a/docs/api/prompt_strategies.dpo.chat_template.html +++ b/docs/api/prompt_strategies.dpo.chat_template.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.dpo.chatml.html b/docs/api/prompt_strategies.dpo.chatml.html index 94b4c98a1..c10ca4af0 100644 --- a/docs/api/prompt_strategies.dpo.chatml.html +++ b/docs/api/prompt_strategies.dpo.chatml.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.dpo.llama3.html b/docs/api/prompt_strategies.dpo.llama3.html index 9598f22a6..ff000794a 100644 --- a/docs/api/prompt_strategies.dpo.llama3.html +++ b/docs/api/prompt_strategies.dpo.llama3.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.dpo.passthrough.html b/docs/api/prompt_strategies.dpo.passthrough.html index eac474ae7..f7e652968 100644 --- a/docs/api/prompt_strategies.dpo.passthrough.html +++ b/docs/api/prompt_strategies.dpo.passthrough.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.dpo.user_defined.html b/docs/api/prompt_strategies.dpo.user_defined.html index 9b9e55711..0a0e4c79f 100644 --- a/docs/api/prompt_strategies.dpo.user_defined.html +++ b/docs/api/prompt_strategies.dpo.user_defined.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.dpo.zephyr.html b/docs/api/prompt_strategies.dpo.zephyr.html index 016de7436..75805070c 100644 --- a/docs/api/prompt_strategies.dpo.zephyr.html +++ b/docs/api/prompt_strategies.dpo.zephyr.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.input_output.html b/docs/api/prompt_strategies.input_output.html index fbe89aaa8..9557fcdca 100644 --- a/docs/api/prompt_strategies.input_output.html +++ b/docs/api/prompt_strategies.input_output.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.kto.chatml.html b/docs/api/prompt_strategies.kto.chatml.html index e8ed491e0..fada8ebf4 100644 --- a/docs/api/prompt_strategies.kto.chatml.html +++ b/docs/api/prompt_strategies.kto.chatml.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.kto.llama3.html b/docs/api/prompt_strategies.kto.llama3.html index 2ae1d953e..96476fe83 100644 --- a/docs/api/prompt_strategies.kto.llama3.html +++ b/docs/api/prompt_strategies.kto.llama3.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.kto.user_defined.html b/docs/api/prompt_strategies.kto.user_defined.html index 06bede72d..aa5f21782 100644 --- a/docs/api/prompt_strategies.kto.user_defined.html +++ b/docs/api/prompt_strategies.kto.user_defined.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.llama2_chat.html b/docs/api/prompt_strategies.llama2_chat.html index 55bfb2fcb..7910effdd 100644 --- a/docs/api/prompt_strategies.llama2_chat.html +++ b/docs/api/prompt_strategies.llama2_chat.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.messages.chat.html b/docs/api/prompt_strategies.messages.chat.html index a370f30e8..1136a93b4 100644 --- a/docs/api/prompt_strategies.messages.chat.html +++ b/docs/api/prompt_strategies.messages.chat.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.metharme.html b/docs/api/prompt_strategies.metharme.html index 5e532e5b1..38ccb6a4b 100644 --- a/docs/api/prompt_strategies.metharme.html +++ b/docs/api/prompt_strategies.metharme.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.orcamini.html b/docs/api/prompt_strategies.orcamini.html index ed0f8a987..3bc29a7c4 100644 --- a/docs/api/prompt_strategies.orcamini.html +++ b/docs/api/prompt_strategies.orcamini.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.orpo.chat_template.html b/docs/api/prompt_strategies.orpo.chat_template.html index 998e90121..4b8a03384 100644 --- a/docs/api/prompt_strategies.orpo.chat_template.html +++ b/docs/api/prompt_strategies.orpo.chat_template.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.pygmalion.html b/docs/api/prompt_strategies.pygmalion.html index f01da8061..91f57cf6f 100644 --- a/docs/api/prompt_strategies.pygmalion.html +++ b/docs/api/prompt_strategies.pygmalion.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.stepwise_supervised.html b/docs/api/prompt_strategies.stepwise_supervised.html index 291d9eeec..018b6ebae 100644 --- a/docs/api/prompt_strategies.stepwise_supervised.html +++ b/docs/api/prompt_strategies.stepwise_supervised.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_strategies.user_defined.html b/docs/api/prompt_strategies.user_defined.html index a86e4c0bf..b7dd282bc 100644 --- a/docs/api/prompt_strategies.user_defined.html +++ b/docs/api/prompt_strategies.user_defined.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/prompt_tokenizers.html b/docs/api/prompt_tokenizers.html index 6f4f9b19d..5fee52f07 100644 --- a/docs/api/prompt_tokenizers.html +++ b/docs/api/prompt_tokenizers.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/train.html b/docs/api/train.html index cc4baa60b..e90df6286 100644 --- a/docs/api/train.html +++ b/docs/api/train.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.bench.html b/docs/api/utils.bench.html index ea4301af5..696afc701 100644 --- a/docs/api/utils.bench.html +++ b/docs/api/utils.bench.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.callbacks.comet_.html b/docs/api/utils.callbacks.comet_.html index 51e3b4ef5..d286b24f6 100644 --- a/docs/api/utils.callbacks.comet_.html +++ b/docs/api/utils.callbacks.comet_.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.callbacks.lisa.html b/docs/api/utils.callbacks.lisa.html index 0de4fb42c..4e668e973 100644 --- a/docs/api/utils.callbacks.lisa.html +++ b/docs/api/utils.callbacks.lisa.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.callbacks.mlflow_.html b/docs/api/utils.callbacks.mlflow_.html index 9197018ce..dd63469e2 100644 --- a/docs/api/utils.callbacks.mlflow_.html +++ b/docs/api/utils.callbacks.mlflow_.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.callbacks.perplexity.html b/docs/api/utils.callbacks.perplexity.html index 4eced4e23..bba0ade27 100644 --- a/docs/api/utils.callbacks.perplexity.html +++ b/docs/api/utils.callbacks.perplexity.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.callbacks.profiler.html b/docs/api/utils.callbacks.profiler.html index 2f06b5211..fa162c302 100644 --- a/docs/api/utils.callbacks.profiler.html +++ b/docs/api/utils.callbacks.profiler.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.callbacks.qat.html b/docs/api/utils.callbacks.qat.html index debb7795f..b3ca6cd16 100644 --- a/docs/api/utils.callbacks.qat.html +++ b/docs/api/utils.callbacks.qat.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.chat_templates.html b/docs/api/utils.chat_templates.html index f46439491..eea740009 100644 --- a/docs/api/utils.chat_templates.html +++ b/docs/api/utils.chat_templates.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.collators.batching.html b/docs/api/utils.collators.batching.html index 045ad6316..f2d2bdd0d 100644 --- a/docs/api/utils.collators.batching.html +++ b/docs/api/utils.collators.batching.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.collators.core.html b/docs/api/utils.collators.core.html index 4a6837701..9883fc6a9 100644 --- a/docs/api/utils.collators.core.html +++ b/docs/api/utils.collators.core.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.collators.mamba.html b/docs/api/utils.collators.mamba.html index a5f2ea353..14a43febc 100644 --- a/docs/api/utils.collators.mamba.html +++ b/docs/api/utils.collators.mamba.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.collators.mm_chat.html b/docs/api/utils.collators.mm_chat.html index 3496ac89e..a7b9ef2b9 100644 --- a/docs/api/utils.collators.mm_chat.html +++ b/docs/api/utils.collators.mm_chat.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.ctx_managers.sequence_parallel.html b/docs/api/utils.ctx_managers.sequence_parallel.html index 511ff2adb..3efe2a067 100644 --- a/docs/api/utils.ctx_managers.sequence_parallel.html +++ b/docs/api/utils.ctx_managers.sequence_parallel.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.data.pretraining.html b/docs/api/utils.data.pretraining.html index 642c446b0..baedc092c 100644 --- a/docs/api/utils.data.pretraining.html +++ b/docs/api/utils.data.pretraining.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.data.sft.html b/docs/api/utils.data.sft.html index 891cf18df..4df32b692 100644 --- a/docs/api/utils.data.sft.html +++ b/docs/api/utils.data.sft.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.dict.html b/docs/api/utils.dict.html index c241b0a24..4c04fa710 100644 --- a/docs/api/utils.dict.html +++ b/docs/api/utils.dict.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.distributed.html b/docs/api/utils.distributed.html index 97598f40e..8e578e2a2 100644 --- a/docs/api/utils.distributed.html +++ b/docs/api/utils.distributed.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.freeze.html b/docs/api/utils.freeze.html index d34cfff77..8a1cad3d9 100644 --- a/docs/api/utils.freeze.html +++ b/docs/api/utils.freeze.html @@ -419,6 +419,12 @@ window.Quarto = { Mixed Precision Training + + @@ -472,12 +478,6 @@ window.Quarto = { N-D Parallelism (Beta) - - diff --git a/docs/api/utils.lora.html b/docs/api/utils.lora.html index b1ca0e15c..eb16479e0 100644 --- a/docs/api/utils.lora.html +++ b/docs/api/utils.lora.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.model_shard_quant.html b/docs/api/utils.model_shard_quant.html index 0b33fc86e..5d66259fc 100644 --- a/docs/api/utils.model_shard_quant.html +++ b/docs/api/utils.model_shard_quant.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.optimizers.adopt.html b/docs/api/utils.optimizers.adopt.html index e15443539..fe107d3c8 100644 --- a/docs/api/utils.optimizers.adopt.html +++ b/docs/api/utils.optimizers.adopt.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.quantization.html b/docs/api/utils.quantization.html index 6b287f533..fd0697b7a 100644 --- a/docs/api/utils.quantization.html +++ b/docs/api/utils.quantization.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.samplers.multipack.html b/docs/api/utils.samplers.multipack.html index 3d0b5f3b0..82a245eca 100644 --- a/docs/api/utils.samplers.multipack.html +++ b/docs/api/utils.samplers.multipack.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.schedulers.html b/docs/api/utils.schedulers.html index 20b337d27..d1c980059 100644 --- a/docs/api/utils.schedulers.html +++ b/docs/api/utils.schedulers.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.schemas.config.html b/docs/api/utils.schemas.config.html index fb1871875..ba93d007f 100644 --- a/docs/api/utils.schemas.config.html +++ b/docs/api/utils.schemas.config.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.schemas.datasets.html b/docs/api/utils.schemas.datasets.html index aff8b2fc2..989a6b320 100644 --- a/docs/api/utils.schemas.datasets.html +++ b/docs/api/utils.schemas.datasets.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.schemas.enums.html b/docs/api/utils.schemas.enums.html index f63adcaef..ad4853a81 100644 --- a/docs/api/utils.schemas.enums.html +++ b/docs/api/utils.schemas.enums.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.schemas.integrations.html b/docs/api/utils.schemas.integrations.html index 47202ddff..c2cde1bd7 100644 --- a/docs/api/utils.schemas.integrations.html +++ b/docs/api/utils.schemas.integrations.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.schemas.model.html b/docs/api/utils.schemas.model.html index 50934d460..d92d20120 100644 --- a/docs/api/utils.schemas.model.html +++ b/docs/api/utils.schemas.model.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.schemas.multimodal.html b/docs/api/utils.schemas.multimodal.html index bfcfd7f62..424ecbf9f 100644 --- a/docs/api/utils.schemas.multimodal.html +++ b/docs/api/utils.schemas.multimodal.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.schemas.peft.html b/docs/api/utils.schemas.peft.html index 25d2e778c..afd69eeda 100644 --- a/docs/api/utils.schemas.peft.html +++ b/docs/api/utils.schemas.peft.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.schemas.training.html b/docs/api/utils.schemas.training.html index 211a54d1a..c50c4acce 100644 --- a/docs/api/utils.schemas.training.html +++ b/docs/api/utils.schemas.training.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.schemas.trl.html b/docs/api/utils.schemas.trl.html index 7b48ece6a..b0a825cc7 100644 --- a/docs/api/utils.schemas.trl.html +++ b/docs/api/utils.schemas.trl.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.schemas.utils.html b/docs/api/utils.schemas.utils.html index d25fffcf8..a8d4f917a 100644 --- a/docs/api/utils.schemas.utils.html +++ b/docs/api/utils.schemas.utils.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.tokenization.html b/docs/api/utils.tokenization.html index 6417d7c8f..9bae98b52 100644 --- a/docs/api/utils.tokenization.html +++ b/docs/api/utils.tokenization.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/api/utils.trainer.html b/docs/api/utils.trainer.html index 657b38df2..e54fe8848 100644 --- a/docs/api/utils.trainer.html +++ b/docs/api/utils.trainer.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/batch_vs_grad.html b/docs/batch_vs_grad.html index f4c9c1dd5..bf5c01c6e 100644 --- a/docs/batch_vs_grad.html +++ b/docs/batch_vs_grad.html @@ -356,6 +356,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -409,12 +415,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/cli.html b/docs/cli.html index ee2697a61..0f0f4b049 100644 --- a/docs/cli.html +++ b/docs/cli.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/config-reference.html b/docs/config-reference.html index 40876ec96..509df8239 100644 --- a/docs/config-reference.html +++ b/docs/config-reference.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/custom_integrations.html b/docs/custom_integrations.html index a5f6eab06..15028dff4 100644 --- a/docs/custom_integrations.html +++ b/docs/custom_integrations.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/dataset-formats/conversation.html b/docs/dataset-formats/conversation.html index c6b36386a..a571dd274 100644 --- a/docs/dataset-formats/conversation.html +++ b/docs/dataset-formats/conversation.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/dataset-formats/index.html b/docs/dataset-formats/index.html index ec7223659..66169c9f8 100644 --- a/docs/dataset-formats/index.html +++ b/docs/dataset-formats/index.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/dataset-formats/inst_tune.html b/docs/dataset-formats/inst_tune.html index 9b84c6a59..005484b64 100644 --- a/docs/dataset-formats/inst_tune.html +++ b/docs/dataset-formats/inst_tune.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/dataset-formats/pretraining.html b/docs/dataset-formats/pretraining.html index 3a3511220..0474a166a 100644 --- a/docs/dataset-formats/pretraining.html +++ b/docs/dataset-formats/pretraining.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/dataset-formats/stepwise_supervised.html b/docs/dataset-formats/stepwise_supervised.html index 54b5bda78..60d5c3ccd 100644 --- a/docs/dataset-formats/stepwise_supervised.html +++ b/docs/dataset-formats/stepwise_supervised.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/dataset-formats/template_free.html b/docs/dataset-formats/template_free.html index b0003c86b..2d9ca2a3a 100644 --- a/docs/dataset-formats/template_free.html +++ b/docs/dataset-formats/template_free.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/dataset-formats/tokenized.html b/docs/dataset-formats/tokenized.html index 32a4febdf..fc3442490 100644 --- a/docs/dataset-formats/tokenized.html +++ b/docs/dataset-formats/tokenized.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/dataset_loading.html b/docs/dataset_loading.html index e0ee6bf81..4d896bc59 100644 --- a/docs/dataset_loading.html +++ b/docs/dataset_loading.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/dataset_preprocessing.html b/docs/dataset_preprocessing.html index b93756e34..f1a4e4eb9 100644 --- a/docs/dataset_preprocessing.html +++ b/docs/dataset_preprocessing.html @@ -356,6 +356,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -409,12 +415,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/debugging.html b/docs/debugging.html index d81e2751f..2ff5354f2 100644 --- a/docs/debugging.html +++ b/docs/debugging.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/docker.html b/docs/docker.html index 8d01ce6f8..1f0735b46 100644 --- a/docs/docker.html +++ b/docs/docker.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/faq.html b/docs/faq.html index e78ef6bc0..e88013bbd 100644 --- a/docs/faq.html +++ b/docs/faq.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/fsdp_qlora.html b/docs/fsdp_qlora.html index 9b495fdf6..4ea6aa567 100644 --- a/docs/fsdp_qlora.html +++ b/docs/fsdp_qlora.html @@ -356,6 +356,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -409,12 +415,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/getting-started.html b/docs/getting-started.html index 459c31c1d..8cf6318af 100644 --- a/docs/getting-started.html +++ b/docs/getting-started.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/gradient_checkpointing.html b/docs/gradient_checkpointing.html index 483000e19..4e6f6491c 100644 --- a/docs/gradient_checkpointing.html +++ b/docs/gradient_checkpointing.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/inference.html b/docs/inference.html index 57bdf5161..028c0eeba 100644 --- a/docs/inference.html +++ b/docs/inference.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/input_output.html b/docs/input_output.html index 6cc7373e0..5363bdf44 100644 --- a/docs/input_output.html +++ b/docs/input_output.html @@ -356,6 +356,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -409,12 +415,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/installation.html b/docs/installation.html index e79f026d5..77d3c8ed6 100644 --- a/docs/installation.html +++ b/docs/installation.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/lora_optims.html b/docs/lora_optims.html index 6b5ccef6a..3887ad40b 100644 --- a/docs/lora_optims.html +++ b/docs/lora_optims.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/lr_groups.html b/docs/lr_groups.html index bfc43a824..f5c756b21 100644 --- a/docs/lr_groups.html +++ b/docs/lr_groups.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/mac.html b/docs/mac.html index 16182238b..2cbe47999 100644 --- a/docs/mac.html +++ b/docs/mac.html @@ -356,6 +356,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -409,12 +415,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/mixed_precision.html b/docs/mixed_precision.html index 3e6f97093..6b9a62afe 100644 --- a/docs/mixed_precision.html +++ b/docs/mixed_precision.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/multi-gpu.html b/docs/multi-gpu.html index d0e3728af..26b893b75 100644 --- a/docs/multi-gpu.html +++ b/docs/multi-gpu.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/multi-node.html b/docs/multi-node.html index 961034b95..b3abc6f39 100644 --- a/docs/multi-node.html +++ b/docs/multi-node.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/multimodal.html b/docs/multimodal.html index d42470b43..be23266f1 100644 --- a/docs/multimodal.html +++ b/docs/multimodal.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/multipack.html b/docs/multipack.html index 39dc927bf..59dc084d3 100644 --- a/docs/multipack.html +++ b/docs/multipack.html @@ -356,6 +356,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -409,12 +415,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/nccl.html b/docs/nccl.html index 768dd24fc..bfdd67556 100644 --- a/docs/nccl.html +++ b/docs/nccl.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/nd_parallelism.html b/docs/nd_parallelism.html index 29ee3f759..4e16909a9 100644 --- a/docs/nd_parallelism.html +++ b/docs/nd_parallelism.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/optimizers.html b/docs/optimizers.html index ba587061b..d6f6ea6c6 100644 --- a/docs/optimizers.html +++ b/docs/optimizers.html @@ -141,7 +141,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); - + @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - @@ -492,14 +492,25 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});

On this page

-
+

Optimizers

@@ -523,17 +534,113 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
-
-

Dion Optimizer

+
+

Overview

+

Axolotl supports all optimizers supported by transformers OptimizerNames

+

Here is a list of optimizers supported by transformers as of v4.54.0:

+
    +
  • adamw_torch
  • +
  • adamw_torch_fused
  • +
  • adamw_torch_xla
  • +
  • adamw_torch_npu_fused
  • +
  • adamw_apex_fused
  • +
  • adafactor
  • +
  • adamw_anyprecision
  • +
  • adamw_torch_4bit
  • +
  • adamw_torch_8bit
  • +
  • ademamix
  • +
  • sgd
  • +
  • adagrad
  • +
  • adamw_bnb_8bit
  • +
  • adamw_8bit # alias for adamw_bnb_8bit
  • +
  • ademamix_8bit
  • +
  • lion_8bit
  • +
  • lion_32bit
  • +
  • paged_adamw_32bit
  • +
  • paged_adamw_8bit
  • +
  • paged_ademamix_32bit
  • +
  • paged_ademamix_8bit
  • +
  • paged_lion_32bit
  • +
  • paged_lion_8bit
  • +
  • rmsprop
  • +
  • rmsprop_bnb
  • +
  • rmsprop_bnb_8bit
  • +
  • rmsprop_bnb_32bit
  • +
  • galore_adamw
  • +
  • galore_adamw_8bit
  • +
  • galore_adafactor
  • +
  • galore_adamw_layerwise
  • +
  • galore_adamw_8bit_layerwise
  • +
  • galore_adafactor_layerwise
  • +
  • lomo
  • +
  • adalomo
  • +
  • grokadamw
  • +
  • schedule_free_radam
  • +
  • schedule_free_adamw
  • +
  • schedule_free_sgd
  • +
  • apollo_adamw
  • +
  • apollo_adamw_layerwise
  • +
  • stable_adamw
  • +
+
+
+

Custom Optimizers

+

Enable custom optimizers by passing a string to the optimizer argument. Each optimizer will receive beta and epsilon args, however, some may accept additional args which are detailed below.

+
+

optimi_adamw

+
optimizer: optimi_adamw
+
+
+

ao_adamw_4bit

+

Deprecated: Please use adamw_torch_4bit.

+
+
+

ao_adamw_8bit

+

Deprecated: Please use adamw_torch_8bit.

+
+
+

ao_adamw_fp8

+
optimizer: ao_adamw_fp8
+
+
+

adopt_adamw

+

GitHub: https://github.com/iShohei220/adopt +Paper: https://arxiv.org/abs/2411.02853

+
optimizer: adopt_adamw
+
+
+

came_pytorch

+

GitHub: https://github.com/yangluo7/CAME/tree/master +Paper: https://arxiv.org/abs/2307.02047

+
optimizer: came_pytorch
+
+# optional args (defaults below)
+adam_beta1: 0.9
+adam_beta2: 0.999
+adam_beta3: 0.9999
+adam_epsilon: 1e-30
+adam_epsilon2: 1e-16
+
+
+

muon

+

Blog: https://kellerjordan.github.io/posts/muon/ +Paper: https://arxiv.org/abs/2502.16982v1

+
optimizer: muon
+
+
+

dion

Microsoft’s Dion (DIstributed OrthoNormalization) optimizer is a scalable and communication-efficient orthonormalizing optimizer that uses low-rank approximations to reduce gradient communication.

-

Usage:

-
optimizer: dion
-dion_lr: 0.01
-dion_momentum: 0.95
-lr: 0.00001  # learning rate for embeddings and parameters that fallback to AdamW
+

GitHub: https://github.com/microsoft/dion +Paper: https://arxiv.org/pdf/2504.05295 +Note: Implementation written for PyTorch 2.7+ for DTensor

+
optimizer: dion
+dion_lr: 0.01
+dion_momentum: 0.95
+lr: 0.00001  # learning rate for embeddings and parameters that fallback to AdamW
+
diff --git a/docs/qat.html b/docs/qat.html index 8e9a68dd0..10615bf85 100644 --- a/docs/qat.html +++ b/docs/qat.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/quantize.html b/docs/quantize.html index 38126c729..ece374408 100644 --- a/docs/quantize.html +++ b/docs/quantize.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/ray-integration.html b/docs/ray-integration.html index efee646a8..dd5f59986 100644 --- a/docs/ray-integration.html +++ b/docs/ray-integration.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/reward_modelling.html b/docs/reward_modelling.html index ff7f3b45c..a78b53b01 100644 --- a/docs/reward_modelling.html +++ b/docs/reward_modelling.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/rlhf.html b/docs/rlhf.html index 395008c01..9a6e3a1da 100644 --- a/docs/rlhf.html +++ b/docs/rlhf.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/sequence_parallelism.html b/docs/sequence_parallelism.html index 9f629b94c..d7b1a3778 100644 --- a/docs/sequence_parallelism.html +++ b/docs/sequence_parallelism.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/torchao.html b/docs/torchao.html index a180e2a2f..30827c648 100644 --- a/docs/torchao.html +++ b/docs/torchao.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/docs/unsloth.html b/docs/unsloth.html index bce720abc..63498afa5 100644 --- a/docs/unsloth.html +++ b/docs/unsloth.html @@ -391,6 +391,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -444,12 +450,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/examples/colab-notebooks/colab-axolotl-example.html b/examples/colab-notebooks/colab-axolotl-example.html index 5436e1899..78defa47c 100644 --- a/examples/colab-notebooks/colab-axolotl-example.html +++ b/examples/colab-notebooks/colab-axolotl-example.html @@ -394,6 +394,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -447,12 +453,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/index.html b/index.html index c8786b3d7..d3a839b0a 100644 --- a/index.html +++ b/index.html @@ -390,6 +390,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -443,12 +449,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/search.json b/search.json index 7f1ece496..9b01b6326 100644 --- a/search.json +++ b/search.json @@ -2159,9 +2159,31 @@ "href": "docs/optimizers.html", "title": "Optimizers", "section": "", - "text": "Dion Optimizer\nMicrosoft’s Dion (DIstributed OrthoNormalization) optimizer is a scalable and communication-efficient\northonormalizing optimizer that uses low-rank approximations to reduce gradient communication.\nUsage:\noptimizer: dion\ndion_lr: 0.01\ndion_momentum: 0.95\nlr: 0.00001 # learning rate for embeddings and parameters that fallback to AdamW", + "text": "Axolotl supports all optimizers supported by transformers OptimizerNames\nHere is a list of optimizers supported by transformers as of v4.54.0:\n\nadamw_torch\nadamw_torch_fused\nadamw_torch_xla\nadamw_torch_npu_fused\nadamw_apex_fused\nadafactor\nadamw_anyprecision\nadamw_torch_4bit\nadamw_torch_8bit\nademamix\nsgd\nadagrad\nadamw_bnb_8bit\nadamw_8bit # alias for adamw_bnb_8bit\nademamix_8bit\nlion_8bit\nlion_32bit\npaged_adamw_32bit\npaged_adamw_8bit\npaged_ademamix_32bit\npaged_ademamix_8bit\npaged_lion_32bit\npaged_lion_8bit\nrmsprop\nrmsprop_bnb\nrmsprop_bnb_8bit\nrmsprop_bnb_32bit\ngalore_adamw\ngalore_adamw_8bit\ngalore_adafactor\ngalore_adamw_layerwise\ngalore_adamw_8bit_layerwise\ngalore_adafactor_layerwise\nlomo\nadalomo\ngrokadamw\nschedule_free_radam\nschedule_free_adamw\nschedule_free_sgd\napollo_adamw\napollo_adamw_layerwise\nstable_adamw", "crumbs": [ - "Advanced Features", + "Core Concepts", + "Optimizers" + ] + }, + { + "objectID": "docs/optimizers.html#overview", + "href": "docs/optimizers.html#overview", + "title": "Optimizers", + "section": "", + "text": "Axolotl supports all optimizers supported by transformers OptimizerNames\nHere is a list of optimizers supported by transformers as of v4.54.0:\n\nadamw_torch\nadamw_torch_fused\nadamw_torch_xla\nadamw_torch_npu_fused\nadamw_apex_fused\nadafactor\nadamw_anyprecision\nadamw_torch_4bit\nadamw_torch_8bit\nademamix\nsgd\nadagrad\nadamw_bnb_8bit\nadamw_8bit # alias for adamw_bnb_8bit\nademamix_8bit\nlion_8bit\nlion_32bit\npaged_adamw_32bit\npaged_adamw_8bit\npaged_ademamix_32bit\npaged_ademamix_8bit\npaged_lion_32bit\npaged_lion_8bit\nrmsprop\nrmsprop_bnb\nrmsprop_bnb_8bit\nrmsprop_bnb_32bit\ngalore_adamw\ngalore_adamw_8bit\ngalore_adafactor\ngalore_adamw_layerwise\ngalore_adamw_8bit_layerwise\ngalore_adafactor_layerwise\nlomo\nadalomo\ngrokadamw\nschedule_free_radam\nschedule_free_adamw\nschedule_free_sgd\napollo_adamw\napollo_adamw_layerwise\nstable_adamw", + "crumbs": [ + "Core Concepts", + "Optimizers" + ] + }, + { + "objectID": "docs/optimizers.html#custom-optimizers", + "href": "docs/optimizers.html#custom-optimizers", + "title": "Optimizers", + "section": "Custom Optimizers", + "text": "Custom Optimizers\nEnable custom optimizers by passing a string to the optimizer argument. Each optimizer will receive beta and epsilon args, however, some may accept additional args which are detailed below.\n\noptimi_adamw\noptimizer: optimi_adamw\n\n\nao_adamw_4bit\nDeprecated: Please use adamw_torch_4bit.\n\n\nao_adamw_8bit\nDeprecated: Please use adamw_torch_8bit.\n\n\nao_adamw_fp8\noptimizer: ao_adamw_fp8\n\n\nadopt_adamw\nGitHub: https://github.com/iShohei220/adopt\nPaper: https://arxiv.org/abs/2411.02853\noptimizer: adopt_adamw\n\n\ncame_pytorch\nGitHub: https://github.com/yangluo7/CAME/tree/master\nPaper: https://arxiv.org/abs/2307.02047\noptimizer: came_pytorch\n\n# optional args (defaults below)\nadam_beta1: 0.9\nadam_beta2: 0.999\nadam_beta3: 0.9999\nadam_epsilon: 1e-30\nadam_epsilon2: 1e-16\n\n\nmuon\nBlog: https://kellerjordan.github.io/posts/muon/\nPaper: https://arxiv.org/abs/2502.16982v1\noptimizer: muon\n\n\ndion\nMicrosoft’s Dion (DIstributed OrthoNormalization) optimizer is a scalable and communication-efficient\northonormalizing optimizer that uses low-rank approximations to reduce gradient communication.\nGitHub: https://github.com/microsoft/dion\nPaper: https://arxiv.org/pdf/2504.05295\nNote: Implementation written for PyTorch 2.7+ for DTensor\noptimizer: dion\ndion_lr: 0.01\ndion_momentum: 0.95\nlr: 0.00001 # learning rate for embeddings and parameters that fallback to AdamW", + "crumbs": [ + "Core Concepts", "Optimizers" ] }, diff --git a/sitemap.xml b/sitemap.xml index b8698e233..15c2df95b 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,794 +2,794 @@ https://docs.axolotl.ai/TODO.html - 2025-08-06T05:07:01.209Z + 2025-08-06T12:02:19.975Z https://docs.axolotl.ai/index.html - 2025-08-06T05:07:01.230Z + 2025-08-06T12:02:19.996Z https://docs.axolotl.ai/docs/debugging.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.977Z https://docs.axolotl.ai/docs/amd_hpc.html - 2025-08-06T05:07:01.210Z + 2025-08-06T12:02:19.976Z https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html - 2025-08-06T05:10:36.042Z + 2025-08-06T12:05:43.877Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html - 2025-08-06T05:10:35.458Z + 2025-08-06T12:05:43.293Z https://docs.axolotl.ai/docs/api/loaders.patch_manager.html - 2025-08-06T05:10:35.063Z + 2025-08-06T12:05:42.897Z https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html - 2025-08-06T05:10:34.730Z + 2025-08-06T12:05:42.575Z https://docs.axolotl.ai/docs/api/cli.train.html - 2025-08-06T05:10:34.790Z + 2025-08-06T12:05:42.633Z https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html - 2025-08-06T05:10:36.033Z + 2025-08-06T12:05:43.868Z https://docs.axolotl.ai/docs/api/core.chat.messages.html - 2025-08-06T05:10:34.727Z + 2025-08-06T12:05:42.572Z https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html - 2025-08-06T05:10:36.039Z + 2025-08-06T12:05:43.873Z https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html - 2025-08-06T05:10:34.888Z + 2025-08-06T12:05:42.729Z https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html - 2025-08-06T05:10:35.518Z + 2025-08-06T12:05:43.352Z https://docs.axolotl.ai/docs/api/utils.chat_templates.html - 2025-08-06T05:10:35.556Z + 2025-08-06T12:05:43.389Z https://docs.axolotl.ai/docs/api/core.chat.format.shared.html - 2025-08-06T05:10:34.731Z + 2025-08-06T12:05:42.577Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html - 2025-08-06T05:10:35.070Z + 2025-08-06T12:05:42.904Z https://docs.axolotl.ai/docs/api/utils.collators.mamba.html - 2025-08-06T05:10:35.981Z + 2025-08-06T12:05:43.816Z https://docs.axolotl.ai/docs/api/logging_config.html - 2025-08-06T05:10:34.674Z + 2025-08-06T12:05:42.520Z https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html - 2025-08-06T05:10:35.986Z + 2025-08-06T12:05:43.821Z https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html - 2025-08-06T05:10:35.192Z + 2025-08-06T12:05:43.027Z https://docs.axolotl.ai/docs/api/kernels.utils.html - 2025-08-06T05:10:35.408Z + 2025-08-06T12:05:43.243Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html - 2025-08-06T05:10:35.226Z + 2025-08-06T12:05:43.061Z https://docs.axolotl.ai/docs/api/kernels.swiglu.html - 2025-08-06T05:10:35.399Z + 2025-08-06T12:05:43.234Z https://docs.axolotl.ai/docs/api/common.const.html - 2025-08-06T05:10:35.941Z + 2025-08-06T12:05:43.776Z https://docs.axolotl.ai/docs/api/cli.cloud.base.html - 2025-08-06T05:10:34.913Z + 2025-08-06T12:05:42.752Z https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html - 2025-08-06T05:10:35.289Z + 2025-08-06T12:05:43.124Z https://docs.axolotl.ai/docs/api/core.builders.rl.html - 2025-08-06T05:10:34.690Z + 2025-08-06T12:05:42.536Z https://docs.axolotl.ai/docs/api/utils.dict.html - 2025-08-06T05:10:35.649Z + 2025-08-06T12:05:43.482Z https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html - 2025-08-06T05:10:35.762Z + 2025-08-06T12:05:43.594Z https://docs.axolotl.ai/docs/api/core.trainers.utils.html - 2025-08-06T05:10:35.028Z + 2025-08-06T12:05:42.862Z https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html - 2025-08-06T05:10:35.507Z + 2025-08-06T12:05:43.341Z https://docs.axolotl.ai/docs/api/cli.evaluate.html - 2025-08-06T05:10:34.798Z + 2025-08-06T12:05:42.641Z https://docs.axolotl.ai/docs/api/core.builders.causal.html - 2025-08-06T05:10:34.686Z + 2025-08-06T12:05:42.531Z https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html - 2025-08-06T05:10:35.452Z + 2025-08-06T12:05:43.288Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html - 2025-08-06T05:10:35.498Z + 2025-08-06T12:05:43.332Z https://docs.axolotl.ai/docs/api/cli.delinearize_llama4.html - 2025-08-06T05:10:34.852Z + 2025-08-06T12:05:42.693Z https://docs.axolotl.ai/docs/api/utils.schemas.trl.html - 2025-08-06T05:10:35.745Z + 2025-08-06T12:05:43.577Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html - 2025-08-06T05:10:35.248Z + 2025-08-06T12:05:43.083Z https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html - 2025-08-06T05:10:35.928Z + 2025-08-06T12:05:43.760Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html - 2025-08-06T05:10:35.547Z + 2025-08-06T12:05:43.381Z https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html - 2025-08-06T05:10:35.656Z + 2025-08-06T12:05:43.490Z https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html - 2025-08-06T05:10:35.516Z + 2025-08-06T12:05:43.351Z https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html - 2025-08-06T05:10:34.920Z + 2025-08-06T12:05:42.759Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html - 2025-08-06T05:10:35.152Z + 2025-08-06T12:05:42.986Z https://docs.axolotl.ai/docs/api/utils.freeze.html - 2025-08-06T05:10:35.578Z + 2025-08-06T12:05:43.411Z https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html - 2025-08-06T05:10:35.293Z + 2025-08-06T12:05:43.128Z https://docs.axolotl.ai/docs/api/integrations.base.html - 2025-08-06T05:10:35.916Z + 2025-08-06T12:05:43.748Z https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html - 2025-08-06T05:10:35.515Z + 2025-08-06T12:05:43.349Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html - 2025-08-06T05:10:35.267Z + 2025-08-06T12:05:43.102Z https://docs.axolotl.ai/docs/api/cli.main.html - 2025-08-06T05:10:34.781Z + 2025-08-06T12:05:42.624Z https://docs.axolotl.ai/docs/api/common.datasets.html - 2025-08-06T05:10:35.957Z + 2025-08-06T12:05:43.791Z https://docs.axolotl.ai/docs/api/train.html - 2025-08-06T05:10:34.586Z + 2025-08-06T12:05:42.433Z https://docs.axolotl.ai/docs/api/utils.trainer.html - 2025-08-06T05:10:35.595Z + 2025-08-06T12:05:43.428Z https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html - 2025-08-06T05:10:35.186Z + 2025-08-06T12:05:43.021Z https://docs.axolotl.ai/docs/api/index.html - 2025-08-06T05:10:34.523Z + 2025-08-06T12:05:42.371Z https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html - 2025-08-06T05:10:35.139Z + 2025-08-06T12:05:42.972Z https://docs.axolotl.ai/docs/api/core.training_args.html - 2025-08-06T05:10:34.703Z + 2025-08-06T12:05:42.549Z https://docs.axolotl.ai/docs/api/kernels.quantize.html - 2025-08-06T05:10:35.407Z + 2025-08-06T12:05:43.242Z https://docs.axolotl.ai/docs/api/convert.html - 2025-08-06T05:10:34.622Z + 2025-08-06T12:05:42.468Z https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html - 2025-08-06T05:10:35.921Z + 2025-08-06T12:05:43.753Z https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html - 2025-08-06T05:10:35.203Z + 2025-08-06T12:05:43.037Z https://docs.axolotl.ai/docs/api/utils.schemas.model.html - 2025-08-06T05:10:35.708Z + 2025-08-06T12:05:43.540Z https://docs.axolotl.ai/docs/api/utils.callbacks.qat.html - 2025-08-06T05:10:36.053Z + 2025-08-06T12:05:43.887Z https://docs.axolotl.ai/docs/api/loaders.constants.html - 2025-08-06T05:10:35.065Z + 2025-08-06T12:05:42.898Z https://docs.axolotl.ai/docs/api/cli.utils.sweeps.html - 2025-08-06T05:10:34.951Z + 2025-08-06T12:05:42.789Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html - 2025-08-06T05:10:35.236Z + 2025-08-06T12:05:43.071Z https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html - 2025-08-06T05:10:34.745Z + 2025-08-06T12:05:42.590Z https://docs.axolotl.ai/docs/api/cli.utils.fetch.html - 2025-08-06T05:10:34.939Z + 2025-08-06T12:05:42.777Z https://docs.axolotl.ai/docs/api/core.trainers.mamba.html - 2025-08-06T05:10:34.995Z + 2025-08-06T12:05:42.831Z https://docs.axolotl.ai/docs/api/utils.schemas.enums.html - 2025-08-06T05:10:35.773Z + 2025-08-06T12:05:43.605Z https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html - 2025-08-06T05:10:36.037Z + 2025-08-06T12:05:43.872Z https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html - 2025-08-06T05:10:35.210Z + 2025-08-06T12:05:43.044Z https://docs.axolotl.ai/docs/api/core.trainers.trl.html - 2025-08-06T05:10:34.989Z + 2025-08-06T12:05:42.825Z https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html - 2025-08-06T05:10:35.214Z + 2025-08-06T12:05:43.048Z https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html - 2025-08-06T05:10:36.027Z + 2025-08-06T12:05:43.862Z https://docs.axolotl.ai/docs/api/utils.schedulers.html - 2025-08-06T05:10:35.623Z + 2025-08-06T12:05:43.456Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html - 2025-08-06T05:10:35.014Z + 2025-08-06T12:05:42.848Z https://docs.axolotl.ai/docs/api/prompt_tokenizers.html - 2025-08-06T05:10:34.665Z + 2025-08-06T12:05:42.510Z https://docs.axolotl.ai/docs/config-reference.html - 2025-08-06T05:10:52.245Z + 2025-08-06T12:05:57.479Z https://docs.axolotl.ai/docs/multimodal.html - 2025-08-06T05:07:01.214Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/mixed_precision.html - 2025-08-06T05:07:01.214Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/unsloth.html - 2025-08-06T05:07:01.215Z + 2025-08-06T12:02:19.981Z https://docs.axolotl.ai/docs/ray-integration.html - 2025-08-06T05:07:01.215Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.977Z https://docs.axolotl.ai/docs/dataset-formats/template_free.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.977Z https://docs.axolotl.ai/docs/dataset-formats/index.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.977Z https://docs.axolotl.ai/docs/dataset-formats/pretraining.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.977Z https://docs.axolotl.ai/docs/nd_parallelism.html - 2025-08-06T05:07:01.214Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/sequence_parallelism.html - 2025-08-06T05:07:01.215Z + 2025-08-06T12:02:19.981Z https://docs.axolotl.ai/docs/inference.html - 2025-08-06T05:07:01.214Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/fsdp_qlora.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.977Z https://docs.axolotl.ai/docs/multi-node.html - 2025-08-06T05:07:01.214Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/lora_optims.html - 2025-08-06T05:07:01.214Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/getting-started.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.977Z https://docs.axolotl.ai/docs/dataset_loading.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.977Z https://docs.axolotl.ai/docs/lr_groups.html - 2025-08-06T05:07:01.214Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/input_output.html - 2025-08-06T05:07:01.214Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html - 2025-08-06T05:07:01.234Z + 2025-08-06T12:02:20.000Z https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html - 2025-08-06T05:07:01.235Z + 2025-08-06T12:02:20.000Z https://docs.axolotl.ai/docs/mac.html - 2025-08-06T05:07:01.214Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/optimizers.html - 2025-08-06T05:07:01.215Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/gradient_checkpointing.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.977Z https://docs.axolotl.ai/docs/qat.html - 2025-08-06T05:07:01.215Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/faq.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.977Z https://docs.axolotl.ai/docs/dataset_preprocessing.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.977Z https://docs.axolotl.ai/docs/nccl.html - 2025-08-06T05:07:01.214Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/cli.html - 2025-08-06T05:07:01.210Z + 2025-08-06T12:02:19.976Z https://docs.axolotl.ai/docs/torchao.html - 2025-08-06T05:07:01.215Z + 2025-08-06T12:02:19.981Z https://docs.axolotl.ai/docs/multi-gpu.html - 2025-08-06T05:07:01.214Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/rlhf.html - 2025-08-06T05:07:01.215Z + 2025-08-06T12:02:19.981Z https://docs.axolotl.ai/docs/dataset-formats/tokenized.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.977Z https://docs.axolotl.ai/docs/dataset-formats/conversation.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.976Z https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.977Z https://docs.axolotl.ai/docs/reward_modelling.html - 2025-08-06T05:07:01.215Z + 2025-08-06T12:02:19.981Z https://docs.axolotl.ai/docs/docker.html - 2025-08-06T05:07:01.211Z + 2025-08-06T12:02:19.977Z https://docs.axolotl.ai/docs/installation.html - 2025-08-06T05:07:01.214Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/quantize.html - 2025-08-06T05:07:01.215Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/docs/custom_integrations.html - 2025-08-06T05:07:01.210Z + 2025-08-06T12:02:19.976Z https://docs.axolotl.ai/docs/batch_vs_grad.html - 2025-08-06T05:07:01.210Z + 2025-08-06T12:02:19.976Z https://docs.axolotl.ai/docs/api/cli.utils.train.html - 2025-08-06T05:10:34.962Z + 2025-08-06T12:05:42.799Z https://docs.axolotl.ai/docs/api/cli.art.html - 2025-08-06T05:10:34.821Z + 2025-08-06T12:05:42.664Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html - 2025-08-06T05:10:35.026Z + 2025-08-06T12:05:42.861Z https://docs.axolotl.ai/docs/api/loaders.model.html - 2025-08-06T05:10:35.038Z + 2025-08-06T12:05:42.872Z https://docs.axolotl.ai/docs/api/cli.preprocess.html - 2025-08-06T05:10:34.897Z + 2025-08-06T12:05:42.737Z https://docs.axolotl.ai/docs/api/cli.utils.html - 2025-08-06T05:10:34.921Z + 2025-08-06T12:05:42.760Z https://docs.axolotl.ai/docs/api/cli.inference.html - 2025-08-06T05:10:34.867Z + 2025-08-06T12:05:42.708Z https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html - 2025-08-06T05:10:35.496Z + 2025-08-06T12:05:43.331Z https://docs.axolotl.ai/docs/api/datasets.html - 2025-08-06T05:10:34.608Z + 2025-08-06T12:05:42.455Z https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html - 2025-08-06T05:10:35.513Z + 2025-08-06T12:05:43.348Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html - 2025-08-06T05:10:35.434Z + 2025-08-06T12:05:43.269Z https://docs.axolotl.ai/docs/api/monkeypatch.relora.html - 2025-08-06T05:10:35.456Z + 2025-08-06T12:05:43.291Z https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html - 2025-08-06T05:10:35.504Z + 2025-08-06T12:05:43.338Z https://docs.axolotl.ai/docs/api/loaders.adapter.html - 2025-08-06T05:10:35.053Z + 2025-08-06T12:05:42.887Z https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html - 2025-08-06T05:10:35.002Z + 2025-08-06T12:05:42.837Z https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html - 2025-08-06T05:10:35.920Z + 2025-08-06T12:05:43.752Z https://docs.axolotl.ai/docs/api/monkeypatch.utils.html - 2025-08-06T05:10:35.495Z + 2025-08-06T12:05:43.329Z https://docs.axolotl.ai/docs/api/loaders.processor.html - 2025-08-06T05:10:35.048Z + 2025-08-06T12:05:42.882Z https://docs.axolotl.ai/docs/api/cli.config.html - 2025-08-06T05:10:34.847Z + 2025-08-06T12:05:42.688Z https://docs.axolotl.ai/docs/api/integrations.liger.args.html - 2025-08-06T05:10:35.932Z + 2025-08-06T12:05:43.764Z https://docs.axolotl.ai/docs/api/loaders.tokenizer.html - 2025-08-06T05:10:35.046Z + 2025-08-06T12:05:42.880Z https://docs.axolotl.ai/docs/api/utils.schemas.config.html - 2025-08-06T05:10:35.701Z + 2025-08-06T12:05:43.533Z https://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html - 2025-08-06T05:10:35.104Z + 2025-08-06T12:05:42.938Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html - 2025-08-06T05:10:35.081Z + 2025-08-06T12:05:42.914Z https://docs.axolotl.ai/docs/api/core.trainers.base.html - 2025-08-06T05:10:34.974Z + 2025-08-06T12:05:42.810Z https://docs.axolotl.ai/docs/api/cli.utils.args.html - 2025-08-06T05:10:34.934Z + 2025-08-06T12:05:42.772Z https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html - 2025-08-06T05:10:35.224Z + 2025-08-06T12:05:43.059Z https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html - 2025-08-06T05:10:35.487Z + 2025-08-06T12:05:43.321Z https://docs.axolotl.ai/docs/api/kernels.lora.html - 2025-08-06T05:10:35.378Z + 2025-08-06T12:05:43.213Z https://docs.axolotl.ai/docs/api/cli.vllm_serve.html - 2025-08-06T05:10:34.910Z + 2025-08-06T12:05:42.749Z https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html - 2025-08-06T05:10:35.750Z + 2025-08-06T12:05:43.582Z https://docs.axolotl.ai/docs/api/utils.schemas.utils.html - 2025-08-06T05:10:35.778Z + 2025-08-06T12:05:43.610Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html - 2025-08-06T05:10:35.436Z + 2025-08-06T12:05:43.271Z https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html - 2025-08-06T05:10:35.935Z + 2025-08-06T12:05:43.769Z https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html - 2025-08-06T05:10:35.451Z + 2025-08-06T12:05:43.286Z https://docs.axolotl.ai/docs/api/utils.collators.core.html - 2025-08-06T05:10:35.959Z + 2025-08-06T12:05:43.793Z https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html - 2025-08-06T05:10:34.728Z + 2025-08-06T12:05:42.574Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html - 2025-08-06T05:10:35.251Z + 2025-08-06T12:05:43.086Z https://docs.axolotl.ai/docs/api/core.datasets.chat.html - 2025-08-06T05:10:34.736Z + 2025-08-06T12:05:42.582Z https://docs.axolotl.ai/docs/api/utils.bench.html - 2025-08-06T05:10:35.570Z + 2025-08-06T12:05:43.403Z https://docs.axolotl.ai/docs/api/utils.schemas.training.html - 2025-08-06T05:10:35.715Z + 2025-08-06T12:05:43.547Z https://docs.axolotl.ai/docs/api/utils.collators.batching.html - 2025-08-06T05:10:35.978Z + 2025-08-06T12:05:43.812Z https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html - 2025-08-06T05:10:35.198Z + 2025-08-06T12:05:43.033Z https://docs.axolotl.ai/docs/api/utils.lora.html - 2025-08-06T05:10:35.561Z + 2025-08-06T12:05:43.394Z https://docs.axolotl.ai/docs/api/prompt_strategies.base.html - 2025-08-06T05:10:35.106Z + 2025-08-06T12:05:42.939Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html - 2025-08-06T05:10:35.166Z + 2025-08-06T12:05:43.000Z https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html - 2025-08-06T05:10:35.733Z + 2025-08-06T12:05:43.565Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html - 2025-08-06T05:10:35.250Z + 2025-08-06T12:05:43.085Z https://docs.axolotl.ai/docs/api/utils.schemas.peft.html - 2025-08-06T05:10:35.741Z + 2025-08-06T12:05:43.573Z https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html - 2025-08-06T05:10:35.220Z + 2025-08-06T12:05:43.055Z https://docs.axolotl.ai/docs/api/common.architectures.html - 2025-08-06T05:10:35.940Z + 2025-08-06T12:05:43.774Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html - 2025-08-06T05:10:35.521Z + 2025-08-06T12:05:43.355Z https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html - 2025-08-06T05:10:36.046Z + 2025-08-06T12:05:43.880Z https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html - 2025-08-06T05:10:35.938Z + 2025-08-06T12:05:43.773Z https://docs.axolotl.ai/docs/api/cli.quantize.html - 2025-08-06T05:10:34.902Z + 2025-08-06T12:05:42.742Z https://docs.axolotl.ai/docs/api/cli.checks.html - 2025-08-06T05:10:34.828Z + 2025-08-06T12:05:42.670Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html - 2025-08-06T05:10:35.259Z + 2025-08-06T12:05:43.094Z https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html - 2025-08-06T05:10:35.567Z + 2025-08-06T12:05:43.400Z https://docs.axolotl.ai/docs/api/utils.quantization.html - 2025-08-06T05:10:35.686Z + 2025-08-06T12:05:43.519Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html - 2025-08-06T05:10:35.074Z + 2025-08-06T12:05:42.907Z https://docs.axolotl.ai/docs/api/kernels.geglu.html - 2025-08-06T05:10:35.389Z + 2025-08-06T12:05:43.224Z https://docs.axolotl.ai/docs/api/utils.data.pretraining.html - 2025-08-06T05:10:35.658Z + 2025-08-06T12:05:43.492Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html - 2025-08-06T05:10:35.269Z + 2025-08-06T12:05:43.104Z https://docs.axolotl.ai/docs/api/core.builders.base.html - 2025-08-06T05:10:34.681Z + 2025-08-06T12:05:42.526Z https://docs.axolotl.ai/docs/api/cli.merge_lora.html - 2025-08-06T05:10:34.876Z + 2025-08-06T12:05:42.716Z https://docs.axolotl.ai/docs/api/cli.utils.load.html - 2025-08-06T05:10:34.945Z + 2025-08-06T12:05:42.783Z https://docs.axolotl.ai/docs/api/utils.data.sft.html - 2025-08-06T05:10:35.665Z + 2025-08-06T12:05:43.498Z https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html - 2025-08-06T05:10:35.174Z + 2025-08-06T12:05:43.008Z https://docs.axolotl.ai/docs/api/utils.tokenization.html - 2025-08-06T05:10:35.554Z + 2025-08-06T12:05:43.388Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html - 2025-08-06T05:10:35.247Z + 2025-08-06T12:05:43.082Z https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html - 2025-08-06T05:10:35.958Z + 2025-08-06T12:05:43.792Z https://docs.axolotl.ai/docs/api/cli.args.html - 2025-08-06T05:10:34.818Z + 2025-08-06T12:05:42.660Z https://docs.axolotl.ai/docs/api/evaluate.html - 2025-08-06T05:10:34.597Z + 2025-08-06T12:05:42.444Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html - 2025-08-06T05:10:35.154Z + 2025-08-06T12:05:42.988Z https://docs.axolotl.ai/docs/api/utils.distributed.html - 2025-08-06T05:10:35.643Z + 2025-08-06T12:05:43.477Z https://docs.axolotl.ai/docs/multipack.html - 2025-08-06T05:07:01.214Z + 2025-08-06T12:02:19.980Z https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html - 2025-08-06T05:07:01.219Z + 2025-08-06T12:02:19.985Z https://docs.axolotl.ai/FAQS.html - 2025-08-06T05:07:01.209Z + 2025-08-06T12:02:19.975Z diff --git a/src/axolotl/integrations/LICENSE.html b/src/axolotl/integrations/LICENSE.html index df0970d8b..081fc1dae 100644 --- a/src/axolotl/integrations/LICENSE.html +++ b/src/axolotl/integrations/LICENSE.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - - diff --git a/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html b/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html index b2a65c2cb..85036d2a6 100644 --- a/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html +++ b/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html @@ -355,6 +355,12 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); Mixed Precision Training + + @@ -408,12 +414,6 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); N-D Parallelism (Beta) - -