diff --git a/.nojekyll b/.nojekyll index f2c30de18..15f25bd6f 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -9001b065 \ No newline at end of file +52e45560 \ No newline at end of file diff --git a/docs/docker.html b/docs/docker.html index 3b3b6e125..32a1f4e02 100644 --- a/docs/docker.html +++ b/docs/docker.html @@ -543,6 +543,7 @@ Important
main-base-py3.11-cu128-2.7.1main-base-py3.11-cu126-2.7.1main-base-py3.11-cu126-2.7.0main-base-py3.11-cu126-2.6.0main-base-py3.11-cu124-2.6.0main-py3.11-cu128-2.7.1main-py3.11-cu126-2.7.1main-py3.11-cu126-2.7.0main-py3.11-cu126-2.6.0main-py3.11-cu124-2.6.0main-latestThe input format is a simple JSON input with customizable fields based on the above config.
{
"system": "...", // optional
@@ -954,14 +953,13 @@ Tip
datasets:
- path: ...
split: train
- type: user_defined.default
-
- field_prompt: "prompt"
- field_system: "system"
- field_completion: "completion"
- field_label: "label"
- prompt_format: "{prompt}"
- completion_format: "{completion}"The input format is a simple JSON input with customizable fields based on the above config.
{
"system": "...", // optional
diff --git a/search.json b/search.json
index 57801035a..5ebd7077c 100644
--- a/search.json
+++ b/search.json
@@ -2193,7 +2193,7 @@
"href": "docs/rlhf.html#rlhf-using-axolotl",
"title": "RLHF (Beta)",
"section": "RLHF using Axolotl",
- "text": "RLHF using Axolotl\n\n\n\n\n\n\nImportant\n\n\n\nThis is a BETA feature and many features are not fully implemented. You are encouraged to open new PRs to improve the integration and functionality.\n\n\nWe rely on the TRL library for implementations of various RL training methods, which we wrap around to expose in axolotl. Each method has their own supported ways of loading datasets and prompt formats.\n\n\n\n\n\n\nTip\n\n\n\nYou can find what each method supports by going into src/axolotl/prompt_strategies/{method} where {method} is one of our supported methods. The type: can be retrieved from {method}.{function_name}.\n\n\n\nDPO\nExample config:\nrl: dpo\ndatasets:\n - path: Intel/orca_dpo_pairs\n split: train\n type: chatml.intel\n - path: argilla/ultrafeedback-binarized-preferences\n split: train\n type: chatml\nDPO supports the following types with the following dataset format:\n\nchatml.argilla\n{\n \"system\": \"...\", // optional\n \"instruction\": \"...\",\n \"chosen_response\": \"...\",\n \"rejected_response\": \"...\"\n}\n\n\nchatml.argilla_chat\n{\n \"chosen\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ],\n \"rejected\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ]\n}\n\n\nchatml.icr\n{\n \"system\": \"...\", // optional\n \"input\": \"...\",\n \"chosen\": \"...\",\n \"rejected\": \"...\"\n}\n\n\nchatml.intel\n{\n \"system\": \"...\", // optional\n \"question\": \"...\",\n \"chosen\": \"...\",\n \"rejected\": \"...\"\n}\n\n\nchatml.prompt_pairs\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"chosen\": \"...\",\n \"rejected\": \"...\"\n}\n\n\nchatml.ultra\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"chosen\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ],\n \"rejected\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ]\n}\n\n\nllama3.argilla\n{\n \"system\": \"...\", // optional\n \"instruction\": \"...\",\n \"chosen_response\": \"...\",\n \"rejected_response\": \"...\"\n}\n\n\nllama3.argilla_chat\n{\n \"chosen\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ],\n \"rejected\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ]\n}\n\n\nllama3.icr\n{\n \"system\": \"...\", // optional\n \"input\": \"...\",\n \"chosen\": \"...\",\n \"rejected\": \"...\"\n}\n\n\nllama3.intel\n{\n \"system\": \"...\", // optional\n \"question\": \"...\",\n \"chosen\": \"...\",\n \"rejected\": \"...\"\n}\n\n\nllama3.prompt_pairs\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"chosen\": \"...\",\n \"rejected\": \"...\"\n}\n\n\nllama3.ultra\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"chosen\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ],\n \"rejected\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ]\n}\n\n\nzephyr.nectar\n{\n \"prompt\": \"...\",\n \"answers\": [\n {\n \"answer\": \"...\",\n \"rank\": 1\n },\n {\n \"answer\": \"...\",\n \"rank\": 2\n }\n // ... more answers with ranks\n ]\n}\n\n\nchat_template.default\nrl: dpo\ndatasets:\n - path: ...\n split: train\n type: chat_template.default\n field_messages: \"messages\"\n field_chosen: \"chosen\"\n field_rejected: \"rejected\"\n message_property_mappings:\n role: role\n content: content\n roles:\n user: [\"user\"]\n assistant: [\"assistant\"]\n system: [\"system\"]\nSample input format:\n{\n \"messages\": [\n {\n \"role\": \"system\",\n \"content\": \"...\"\n },\n {\n \"role\": \"user\",\n \"content\": \"...\"\n },\n // ... more messages\n ],\n \"chosen\": {\n \"role\": \"assistant\",\n \"content\": \"...\"\n },\n \"rejected\": {\n \"role\": \"assistant\",\n \"content\": \"...\"\n }\n}\n\n\nuser_defined.default\nFor custom behaviors,\nrl: dpo\ndatasets:\n - path: ...\n split: train\n type: user_defined.default\n\n field_prompt: \"prompt\"\n field_system: \"system\"\n field_chosen: \"chosen\"\n field_rejected: \"rejected\"\n prompt_format: \"{prompt}\"\n chosen_format: \"{chosen}\"\n rejected_format: \"{rejected}\"\nThe input format is a simple JSON input with customizable fields based on the above config.\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"chosen\": \"...\",\n \"rejected\": \"...\"\n}\n\n\n\nIPO\nAs IPO is just DPO with a different loss function, all supported dataset formats for DPO are also supported for IPO.\nrl: ipo\n\n\nORPO\nPaper: https://arxiv.org/abs/2403.07691\nrl: orpo\norpo_alpha: 0.1\nremove_unused_columns: false\n\nchat_template: chatml\ndatasets:\n - path: argilla/ultrafeedback-binarized-preferences-cleaned\n type: chat_template.argilla\nORPO supports the following types with the following dataset format:\n\nchat_template.argilla\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\", // if available, will be taken as user message for single-turn instead of from list below\n\n // chosen/rejected should be same till last content and only even-number of alternating user/assistant turns\n \"chosen\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ],\n \"rejected\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ]\n}\n\n\n\nKTO\nrl: kto\nrl_beta: 0.1 # default\nkto_desirable_weight: 1.0 # default\nkto_undesirable_weight: 1.0 # default\n\nremove_unused_columns: false\n\ndatasets:\n - path: argilla/ultrafeedback-binarized-preferences-cleaned-kto\n type: llama3.ultra\n split: train\n\ngradient_checkpointing: true\ngradient_checkpointing_kwargs:\n use_reentrant: true\nKTO supports the following types with the following dataset format:\n\nchatml.argilla\n{\n \"system\": \"...\", // optional\n \"instruction\": \"...\",\n \"completion\": \"...\"\n}\n\n\nchatml.argilla_chat\n{\n \"chosen\": [\n {\"role\": \"user\", \"content\": \"...\"}\n ],\n \"completion\": [\n {\"role\": \"assistant\", \"content\": \"...\"}\n ]\n}\n\n\nchatml.intel\n{\n \"system\": \"...\", // optional\n \"question\": \"...\",\n \"completion\": \"...\"\n}\n\n\nchatml.prompt_pairs\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"completion\": \"...\"\n}\n\n\nchatml.ultra\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"completion\": \"...\"\n}\n\n\nllama3.argilla\n{\n \"system\": \"...\", // optional\n \"instruction\": \"...\",\n \"completion\": \"...\"\n}\n\n\nllama3.argilla_chat\n{\n \"completion\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ]\n}\n\n\nllama3.intel\n{\n \"system\": \"...\", // optional\n \"question\": \"...\",\n \"completion\": \"...\"\n}\n\n\nllama3.prompt_pairs\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"completion\": \"...\"\n}\n\n\nllama3.ultra\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"completion\": \"...\"\n}\n\n\nuser_defined.default\nFor custom behaviors,\nrl: kto\ndatasets:\n - path: ...\n split: train\n type: user_defined.default\n\n field_prompt: \"prompt\"\n field_system: \"system\"\n field_completion: \"completion\"\n field_label: \"label\"\n prompt_format: \"{prompt}\"\n completion_format: \"{completion}\"\nThe input format is a simple JSON input with customizable fields based on the above config.\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"completion\": \"...\",\n \"label\": \"...\"\n}\n\n\n\nGRPO\n\n\n\n\n\n\nTip\n\n\n\nCheck out our GRPO cookbook.\n\n\nIn the latest GRPO implementation, vLLM is used to significantly speedup trajectory generation during training. In this example, we’re using 4 GPUs - 2 for training, and 2 for vLLM:\n\n\n\n\n\n\nImportant\n\n\n\nMake sure you’ve installed the correct version of vLLM by including it as an extra when installing axolotl, e.g. pip install axolotl[vllm].\n\n\nbase_model: Qwen/Qwen2.5-1.5B-Instruct\n\nvllm:\n host: 0.0.0.0\n port: 8000\n tensor_parallel_size: 2\n gpu_memory_utilization: 0.85\n dtype: auto\n # max_model_len: # you may find it useful to set the vLLM model context length if you know this beforehand\n\nrl: grpo\ntrl:\n use_vllm: true\n vllm_server_host: 0.0.0.0\n vllm_server_port: 8000\n vllm_server_timeout: 300\nCUDA_VISIBLE_DEVICES=2,3 axolotl vllm-serve grpo.yaml\nYour vLLM instance will now attempt to spin up, and it’s time to kick off training utilizing our remaining two GPUs. In another terminal, execute:\nCUDA_VISIBLE_DEVICES=0,1 axolotl train grpo.yaml --num-processes 2\n\n\n\n\n\n\nNote\n\n\n\nDue to TRL’s implementation with vLLM, the vLLM instance must use the last N GPUs instead of the first N GPUs. This is why in the example above, we use CUDA_VISIBLE_DEVICES=2,3 for the vLLM instance.\n\n\n\nReward functions\nGRPO uses custom reward functions and transformations. Please have them ready locally.\nFor example, to load OpenAI’s GSM8K and use a random reward for completions:\n# rewards.py\nimport random\n\ndef rand_reward_func(completions, **kwargs) -> list[float]:\n return [random.uniform(0, 1) for _ in completions]\n\ndef oai_gsm8k_transform(cfg, *args, **kwargs):\n def transform_fn(example, tokenizer=None):\n label = example[\"answer\"].split(\"####\")[-1].strip().replace(\",\", \"\")\n return {\n \"prompt\": [{\"role\": \"user\", \"content\": example[\"question\"]},],\n \"answer\": label,\n }\n return transform_fn, {\"remove_columns\": [\"question\"]}\nrl: grpo\n\ntrl:\n beta: 0.001\n max_completion_length: 256\n use_vllm: True\n num_generations: 4\n reward_funcs: [\"rewards.rand_reward_func\"] # format: '{file_name}.{fn_name}'\n reward_weights: [1.0]\ndatasets:\n - path: openai/gsm8k\n name: main\n type: rewards.oai_gsm8k_transform # format: '{file_name}.{fn_name}'\nTo see other examples of custom reward functions, please see TRL GRPO Docs.\nTo see all configs, please see TRLConfig.\n\n\nGRPO with DAPO/Dr. GRPO loss\nThe DAPO paper and subsequently Dr. GRPO paper proposed an alternative loss function for GRPO to remediate the penalty in longer responses.\ntrl:\n loss_type: dr_grpo\n # Normalizes loss based on max completion length (default: 256)\n max_completion_length:\nFor more information, see GRPO docs.\n\n\n\nSimPO\nSimPO uses CPOTrainer but with alternative loss function.\nrl: simpo\nrl_beta: 0.1 # default in CPOTrainer\ncpo_alpha: 1.0 # default in CPOTrainer\nsimpo_gamma: 0.5 # default in CPOTrainer\nThis method uses the same dataset format as DPO.\n\n\nUsing local dataset files\ndatasets:\n - ds_type: json\n data_files:\n - orca_rlhf.jsonl\n split: train\n type: chatml.intel\n\n\nTRL auto-unwrapping for PEFT\nTRL supports auto-unwrapping PEFT models for RL training paradigms which rely on a reference model. This significantly reduces memory pressure as an additional refreference model does not need to be loaded, and reference model log-probabilities can be obtained by disabling PEFT adapters. This is enabled by default. To turn it off, pass the following config:\n# load ref model when adapter training.\nrl_adapter_ref_model: true",
+ "text": "RLHF using Axolotl\n\n\n\n\n\n\nImportant\n\n\n\nThis is a BETA feature and many features are not fully implemented. You are encouraged to open new PRs to improve the integration and functionality.\n\n\nWe rely on the TRL library for implementations of various RL training methods, which we wrap around to expose in axolotl. Each method has their own supported ways of loading datasets and prompt formats.\n\n\n\n\n\n\nTip\n\n\n\nYou can find what each method supports by going into src/axolotl/prompt_strategies/{method} where {method} is one of our supported methods. The type: can be retrieved from {method}.{function_name}.\n\n\n\nDPO\nExample config:\nrl: dpo\ndatasets:\n - path: Intel/orca_dpo_pairs\n split: train\n type: chatml.intel\n - path: argilla/ultrafeedback-binarized-preferences\n split: train\n type: chatml\nDPO supports the following types with the following dataset format:\n\nchatml.argilla\n{\n \"system\": \"...\", // optional\n \"instruction\": \"...\",\n \"chosen_response\": \"...\",\n \"rejected_response\": \"...\"\n}\n\n\nchatml.argilla_chat\n{\n \"chosen\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ],\n \"rejected\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ]\n}\n\n\nchatml.icr\n{\n \"system\": \"...\", // optional\n \"input\": \"...\",\n \"chosen\": \"...\",\n \"rejected\": \"...\"\n}\n\n\nchatml.intel\n{\n \"system\": \"...\", // optional\n \"question\": \"...\",\n \"chosen\": \"...\",\n \"rejected\": \"...\"\n}\n\n\nchatml.prompt_pairs\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"chosen\": \"...\",\n \"rejected\": \"...\"\n}\n\n\nchatml.ultra\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"chosen\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ],\n \"rejected\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ]\n}\n\n\nllama3.argilla\n{\n \"system\": \"...\", // optional\n \"instruction\": \"...\",\n \"chosen_response\": \"...\",\n \"rejected_response\": \"...\"\n}\n\n\nllama3.argilla_chat\n{\n \"chosen\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ],\n \"rejected\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ]\n}\n\n\nllama3.icr\n{\n \"system\": \"...\", // optional\n \"input\": \"...\",\n \"chosen\": \"...\",\n \"rejected\": \"...\"\n}\n\n\nllama3.intel\n{\n \"system\": \"...\", // optional\n \"question\": \"...\",\n \"chosen\": \"...\",\n \"rejected\": \"...\"\n}\n\n\nllama3.prompt_pairs\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"chosen\": \"...\",\n \"rejected\": \"...\"\n}\n\n\nllama3.ultra\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"chosen\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ],\n \"rejected\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ]\n}\n\n\nzephyr.nectar\n{\n \"prompt\": \"...\",\n \"answers\": [\n {\n \"answer\": \"...\",\n \"rank\": 1\n },\n {\n \"answer\": \"...\",\n \"rank\": 2\n }\n // ... more answers with ranks\n ]\n}\n\n\nchat_template.default\nrl: dpo\ndatasets:\n - path: ...\n split: train\n type: chat_template.default\n field_messages: \"messages\"\n field_chosen: \"chosen\"\n field_rejected: \"rejected\"\n message_property_mappings:\n role: role\n content: content\n roles:\n user: [\"user\"]\n assistant: [\"assistant\"]\n system: [\"system\"]\nSample input format:\n{\n \"messages\": [\n {\n \"role\": \"system\",\n \"content\": \"...\"\n },\n {\n \"role\": \"user\",\n \"content\": \"...\"\n },\n // ... more messages\n ],\n \"chosen\": {\n \"role\": \"assistant\",\n \"content\": \"...\"\n },\n \"rejected\": {\n \"role\": \"assistant\",\n \"content\": \"...\"\n }\n}\n\n\nuser_defined.default\nFor custom behaviors,\nrl: dpo\ndatasets:\n - path: ...\n split: train\n type:\n field_prompt: \"prompt\"\n field_system: \"system\"\n field_chosen: \"chosen\"\n field_rejected: \"rejected\"\n prompt_format: \"{prompt}\"\n chosen_format: \"{chosen}\"\n rejected_format: \"{rejected}\"\nThe input format is a simple JSON input with customizable fields based on the above config.\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"chosen\": \"...\",\n \"rejected\": \"...\"\n}\n\n\n\nIPO\nAs IPO is just DPO with a different loss function, all supported dataset formats for DPO are also supported for IPO.\nrl: ipo\n\n\nORPO\nPaper: https://arxiv.org/abs/2403.07691\nrl: orpo\norpo_alpha: 0.1\nremove_unused_columns: false\n\nchat_template: chatml\ndatasets:\n - path: argilla/ultrafeedback-binarized-preferences-cleaned\n type: chat_template.argilla\nORPO supports the following types with the following dataset format:\n\nchat_template.argilla\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\", // if available, will be taken as user message for single-turn instead of from list below\n\n // chosen/rejected should be same till last content and only even-number of alternating user/assistant turns\n \"chosen\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ],\n \"rejected\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ]\n}\n\n\n\nKTO\nrl: kto\nrl_beta: 0.1 # default\nkto_desirable_weight: 1.0 # default\nkto_undesirable_weight: 1.0 # default\n\nremove_unused_columns: false\n\ndatasets:\n - path: argilla/ultrafeedback-binarized-preferences-cleaned-kto\n type: llama3.ultra\n split: train\n\ngradient_checkpointing: true\ngradient_checkpointing_kwargs:\n use_reentrant: true\nKTO supports the following types with the following dataset format:\n\nchatml.argilla\n{\n \"system\": \"...\", // optional\n \"instruction\": \"...\",\n \"completion\": \"...\"\n}\n\n\nchatml.argilla_chat\n{\n \"chosen\": [\n {\"role\": \"user\", \"content\": \"...\"}\n ],\n \"completion\": [\n {\"role\": \"assistant\", \"content\": \"...\"}\n ]\n}\n\n\nchatml.intel\n{\n \"system\": \"...\", // optional\n \"question\": \"...\",\n \"completion\": \"...\"\n}\n\n\nchatml.prompt_pairs\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"completion\": \"...\"\n}\n\n\nchatml.ultra\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"completion\": \"...\"\n}\n\n\nllama3.argilla\n{\n \"system\": \"...\", // optional\n \"instruction\": \"...\",\n \"completion\": \"...\"\n}\n\n\nllama3.argilla_chat\n{\n \"completion\": [\n {\"role\": \"user\", \"content\": \"...\"},\n {\"role\": \"assistant\", \"content\": \"...\"}\n ]\n}\n\n\nllama3.intel\n{\n \"system\": \"...\", // optional\n \"question\": \"...\",\n \"completion\": \"...\"\n}\n\n\nllama3.prompt_pairs\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"completion\": \"...\"\n}\n\n\nllama3.ultra\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"completion\": \"...\"\n}\n\n\nuser_defined.default\nFor custom behaviors,\nrl: kto\ndatasets:\n - path: ...\n split: train\n type:\n field_prompt: \"prompt\"\n field_system: \"system\"\n field_completion: \"completion\"\n field_label: \"label\"\n prompt_format: \"{prompt}\"\n completion_format: \"{completion}\"\nThe input format is a simple JSON input with customizable fields based on the above config.\n{\n \"system\": \"...\", // optional\n \"prompt\": \"...\",\n \"completion\": \"...\",\n \"label\": \"...\"\n}\n\n\n\nGRPO\n\n\n\n\n\n\nTip\n\n\n\nCheck out our GRPO cookbook.\n\n\nIn the latest GRPO implementation, vLLM is used to significantly speedup trajectory generation during training. In this example, we’re using 4 GPUs - 2 for training, and 2 for vLLM:\n\n\n\n\n\n\nImportant\n\n\n\nMake sure you’ve installed the correct version of vLLM by including it as an extra when installing axolotl, e.g. pip install axolotl[vllm].\n\n\nbase_model: Qwen/Qwen2.5-1.5B-Instruct\n\nvllm:\n host: 0.0.0.0\n port: 8000\n tensor_parallel_size: 2\n gpu_memory_utilization: 0.85\n dtype: auto\n # max_model_len: # you may find it useful to set the vLLM model context length if you know this beforehand\n\nrl: grpo\ntrl:\n use_vllm: true\n vllm_server_host: 0.0.0.0\n vllm_server_port: 8000\n vllm_server_timeout: 300\nCUDA_VISIBLE_DEVICES=2,3 axolotl vllm-serve grpo.yaml\nYour vLLM instance will now attempt to spin up, and it’s time to kick off training utilizing our remaining two GPUs. In another terminal, execute:\nCUDA_VISIBLE_DEVICES=0,1 axolotl train grpo.yaml --num-processes 2\n\n\n\n\n\n\nNote\n\n\n\nDue to TRL’s implementation with vLLM, the vLLM instance must use the last N GPUs instead of the first N GPUs. This is why in the example above, we use CUDA_VISIBLE_DEVICES=2,3 for the vLLM instance.\n\n\n\nReward functions\nGRPO uses custom reward functions and transformations. Please have them ready locally.\nFor example, to load OpenAI’s GSM8K and use a random reward for completions:\n# rewards.py\nimport random\n\ndef rand_reward_func(completions, **kwargs) -> list[float]:\n return [random.uniform(0, 1) for _ in completions]\n\ndef oai_gsm8k_transform(cfg, *args, **kwargs):\n def transform_fn(example, tokenizer=None):\n label = example[\"answer\"].split(\"####\")[-1].strip().replace(\",\", \"\")\n return {\n \"prompt\": [{\"role\": \"user\", \"content\": example[\"question\"]},],\n \"answer\": label,\n }\n return transform_fn, {\"remove_columns\": [\"question\"]}\nrl: grpo\n\ntrl:\n beta: 0.001\n max_completion_length: 256\n use_vllm: True\n num_generations: 4\n reward_funcs: [\"rewards.rand_reward_func\"] # format: '{file_name}.{fn_name}'\n reward_weights: [1.0]\ndatasets:\n - path: openai/gsm8k\n name: main\n type: rewards.oai_gsm8k_transform # format: '{file_name}.{fn_name}'\nTo see other examples of custom reward functions, please see TRL GRPO Docs.\nTo see all configs, please see TRLConfig.\n\n\nGRPO with DAPO/Dr. GRPO loss\nThe DAPO paper and subsequently Dr. GRPO paper proposed an alternative loss function for GRPO to remediate the penalty in longer responses.\ntrl:\n loss_type: dr_grpo\n # Normalizes loss based on max completion length (default: 256)\n max_completion_length:\nFor more information, see GRPO docs.\n\n\n\nSimPO\nSimPO uses CPOTrainer but with alternative loss function.\nrl: simpo\nrl_beta: 0.1 # default in CPOTrainer\ncpo_alpha: 1.0 # default in CPOTrainer\nsimpo_gamma: 0.5 # default in CPOTrainer\nThis method uses the same dataset format as DPO.\n\n\nUsing local dataset files\ndatasets:\n - ds_type: json\n data_files:\n - orca_rlhf.jsonl\n split: train\n type: chatml.intel\n\n\nTRL auto-unwrapping for PEFT\nTRL supports auto-unwrapping PEFT models for RL training paradigms which rely on a reference model. This significantly reduces memory pressure as an additional refreference model does not need to be loaded, and reference model log-probabilities can be obtained by disabling PEFT adapters. This is enabled by default. To turn it off, pass the following config:\n# load ref model when adapter training.\nrl_adapter_ref_model: true",
"crumbs": [
"How To Guides",
"RLHF (Beta)"
@@ -2523,7 +2523,7 @@
"href": "docs/docker.html#base",
"title": "Docker",
"section": "Base",
- "text": "Base\nThe base image is the most minimal image that can install Axolotl. It is based on the nvidia/cuda image. It includes python, torch, git, git-lfs, awscli, pydantic, and more.\n\nImage\naxolotlai/axolotl-base\nLink: Docker Hub\n\n\nTags format\nmain-base-py{python_version}-cu{cuda_version}-{pytorch_version}\nTags examples:\n\nmain-base-py3.11-cu128-2.7.1\nmain-base-py3.11-cu126-2.7.1\nmain-base-py3.11-cu126-2.6.0\nmain-base-py3.11-cu124-2.6.0",
+ "text": "Base\nThe base image is the most minimal image that can install Axolotl. It is based on the nvidia/cuda image. It includes python, torch, git, git-lfs, awscli, pydantic, and more.\n\nImage\naxolotlai/axolotl-base\nLink: Docker Hub\n\n\nTags format\nmain-base-py{python_version}-cu{cuda_version}-{pytorch_version}\nTags examples:\n\nmain-base-py3.11-cu128-2.7.1\nmain-base-py3.11-cu126-2.7.1\nmain-base-py3.11-cu126-2.7.0\nmain-base-py3.11-cu126-2.6.0\nmain-base-py3.11-cu124-2.6.0",
"crumbs": [
"Deployments",
"Docker"
@@ -2534,7 +2534,7 @@
"href": "docs/docker.html#main",
"title": "Docker",
"section": "Main",
- "text": "Main\nThe main image is the image that is used to run Axolotl. It is based on the axolotlai/axolotl-base image and includes the Axolotl codebase, dependencies, and more.\n\nImage\naxolotlai/axolotl\nLink: Docker Hub\n\n\nTags format\n# on push to main\nmain-py{python_version}-cu{cuda_version}-{pytorch_version}\n\n# latest main (currently torch 2.6.0, python 3.11, cuda 12.4)\nmain-latest\n\n# nightly build\n{branch}-{date_in_YYYYMMDD}-py{python_version}-cu{cuda_version}-{pytorch_version}\n\n# tagged release\n{version}\n\n\n\n\n\n\nTip\n\n\n\nThere may be some extra tags appended to the image, like -vllm which installs those packages.\n\n\nTags examples:\n\nmain-py3.11-cu128-2.7.1\nmain-py3.11-cu126-2.7.1\nmain-py3.11-cu126-2.6.0\nmain-py3.11-cu124-2.6.0\nmain-latest\nmain-20250303-py3.11-cu124-2.6.0\nmain-20250303-py3.11-cu126-2.6.0\n0.10.1",
+ "text": "Main\nThe main image is the image that is used to run Axolotl. It is based on the axolotlai/axolotl-base image and includes the Axolotl codebase, dependencies, and more.\n\nImage\naxolotlai/axolotl\nLink: Docker Hub\n\n\nTags format\n# on push to main\nmain-py{python_version}-cu{cuda_version}-{pytorch_version}\n\n# latest main (currently torch 2.6.0, python 3.11, cuda 12.4)\nmain-latest\n\n# nightly build\n{branch}-{date_in_YYYYMMDD}-py{python_version}-cu{cuda_version}-{pytorch_version}\n\n# tagged release\n{version}\n\n\n\n\n\n\nTip\n\n\n\nThere may be some extra tags appended to the image, like -vllm which installs those packages.\n\n\nTags examples:\n\nmain-py3.11-cu128-2.7.1\nmain-py3.11-cu126-2.7.1\nmain-py3.11-cu126-2.7.0\nmain-py3.11-cu126-2.6.0\nmain-py3.11-cu124-2.6.0\nmain-latest\nmain-20250303-py3.11-cu124-2.6.0\nmain-20250303-py3.11-cu126-2.6.0\n0.10.1",
"crumbs": [
"Deployments",
"Docker"
diff --git a/sitemap.xml b/sitemap.xml
index b08bfeb26..0350cdf6b 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,758 +2,758 @@
https://docs.axolotl.ai/TODO.html
- 2025-07-12T14:18:08.998Z
+ 2025-07-12T15:41:44.501Z
https://docs.axolotl.ai/index.html
- 2025-07-12T14:18:09.018Z
+ 2025-07-12T15:41:44.521Z
https://docs.axolotl.ai/docs/debugging.html
- 2025-07-12T14:18:09.000Z
+ 2025-07-12T15:41:44.503Z
https://docs.axolotl.ai/docs/amd_hpc.html
- 2025-07-12T14:18:08.999Z
+ 2025-07-12T15:41:44.502Z
https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html
- 2025-07-12T14:21:13.570Z
+ 2025-07-12T15:44:53.140Z
https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html
- 2025-07-12T14:21:13.005Z
+ 2025-07-12T15:44:52.574Z
https://docs.axolotl.ai/docs/api/loaders.patch_manager.html
- 2025-07-12T14:21:12.616Z
+ 2025-07-12T15:44:52.183Z
https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html
- 2025-07-12T14:21:12.312Z
+ 2025-07-12T15:44:51.879Z
https://docs.axolotl.ai/docs/api/cli.train.html
- 2025-07-12T14:21:12.366Z
+ 2025-07-12T15:44:51.932Z
https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html
- 2025-07-12T14:21:13.561Z
+ 2025-07-12T15:44:53.131Z
https://docs.axolotl.ai/docs/api/core.chat.messages.html
- 2025-07-12T14:21:12.310Z
+ 2025-07-12T15:44:51.876Z
https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html
- 2025-07-12T14:21:13.566Z
+ 2025-07-12T15:44:53.136Z
https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html
- 2025-07-12T14:21:12.451Z
+ 2025-07-12T15:44:52.019Z
https://docs.axolotl.ai/docs/api/cli.sweeps.html
- 2025-07-12T14:21:12.465Z
+ 2025-07-12T15:44:52.033Z
https://docs.axolotl.ai/docs/api/utils.chat_templates.html
- 2025-07-12T14:21:13.103Z
+ 2025-07-12T15:44:52.672Z
https://docs.axolotl.ai/docs/api/core.chat.format.shared.html
- 2025-07-12T14:21:12.314Z
+ 2025-07-12T15:44:51.880Z
https://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html
- 2025-07-12T14:21:12.623Z
+ 2025-07-12T15:44:52.190Z
https://docs.axolotl.ai/docs/api/utils.collators.mamba.html
- 2025-07-12T14:21:13.510Z
+ 2025-07-12T15:44:53.080Z
https://docs.axolotl.ai/docs/api/logging_config.html
- 2025-07-12T14:21:12.259Z
+ 2025-07-12T15:44:51.825Z
https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html
- 2025-07-12T14:21:13.515Z
+ 2025-07-12T15:44:53.084Z
https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html
- 2025-07-12T14:21:12.743Z
+ 2025-07-12T15:44:52.311Z
https://docs.axolotl.ai/docs/api/kernels.utils.html
- 2025-07-12T14:21:12.955Z
+ 2025-07-12T15:44:52.523Z
https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html
- 2025-07-12T14:21:12.776Z
+ 2025-07-12T15:44:52.344Z
https://docs.axolotl.ai/docs/api/kernels.swiglu.html
- 2025-07-12T14:21:12.946Z
+ 2025-07-12T15:44:52.514Z
https://docs.axolotl.ai/docs/api/common.const.html
- 2025-07-12T14:21:13.470Z
+ 2025-07-12T15:44:53.040Z
https://docs.axolotl.ai/docs/api/cli.cloud.base.html
- 2025-07-12T14:21:12.507Z
+ 2025-07-12T15:44:52.074Z
https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html
- 2025-07-12T14:21:13.573Z
+ 2025-07-12T15:44:53.143Z
https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html
- 2025-07-12T14:21:13.069Z
+ 2025-07-12T15:44:52.638Z
https://docs.axolotl.ai/docs/api/common.architectures.html
- 2025-07-12T14:21:13.469Z
+ 2025-07-12T15:44:53.038Z
https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html
- 2025-07-12T14:21:12.770Z
+ 2025-07-12T15:44:52.338Z
https://docs.axolotl.ai/docs/api/utils.schemas.peft.html
- 2025-07-12T14:21:13.276Z
+ 2025-07-12T15:44:52.845Z
https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html
- 2025-07-12T14:21:12.800Z
+ 2025-07-12T15:44:52.367Z
https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html
- 2025-07-12T14:21:13.267Z
+ 2025-07-12T15:44:52.836Z
https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html
- 2025-07-12T14:21:12.717Z
+ 2025-07-12T15:44:52.284Z
https://docs.axolotl.ai/docs/api/prompt_strategies.base.html
- 2025-07-12T14:21:12.658Z
+ 2025-07-12T15:44:52.225Z
https://docs.axolotl.ai/docs/api/utils.lora.html
- 2025-07-12T14:21:13.108Z
+ 2025-07-12T15:44:52.677Z
https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html
- 2025-07-12T14:21:12.749Z
+ 2025-07-12T15:44:52.317Z
https://docs.axolotl.ai/docs/api/utils.schemas.trl.html
- 2025-07-12T14:21:13.279Z
+ 2025-07-12T15:44:52.848Z
https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html
- 2025-07-12T14:21:12.798Z
+ 2025-07-12T15:44:52.366Z
https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html
- 2025-07-12T14:21:13.457Z
+ 2025-07-12T15:44:53.027Z
https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html
- 2025-07-12T14:21:13.095Z
+ 2025-07-12T15:44:52.664Z
https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html
- 2025-07-12T14:21:13.196Z
+ 2025-07-12T15:44:52.765Z
https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html
- 2025-07-12T14:21:13.063Z
+ 2025-07-12T15:44:52.631Z
https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html
- 2025-07-12T14:21:12.513Z
+ 2025-07-12T15:44:52.081Z
https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html
- 2025-07-12T14:21:12.703Z
+ 2025-07-12T15:44:52.271Z
https://docs.axolotl.ai/docs/api/utils.freeze.html
- 2025-07-12T14:21:13.125Z
+ 2025-07-12T15:44:52.693Z
https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html
- 2025-07-12T14:21:12.843Z
+ 2025-07-12T15:44:52.410Z
https://docs.axolotl.ai/docs/api/integrations.base.html
- 2025-07-12T14:21:13.445Z
+ 2025-07-12T15:44:53.015Z
https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html
- 2025-07-12T14:21:13.061Z
+ 2025-07-12T15:44:52.630Z
https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html
- 2025-07-12T14:21:12.817Z
+ 2025-07-12T15:44:52.385Z
https://docs.axolotl.ai/docs/api/cli.main.html
- 2025-07-12T14:21:12.358Z
+ 2025-07-12T15:44:51.924Z
https://docs.axolotl.ai/docs/api/common.datasets.html
- 2025-07-12T14:21:13.485Z
+ 2025-07-12T15:44:53.055Z
https://docs.axolotl.ai/docs/api/train.html
- 2025-07-12T14:21:12.173Z
+ 2025-07-12T15:44:51.739Z
https://docs.axolotl.ai/docs/api/core.trainers.base.html
- 2025-07-12T14:21:12.528Z
+ 2025-07-12T15:44:52.096Z
https://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html
- 2025-07-12T14:21:12.633Z
+ 2025-07-12T15:44:52.200Z
https://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html
- 2025-07-12T14:21:12.656Z
+ 2025-07-12T15:44:52.224Z
https://docs.axolotl.ai/docs/api/utils.schemas.config.html
- 2025-07-12T14:21:13.238Z
+ 2025-07-12T15:44:52.807Z
https://docs.axolotl.ai/docs/api/loaders.tokenizer.html
- 2025-07-12T14:21:12.601Z
+ 2025-07-12T15:44:52.168Z
https://docs.axolotl.ai/docs/api/integrations.liger.args.html
- 2025-07-12T14:21:13.460Z
+ 2025-07-12T15:44:53.030Z
https://docs.axolotl.ai/docs/api/cli.config.html
- 2025-07-12T14:21:12.417Z
+ 2025-07-12T15:44:51.984Z
https://docs.axolotl.ai/docs/api/loaders.processor.html
- 2025-07-12T14:21:12.602Z
+ 2025-07-12T15:44:52.170Z
https://docs.axolotl.ai/docs/api/monkeypatch.utils.html
- 2025-07-12T14:21:13.041Z
+ 2025-07-12T15:44:52.610Z
https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html
- 2025-07-12T14:21:13.449Z
+ 2025-07-12T15:44:53.019Z
https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html
- 2025-07-12T14:21:12.559Z
+ 2025-07-12T15:44:52.126Z
https://docs.axolotl.ai/docs/api/loaders.adapter.html
- 2025-07-12T14:21:12.608Z
+ 2025-07-12T15:44:52.175Z
https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html
- 2025-07-12T14:21:12.786Z
+ 2025-07-12T15:44:52.354Z
https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html
- 2025-07-12T14:21:12.327Z
+ 2025-07-12T15:44:51.893Z
https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html
- 2025-07-12T14:21:13.060Z
+ 2025-07-12T15:44:52.628Z
https://docs.axolotl.ai/docs/api/datasets.html
- 2025-07-12T14:21:12.195Z
+ 2025-07-12T15:44:51.760Z
https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html
- 2025-07-12T14:21:13.043Z
+ 2025-07-12T15:44:52.611Z
https://docs.axolotl.ai/docs/api/cli.inference.html
- 2025-07-12T14:21:12.431Z
+ 2025-07-12T15:44:51.998Z
https://docs.axolotl.ai/docs/api/cli.utils.html
- 2025-07-12T14:21:12.497Z
+ 2025-07-12T15:44:52.064Z
https://docs.axolotl.ai/docs/api/cli.preprocess.html
- 2025-07-12T14:21:12.459Z
+ 2025-07-12T15:44:52.027Z
https://docs.axolotl.ai/docs/api/loaders.model.html
- 2025-07-12T14:21:12.592Z
+ 2025-07-12T15:44:52.160Z
https://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html
- 2025-07-12T14:21:12.581Z
+ 2025-07-12T15:44:52.149Z
https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html
- 2025-07-12T14:21:12.569Z
+ 2025-07-12T15:44:52.137Z
https://docs.axolotl.ai/docs/batch_vs_grad.html
- 2025-07-12T14:18:08.999Z
+ 2025-07-12T15:41:44.502Z
https://docs.axolotl.ai/docs/custom_integrations.html
- 2025-07-12T14:18:08.999Z
+ 2025-07-12T15:41:44.502Z
https://docs.axolotl.ai/docs/quantize.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.506Z
https://docs.axolotl.ai/docs/unsloth.html
- 2025-07-12T14:18:09.004Z
+ 2025-07-12T15:41:44.506Z
https://docs.axolotl.ai/docs/ray-integration.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.506Z
https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html
- 2025-07-12T14:18:08.999Z
+ 2025-07-12T15:41:44.502Z
https://docs.axolotl.ai/docs/dataset-formats/template_free.html
- 2025-07-12T14:18:09.000Z
+ 2025-07-12T15:41:44.502Z
https://docs.axolotl.ai/docs/dataset-formats/index.html
- 2025-07-12T14:18:08.999Z
+ 2025-07-12T15:41:44.502Z
https://docs.axolotl.ai/docs/dataset-formats/pretraining.html
- 2025-07-12T14:18:08.999Z
+ 2025-07-12T15:41:44.502Z
https://docs.axolotl.ai/docs/multi-gpu.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.506Z
https://docs.axolotl.ai/docs/torchao.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.506Z
https://docs.axolotl.ai/docs/cli.html
- 2025-07-12T14:18:08.999Z
+ 2025-07-12T15:41:44.502Z
https://docs.axolotl.ai/docs/nccl.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.506Z
https://docs.axolotl.ai/docs/dataset_preprocessing.html
- 2025-07-12T14:18:09.000Z
+ 2025-07-12T15:41:44.502Z
https://docs.axolotl.ai/docs/faq.html
- 2025-07-12T14:18:09.000Z
+ 2025-07-12T15:41:44.503Z
https://docs.axolotl.ai/docs/qat.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.506Z
https://docs.axolotl.ai/docs/lr_groups.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.505Z
https://docs.axolotl.ai/docs/mac.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.506Z
https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html
- 2025-07-12T14:18:09.022Z
+ 2025-07-12T15:41:44.525Z
https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html
- 2025-07-12T14:18:09.022Z
+ 2025-07-12T15:41:44.525Z
https://docs.axolotl.ai/docs/input_output.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.505Z
https://docs.axolotl.ai/docs/dataset_loading.html
- 2025-07-12T14:18:09.000Z
+ 2025-07-12T15:41:44.502Z
https://docs.axolotl.ai/docs/getting-started.html
- 2025-07-12T14:18:09.000Z
+ 2025-07-12T15:41:44.503Z
https://docs.axolotl.ai/docs/lora_optims.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.505Z
https://docs.axolotl.ai/docs/multi-node.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.506Z
https://docs.axolotl.ai/docs/fsdp_qlora.html
- 2025-07-12T14:18:09.000Z
+ 2025-07-12T15:41:44.503Z
https://docs.axolotl.ai/docs/inference.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.505Z
https://docs.axolotl.ai/docs/sequence_parallelism.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.506Z
https://docs.axolotl.ai/docs/rlhf.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.506Z
https://docs.axolotl.ai/docs/dataset-formats/tokenized.html
- 2025-07-12T14:18:09.000Z
+ 2025-07-12T15:41:44.502Z
https://docs.axolotl.ai/docs/dataset-formats/conversation.html
- 2025-07-12T14:18:08.999Z
+ 2025-07-12T15:41:44.502Z
https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html
- 2025-07-12T14:18:08.999Z
+ 2025-07-12T15:41:44.502Z
https://docs.axolotl.ai/docs/reward_modelling.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.506Z
https://docs.axolotl.ai/docs/docker.html
- 2025-07-12T14:18:09.000Z
+ 2025-07-12T15:41:44.503Z
https://docs.axolotl.ai/docs/installation.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.505Z
https://docs.axolotl.ai/docs/multimodal.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.506Z
https://docs.axolotl.ai/docs/config-reference.html
- 2025-07-12T14:21:26.033Z
+ 2025-07-12T15:45:05.743Z
https://docs.axolotl.ai/docs/api/prompt_tokenizers.html
- 2025-07-12T14:21:12.249Z
+ 2025-07-12T15:44:51.816Z
https://docs.axolotl.ai/docs/api/utils.schedulers.html
- 2025-07-12T14:21:13.165Z
+ 2025-07-12T15:44:52.734Z
https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html
- 2025-07-12T14:21:13.555Z
+ 2025-07-12T15:44:53.125Z
https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html
- 2025-07-12T14:21:12.764Z
+ 2025-07-12T15:44:52.332Z
https://docs.axolotl.ai/docs/api/core.trainers.trl.html
- 2025-07-12T14:21:12.542Z
+ 2025-07-12T15:44:52.110Z
https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html
- 2025-07-12T14:21:12.760Z
+ 2025-07-12T15:44:52.328Z
https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html
- 2025-07-12T14:21:13.565Z
+ 2025-07-12T15:44:53.135Z
https://docs.axolotl.ai/docs/api/utils.schemas.enums.html
- 2025-07-12T14:21:13.306Z
+ 2025-07-12T15:44:52.876Z
https://docs.axolotl.ai/docs/api/core.trainers.mamba.html
- 2025-07-12T14:21:12.548Z
+ 2025-07-12T15:44:52.115Z
https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html
- 2025-07-12T14:21:12.980Z
+ 2025-07-12T15:44:52.548Z
https://docs.axolotl.ai/docs/api/monkeypatch.relora.html
- 2025-07-12T14:21:13.004Z
+ 2025-07-12T15:44:52.572Z
https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html
- 2025-07-12T14:21:13.050Z
+ 2025-07-12T15:44:52.619Z
https://docs.axolotl.ai/docs/api/loaders.constants.html
- 2025-07-12T14:21:12.617Z
+ 2025-07-12T15:44:52.185Z
https://docs.axolotl.ai/docs/api/utils.callbacks.qat.html
- 2025-07-12T14:21:13.580Z
+ 2025-07-12T15:44:53.150Z
https://docs.axolotl.ai/docs/api/utils.schemas.model.html
- 2025-07-12T14:21:13.244Z
+ 2025-07-12T15:44:52.814Z
https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html
- 2025-07-12T14:21:12.753Z
+ 2025-07-12T15:44:52.321Z
https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html
- 2025-07-12T14:21:13.450Z
+ 2025-07-12T15:44:53.020Z
https://docs.axolotl.ai/docs/api/convert.html
- 2025-07-12T14:21:12.208Z
+ 2025-07-12T15:44:51.774Z
https://docs.axolotl.ai/docs/api/kernels.quantize.html
- 2025-07-12T14:21:12.953Z
+ 2025-07-12T15:44:52.521Z
https://docs.axolotl.ai/docs/api/core.training_args.html
- 2025-07-12T14:21:12.287Z
+ 2025-07-12T15:44:51.853Z
https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html
- 2025-07-12T14:21:12.690Z
+ 2025-07-12T15:44:52.257Z
https://docs.axolotl.ai/docs/api/index.html
- 2025-07-12T14:21:12.112Z
+ 2025-07-12T15:44:51.677Z
https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html
- 2025-07-12T14:21:12.737Z
+ 2025-07-12T15:44:52.305Z
https://docs.axolotl.ai/docs/api/utils.trainer.html
- 2025-07-12T14:21:13.141Z
+ 2025-07-12T15:44:52.710Z
https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html
- 2025-07-12T14:21:12.775Z
+ 2025-07-12T15:44:52.342Z
https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html
- 2025-07-12T14:21:13.033Z
+ 2025-07-12T15:44:52.602Z
https://docs.axolotl.ai/docs/api/kernels.lora.html
- 2025-07-12T14:21:12.925Z
+ 2025-07-12T15:44:52.493Z
https://docs.axolotl.ai/docs/api/cli.vllm_serve.html
- 2025-07-12T14:21:12.504Z
+ 2025-07-12T15:44:52.071Z
https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html
- 2025-07-12T14:21:13.284Z
+ 2025-07-12T15:44:52.853Z
https://docs.axolotl.ai/docs/api/utils.schemas.utils.html
- 2025-07-12T14:21:13.312Z
+ 2025-07-12T15:44:52.882Z
https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html
- 2025-07-12T14:21:12.981Z
+ 2025-07-12T15:44:52.550Z
https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html
- 2025-07-12T14:21:13.464Z
+ 2025-07-12T15:44:53.033Z
https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html
- 2025-07-12T14:21:12.996Z
+ 2025-07-12T15:44:52.564Z
https://docs.axolotl.ai/docs/api/utils.collators.core.html
- 2025-07-12T14:21:13.488Z
+ 2025-07-12T15:44:53.057Z
https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html
- 2025-07-12T14:21:12.311Z
+ 2025-07-12T15:44:51.877Z
https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html
- 2025-07-12T14:21:12.801Z
+ 2025-07-12T15:44:52.369Z
https://docs.axolotl.ai/docs/api/core.datasets.chat.html
- 2025-07-12T14:21:12.319Z
+ 2025-07-12T15:44:51.885Z
https://docs.axolotl.ai/docs/api/utils.bench.html
- 2025-07-12T14:21:13.117Z
+ 2025-07-12T15:44:52.686Z
https://docs.axolotl.ai/docs/api/utils.schemas.training.html
- 2025-07-12T14:21:13.249Z
+ 2025-07-12T15:44:52.819Z
https://docs.axolotl.ai/docs/api/utils.collators.batching.html
- 2025-07-12T14:21:13.507Z
+ 2025-07-12T15:44:53.076Z
https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html
- 2025-07-12T14:21:13.044Z
+ 2025-07-12T15:44:52.613Z
https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html
- 2025-07-12T14:21:12.997Z
+ 2025-07-12T15:44:52.565Z
https://docs.axolotl.ai/docs/api/core.builders.causal.html
- 2025-07-12T14:21:12.269Z
+ 2025-07-12T15:44:51.836Z
https://docs.axolotl.ai/docs/api/cli.evaluate.html
- 2025-07-12T14:21:12.374Z
+ 2025-07-12T15:44:51.940Z
https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html
- 2025-07-12T14:21:13.053Z
+ 2025-07-12T15:44:52.622Z
https://docs.axolotl.ai/docs/api/core.trainers.utils.html
- 2025-07-12T14:21:12.583Z
+ 2025-07-12T15:44:52.150Z
https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html
- 2025-07-12T14:21:13.296Z
+ 2025-07-12T15:44:52.866Z
https://docs.axolotl.ai/docs/api/utils.dict.html
- 2025-07-12T14:21:13.188Z
+ 2025-07-12T15:44:52.757Z
https://docs.axolotl.ai/docs/api/core.builders.rl.html
- 2025-07-12T14:21:12.274Z
+ 2025-07-12T15:44:51.841Z
https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html
- 2025-07-12T14:21:12.839Z
+ 2025-07-12T15:44:52.407Z
https://docs.axolotl.ai/docs/api/core.trainers.relora.html
- 2025-07-12T14:21:12.552Z
+ 2025-07-12T15:44:52.120Z
https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html
- 2025-07-12T14:21:13.467Z
+ 2025-07-12T15:44:53.037Z
https://docs.axolotl.ai/docs/api/cli.quantize.html
- 2025-07-12T14:21:12.518Z
+ 2025-07-12T15:44:52.085Z
https://docs.axolotl.ai/docs/api/cli.checks.html
- 2025-07-12T14:21:12.400Z
+ 2025-07-12T15:44:51.966Z
https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html
- 2025-07-12T14:21:12.809Z
+ 2025-07-12T15:44:52.377Z
https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html
- 2025-07-12T14:21:13.113Z
+ 2025-07-12T15:44:52.682Z
https://docs.axolotl.ai/docs/api/utils.quantization.html
- 2025-07-12T14:21:13.225Z
+ 2025-07-12T15:44:52.794Z
https://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html
- 2025-07-12T14:21:12.626Z
+ 2025-07-12T15:44:52.194Z
https://docs.axolotl.ai/docs/api/kernels.geglu.html
- 2025-07-12T14:21:12.936Z
+ 2025-07-12T15:44:52.504Z
https://docs.axolotl.ai/docs/api/utils.data.pretraining.html
- 2025-07-12T14:21:13.197Z
+ 2025-07-12T15:44:52.767Z
https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html
- 2025-07-12T14:21:12.819Z
+ 2025-07-12T15:44:52.386Z
https://docs.axolotl.ai/docs/api/core.builders.base.html
- 2025-07-12T14:21:12.265Z
+ 2025-07-12T15:44:51.831Z
https://docs.axolotl.ai/docs/api/cli.merge_lora.html
- 2025-07-12T14:21:12.440Z
+ 2025-07-12T15:44:52.007Z
https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html
- 2025-07-12T14:21:13.064Z
+ 2025-07-12T15:44:52.633Z
https://docs.axolotl.ai/docs/api/utils.data.sft.html
- 2025-07-12T14:21:13.204Z
+ 2025-07-12T15:44:52.773Z
https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html
- 2025-07-12T14:21:12.725Z
+ 2025-07-12T15:44:52.292Z
https://docs.axolotl.ai/docs/api/utils.tokenization.html
- 2025-07-12T14:21:13.102Z
+ 2025-07-12T15:44:52.670Z
https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html
- 2025-07-12T14:21:12.797Z
+ 2025-07-12T15:44:52.364Z
https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html
- 2025-07-12T14:21:13.487Z
+ 2025-07-12T15:44:53.056Z
https://docs.axolotl.ai/docs/api/cli.args.html
- 2025-07-12T14:21:12.393Z
+ 2025-07-12T15:44:51.960Z
https://docs.axolotl.ai/docs/api/evaluate.html
- 2025-07-12T14:21:12.184Z
+ 2025-07-12T15:44:51.749Z
https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html
- 2025-07-12T14:21:12.705Z
+ 2025-07-12T15:44:52.272Z
https://docs.axolotl.ai/docs/api/utils.distributed.html
- 2025-07-12T14:21:13.185Z
+ 2025-07-12T15:44:52.754Z
https://docs.axolotl.ai/docs/multipack.html
- 2025-07-12T14:18:09.003Z
+ 2025-07-12T15:41:44.506Z
https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html
- 2025-07-12T14:18:09.005Z
+ 2025-07-12T15:41:44.510Z
https://docs.axolotl.ai/FAQS.html
- 2025-07-12T14:18:08.998Z
+ 2025-07-12T15:41:44.500Z