diff --git a/.nojekyll b/.nojekyll index b8998bea4..9e9b791a0 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -b27210dc \ No newline at end of file +522f6e63 \ No newline at end of file diff --git a/docs/multimodal.html b/docs/multimodal.html index ad89d1889..f8b5112fb 100644 --- a/docs/multimodal.html +++ b/docs/multimodal.html @@ -499,15 +499,19 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
  • Pixtral
  • Llava-1.5
  • Mistral-Small-3.1
  • +
  • Voxtral
  • Gemma-3
  • Gemma-3n
  • Qwen2-VL
  • Qwen2.5-VL
  • +
  • SmolVLM2
  • +
  • LFM2-VL
  • Dataset Format
  • FAQ
  • @@ -544,10 +548,13 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
  • Pixtral
  • Llava-1.5
  • Mistral-Small-3.1
  • +
  • Voxtral
  • Gemma-3
  • Gemma-3n
  • Qwen2-VL
  • Qwen2.5-VL
  • +
  • SmolVLM2
  • +
  • LFM2-VL
  • @@ -560,7 +567,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); remove_unused_columns: false # leave columns in place as they are needed to handle image embeddings during training sample_packing: false # not yet supported with multimodal -chat_template: # see in next section +chat_template: # see in next section if specified # example dataset datasets: @@ -622,6 +629,23 @@ Warning chat_template: mistral_v7_tekken
    +
    +

    Voxtral

    +
    +
    +
    + +
    +
    +Tip +
    +
    +
    +

    Please make sure to install audio lib via pip3 install librosa==0.11.0 'mistral_common[audio]==1.8.3'

    +
    +
    +
    base_model: mistralai/Voxtral-Mini-3B-2507
    +

    Gemma-3

    @@ -638,9 +662,9 @@ Tip

    For multi-modal 4B/12B/27B models, use the following config:

    -
    base_model: google/gemma-3-4b-it
    -
    -chat_template: gemma3
    +
    base_model: google/gemma-3-4b-it
    +
    +chat_template: gemma3

    Gemma-3n

    @@ -670,21 +694,55 @@ Tip

    Please make sure to install timm via pip3 install timm==1.0.17

    -
    base_model: google/gemma-3n-E2B-it
    -
    -chat_template: gemma3n
    +
    base_model: google/gemma-3n-E2B-it
    +
    +chat_template: gemma3n

    Qwen2-VL

    -
    base_model: Qwen/Qwen2-VL-7B-Instruct
    -
    -chat_template: qwen2_vl
    +
    base_model: Qwen/Qwen2-VL-7B-Instruct
    +
    +chat_template: qwen2_vl

    Qwen2.5-VL

    -
    base_model: Qwen/Qwen2.5-VL-7B-Instruct
    -
    -chat_template: qwen2_vl  # same as qwen2-vl
    +
    base_model: Qwen/Qwen2.5-VL-7B-Instruct
    +
    +chat_template: qwen2_vl  # same as qwen2-vl
    +
    +
    +

    SmolVLM2

    +
    +
    +
    + +
    +
    +Tip +
    +
    +
    +

    Please make sure to install num2words via pip3 install num2words==0.5.14

    +
    +
    +
    base_model: HuggingFaceTB/SmolVLM2-500M-Video-Instruct
    +
    +
    +

    LFM2-VL

    +
    +
    +
    + +
    +
    +Warning +
    +
    +
    +

    Please uninstall causal-conv1d via pip3 uninstall -y causal-conv1d

    +
    +
    +
    base_model: LiquidAI/LFM2-VL-450M
    @@ -744,34 +802,56 @@ Tip
    +
    +

    Video

    +
    +
    +
    + +
    +
    +Warning +
    +
    +
    +

    This is not well tested at the moment. We welcome contributors!

    +
    +
    +

    For video loading, you can use the following keys within content alongside "type": "video":

    + +

    Example

    Here is an example of a multi-modal dataset:

    -
    [
    -  {
    -    "messages": [
    -        {
    -            "role": "system",
    -            "content": [
    -              {"type": "text", "text": "You are a helpful assistant."}
    -              ]
    -        },
    -        {
    -            "role": "user",
    -            "content": [
    -                {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},
    -                {"type": "text", "text": "Describe this image in detail."}
    -            ]
    -        },
    -        {
    -            "role": "assistant",
    -            "content": [
    -              {"type": "text", "text": "The image is a bee."}
    -            ]
    -        }
    -    ]
    -  }
    -]
    +
    [
    +  {
    +    "messages": [
    +        {
    +            "role": "system",
    +            "content": [
    +              {"type": "text", "text": "You are a helpful assistant."}
    +              ]
    +        },
    +        {
    +            "role": "user",
    +            "content": [
    +                {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},
    +                {"type": "text", "text": "Describe this image in detail."}
    +            ]
    +        },
    +        {
    +            "role": "assistant",
    +            "content": [
    +              {"type": "text", "text": "The image is a bee."}
    +            ]
    +        }
    +    ]
    +  }
    +]
    diff --git a/search.json b/search.json index 9e9fe9ded..10001c63b 100644 --- a/search.json +++ b/search.json @@ -563,7 +563,7 @@ "href": "docs/multimodal.html", "title": "MultiModal / Vision Language Models (BETA)", "section": "", - "text": "Mllama\nLlama4\nPixtral\nLlava-1.5\nMistral-Small-3.1\nGemma-3\nGemma-3n\nQwen2-VL\nQwen2.5-VL", + "text": "Mllama\nLlama4\nPixtral\nLlava-1.5\nMistral-Small-3.1\nVoxtral\nGemma-3\nGemma-3n\nQwen2-VL\nQwen2.5-VL\nSmolVLM2\nLFM2-VL", "crumbs": [ "How To Guides", "MultiModal / Vision Language Models (BETA)" @@ -574,7 +574,7 @@ "href": "docs/multimodal.html#supported-models", "title": "MultiModal / Vision Language Models (BETA)", "section": "", - "text": "Mllama\nLlama4\nPixtral\nLlava-1.5\nMistral-Small-3.1\nGemma-3\nGemma-3n\nQwen2-VL\nQwen2.5-VL", + "text": "Mllama\nLlama4\nPixtral\nLlava-1.5\nMistral-Small-3.1\nVoxtral\nGemma-3\nGemma-3n\nQwen2-VL\nQwen2.5-VL\nSmolVLM2\nLFM2-VL", "crumbs": [ "How To Guides", "MultiModal / Vision Language Models (BETA)" @@ -585,7 +585,7 @@ "href": "docs/multimodal.html#usage", "title": "MultiModal / Vision Language Models (BETA)", "section": "Usage", - "text": "Usage\nMultimodal support is limited and doesn’t have full feature parity.\nHere are the hyperparams you’ll need to use to finetune a multimodal model.\nprocessor_type: AutoProcessor\n\nskip_prepare_dataset: true\nremove_unused_columns: false # leave columns in place as they are needed to handle image embeddings during training\nsample_packing: false # not yet supported with multimodal\n\nchat_template: # see in next section\n\n# example dataset\ndatasets:\n - path: HuggingFaceH4/llava-instruct-mix-vsft\n type: chat_template\n split: train[:1%]\n field_messages: messages\n\n# (optional) if doing lora, only finetune the Language model,\n# leave the vision model and vision tower frozen\n# load_in_8bit: true\nadapter: lora\nlora_target_modules: 'model.language_model.layers.[\\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'\n\n# (optional) if you want to resize images to a set size\nimage_size: 512\nimage_resize_algorithm: bilinear\nPlease see examples folder for full configs.\n\n\n\n\n\n\nWarning\n\n\n\nSome of our chat_templates have been extended to support broader dataset types. This should not break any existing configs.\n\n\n\nMllama\nbase_model: meta-llama/Llama-3.2-11B-Vision-Instruct\n\nchat_template: llama3_2_vision\n\n\nLlama4\nbase_model: meta-llama/Llama-4-Scout-17B-16E-Instruct\n\nchat_template: llama4\n\n\nPixtral\nbase_model: mistralai/Pixtral-12B-2409\n\nchat_template: pixtral\n\n\nLlava-1.5\nbase_model: llava-hf/llava-1.5-7b-hf\n\nchat_template: llava\n\n\nMistral-Small-3.1\nbase_model: mistralai/Mistral-Small-3.1-24B-Instruct-2503\n\nchat_template: mistral_v7_tekken\n\n\nGemma-3\n\n\n\n\n\n\nTip\n\n\n\nThe Gemma3-1B model is a text-only model, so please train as regular text model.\n\n\nFor multi-modal 4B/12B/27B models, use the following config:\nbase_model: google/gemma-3-4b-it\n\nchat_template: gemma3\n\n\nGemma-3n\n\n\n\n\n\n\nWarning\n\n\n\nThe model’s initial loss and grad norm will be very high. We suspect this to be due to the Conv in the vision layers.\n\n\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install timm via pip3 install timm==1.0.17\n\n\nbase_model: google/gemma-3n-E2B-it\n\nchat_template: gemma3n\n\n\nQwen2-VL\nbase_model: Qwen/Qwen2-VL-7B-Instruct\n\nchat_template: qwen2_vl\n\n\nQwen2.5-VL\nbase_model: Qwen/Qwen2.5-VL-7B-Instruct\n\nchat_template: qwen2_vl # same as qwen2-vl", + "text": "Usage\nMultimodal support is limited and doesn’t have full feature parity.\nHere are the hyperparams you’ll need to use to finetune a multimodal model.\nprocessor_type: AutoProcessor\n\nskip_prepare_dataset: true\nremove_unused_columns: false # leave columns in place as they are needed to handle image embeddings during training\nsample_packing: false # not yet supported with multimodal\n\nchat_template: # see in next section if specified\n\n# example dataset\ndatasets:\n - path: HuggingFaceH4/llava-instruct-mix-vsft\n type: chat_template\n split: train[:1%]\n field_messages: messages\n\n# (optional) if doing lora, only finetune the Language model,\n# leave the vision model and vision tower frozen\n# load_in_8bit: true\nadapter: lora\nlora_target_modules: 'model.language_model.layers.[\\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'\n\n# (optional) if you want to resize images to a set size\nimage_size: 512\nimage_resize_algorithm: bilinear\nPlease see examples folder for full configs.\n\n\n\n\n\n\nWarning\n\n\n\nSome of our chat_templates have been extended to support broader dataset types. This should not break any existing configs.\n\n\n\nMllama\nbase_model: meta-llama/Llama-3.2-11B-Vision-Instruct\n\nchat_template: llama3_2_vision\n\n\nLlama4\nbase_model: meta-llama/Llama-4-Scout-17B-16E-Instruct\n\nchat_template: llama4\n\n\nPixtral\nbase_model: mistralai/Pixtral-12B-2409\n\nchat_template: pixtral\n\n\nLlava-1.5\nbase_model: llava-hf/llava-1.5-7b-hf\n\nchat_template: llava\n\n\nMistral-Small-3.1\nbase_model: mistralai/Mistral-Small-3.1-24B-Instruct-2503\n\nchat_template: mistral_v7_tekken\n\n\nVoxtral\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install audio lib via pip3 install librosa==0.11.0 'mistral_common[audio]==1.8.3'\n\n\nbase_model: mistralai/Voxtral-Mini-3B-2507\n\n\nGemma-3\n\n\n\n\n\n\nTip\n\n\n\nThe Gemma3-1B model is a text-only model, so please train as regular text model.\n\n\nFor multi-modal 4B/12B/27B models, use the following config:\nbase_model: google/gemma-3-4b-it\n\nchat_template: gemma3\n\n\nGemma-3n\n\n\n\n\n\n\nWarning\n\n\n\nThe model’s initial loss and grad norm will be very high. We suspect this to be due to the Conv in the vision layers.\n\n\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install timm via pip3 install timm==1.0.17\n\n\nbase_model: google/gemma-3n-E2B-it\n\nchat_template: gemma3n\n\n\nQwen2-VL\nbase_model: Qwen/Qwen2-VL-7B-Instruct\n\nchat_template: qwen2_vl\n\n\nQwen2.5-VL\nbase_model: Qwen/Qwen2.5-VL-7B-Instruct\n\nchat_template: qwen2_vl # same as qwen2-vl\n\n\nSmolVLM2\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install num2words via pip3 install num2words==0.5.14\n\n\nbase_model: HuggingFaceTB/SmolVLM2-500M-Video-Instruct\n\n\nLFM2-VL\n\n\n\n\n\n\nWarning\n\n\n\nPlease uninstall causal-conv1d via pip3 uninstall -y causal-conv1d\n\n\nbase_model: LiquidAI/LFM2-VL-450M", "crumbs": [ "How To Guides", "MultiModal / Vision Language Models (BETA)" @@ -596,7 +596,7 @@ "href": "docs/multimodal.html#dataset-format", "title": "MultiModal / Vision Language Models (BETA)", "section": "Dataset Format", - "text": "Dataset Format\nFor multi-modal datasets, we adopt an extended chat_template format similar to OpenAI’s Message format.\n\nA message is a list of role and content.\nrole can be system, user, assistant, etc.\ncontent is a list of type and (text, image, path, url, base64, or audio).\n\n\nImage\n\n\n\n\n\n\nNote\n\n\n\nFor backwards compatibility:\n\nIf the dataset has a images or image column of list[Image], it will be appended to the first content list as {\"type\": \"image\", \"image\": ...}. However, if the content already has a {\"type\": \"image\"} but no image key, it will be set the image key.\nIf content is a string, it will be converted to a list with type as text.\n\n\n\nFor image loading, you can use the following keys within content alongside \"type\": \"image\":\n\n\"path\": \"/path/to/image.jpg\"\n\"url\": \"https://example.com/image.jpg\"\n\"base64\": \"...\"\n\"image\": PIL.Image\n\n\n\nAudio\nFor audio loading, you can use the following keys within content alongside \"type\": \"audio\":\n\n\"path\": \"/path/to/audio.mp3\"\n\"url\": \"https://example.com/audio.mp3\"\n\"audio\": np.ndarray\n\n\n\n\n\n\n\nTip\n\n\n\nYou may need to install librosa via pip3 install librosa==0.11.0.\n\n\n\n\nExample\nHere is an example of a multi-modal dataset:\n[\n {\n \"messages\": [\n {\n \"role\": \"system\",\n \"content\": [\n {\"type\": \"text\", \"text\": \"You are a helpful assistant.\"}\n ]\n },\n {\n \"role\": \"user\",\n \"content\": [\n {\"type\": \"image\", \"url\": \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg\"},\n {\"type\": \"text\", \"text\": \"Describe this image in detail.\"}\n ]\n },\n {\n \"role\": \"assistant\",\n \"content\": [\n {\"type\": \"text\", \"text\": \"The image is a bee.\"}\n ]\n }\n ]\n }\n]", + "text": "Dataset Format\nFor multi-modal datasets, we adopt an extended chat_template format similar to OpenAI’s Message format.\n\nA message is a list of role and content.\nrole can be system, user, assistant, etc.\ncontent is a list of type and (text, image, path, url, base64, or audio).\n\n\nImage\n\n\n\n\n\n\nNote\n\n\n\nFor backwards compatibility:\n\nIf the dataset has a images or image column of list[Image], it will be appended to the first content list as {\"type\": \"image\", \"image\": ...}. However, if the content already has a {\"type\": \"image\"} but no image key, it will be set the image key.\nIf content is a string, it will be converted to a list with type as text.\n\n\n\nFor image loading, you can use the following keys within content alongside \"type\": \"image\":\n\n\"path\": \"/path/to/image.jpg\"\n\"url\": \"https://example.com/image.jpg\"\n\"base64\": \"...\"\n\"image\": PIL.Image\n\n\n\nAudio\nFor audio loading, you can use the following keys within content alongside \"type\": \"audio\":\n\n\"path\": \"/path/to/audio.mp3\"\n\"url\": \"https://example.com/audio.mp3\"\n\"audio\": np.ndarray\n\n\n\n\n\n\n\nTip\n\n\n\nYou may need to install librosa via pip3 install librosa==0.11.0.\n\n\n\n\nVideo\n\n\n\n\n\n\nWarning\n\n\n\nThis is not well tested at the moment. We welcome contributors!\n\n\nFor video loading, you can use the following keys within content alongside \"type\": \"video\":\n\n\"path\": \"/path/to/video.mp4\"\n\"url\": \"https://example.com/video.mp4\"\n\"video\": np.ndarray | list[PIL.Image.Image] | torch.Tensor (or list of the aforementioned)\n\n\n\nExample\nHere is an example of a multi-modal dataset:\n[\n {\n \"messages\": [\n {\n \"role\": \"system\",\n \"content\": [\n {\"type\": \"text\", \"text\": \"You are a helpful assistant.\"}\n ]\n },\n {\n \"role\": \"user\",\n \"content\": [\n {\"type\": \"image\", \"url\": \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg\"},\n {\"type\": \"text\", \"text\": \"Describe this image in detail.\"}\n ]\n },\n {\n \"role\": \"assistant\",\n \"content\": [\n {\"type\": \"text\", \"text\": \"The image is a bee.\"}\n ]\n }\n ]\n }\n]", "crumbs": [ "How To Guides", "MultiModal / Vision Language Models (BETA)" diff --git a/sitemap.xml b/sitemap.xml index ae4a5de3e..f47fbc153 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,790 +2,790 @@ https://docs.axolotl.ai/index.html - 2025-08-14T01:23:15.314Z + 2025-08-15T14:53:04.773Z https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html - 2025-08-14T01:23:15.318Z + 2025-08-15T14:53:04.777Z https://docs.axolotl.ai/docs/gradient_checkpointing.html - 2025-08-14T01:23:15.292Z + 2025-08-15T14:53:04.754Z https://docs.axolotl.ai/docs/mixed_precision.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/docs/sequence_parallelism.html - 2025-08-14T01:23:15.296Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/docs/docker.html - 2025-08-14T01:23:15.292Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/torchao.html - 2025-08-14T01:23:15.296Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/docs/multi-gpu.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/docs/dataset_preprocessing.html - 2025-08-14T01:23:15.292Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/debugging.html - 2025-08-14T01:23:15.292Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/rlhf.html - 2025-08-14T01:23:15.296Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/docs/lr_groups.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.756Z https://docs.axolotl.ai/docs/multimodal.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/docs/ray-integration.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/docs/input_output.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.756Z https://docs.axolotl.ai/docs/inference.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.756Z https://docs.axolotl.ai/docs/fsdp_qlora.html - 2025-08-14T01:23:15.292Z + 2025-08-15T14:53:04.754Z https://docs.axolotl.ai/docs/multipack.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html - 2025-08-14T01:26:50.779Z + 2025-08-15T14:56:16.581Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html - 2025-08-14T01:26:51.045Z + 2025-08-15T14:56:16.848Z https://docs.axolotl.ai/docs/api/cli.art.html - 2025-08-14T01:26:50.411Z + 2025-08-15T14:56:16.212Z https://docs.axolotl.ai/docs/api/cli.quantize.html - 2025-08-14T01:26:50.487Z + 2025-08-15T14:56:16.288Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html - 2025-08-14T01:26:50.996Z + 2025-08-15T14:56:16.800Z https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html - 2025-08-14T01:26:51.578Z + 2025-08-15T14:56:17.379Z https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html - 2025-08-14T01:26:50.783Z + 2025-08-15T14:56:16.585Z https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html - 2025-08-14T01:26:51.461Z + 2025-08-15T14:56:17.263Z https://docs.axolotl.ai/docs/api/utils.data.sft.html - 2025-08-14T01:26:51.210Z + 2025-08-15T14:56:17.012Z https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html - 2025-08-14T01:26:51.062Z + 2025-08-15T14:56:16.865Z https://docs.axolotl.ai/docs/api/kernels.geglu.html - 2025-08-14T01:26:50.972Z + 2025-08-15T14:56:16.775Z https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html - 2025-08-14T01:26:50.870Z + 2025-08-15T14:56:16.670Z https://docs.axolotl.ai/docs/api/cli.utils.sweeps.html - 2025-08-14T01:26:50.534Z + 2025-08-15T14:56:16.335Z https://docs.axolotl.ai/docs/api/cli.delinearize_llama4.html - 2025-08-14T01:26:50.440Z + 2025-08-15T14:56:16.241Z https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html - 2025-08-14T01:26:50.801Z + 2025-08-15T14:56:16.602Z https://docs.axolotl.ai/docs/api/evaluate.html - 2025-08-14T01:26:50.196Z + 2025-08-15T14:56:15.997Z https://docs.axolotl.ai/docs/api/utils.data.pretraining.html - 2025-08-14T01:26:51.203Z + 2025-08-15T14:56:17.005Z https://docs.axolotl.ai/docs/api/index.html - 2025-08-14T01:26:50.124Z + 2025-08-15T14:56:15.924Z https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html - 2025-08-14T01:26:51.051Z + 2025-08-15T14:56:16.854Z https://docs.axolotl.ai/docs/api/monkeypatch.utils.html - 2025-08-14T01:26:51.042Z + 2025-08-15T14:56:16.845Z https://docs.axolotl.ai/docs/api/cli.checks.html - 2025-08-14T01:26:50.418Z + 2025-08-15T14:56:16.218Z https://docs.axolotl.ai/docs/api/utils.chat_templates.html - 2025-08-14T01:26:51.102Z + 2025-08-15T14:56:16.905Z https://docs.axolotl.ai/docs/api/core.builders.rl.html - 2025-08-14T01:26:50.287Z + 2025-08-15T14:56:16.087Z https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html - 2025-08-14T01:26:50.805Z + 2025-08-15T14:56:16.606Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html - 2025-08-14T01:26:50.650Z + 2025-08-15T14:56:16.450Z https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html - 2025-08-14T01:26:50.794Z + 2025-08-15T14:56:16.596Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html - 2025-08-14T01:26:50.660Z + 2025-08-15T14:56:16.460Z https://docs.axolotl.ai/docs/api/cli.utils.fetch.html - 2025-08-14T01:26:50.522Z + 2025-08-15T14:56:16.323Z https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html - 2025-08-14T01:26:51.277Z + 2025-08-15T14:56:17.078Z https://docs.axolotl.ai/docs/api/cli.cloud.base.html - 2025-08-14T01:26:50.498Z + 2025-08-15T14:56:16.299Z https://docs.axolotl.ai/docs/api/cli.utils.args.html - 2025-08-14T01:26:50.517Z + 2025-08-15T14:56:16.318Z https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html - 2025-08-14T01:26:51.587Z + 2025-08-15T14:56:17.387Z https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html - 2025-08-14T01:26:51.583Z + 2025-08-15T14:56:17.384Z https://docs.axolotl.ai/docs/api/core.builders.causal.html - 2025-08-14T01:26:50.282Z + 2025-08-15T14:56:16.082Z https://docs.axolotl.ai/docs/api/cli.train.html - 2025-08-14T01:26:50.381Z + 2025-08-15T14:56:16.182Z https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html - 2025-08-14T01:26:51.306Z + 2025-08-15T14:56:17.107Z https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html - 2025-08-14T01:26:51.477Z + 2025-08-15T14:56:17.278Z https://docs.axolotl.ai/docs/api/cli.evaluate.html - 2025-08-14T01:26:50.389Z + 2025-08-15T14:56:16.190Z https://docs.axolotl.ai/docs/api/utils.trainer.html - 2025-08-14T01:26:51.141Z + 2025-08-15T14:56:16.943Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html - 2025-08-14T01:26:50.840Z + 2025-08-15T14:56:16.641Z https://docs.axolotl.ai/docs/api/convert.html - 2025-08-14T01:26:50.220Z + 2025-08-15T14:56:16.021Z https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html - 2025-08-14T01:26:51.294Z + 2025-08-15T14:56:17.095Z https://docs.axolotl.ai/docs/api/loaders.patch_manager.html - 2025-08-14T01:26:50.643Z + 2025-08-15T14:56:16.443Z https://docs.axolotl.ai/docs/api/utils.schemas.training.html - 2025-08-14T01:26:51.259Z + 2025-08-15T14:56:17.060Z https://docs.axolotl.ai/docs/api/utils.schemas.config.html - 2025-08-14T01:26:51.245Z + 2025-08-15T14:56:17.047Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html - 2025-08-14T01:26:50.850Z + 2025-08-15T14:56:16.650Z https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html - 2025-08-14T01:26:50.874Z + 2025-08-15T14:56:16.674Z https://docs.axolotl.ai/docs/api/cli.vllm_serve.html - 2025-08-14T01:26:50.494Z + 2025-08-15T14:56:16.295Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html - 2025-08-14T01:26:50.746Z + 2025-08-15T14:56:16.549Z https://docs.axolotl.ai/docs/api/cli.merge_lora.html - 2025-08-14T01:26:50.462Z + 2025-08-15T14:56:16.263Z https://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html - 2025-08-14T01:26:50.684Z + 2025-08-15T14:56:16.487Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html - 2025-08-14T01:26:50.734Z + 2025-08-15T14:56:16.537Z https://docs.axolotl.ai/docs/api/utils.bench.html - 2025-08-14T01:26:51.116Z + 2025-08-15T14:56:16.919Z https://docs.axolotl.ai/docs/api/common.datasets.html - 2025-08-14T01:26:51.498Z + 2025-08-15T14:56:17.299Z https://docs.axolotl.ai/docs/api/cli.utils.train.html - 2025-08-14T01:26:50.546Z + 2025-08-15T14:56:16.347Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html - 2025-08-14T01:26:50.998Z + 2025-08-15T14:56:16.801Z https://docs.axolotl.ai/docs/api/core.chat.messages.html - 2025-08-14T01:26:50.322Z + 2025-08-15T14:56:16.123Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html - 2025-08-14T01:26:50.806Z + 2025-08-15T14:56:16.608Z https://docs.axolotl.ai/docs/api/core.trainers.trl.html - 2025-08-14T01:26:50.571Z + 2025-08-15T14:56:16.372Z https://docs.axolotl.ai/docs/api/cli.preprocess.html - 2025-08-14T01:26:50.482Z + 2025-08-15T14:56:16.283Z https://docs.axolotl.ai/docs/api/kernels.swiglu.html - 2025-08-14T01:26:50.982Z + 2025-08-15T14:56:16.785Z https://docs.axolotl.ai/docs/api/kernels.quantize.html - 2025-08-14T01:26:50.989Z + 2025-08-15T14:56:16.793Z https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html - 2025-08-14T01:26:50.719Z + 2025-08-15T14:56:16.521Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html - 2025-08-14T01:26:50.848Z + 2025-08-15T14:56:16.649Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html - 2025-08-14T01:26:50.594Z + 2025-08-15T14:56:16.395Z https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html - 2025-08-14T01:26:50.999Z + 2025-08-15T14:56:16.803Z https://docs.axolotl.ai/docs/api/core.datasets.chat.html - 2025-08-14T01:26:50.332Z + 2025-08-15T14:56:16.132Z https://docs.axolotl.ai/docs/api/cli.args.html - 2025-08-14T01:26:50.408Z + 2025-08-15T14:56:16.209Z https://docs.axolotl.ai/docs/api/cli.main.html - 2025-08-14T01:26:50.373Z + 2025-08-15T14:56:16.173Z https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html - 2025-08-14T01:26:50.584Z + 2025-08-15T14:56:16.385Z https://docs.axolotl.ai/docs/api/utils.schemas.trl.html - 2025-08-14T01:26:51.289Z + 2025-08-15T14:56:17.090Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html - 2025-08-14T01:26:50.832Z + 2025-08-15T14:56:16.633Z https://docs.axolotl.ai/docs/api/prompt_tokenizers.html - 2025-08-14T01:26:50.262Z + 2025-08-15T14:56:16.062Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html - 2025-08-14T01:26:50.733Z + 2025-08-15T14:56:16.535Z https://docs.axolotl.ai/docs/api/logging_config.html - 2025-08-14T01:26:50.271Z + 2025-08-15T14:56:16.072Z https://docs.axolotl.ai/docs/dataset-formats/tokenized.html - 2025-08-14T01:23:15.292Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/dataset-formats/index.html - 2025-08-14T01:23:15.292Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/dataset-formats/pretraining.html - 2025-08-14T01:23:15.292Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html - 2025-08-14T01:23:15.292Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/qat.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html - 2025-08-14T01:23:15.300Z + 2025-08-15T14:53:04.762Z https://docs.axolotl.ai/FAQS.html - 2025-08-14T01:23:15.290Z + 2025-08-15T14:53:04.751Z https://docs.axolotl.ai/docs/installation.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.756Z https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html - 2025-08-14T01:23:15.292Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/dataset-formats/template_free.html - 2025-08-14T01:23:15.292Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/dataset-formats/conversation.html - 2025-08-14T01:23:15.291Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/api/utils.dict.html - 2025-08-14T01:26:51.194Z + 2025-08-15T14:56:16.996Z https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html - 2025-08-14T01:26:50.773Z + 2025-08-15T14:56:16.575Z https://docs.axolotl.ai/docs/api/utils.collators.core.html - 2025-08-14T01:26:51.500Z + 2025-08-15T14:56:17.302Z https://docs.axolotl.ai/docs/api/cli.inference.html - 2025-08-14T01:26:50.454Z + 2025-08-15T14:56:16.255Z https://docs.axolotl.ai/docs/api/utils.freeze.html - 2025-08-14T01:26:51.124Z + 2025-08-15T14:56:16.926Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html - 2025-08-14T01:26:50.607Z + 2025-08-15T14:56:16.407Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html - 2025-08-14T01:26:50.653Z + 2025-08-15T14:56:16.454Z https://docs.axolotl.ai/docs/api/cli.utils.html - 2025-08-14T01:26:50.505Z + 2025-08-15T14:56:16.307Z https://docs.axolotl.ai/docs/api/core.chat.format.shared.html - 2025-08-14T01:26:50.327Z + 2025-08-15T14:56:16.127Z https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html - 2025-08-14T01:26:51.579Z + 2025-08-15T14:56:17.381Z https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html - 2025-08-14T01:26:51.527Z + 2025-08-15T14:56:17.329Z https://docs.axolotl.ai/docs/api/core.trainers.utils.html - 2025-08-14T01:26:50.608Z + 2025-08-15T14:56:16.409Z https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html - 2025-08-14T01:26:51.202Z + 2025-08-15T14:56:17.004Z https://docs.axolotl.ai/docs/api/integrations.base.html - 2025-08-14T01:26:51.458Z + 2025-08-15T14:56:17.260Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html - 2025-08-14T01:26:50.830Z + 2025-08-15T14:56:16.631Z https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html - 2025-08-14T01:26:51.044Z + 2025-08-15T14:56:16.847Z https://docs.axolotl.ai/docs/api/utils.quantization.html - 2025-08-14T01:26:51.231Z + 2025-08-15T14:56:17.032Z https://docs.axolotl.ai/docs/api/utils.callbacks.qat.html - 2025-08-14T01:26:51.593Z + 2025-08-15T14:56:17.394Z https://docs.axolotl.ai/docs/api/core.builders.base.html - 2025-08-14T01:26:50.277Z + 2025-08-15T14:56:16.078Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html - 2025-08-14T01:26:51.068Z + 2025-08-15T14:56:16.871Z https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html - 2025-08-14T01:26:51.470Z + 2025-08-15T14:56:17.272Z https://docs.axolotl.ai/docs/api/integrations.liger.args.html - 2025-08-14T01:26:51.473Z + 2025-08-15T14:56:17.275Z https://docs.axolotl.ai/docs/api/utils.collators.mamba.html - 2025-08-14T01:26:51.523Z + 2025-08-15T14:56:17.324Z https://docs.axolotl.ai/docs/api/loaders.model.html - 2025-08-14T01:26:50.618Z + 2025-08-15T14:56:16.419Z https://docs.axolotl.ai/docs/api/utils.schedulers.html - 2025-08-14T01:26:51.168Z + 2025-08-15T14:56:16.971Z https://docs.axolotl.ai/docs/api/kernels.lora.html - 2025-08-14T01:26:50.961Z + 2025-08-15T14:56:16.765Z https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html - 2025-08-14T01:26:51.113Z + 2025-08-15T14:56:16.915Z https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html - 2025-08-14T01:26:50.325Z + 2025-08-15T14:56:16.125Z https://docs.axolotl.ai/docs/api/core.trainers.mamba.html - 2025-08-14T01:26:50.577Z + 2025-08-15T14:56:16.378Z https://docs.axolotl.ai/docs/api/utils.schemas.enums.html - 2025-08-14T01:26:51.317Z + 2025-08-15T14:56:17.117Z https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html - 2025-08-14T01:26:51.065Z + 2025-08-15T14:56:16.868Z https://docs.axolotl.ai/docs/api/kernels.utils.html - 2025-08-14T01:26:50.991Z + 2025-08-15T14:56:16.794Z https://docs.axolotl.ai/docs/api/core.training_args.html - 2025-08-14T01:26:50.299Z + 2025-08-15T14:56:16.100Z https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html - 2025-08-14T01:26:51.574Z + 2025-08-15T14:56:17.375Z https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html - 2025-08-14T01:26:50.504Z + 2025-08-15T14:56:16.305Z https://docs.axolotl.ai/docs/api/cli.utils.load.html - 2025-08-14T01:26:50.528Z + 2025-08-15T14:56:16.329Z https://docs.axolotl.ai/docs/api/train.html - 2025-08-14T01:26:50.185Z + 2025-08-15T14:56:15.986Z https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html - 2025-08-14T01:26:51.462Z + 2025-08-15T14:56:17.264Z https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html - 2025-08-14T01:26:51.568Z + 2025-08-15T14:56:17.369Z https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html - 2025-08-14T01:26:50.790Z + 2025-08-15T14:56:16.592Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html - 2025-08-14T01:26:51.006Z + 2025-08-15T14:56:16.809Z https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html - 2025-08-14T01:26:51.064Z + 2025-08-15T14:56:16.866Z https://docs.axolotl.ai/docs/api/utils.lora.html - 2025-08-14T01:26:51.107Z + 2025-08-15T14:56:16.910Z https://docs.axolotl.ai/docs/api/loaders.tokenizer.html - 2025-08-14T01:26:50.626Z + 2025-08-15T14:56:16.427Z https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html - 2025-08-14T01:26:50.324Z + 2025-08-15T14:56:16.124Z https://docs.axolotl.ai/docs/api/utils.collators.batching.html - 2025-08-14T01:26:51.519Z + 2025-08-15T14:56:17.321Z https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html - 2025-08-14T01:26:50.474Z + 2025-08-15T14:56:16.275Z https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html - 2025-08-14T01:26:50.767Z + 2025-08-15T14:56:16.569Z https://docs.axolotl.ai/docs/api/utils.tokenization.html - 2025-08-14T01:26:51.101Z + 2025-08-15T14:56:16.903Z https://docs.axolotl.ai/docs/api/common.architectures.html - 2025-08-14T01:26:51.481Z + 2025-08-15T14:56:17.283Z https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html - 2025-08-14T01:26:50.339Z + 2025-08-15T14:56:16.140Z https://docs.axolotl.ai/docs/api/core.trainers.base.html - 2025-08-14T01:26:50.556Z + 2025-08-15T14:56:16.357Z https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html - 2025-08-14T01:26:51.034Z + 2025-08-15T14:56:16.837Z https://docs.axolotl.ai/docs/api/utils.schemas.utils.html - 2025-08-14T01:26:51.322Z + 2025-08-15T14:56:17.123Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html - 2025-08-14T01:26:50.817Z + 2025-08-15T14:56:16.618Z https://docs.axolotl.ai/docs/api/cli.config.html - 2025-08-14T01:26:50.435Z + 2025-08-15T14:56:16.236Z https://docs.axolotl.ai/docs/api/utils.schemas.peft.html - 2025-08-14T01:26:51.285Z + 2025-08-15T14:56:17.086Z https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html - 2025-08-14T01:26:50.754Z + 2025-08-15T14:56:16.556Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html - 2025-08-14T01:26:50.829Z + 2025-08-15T14:56:16.630Z https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html - 2025-08-14T01:26:51.001Z + 2025-08-15T14:56:16.804Z https://docs.axolotl.ai/docs/api/prompt_strategies.base.html - 2025-08-14T01:26:50.686Z + 2025-08-15T14:56:16.488Z https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html - 2025-08-14T01:26:51.499Z + 2025-08-15T14:56:17.300Z https://docs.axolotl.ai/docs/api/monkeypatch.relora.html - 2025-08-14T01:26:51.004Z + 2025-08-15T14:56:16.808Z https://docs.axolotl.ai/docs/api/common.const.html - 2025-08-14T01:26:51.483Z + 2025-08-15T14:56:17.284Z https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html - 2025-08-14T01:26:51.054Z + 2025-08-15T14:56:16.857Z https://docs.axolotl.ai/docs/api/utils.distributed.html - 2025-08-14T01:26:51.188Z + 2025-08-15T14:56:16.991Z https://docs.axolotl.ai/docs/api/loaders.constants.html - 2025-08-14T01:26:50.644Z + 2025-08-15T14:56:16.445Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html - 2025-08-14T01:26:51.094Z + 2025-08-15T14:56:16.897Z https://docs.axolotl.ai/docs/api/datasets.html - 2025-08-14T01:26:50.207Z + 2025-08-15T14:56:16.008Z https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html - 2025-08-14T01:26:51.061Z + 2025-08-15T14:56:16.863Z https://docs.axolotl.ai/docs/api/loaders.processor.html - 2025-08-14T01:26:50.628Z + 2025-08-15T14:56:16.428Z https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html - 2025-08-14T01:26:51.480Z + 2025-08-15T14:56:17.281Z https://docs.axolotl.ai/docs/api/loaders.adapter.html - 2025-08-14T01:26:50.633Z + 2025-08-15T14:56:16.434Z https://docs.axolotl.ai/docs/api/utils.schemas.model.html - 2025-08-14T01:26:51.252Z + 2025-08-15T14:56:17.054Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html - 2025-08-14T01:26:50.827Z + 2025-08-15T14:56:16.628Z https://docs.axolotl.ai/docs/batch_vs_grad.html - 2025-08-14T01:23:15.291Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/mac.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.756Z https://docs.axolotl.ai/docs/nd_parallelism.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/docs/dataset_loading.html - 2025-08-14T01:23:15.292Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/lora_optims.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.756Z https://docs.axolotl.ai/docs/unsloth.html - 2025-08-14T01:23:15.296Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/docs/config-reference.html - 2025-08-14T01:27:04.890Z + 2025-08-15T14:56:30.358Z https://docs.axolotl.ai/docs/custom_integrations.html - 2025-08-14T01:23:15.291Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/faq.html - 2025-08-14T01:23:15.292Z + 2025-08-15T14:53:04.754Z https://docs.axolotl.ai/docs/amd_hpc.html - 2025-08-14T01:23:15.291Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/multi-node.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/docs/cli.html - 2025-08-14T01:23:15.291Z + 2025-08-15T14:53:04.753Z https://docs.axolotl.ai/docs/nccl.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/docs/optimizers.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/docs/getting-started.html - 2025-08-14T01:23:15.292Z + 2025-08-15T14:53:04.754Z https://docs.axolotl.ai/docs/quantize.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/docs/reward_modelling.html - 2025-08-14T01:23:15.295Z + 2025-08-15T14:53:04.757Z https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html - 2025-08-14T01:23:15.318Z + 2025-08-15T14:53:04.777Z