From 5992e607a2e59dced8b0ccb520527b1ad57c94f7 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 4 Dec 2025 21:44:44 +0700 Subject: [PATCH] fix: improve ministral3 docs to be clearer (#3300) * fix: improve ministral3 docs to be clearer * fix: title * chore: wording --- examples/ministral/README.md | 10 +-- examples/ministral3/README.md | 79 +++++++++++++++++++ examples/ministral3/ministral3-3b-qlora.yaml | 67 ++++++++++++++++ .../{ministral => ministral3}/think/README.md | 34 +------- .../think/ministral3-3b-think-qlora.yaml} | 0 examples/ministral3/vision/README.md | 57 +++++++++++++ .../vision/ministral3-3b-vision-qlora.yml | 64 +++++++++++++++ 7 files changed, 272 insertions(+), 39 deletions(-) create mode 100644 examples/ministral3/README.md create mode 100644 examples/ministral3/ministral3-3b-qlora.yaml rename examples/{ministral => ministral3}/think/README.md (72%) rename examples/{ministral/think/ministral3-small-think-qlora.yaml => ministral3/think/ministral3-3b-think-qlora.yaml} (100%) create mode 100644 examples/ministral3/vision/README.md create mode 100644 examples/ministral3/vision/ministral3-3b-vision-qlora.yml diff --git a/examples/ministral/README.md b/examples/ministral/README.md index b088c06ec..f8af7bf27 100644 --- a/examples/ministral/README.md +++ b/examples/ministral/README.md @@ -1,6 +1,6 @@ # Finetune Ministral with Axolotl -Ministral is a family of openweight models from MistralAI found on HuggingFace at [2410](mistralai/Ministral-8B-Instruct-2410) and [2512](https://huggingface.co/collections/mistralai/ministral-3) (see [Thinking](#thinking)). This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking. +Ministral is a family of openweight models from MistralAI found on [HuggingFace](mistralai/Ministral-8B-Instruct-2410). This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking. ## Getting started @@ -18,14 +18,6 @@ This config uses about 8.76 GiB VRAM. Let us know how it goes. Happy finetuning! 🚀 -### Thinking - -MistralAI has released their [Ministral3 2512](https://huggingface.co/collections/mistralai/ministral-3) model with thinking capabilities, enabling Chain-of-Thought reasoning with explicit thinking steps. - -📚 **[See the Thinking fine-tuning guide →](./think/README.md)** - -For Ministral3 Base/Instruct, you can reuse the above config to train supervised finetuning. - ### Tips - We recommend adding the same/similar SystemPrompt that the model is tuned for. You can find this within the repo's files titled `SYSTEM_PROMPT.txt`. diff --git a/examples/ministral3/README.md b/examples/ministral3/README.md new file mode 100644 index 000000000..6ed7efda5 --- /dev/null +++ b/examples/ministral3/README.md @@ -0,0 +1,79 @@ +# Finetune Ministral3 with Axolotl + +Ministral3 is a family of open-weight models from MistralAI found on [HuggingFace](https://huggingface.co/collections/mistralai/ministral-3). This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking. + +Please see [Thinking](#thinking) and [Vision](#vision) for their respective fine-tuning. + +Thanks to the team at MistralAI for giving us early access to prepare for these releases. + +Note: This is still experimental given it is based on transformers v5 RC. + +## Getting started + +1. Install Axolotl from source following the [installation guide](https://docs.axolotl.ai/docs/installation.html#sec-edge-build). + +2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage. + +3. Swap to the Axolotl transformers v5 branch + + ```bash + cp examples/ministral3/ministral3-3b-qlora.yaml ministral3-3b-qlora.yaml + + git fetch + git checkout transformers-v5 + + # Install packages for transformers v5 + pip install -e . + ``` + +4. Run the fine-tuning: + + ```bash + axolotl train ministral3-3b-qlora.yaml + ``` + +Let us know how it goes. Happy finetuning! 🚀 + + +### Tips + +- We recommend adding the same/similar SystemPrompt that the model is tuned for. You can find this within the repo's files titled `SYSTEM_PROMPT.txt`. +- You can run a full finetuning by removing the `adapter: qlora` and `load_in_4bit: true` from the config. +- Read more on how to load your own dataset at [docs](https://docs.axolotl.ai/docs/dataset_loading.html). +- The text dataset format follows the OpenAI Messages format as seen [here](https://docs.axolotl.ai/docs/dataset-formats/conversation.html#chat_template). + +### Thinking + +Ministral3 2512 model supports thinking capabilities, enabling Chain-of-Thought reasoning with explicit thinking steps. + +📚 **[See the Thinking fine-tuning guide →](./think/README.md)** + +### Vision + +Ministral3 2512 model also supports vision capabilities. + +📚 **[See the Vision fine-tuning guide →](./vision/README.md)** + +## Optimization Guides + +Please check the [Optimizations doc](https://docs.axolotl.ai/docs/optimizations.html). + +## Limitations + +We only support the `mistral-common` tokenizer for Supervised Fine-tuning at the moment and for `type: chat_template` only. + +In addition, we do not support overriding tokens yet. + +## Related Resources + +- [MistralAI Mistral3 Blog](https://mistral.ai/news/mistral-3) +- [Axolotl Docs](https://docs.axolotl.ai) +- [Axolotl Website](https://axolotl.ai) +- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl) +- [Axolotl Discord](https://discord.gg/7m9sfhzaf3) + + +## Future Work + +- Add parity to Preference Tuning, RL, etc. +- Add parity to other tokenizer configs like overriding tokens. diff --git a/examples/ministral3/ministral3-3b-qlora.yaml b/examples/ministral3/ministral3-3b-qlora.yaml new file mode 100644 index 000000000..a31545ab2 --- /dev/null +++ b/examples/ministral3/ministral3-3b-qlora.yaml @@ -0,0 +1,67 @@ +base_model: mistralai/Ministral-3-3B-Reasoning-2512 + +# Enable to use mistral-common tokenizer +tokenizer_use_mistral_common: true + +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + +plugins: + - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin + +load_in_8bit: false +load_in_4bit: true + +datasets: + - path: fozziethebeat/alpaca_messages_2k_test + type: chat_template + +dataset_prepared_path: last_run_prepared +val_set_size: 0.1 +output_dir: ./outputs/lora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 2048 +sample_packing: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_target_modules: + - gate_proj + - down_proj + - up_proj + - q_proj + - v_proj + - k_proj + - o_proj + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +bf16: auto +tf32: false + +gradient_checkpointing: true +resume_from_checkpoint: +logging_steps: 1 +flash_attention: true + +warmup_ratio: 0.1 +evals_per_epoch: 1 +saves_per_epoch: 1 + +# save_first_step: true # uncomment this to validate checkpoint saving works with your config diff --git a/examples/ministral/think/README.md b/examples/ministral3/think/README.md similarity index 72% rename from examples/ministral/think/README.md rename to examples/ministral3/think/README.md index 0ee5ea876..8c40adbb9 100644 --- a/examples/ministral/think/README.md +++ b/examples/ministral3/think/README.md @@ -2,8 +2,6 @@ This guide covers fine-tuning [Ministral3 2512](https://huggingface.co/collections/mistralai/ministral-3) with thinking capabilities using Axolotl. The thinking model enables explicit Chain-of-Thought reasoning with separate thinking and response sections. -Thanks to the team at MistralAI for giving us early access to prepare for these releases. - ## Prerequisites Before starting, ensure you have: @@ -11,35 +9,11 @@ Before starting, ensure you have: ## Getting Started -1. Install transformers v5 +Run the thinking model fine-tuning: - ```bash - pip install transformers==5.0.0rc0 - ``` - - Note: This is still experimental in Axolotl. Other stuff may break. - -2. Upgrade `mistral-common` - - ```bash - pip install mistral-common==1.8.6 - ``` - -3. Swap to the Axolotl transformers v5 branch - - ```bash - # copy examples/ministral/think/ministral3-small-think-qlora.yaml somewhere - cp examples/ministral/think/ministral3-small-think-qlora.yaml ministral3-small-think-qlora.yaml - - git fetch - git checkout transformers-v5 - ``` - -4. Run the thinking model fine-tuning: - - ```bash - axolotl train ministral3-small-think-qlora.yaml - ``` +```bash +axolotl train examples/ministral3/think/ministral3-3b-think-qlora.yaml +``` This config uses about 4.76 GiB VRAM. diff --git a/examples/ministral/think/ministral3-small-think-qlora.yaml b/examples/ministral3/think/ministral3-3b-think-qlora.yaml similarity index 100% rename from examples/ministral/think/ministral3-small-think-qlora.yaml rename to examples/ministral3/think/ministral3-3b-think-qlora.yaml diff --git a/examples/ministral3/vision/README.md b/examples/ministral3/vision/README.md new file mode 100644 index 000000000..369b0116a --- /dev/null +++ b/examples/ministral3/vision/README.md @@ -0,0 +1,57 @@ +# Ministral3 2512 Vision Fine-tuning + +This guide covers fine-tuning [Ministral3 2512](https://huggingface.co/collections/mistralai/ministral-3) with vision capabilities using Axolotl. + +## Prerequisites + +Before starting, ensure you have: +- Installed Axolotl from source (see [main README](../README.md#getting-started)) + +## Getting started + +1. Install the required vision lib: + ```bash + pip install 'mistral-common[opencv]==1.8.6' + ``` + +2. Download the example dataset image: + ```bash + wget https://huggingface.co/datasets/Nanobit/text-vision-2k-test/resolve/main/African_elephant.jpg + ``` + +3. Run the fine-tuning: + ```bash + axolotl train examples/ministral3/vision/ministral3-3b-vision-qlora.yml + ``` + +WARNING: The loss and grad norm will be much higher than normal at first. We suspect this to be inherent to the model as of the moment. If anyone would like to submit a fix for this, we are happy to take a look. + +### Tips + +Key differences from text-only model: +- Multi-modal dataset format required +- Sample packing not supported + +## Dataset Format + +The vision model requires multi-modal dataset format as documented [here](https://docs.axolotl.ai/docs/multimodal.html#dataset-format). + +One exception is that, passing `"image": PIL.Image` is not supported. MistralTokenizer only supports `path`, `url`, and `base64` for now. + +Example: +```json +{ + "messages": [ + {"role": "system", "content": [{ "type": "text", "text": "{SYSTEM_PROMPT}"}]}, + {"role": "user", "content": [ + { "type": "text", "text": "What's in this image?"}, + {"type": "image", "path": "path/to/image.jpg" } + ]}, + {"role": "assistant", "content": [{ "type": "text", "text": "..." }]}, + ], +} +``` + +## Limitations + +- Sample Packing is not supported for multi-modality training currently. diff --git a/examples/ministral3/vision/ministral3-3b-vision-qlora.yml b/examples/ministral3/vision/ministral3-3b-vision-qlora.yml new file mode 100644 index 000000000..0a0fdce4a --- /dev/null +++ b/examples/ministral3/vision/ministral3-3b-vision-qlora.yml @@ -0,0 +1,64 @@ +base_model: mistralai/Ministral-3-3B-Reasoning-2512 +processor_type: AutoProcessor + +# Enable to use mistral-common tokenizer +tokenizer_use_mistral_common: true + +plugins: + - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin + +load_in_4bit: true + +# these 3 lines are needed for now to handle vision chat templates w images +skip_prepare_dataset: true +remove_unused_columns: false +sample_packing: false + +# sample dataset below requires downloading image in advance +# wget https://huggingface.co/datasets/Nanobit/text-vision-2k-test/resolve/main/African_elephant.jpg +datasets: + - path: Nanobit/text-vision-2k-test + type: chat_template + +dataset_prepared_path: last_run_prepared +val_set_size: 0.01 +output_dir: ./outputs/out + +adapter: qlora +lora_model_dir: + +sequence_len: 2048 + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: 'model.language_model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj' + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 1 +micro_batch_size: 1 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +bf16: true +fp16: +tf32: true + +gradient_checkpointing: true +logging_steps: 1 +flash_attention: true + +warmup_ratio: 0.1 +evals_per_epoch: 1 +saves_per_epoch: 1 +weight_decay: 0.0 +special_tokens: + +# save_first_step: true # uncomment this to validate checkpoint saving works with your config