separate out flash-attn install (sadly)
This commit is contained in:
@@ -12,9 +12,14 @@ This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl.
|
||||
|
||||
Here is an example of how to install from pip:
|
||||
```bash
|
||||
# Ensure you have a compatible version of Pytorch installed
|
||||
pip3 install packaging setuptools wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
# Ensure you have a compatible version of PyTorch installed
|
||||
# Option A: manage dependencies in your project
|
||||
uv add 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
2. Run one of the finetuning examples below.
|
||||
@@ -35,7 +40,7 @@ This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl.
|
||||
|
||||
- **Installation Error**: If you encounter `ImportError: ... undefined symbol ...` or `ModuleNotFoundError: No module named 'causal_conv1d_cuda'`, the `causal-conv1d` package may have been installed incorrectly. Try uninstalling it:
|
||||
```bash
|
||||
pip uninstall -y causal-conv1d
|
||||
uv pip uninstall -y causal-conv1d
|
||||
```
|
||||
|
||||
- **Dataset Loading**: Read more on how to load your own dataset in our [documentation](https://docs.axolotl.ai/docs/dataset_loading.html).
|
||||
|
||||
@@ -15,8 +15,8 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||
uv sync
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
@@ -31,7 +31,7 @@ python scripts/cutcrossentropy_install.py | sh
|
||||
# For those using our Docker image, use the below path.
|
||||
export CUDA_HOME=/usr/local/cuda
|
||||
|
||||
pip3 install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
||||
uv pip install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
||||
```
|
||||
|
||||
For any installation errors, see [XIELU Installation Issues](#xielu-installation-issues)
|
||||
@@ -67,7 +67,7 @@ If those didn't help, please try the below solutions:
|
||||
1. Pass env for CMAKE and try install again:
|
||||
|
||||
```bash
|
||||
Python_EXECUTABLE=$(which python) pip3 install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
||||
Python_EXECUTABLE=$(which python) uv pip install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
||||
```
|
||||
|
||||
2. Git clone the repo and manually hardcode python path:
|
||||
@@ -92,7 +92,7 @@ If those didn't help, please try the below solutions:
|
||||
```
|
||||
|
||||
```bash
|
||||
pip3 install . --no-build-isolation --no-deps
|
||||
uv pip install . --no-build-isolation --no-deps
|
||||
```
|
||||
|
||||
## Optimization Guides
|
||||
|
||||
@@ -17,8 +17,8 @@ Thanks to the team at Arcee.ai for using Axolotl in supervised fine-tuning the A
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||
uv sync
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
|
||||
@@ -12,10 +12,10 @@
|
||||
"\n",
|
||||
"Axolotl is the most performant LLM post-training framework available, delivering faster training with efficient, consistent and stable performance. Train your workload and ship your product 30% faster; saving you both time and money.\n",
|
||||
"\n",
|
||||
"- ⭐ us on [GitHub](https://github.com/axolotl-ai-cloud/axolotl)\n",
|
||||
"- 📜 Read the [Docs](http://docs.axolotl.ai/)\n",
|
||||
"- 💬 Chat with us on [Discord](https://discord.gg/mnpEYgRUmD)\n",
|
||||
"- 📰 Get updates on [X/Twitter](https://x.com/axolotl_ai)\n"
|
||||
"- \u2b50 us on [GitHub](https://github.com/axolotl-ai-cloud/axolotl)\n",
|
||||
"- \ud83d\udcdc Read the [Docs](http://docs.axolotl.ai/)\n",
|
||||
"- \ud83d\udcac Chat with us on [Discord](https://discord.gg/mnpEYgRUmD)\n",
|
||||
"- \ud83d\udcf0 Get updates on [X/Twitter](https://x.com/axolotl_ai)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -39,8 +39,8 @@
|
||||
"source": [
|
||||
"%%capture\n",
|
||||
"# This step can take ~5-10 minutes to install dependencies\n",
|
||||
"!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n",
|
||||
"!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28\""
|
||||
"!uv pip install --no-build-isolation axolotl>=0.9.1\n!uv pip install flash-attn --no-build-isolation\n",
|
||||
"!uv pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1371,7 +1371,7 @@
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
|
||||
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv\u2026"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
@@ -1729,9 +1729,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_12815f401eba44658caa7b2e490137a8",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_30e02aa2d0d241979369e598287f2639",
|
||||
"value": "Drop Samples with Zero Trainable Tokens (num_proc=2): 100%"
|
||||
"value": "Drop\u2007Samples\u2007with\u2007Zero\u2007Trainable\u2007Tokens\u2007(num_proc=2):\u2007100%"
|
||||
}
|
||||
},
|
||||
"083f9cda8d754c168beee10d2f8955a2": {
|
||||
@@ -1774,9 +1774,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_b195f160ca20442fadd8b5aed0ee41af",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_ca65e32eb52f48c09a84b33cb18f22cd",
|
||||
"value": " 11.4M/11.4M [00:00<00:00, 21.8MB/s]"
|
||||
"value": "\u200711.4M/11.4M\u2007[00:00<00:00,\u200721.8MB/s]"
|
||||
}
|
||||
},
|
||||
"0a46ad75c198463d843fb35e813642cb": {
|
||||
@@ -1917,7 +1917,7 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_b1bea589efa14258a9982071b87938bf",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_590eef89881545aa8bbef9a8bbe7fb00",
|
||||
"value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
|
||||
}
|
||||
@@ -1938,9 +1938,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_bfcdbba993b74972a9e3e575f86908ff",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_6ebb2ec171414e47a14765505f64bb3c",
|
||||
"value": " 3.84G/3.84G [00:09<00:00, 664MB/s]"
|
||||
"value": "\u20073.84G/3.84G\u2007[00:09<00:00,\u2007664MB/s]"
|
||||
}
|
||||
},
|
||||
"0e936d9dbf9c4fdd86bbfe9730dedc47": {
|
||||
@@ -2296,9 +2296,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_349eee9f56d64f0cba6fc24ff2c50c9b",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_7e5d3774060e4589aa65982da5ea4ef4",
|
||||
"value": " 9985/9985 [00:04<00:00, 2604.11 examples/s]"
|
||||
"value": "\u20079985/9985\u2007[00:04<00:00,\u20072604.11\u2007examples/s]"
|
||||
}
|
||||
},
|
||||
"16d1283741404b7bb319094c992fce01": {
|
||||
@@ -2317,9 +2317,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_a4e5789584564049b83df7c6c54a3e08",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_ff3a94b146a948b6907f5d80c7157f99",
|
||||
"value": " 9985/0 [00:00<00:00, 50763.46 examples/s]"
|
||||
"value": "\u20079985/0\u2007[00:00<00:00,\u200750763.46\u2007examples/s]"
|
||||
}
|
||||
},
|
||||
"1811cda0644e4190a9469d1774435d82": {
|
||||
@@ -2390,9 +2390,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_e366ae3fceec4566b9ed303d6c5f90af",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_5dd7d150dbe04f08b165ce7f2c27cd11",
|
||||
"value": "model-00008-of-00008.safetensors: 100%"
|
||||
"value": "model-00008-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"19127c7bb1554ccbac877059f9a82db0": {
|
||||
@@ -2561,9 +2561,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_0dea5caa27384f5689e3cab51f558727",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_a6f48410b9964fefba0c3009a77dc838",
|
||||
"value": " 9.68k/9.68k [00:00<00:00, 812kB/s]"
|
||||
"value": "\u20079.68k/9.68k\u2007[00:00<00:00,\u2007812kB/s]"
|
||||
}
|
||||
},
|
||||
"1f7d30f71bbd4547a9150d21da071055": {
|
||||
@@ -2634,9 +2634,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_f4a1795dc7514a718f478245f521f0ba",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_5e746eb25bbe416fb585fa24e79f5177",
|
||||
"value": "model-00002-of-00008.safetensors: 100%"
|
||||
"value": "model-00002-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"20352e5f58d24bb8b1f3940efd14fe4a": {
|
||||
@@ -2707,9 +2707,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_1c6f1f10667545aaab958016ba7e2c94",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_e6e969610738449887259063967f82b0",
|
||||
"value": " 2.78M/2.78M [00:00<00:00, 17.8MB/s]"
|
||||
"value": "\u20072.78M/2.78M\u2007[00:00<00:00,\u200717.8MB/s]"
|
||||
}
|
||||
},
|
||||
"258b7c635c1045329d4669e48c46ccd5": {
|
||||
@@ -3056,9 +3056,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_be724f04b03942b2a033a7e8898bb4fd",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_fcbab4d8dced41a18dfccce81e3a45a0",
|
||||
"value": "model-00005-of-00008.safetensors: 100%"
|
||||
"value": "model-00005-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"3036608c71904ce9ae4bb2a9fa8802d9": {
|
||||
@@ -3077,9 +3077,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_5ca6be24acb548cea130bd58e9954c7c",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_5cfb02ee044b4011a378efa8b54a370f",
|
||||
"value": " 3.96G/3.96G [00:10<00:00, 531MB/s]"
|
||||
"value": "\u20073.96G/3.96G\u2007[00:10<00:00,\u2007531MB/s]"
|
||||
}
|
||||
},
|
||||
"30a81da86f8043eca301e86a8651201a": {
|
||||
@@ -3629,9 +3629,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_8f5bd719974e41c3a8dd9a5b0d3d71e6",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_b87c84de30e84b3abf4871461fb9cbd3",
|
||||
"value": "Loading checkpoint shards: 100%"
|
||||
"value": "Loading\u2007checkpoint\u2007shards:\u2007100%"
|
||||
}
|
||||
},
|
||||
"41f3b32c2f6b4034ae7a3b9124e28bc7": {
|
||||
@@ -3791,7 +3791,7 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_39789237703c4a418134243055c9cbf5",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_a3a945817f684328b34651fe052393ec",
|
||||
"value": "Connecting..."
|
||||
}
|
||||
@@ -4077,9 +4077,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_4d468f96ec924681ad65eb671674b93e",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_ad7599de524549c48bf2d3124ad4b299",
|
||||
"value": "Dropping Long Sequences (num_proc=2): 100%"
|
||||
"value": "Dropping\u2007Long\u2007Sequences\u2007(num_proc=2):\u2007100%"
|
||||
}
|
||||
},
|
||||
"5ca240f31e6b44e3882c5eb37cd5a309": {
|
||||
@@ -4471,9 +4471,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_5e18768f7ad6434ba8b8b8a2e853e204",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_bb33aec33a6447078c31bfd728942994",
|
||||
"value": " 728/728 [00:00<00:00, 20.3kB/s]"
|
||||
"value": "\u2007728/728\u2007[00:00<00:00,\u200720.3kB/s]"
|
||||
}
|
||||
},
|
||||
"62e302ebdad64aada0ffe64ae1c873f3": {
|
||||
@@ -4636,9 +4636,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_81c3db71ac704280ad030072655f1537",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_042e091f75694c47aee761e760e76773",
|
||||
"value": " 9985/9985 [00:02<00:00, 3977.47 examples/s]"
|
||||
"value": "\u20079985/9985\u2007[00:02<00:00,\u20073977.47\u2007examples/s]"
|
||||
}
|
||||
},
|
||||
"67da6c4260574869aa24c3cbc1bc1654": {
|
||||
@@ -4778,7 +4778,7 @@
|
||||
"description_tooltip": null,
|
||||
"disabled": false,
|
||||
"layout": "IPY_MODEL_2e257c8be2da40b4bb67a9e4ab6811f3",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_56e3768bef5a4b9db4168c5c17f509c2",
|
||||
"value": ""
|
||||
}
|
||||
@@ -4823,9 +4823,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_41f3b32c2f6b4034ae7a3b9124e28bc7",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_a10d0a76010f4e508c65a9b69ebc5156",
|
||||
"value": "Tokenizing Prompts (num_proc=2): 100%"
|
||||
"value": "Tokenizing\u2007Prompts\u2007(num_proc=2):\u2007100%"
|
||||
}
|
||||
},
|
||||
"704f2f5a9b1c49d5a75a0025a5dda11b": {
|
||||
@@ -5071,9 +5071,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_93a44a11aa4846fa8efc6c1413ef1627",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_a55060adc3564407ac81ad7297d34aaa",
|
||||
"value": "train.jsonl: 100%"
|
||||
"value": "train.jsonl:\u2007100%"
|
||||
}
|
||||
},
|
||||
"7be6f04c284e4326bb4ff3d301e7b3c6": {
|
||||
@@ -5138,9 +5138,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_7fd44cf9ca6e4726bfd7ac21846d6a14",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_366a343b62fa47d8985a3bd464d99f9e",
|
||||
"value": "config.json: 100%"
|
||||
"value": "config.json:\u2007100%"
|
||||
}
|
||||
},
|
||||
"7cd0b85ebd204b7aba908417811ce4e0": {
|
||||
@@ -5339,9 +5339,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_67da6c4260574869aa24c3cbc1bc1654",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_94b9088614464f60a203de39dbcae853",
|
||||
"value": " 8/8 [01:47<00:00, 11.64s/it]"
|
||||
"value": "\u20078/8\u2007[01:47<00:00,\u200711.64s/it]"
|
||||
}
|
||||
},
|
||||
"823f1c78f15043e38bbd4dca3932a86a": {
|
||||
@@ -5488,7 +5488,7 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_8640ac440fbc4644b9a3af7ba3ae7183",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_5cea7996f02040b187ece0bb2d6a8d1f",
|
||||
"value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
|
||||
}
|
||||
@@ -5509,9 +5509,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_ef223e8504b64e3592589880326aaf41",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_598da69727bd4fb8b1caf465ac736d7a",
|
||||
"value": " 1.67M/1.67M [00:00<00:00, 19.0MB/s]"
|
||||
"value": "\u20071.67M/1.67M\u2007[00:00<00:00,\u200719.0MB/s]"
|
||||
}
|
||||
},
|
||||
"897b77a56c09479bb11d7f2a30997e55": {
|
||||
@@ -5717,9 +5717,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_37de928300e34184881039378bd75e7f",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_0e936d9dbf9c4fdd86bbfe9730dedc47",
|
||||
"value": " 3.96G/3.96G [00:13<00:00, 273MB/s]"
|
||||
"value": "\u20073.96G/3.96G\u2007[00:13<00:00,\u2007273MB/s]"
|
||||
}
|
||||
},
|
||||
"936d04b5fe1b4c63bf0b080e423d051b": {
|
||||
@@ -6050,9 +6050,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_d955dcaa0e944e719f3a06139dd54a03",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_d3de2662c7964f1ba96e58da382af720",
|
||||
"value": "merges.txt: 100%"
|
||||
"value": "merges.txt:\u2007100%"
|
||||
}
|
||||
},
|
||||
"9cd5211b5d8b457aa0002f1d17b80028": {
|
||||
@@ -6071,9 +6071,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_6932489232ec4ab18a160b1e7fbcdfe1",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_4540927d98f54466b434ba4c0edf045d",
|
||||
"value": "model-00007-of-00008.safetensors: 100%"
|
||||
"value": "model-00007-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"9d4897eefb5f48259ffb2d23e332f752": {
|
||||
@@ -6303,9 +6303,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_3aaecbf540f54a2db9ab0931e3b1fe57",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_9e333ed3b5014069ac1dd969255dd591",
|
||||
"value": " 239/239 [00:00<00:00, 30.9kB/s]"
|
||||
"value": "\u2007239/239\u2007[00:00<00:00,\u200730.9kB/s]"
|
||||
}
|
||||
},
|
||||
"a20927bf5f2c41f58c1e31ac858ab36c": {
|
||||
@@ -6324,9 +6324,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_1811cda0644e4190a9469d1774435d82",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_35c811d2ae8e43f3b5cecbdd3cfa857f",
|
||||
"value": "tokenizer.json: 100%"
|
||||
"value": "tokenizer.json:\u2007100%"
|
||||
}
|
||||
},
|
||||
"a3a945817f684328b34651fe052393ec": {
|
||||
@@ -6360,9 +6360,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_ed5ca967ad5342929e578ac6aa4dc4c0",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_af401d117d5047629d3a6e2361757b62",
|
||||
"value": "model-00001-of-00008.safetensors: 100%"
|
||||
"value": "model-00001-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"a4e5789584564049b83df7c6c54a3e08": {
|
||||
@@ -6494,9 +6494,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_fa1282ccc7544e4f818e2f03ccffe4a5",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_bbbf575d2a4b4c6ea8389be79b2a6039",
|
||||
"value": "model.safetensors.index.json: 100%"
|
||||
"value": "model.safetensors.index.json:\u2007100%"
|
||||
}
|
||||
},
|
||||
"ab93eabd7cea4b94b4b7a387f101e8a1": {
|
||||
@@ -6582,9 +6582,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_62e302ebdad64aada0ffe64ae1c873f3",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_bd1b0dfed6d34d16af33a4a58330f5ec",
|
||||
"value": "Saving the dataset (1/1 shards): 100%"
|
||||
"value": "Saving\u2007the\u2007dataset\u2007(1/1\u2007shards):\u2007100%"
|
||||
}
|
||||
},
|
||||
"ad7599de524549c48bf2d3124ad4b299": {
|
||||
@@ -6967,9 +6967,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_2b3a2659b12244bd8548320320016dbf",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_0cd7efffbb3c4c4b972e63749f61ab97",
|
||||
"value": "Generating train split: "
|
||||
"value": "Generating\u2007train\u2007split:\u2007"
|
||||
}
|
||||
},
|
||||
"b87c84de30e84b3abf4871461fb9cbd3": {
|
||||
@@ -7085,9 +7085,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_0f480e3a0b0a45d2a2d2dec3cad923f3",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_fcb30372e7404c5d8a1ad4df91e6c7b2",
|
||||
"value": " 1.91G/1.91G [00:05<00:00, 444MB/s]"
|
||||
"value": "\u20071.91G/1.91G\u2007[00:05<00:00,\u2007444MB/s]"
|
||||
}
|
||||
},
|
||||
"bd1b0dfed6d34d16af33a4a58330f5ec": {
|
||||
@@ -7325,9 +7325,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_158c8b85dbf34de6a94b4e35e2fc7d5a",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_0b4c9753a7cb4354b8e5f187e6e1ad7c",
|
||||
"value": " 3.96G/3.96G [00:15<00:00, 564MB/s]"
|
||||
"value": "\u20073.96G/3.96G\u2007[00:15<00:00,\u2007564MB/s]"
|
||||
}
|
||||
},
|
||||
"c0991cf63ee6458b96e9a75e7a88b61a": {
|
||||
@@ -7346,9 +7346,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_ed28e2e0410d4e0b855467e798e53d66",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_d93f134f802b4b69b575bdaf07dbd27c",
|
||||
"value": "tokenizer_config.json: 100%"
|
||||
"value": "tokenizer_config.json:\u2007100%"
|
||||
}
|
||||
},
|
||||
"c12ea43372ac4d57bb9605f1a429b397": {
|
||||
@@ -7581,9 +7581,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_8bc9d8ba866c442b9118d9630009939c",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_9f56a2d9979c4bd8928c644c22c3ecdf",
|
||||
"value": "model-00003-of-00008.safetensors: 100%"
|
||||
"value": "model-00003-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"c6164e05a1914ae48083db9ad7f4ef7c": {
|
||||
@@ -7694,9 +7694,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_e40d1c1ac9494b3bade9858324e7ffdf",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_d65b6b060d9845779299491ac5599c31",
|
||||
"value": " 9985/9985 [01:04<00:00, 189.08 examples/s]"
|
||||
"value": "\u20079985/9985\u2007[01:04<00:00,\u2007189.08\u2007examples/s]"
|
||||
}
|
||||
},
|
||||
"c7433acd3c4841e6958ae8f7e87b1808": {
|
||||
@@ -7737,9 +7737,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_0077aedc3d174560bce924ee89e9c006",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_00321cce58884f6f9b3855a21fcd9187",
|
||||
"value": "Add position_id column (Sample Packing) (num_proc=2): 100%"
|
||||
"value": "Add\u2007position_id\u2007column\u2007(Sample\u2007Packing)\u2007(num_proc=2):\u2007100%"
|
||||
}
|
||||
},
|
||||
"ca65e32eb52f48c09a84b33cb18f22cd": {
|
||||
@@ -8162,9 +8162,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_63580b6fb30642479fe3000915bf551a",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_8f726dbfb45d4528afa33e36a6313267",
|
||||
"value": " 27.3M/27.3M [00:00<00:00, 31.0MB/s]"
|
||||
"value": "\u200727.3M/27.3M\u2007[00:00<00:00,\u200731.0MB/s]"
|
||||
}
|
||||
},
|
||||
"d43c6df07ddb466587807d6dbe1ff614": {
|
||||
@@ -8183,9 +8183,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_8c4d4fc5a30f4e7cb3be53fe2adda33d",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_e90658f4bcb642baa78426012f863152",
|
||||
"value": "model-00004-of-00008.safetensors: 100%"
|
||||
"value": "model-00004-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"d65b6b060d9845779299491ac5599c31": {
|
||||
@@ -8474,9 +8474,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_34cf3df51fbc41cabfdbba153c007f0e",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_ac764024cf1c4e08ba7749afd2cd20ac",
|
||||
"value": "vocab.json: 100%"
|
||||
"value": "vocab.json:\u2007100%"
|
||||
}
|
||||
},
|
||||
"dfd2a2649b8341ef913207526708aff1": {
|
||||
@@ -8669,9 +8669,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_c6164e05a1914ae48083db9ad7f4ef7c",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_813621384dc748b0ad06775e22761c0b",
|
||||
"value": " 9985/9985 [00:03<00:00, 3622.89 examples/s]"
|
||||
"value": "\u20079985/9985\u2007[00:03<00:00,\u20073622.89\u2007examples/s]"
|
||||
}
|
||||
},
|
||||
"e400cbf14bcc446a9d33b210cd93550b": {
|
||||
@@ -9065,9 +9065,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_fba7aa824b38467ab3061b226114cdec",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_f3075dccbd2747b4a7913b66f44f2596",
|
||||
"value": " 3.96G/3.96G [00:13<00:00, 398MB/s]"
|
||||
"value": "\u20073.96G/3.96G\u2007[00:13<00:00,\u2007398MB/s]"
|
||||
}
|
||||
},
|
||||
"ec030fc3c346426f9abc3a89892258d3": {
|
||||
@@ -9110,9 +9110,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_936d04b5fe1b4c63bf0b080e423d051b",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_f1cef8e8dc2646fb9fd09f3b09081074",
|
||||
"value": " 36.5k/36.5k [00:00<00:00, 4.32MB/s]"
|
||||
"value": "\u200736.5k/36.5k\u2007[00:00<00:00,\u20074.32MB/s]"
|
||||
}
|
||||
},
|
||||
"ed28e2e0410d4e0b855467e798e53d66": {
|
||||
@@ -9422,9 +9422,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_735d4f225b24414294fc1b213c61223c",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_5e5e15b0569b474c9620083b3ec6af55",
|
||||
"value": "generation_config.json: 100%"
|
||||
"value": "generation_config.json:\u2007100%"
|
||||
}
|
||||
},
|
||||
"f4667818b9d34a09891cd727a429a610": {
|
||||
@@ -9443,9 +9443,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_4b27c267393640f28f6eae0875bd2ed9",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_9858cb74a09748a39e8149baac96702c",
|
||||
"value": " 3.96G/3.96G [00:11<00:00, 457MB/s]"
|
||||
"value": "\u20073.96G/3.96G\u2007[00:11<00:00,\u2007457MB/s]"
|
||||
}
|
||||
},
|
||||
"f4a1795dc7514a718f478245f521f0ba": {
|
||||
@@ -9830,9 +9830,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_d1f9b10c130542f094c8fd3d1e23b5e9",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_e575d87a7efe4ec7b1efde489839d4a6",
|
||||
"value": "model-00006-of-00008.safetensors: 100%"
|
||||
"value": "model-00006-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"fe18bba7f3fb4c31bf840541f36b3425": {
|
||||
@@ -9873,9 +9873,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_e5a82df528bb4e408797a3b6c2758f4a",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_f113ebd8c1c34806bea4dd7ed3035173",
|
||||
"value": " 9985/9985 [00:00<00:00, 44264.88 examples/s]"
|
||||
"value": "\u20079985/9985\u2007[00:00<00:00,\u200744264.88\u2007examples/s]"
|
||||
}
|
||||
},
|
||||
"fea1b70fb46745feb5111b3929175b5d": {
|
||||
@@ -9931,9 +9931,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_ab93eabd7cea4b94b4b7a387f101e8a1",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_704f2f5a9b1c49d5a75a0025a5dda11b",
|
||||
"value": " 3.96G/3.96G [00:12<00:00, 656MB/s]"
|
||||
"value": "\u20073.96G/3.96G\u2007[00:12<00:00,\u2007656MB/s]"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,8 +16,13 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r
|
||||
|
||||
```bash
|
||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
# Option A: manage dependencies in your project
|
||||
uv add 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage
|
||||
|
||||
@@ -10,17 +10,22 @@ Gemma-3n is a family of multimodal models from Google found on [HuggingFace](htt
|
||||
|
||||
```bash
|
||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
# Option A: manage dependencies in your project
|
||||
uv add 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
2. In addition to Axolotl's requirements, Gemma-3n requires:
|
||||
|
||||
```bash
|
||||
pip3 install timm==1.0.17
|
||||
uv pip install timm==1.0.17
|
||||
|
||||
# for loading audio data
|
||||
pip3 install librosa==0.11.0
|
||||
uv pip install librosa==0.11.0
|
||||
```
|
||||
|
||||
3. Download sample dataset files
|
||||
|
||||
@@ -12,8 +12,13 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
||||
|
||||
```bash
|
||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
# Option A: manage dependencies in your project
|
||||
uv add 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
2. Choose one of the following configs below for training the 20B model. (for 120B, see [below](#training-120b))
|
||||
@@ -75,7 +80,7 @@ for more information about using a special vllm-openai docker image for inferenc
|
||||
Optionally, vLLM can be installed from nightly:
|
||||
|
||||
```bash
|
||||
pip install --no-build-isolation --pre -U vllm --extra-index-url https://wheels.vllm.ai/nightly
|
||||
uv pip install --no-build-isolation --pre -U vllm --extra-index-url https://wheels.vllm.ai/nightly
|
||||
```
|
||||
and the vLLM server can be started with the following command (modify `--tensor-parallel-size 8` to match your environment):
|
||||
```bash
|
||||
|
||||
@@ -13,8 +13,8 @@ Tencent released a family of opensource models called HunYuan with varying param
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||
uv sync
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
|
||||
@@ -13,9 +13,14 @@ Thanks to the team at MistralAI for giving us early access to prepare for these
|
||||
Here is an example of how to install from pip:
|
||||
|
||||
```bash
|
||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
# Ensure you have PyTorch installed (PyTorch 2.6.0 min)
|
||||
# Option A: manage dependencies in your project
|
||||
uv add 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage
|
||||
|
||||
@@ -15,8 +15,8 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||
uv sync
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
@@ -24,12 +24,12 @@ python scripts/cutcrossentropy_install.py | sh
|
||||
|
||||
2. Install Qwen3-Next transformers commit
|
||||
```bash
|
||||
pip3 uninstall -y transformers && pip3 install "git+https://github.com/huggingface/transformers.git@b9282355bea846b54ed850a066901496b19da654"
|
||||
uv pip uninstall -y transformers && uv pip install "git+https://github.com/huggingface/transformers.git@b9282355bea846b54ed850a066901496b19da654"
|
||||
```
|
||||
|
||||
3. Install FLA for improved performance
|
||||
```bash
|
||||
pip3 uninstall -y causal-conv1d && pip3 install flash-linear-attention==0.3.2
|
||||
uv pip uninstall -y causal-conv1d && uv pip install flash-linear-attention==0.3.2
|
||||
```
|
||||
|
||||
4. Run the finetuning example:
|
||||
|
||||
@@ -15,8 +15,8 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||
uv sync --extra deepspeed
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Install Cut Cross Entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
|
||||
@@ -13,14 +13,19 @@ This guide shows how to fine-tune SmolVLM2 models with Axolotl.
|
||||
Here is an example of how to install from pip:
|
||||
```bash
|
||||
# Ensure you have a compatible version of Pytorch installed
|
||||
pip3 install packaging setuptools wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
# Option A: manage dependencies in your project
|
||||
uv add 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
2. Install an extra dependency:
|
||||
|
||||
```bash
|
||||
pip3 install num2words==0.5.14
|
||||
uv pip install num2words==0.5.14
|
||||
```
|
||||
|
||||
3. Run the finetuning example:
|
||||
|
||||
@@ -12,16 +12,21 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r
|
||||
|
||||
```bash
|
||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
# Option A: manage dependencies in your project
|
||||
uv add 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
2. Please install the below.
|
||||
|
||||
```bash
|
||||
# audio
|
||||
pip3 install librosa==0.11.0
|
||||
pip3 install 'mistral_common[audio]==1.8.3'
|
||||
uv pip install librosa==0.11.0
|
||||
uv pip install 'mistral_common[audio]==1.8.3'
|
||||
|
||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
|
||||
Reference in New Issue
Block a user