Compare commits
2 Commits
vendor-moe
...
fsdp2_fp32
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1d0562dedd | ||
|
|
7fa8ac40cd |
@@ -40,7 +40,7 @@
|
|||||||
"%%capture\n",
|
"%%capture\n",
|
||||||
"# This step can take ~5-10 minutes to install dependencies\n",
|
"# This step can take ~5-10 minutes to install dependencies\n",
|
||||||
"!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n",
|
"!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n",
|
||||||
"!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@c5aa3ef\""
|
"!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28\""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -29,5 +29,5 @@ UV_PREFIX = "uv " if USE_UV else ""
|
|||||||
|
|
||||||
print(
|
print(
|
||||||
UNINSTALL_PREFIX
|
UNINSTALL_PREFIX
|
||||||
+ f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@c5aa3ef"'
|
+ f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"'
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ python scripts/cutcrossentropy_install.py | sh
|
|||||||
|
|
||||||
- If you are installing from pip
|
- If you are installing from pip
|
||||||
```bash
|
```bash
|
||||||
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@c5aa3ef"
|
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
@@ -31,6 +31,7 @@ plugins:
|
|||||||
|
|
||||||
## Supported Models
|
## Supported Models
|
||||||
|
|
||||||
|
- apertus
|
||||||
- arcee
|
- arcee
|
||||||
- cohere
|
- cohere
|
||||||
- cohere2
|
- cohere2
|
||||||
@@ -44,9 +45,13 @@ plugins:
|
|||||||
- glm
|
- glm
|
||||||
- glm4
|
- glm4
|
||||||
- glm4_moe
|
- glm4_moe
|
||||||
|
- glm4v
|
||||||
|
- glm4v_moe
|
||||||
- gpt_oss
|
- gpt_oss
|
||||||
- granite
|
- granite
|
||||||
- granitemoe
|
- granitemoe
|
||||||
|
- granitemoeshared
|
||||||
|
- granitemoehybrid
|
||||||
- hunyuan_v1_dense
|
- hunyuan_v1_dense
|
||||||
- hunyuan_v1_moe
|
- hunyuan_v1_moe
|
||||||
- llama
|
- llama
|
||||||
@@ -65,6 +70,8 @@ plugins:
|
|||||||
- qwen2_5_vl
|
- qwen2_5_vl
|
||||||
- qwen3
|
- qwen3
|
||||||
- qwen3_moe
|
- qwen3_moe
|
||||||
|
- qwen3_vl
|
||||||
|
- qwen3_vl_moe
|
||||||
- qwen3_next
|
- qwen3_next
|
||||||
- smollm3
|
- smollm3
|
||||||
- seed_oss
|
- seed_oss
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ LOG = get_logger(__name__)
|
|||||||
|
|
||||||
_CCE_INSTALL_MESSAGE = (
|
_CCE_INSTALL_MESSAGE = (
|
||||||
"Please install Axolotl's fork of cut_cross_entropy with transformers support using "
|
"Please install Axolotl's fork of cut_cross_entropy with transformers support using "
|
||||||
'`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@c5aa3ef"`'
|
'`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"`'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -368,6 +368,7 @@ def fsdp2_prepare_model(accelerator, model: torch.nn.Module) -> torch.nn.Module:
|
|||||||
# removing the call above leads to extra memory usage as explained in the comment above
|
# removing the call above leads to extra memory usage as explained in the comment above
|
||||||
if hasattr(model, "tie_weights"):
|
if hasattr(model, "tie_weights"):
|
||||||
model.tie_weights()
|
model.tie_weights()
|
||||||
|
model = model.to(torch.float32)
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user