basic llama multipack

This commit is contained in:
Wing Lian
2024-06-20 14:39:55 -04:00
parent 559562d790
commit 469e15607d

View File

@@ -10,6 +10,7 @@ from axolotl.monkeypatch.mixtral import patch_mixtral_moe_forward_zero3
from axolotl.monkeypatch.utils import get_unpad_data
SUPPORTED_MULTIPACK_MODEL_TYPES = [
"llama",
"mixtral",
"qwen2",
"qwen2_moe",
@@ -29,6 +30,10 @@ def patch_for_multipack(model_type, model_name=None):
)
if is_deepspeed_zero3_enabled():
patch_mixtral_moe_forward_zero3()
elif model_type == "llama":
transformers.models.llama.modeling_llama._get_unpad_data = ( # pylint: disable=protected-access
get_unpad_data
)
elif model_type == "qwen2":
transformers.models.qwen2.modeling_qwen2._get_unpad_data = ( # pylint: disable=protected-access
get_unpad_data