feat: add llama4 multimodal (#2499)

* feat: add llama4 multimodal

* feat: add torchvision to base docker

* just use latest torchvision

---------

Co-authored-by: Wing Lian <wing@axolotl.ai>
This commit is contained in:
NanoCode012
2025-04-07 21:49:29 +07:00
committed by GitHub
parent 8bbad21bfd
commit e0e5d9b1d6
6 changed files with 15 additions and 1 deletions

View File

@@ -268,6 +268,7 @@ def get_processing_strategy(
)
if chat_template_type in [
"llama3_2_vision",
"llama4",
"llava",
"mistral_v7_tekken",
"pixtral",

File diff suppressed because one or more lines are too long

View File

@@ -36,6 +36,7 @@ from transformers import (
BitsAndBytesConfig,
Gemma3ForConditionalGeneration,
GPTQConfig,
Llama4ForConditionalGeneration,
LlavaForConditionalGeneration,
Mistral3ForConditionalGeneration,
MllamaForConditionalGeneration,
@@ -76,6 +77,7 @@ LOG = logging.getLogger(__name__)
MULTIMODAL_AUTO_MODEL_MAPPING = {
"mllama": MllamaForConditionalGeneration,
"llama4": Llama4ForConditionalGeneration,
"llava": LlavaForConditionalGeneration,
"qwen2_vl": Qwen2VLForConditionalGeneration,
"qwen2_5_vl": Qwen2_5_VLForConditionalGeneration,

View File

@@ -28,6 +28,7 @@ class ChatTemplate(str, Enum):
llama3 = "llama3" # pylint: disable=invalid-name
llama4 = "llama4" # pylint: disable=invalid-name
llama3_2_vision = "llama3_2_vision" # pylint: disable=invalid-name
llama4 = "llama4" # pylint: disable=invalid-name
phi_3 = "phi_3" # pylint: disable=invalid-name
phi_35 = "phi_35" # pylint: disable=invalid-name
deepseek_v2 = "deepseek_v2" # pylint: disable=invalid-name