feat: add support for qwen25 vl for multimodal

revert seq len to 8192
settings
2025-02-18 12:42:29 +07:00 · 2024-12-08 22:30:20 -05:00 · 2024-12-08 22:22:18 -05:00 · 2024-12-06 16:06:57 -05:00 · 2024-12-06 15:41:09 -05:00 · 2024-12-06 15:27:18 -05:00
3 changed files with 8 additions and 5 deletions
--- a/examples/qwen2-vl/lora-7b.yaml
+++ b/examples/qwen2-vl/lora-7b.yaml
@@ -26,7 +26,7 @@ pad_to_sequence_len: false
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
-lora_target_modules: 'language_model.model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'
+lora_target_modules: 'model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'

 wandb_project:
 wandb_entity:
--- a/src/axolotl/utils/collators/mm_chat.py
+++ b/src/axolotl/utils/collators/mm_chat.py
@@ -225,9 +225,12 @@ class MultiModalChatDataCollator(DataCollatorMixin):
        labels = batch["input_ids"].clone()
        labels[labels == processor.tokenizer.pad_token_id] = -100  #
        # Ignore the image token index in the loss computation (model specific)
-        image_token_id = processor.tokenizer.convert_tokens_to_ids(
-            processor.image_token
-        )
+        if chat_template_type == "qwen2_vl":
+            image_token_id = processor.tokenizer.convert_tokens_to_ids("<|image_pad|>")
+        else:
+            image_token_id = processor.tokenizer.convert_tokens_to_ids(
+                processor.image_token
+            )
        labels[labels == image_token_id] = -100
        batch["labels"] = labels

--- a/src/axolotl/utils/config/init.py
+++ b/src/axolotl/utils/config/init.py
@@ -132,7 +132,7 @@ def normalize_config(cfg):

    cfg.is_multimodal = (
        hasattr(model_config, "model_type")
-        and model_config.model_type in ["llava", "mllama", "qwen2_vl"]
+        and model_config.model_type in ["llava", "mllama", "qwen2_vl", "qwen2_5_vl"]
        or any(
            multimodal_name in cfg.base_model.lower()
            for multimodal_name in [
Author	SHA1	Message	Date
NanoCode012	fbf3ca86c9	feat: add support for qwen25 vl for multimodal	2025-02-18 12:42:29 +07:00
Sunny	2de866e92f	revert seq len to 8192	2024-12-08 22:30:20 -05:00
Sunny	295e07dcca	settings	2024-12-08 22:22:18 -05:00
bursteratom	3c07b6d6b1	lint	2024-12-06 16:06:57 -05:00
bursteratom	89dae7dc6d	lora_target_module	2024-12-06 15:41:09 -05:00
bursteratom	1b54af8e54	lora config	2024-12-06 15:27:18 -05:00
bursteratom	ca7b56cba3	lora config	2024-12-06 15:26:06 -05:00
bursteratom	ea8269d2eb	lora config	2024-12-06 15:23:24 -05:00
bursteratom	13ca7ed087	comment out lora target	2024-12-06 15:21:08 -05:00
bursteratom	0dfd8541ee	lora config qwen2vl	2024-12-06 14:56:51 -05:00