Compare commits
10 Commits
75e1d3537f
...
pixtral_in
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fbf3ca86c9 | ||
|
|
2de866e92f | ||
|
|
295e07dcca | ||
|
|
3c07b6d6b1 | ||
|
|
89dae7dc6d | ||
|
|
1b54af8e54 | ||
|
|
ca7b56cba3 | ||
|
|
ea8269d2eb | ||
|
|
13ca7ed087 | ||
|
|
0dfd8541ee |
@@ -26,7 +26,7 @@ pad_to_sequence_len: false
|
||||
lora_r: 32
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.05
|
||||
lora_target_modules: 'language_model.model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'
|
||||
lora_target_modules: 'model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
|
||||
@@ -225,9 +225,12 @@ class MultiModalChatDataCollator(DataCollatorMixin):
|
||||
labels = batch["input_ids"].clone()
|
||||
labels[labels == processor.tokenizer.pad_token_id] = -100 #
|
||||
# Ignore the image token index in the loss computation (model specific)
|
||||
image_token_id = processor.tokenizer.convert_tokens_to_ids(
|
||||
processor.image_token
|
||||
)
|
||||
if chat_template_type == "qwen2_vl":
|
||||
image_token_id = processor.tokenizer.convert_tokens_to_ids("<|image_pad|>")
|
||||
else:
|
||||
image_token_id = processor.tokenizer.convert_tokens_to_ids(
|
||||
processor.image_token
|
||||
)
|
||||
labels[labels == image_token_id] = -100
|
||||
batch["labels"] = labels
|
||||
|
||||
|
||||
@@ -132,7 +132,7 @@ def normalize_config(cfg):
|
||||
|
||||
cfg.is_multimodal = (
|
||||
hasattr(model_config, "model_type")
|
||||
and model_config.model_type in ["llava", "mllama", "qwen2_vl"]
|
||||
and model_config.model_type in ["llava", "mllama", "qwen2_vl", "qwen2_5_vl"]
|
||||
or any(
|
||||
multimodal_name in cfg.base_model.lower()
|
||||
for multimodal_name in [
|
||||
|
||||
Reference in New Issue
Block a user