add glm support + patch (#3329) [skip ci]
* add glm support + patch * lint * lint * Update examples/glm4/glm-4-6v-flash-qlora.yaml Co-authored-by: NanoCode012 <kevinvong@rocketmail.com> * Update examples/glm4/glm-4-6v-flash-qlora.yaml Co-authored-by: NanoCode012 <kevinvong@rocketmail.com> * Update src/axolotl/processing_strategies.py Co-authored-by: NanoCode012 <kevinvong@rocketmail.com> * patch removed * lint * lint2 * docs + rename * rmv moe * docs * removed processor * sdpa T_T" * ddp_find_unused_parameters: true * muti gpu yaml tested both * muti gpu yaml tested both * Update examples/glm46v/README.md Co-authored-by: NanoCode012 <kevinvong@rocketmail.com> * Update examples/glm46v/README.md Co-authored-by: NanoCode012 <kevinvong@rocketmail.com> * Update examples/glm46v/README.md Co-authored-by: NanoCode012 <kevinvong@rocketmail.com> * rmv text only section + v5 comments * rename --------- Co-authored-by: Ved <ved.work2024@gmail.com> Co-authored-by: NanoCode012 <kevinvong@rocketmail.com>
This commit is contained in:
@@ -485,6 +485,58 @@ class InternVLProcessingStrategy(ProcessingStrategy):
|
||||
return labels
|
||||
|
||||
|
||||
class Glm4vProcessingStrategy(ProcessingStrategy):
|
||||
"""Processing Strategy class for GLM4V and GLM4V-MoE vision models."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
processor: ProcessorMixin,
|
||||
chat_template: Optional[str] = None,
|
||||
image_size: int | tuple[int, int] | None = None,
|
||||
image_resize_algorithm: Resampling | None = None,
|
||||
):
|
||||
super().__init__(processor, chat_template, image_size, image_resize_algorithm)
|
||||
|
||||
self.tokenizer = getattr(processor, "tokenizer", processor)
|
||||
|
||||
self.image_token = "<|image|>" # nosec
|
||||
self.begin_image_token = "<|begin_of_image|>" # nosec
|
||||
self.end_image_token = "<|end_of_image|>" # nosec
|
||||
self.video_token = "<|video|>" # nosec
|
||||
self.begin_video_token = "<|begin_of_video|>" # nosec
|
||||
self.end_video_token = "<|end_of_video|>" # nosec
|
||||
|
||||
self.image_token_id = self.tokenizer.convert_tokens_to_ids(self.image_token)
|
||||
self.begin_image_token_id = self.tokenizer.convert_tokens_to_ids(
|
||||
self.begin_image_token
|
||||
)
|
||||
self.end_image_token_id = self.tokenizer.convert_tokens_to_ids(
|
||||
self.end_image_token
|
||||
)
|
||||
self.video_token_id = self.tokenizer.convert_tokens_to_ids(self.video_token)
|
||||
self.begin_video_token_id = self.tokenizer.convert_tokens_to_ids(
|
||||
self.begin_video_token
|
||||
)
|
||||
self.end_video_token_id = self.tokenizer.convert_tokens_to_ids(
|
||||
self.end_video_token
|
||||
)
|
||||
|
||||
def process_labels(self, input_ids):
|
||||
labels = input_ids.clone()
|
||||
|
||||
labels[labels == self.tokenizer.pad_token_id] = -100
|
||||
|
||||
labels[labels == self.image_token_id] = -100
|
||||
labels[labels == self.begin_image_token_id] = -100
|
||||
labels[labels == self.end_image_token_id] = -100
|
||||
|
||||
labels[labels == self.video_token_id] = -100
|
||||
labels[labels == self.begin_video_token_id] = -100
|
||||
labels[labels == self.end_video_token_id] = -100
|
||||
|
||||
return labels
|
||||
|
||||
|
||||
def get_processing_strategy(
|
||||
processor: ProcessorMixin,
|
||||
chat_template,
|
||||
@@ -501,10 +553,10 @@ def get_processing_strategy(
|
||||
"image_resize_algorithm": image_resize_algorithm,
|
||||
}
|
||||
|
||||
if chat_template_type in [None, "tokenizer_default"] and hasattr(
|
||||
processor.tokenizer, "chat_template"
|
||||
):
|
||||
processing_kwargs["chat_template"] = processor.tokenizer.chat_template
|
||||
if chat_template_type in [None, "tokenizer_default"]:
|
||||
tokenizer = getattr(processor, "tokenizer", processor)
|
||||
if hasattr(tokenizer, "chat_template"):
|
||||
processing_kwargs["chat_template"] = tokenizer.chat_template
|
||||
|
||||
if chat_template_type == "qwen2_vl":
|
||||
return Qwen2VLProcessingStrategy(
|
||||
@@ -533,6 +585,15 @@ def get_processing_strategy(
|
||||
return Mistral3ProcessingStrategy(
|
||||
**processing_kwargs,
|
||||
)
|
||||
try:
|
||||
from transformers.models.glm46v.processing_glm46v import Glm46VProcessor
|
||||
|
||||
if isinstance(processor, Glm46VProcessor):
|
||||
return Glm4vProcessingStrategy(
|
||||
**processing_kwargs,
|
||||
)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
if isinstance(processor, InternVLProcessor):
|
||||
return InternVLProcessingStrategy(
|
||||
|
||||
Reference in New Issue
Block a user