diff --git a/src/axolotl/processing_strategies.py b/src/axolotl/processing_strategies.py index 175b9dd36..66cf8bcff 100644 --- a/src/axolotl/processing_strategies.py +++ b/src/axolotl/processing_strategies.py @@ -264,6 +264,23 @@ class Gemma3ProcessingStrategy(ProcessingStrategy): return labels +class Phi35VLProcessingStrategy(ProcessingStrategy): + """Processing Strategy class for Phi-3.5-vision-instruct""" + + def __init__( + self, + processor: ProcessorMixin, + chat_template: Optional[str] = None, + image_size: int | tuple[int, int] | None = None, + image_resize_algorithm: Resampling | None = None, + ): + super().__init__(processor, chat_template, image_size, image_resize_algorithm) + self.image_token = "<|image|>" # nosec + self.image_token_id = processor.tokenizer.convert_tokens_to_ids( + self.image_token + ) + + def get_processing_strategy( processor: ProcessorMixin, chat_template, @@ -279,13 +296,16 @@ def get_processing_strategy( return Gemma3ProcessingStrategy( processor, chat_template, image_size, image_resize_algorithm ) + if chat_template_type == "phi_35_vl": + return Phi35VLProcessingStrategy( + processor, chat_template, image_size, image_resize_algorithm + ) if chat_template_type in [ "llama3_2_vision", "llama4", "llava", "mistral_v7_tekken", "pixtral", - "phi_35_vl", ]: return ProcessingStrategy( processor, chat_template, image_size, image_resize_algorithm