feat: add custom processing strategy for phi35 vl

This commit is contained in:
NanoCode012
2025-06-17 16:23:42 +07:00
parent e1528fb381
commit 98e912e416

View File

@@ -264,6 +264,23 @@ class Gemma3ProcessingStrategy(ProcessingStrategy):
return labels return labels
class Phi35VLProcessingStrategy(ProcessingStrategy):
"""Processing Strategy class for Phi-3.5-vision-instruct"""
def __init__(
self,
processor: ProcessorMixin,
chat_template: Optional[str] = None,
image_size: int | tuple[int, int] | None = None,
image_resize_algorithm: Resampling | None = None,
):
super().__init__(processor, chat_template, image_size, image_resize_algorithm)
self.image_token = "<|image|>" # nosec
self.image_token_id = processor.tokenizer.convert_tokens_to_ids(
self.image_token
)
def get_processing_strategy( def get_processing_strategy(
processor: ProcessorMixin, processor: ProcessorMixin,
chat_template, chat_template,
@@ -279,13 +296,16 @@ def get_processing_strategy(
return Gemma3ProcessingStrategy( return Gemma3ProcessingStrategy(
processor, chat_template, image_size, image_resize_algorithm processor, chat_template, image_size, image_resize_algorithm
) )
if chat_template_type == "phi_35_vl":
return Phi35VLProcessingStrategy(
processor, chat_template, image_size, image_resize_algorithm
)
if chat_template_type in [ if chat_template_type in [
"llama3_2_vision", "llama3_2_vision",
"llama4", "llama4",
"llava", "llava",
"mistral_v7_tekken", "mistral_v7_tekken",
"pixtral", "pixtral",
"phi_35_vl",
]: ]:
return ProcessingStrategy( return ProcessingStrategy(
processor, chat_template, image_size, image_resize_algorithm processor, chat_template, image_size, image_resize_algorithm