feat: add custom processing strategy for phi35 vl

This commit is contained in:
NanoCode012
2025-06-17 16:23:42 +07:00
parent e1528fb381
commit 98e912e416

View File

@@ -264,6 +264,23 @@ class Gemma3ProcessingStrategy(ProcessingStrategy):
return labels
class Phi35VLProcessingStrategy(ProcessingStrategy):
"""Processing Strategy class for Phi-3.5-vision-instruct"""
def __init__(
self,
processor: ProcessorMixin,
chat_template: Optional[str] = None,
image_size: int | tuple[int, int] | None = None,
image_resize_algorithm: Resampling | None = None,
):
super().__init__(processor, chat_template, image_size, image_resize_algorithm)
self.image_token = "<|image|>" # nosec
self.image_token_id = processor.tokenizer.convert_tokens_to_ids(
self.image_token
)
def get_processing_strategy(
processor: ProcessorMixin,
chat_template,
@@ -279,13 +296,16 @@ def get_processing_strategy(
return Gemma3ProcessingStrategy(
processor, chat_template, image_size, image_resize_algorithm
)
if chat_template_type == "phi_35_vl":
return Phi35VLProcessingStrategy(
processor, chat_template, image_size, image_resize_algorithm
)
if chat_template_type in [
"llama3_2_vision",
"llama4",
"llava",
"mistral_v7_tekken",
"pixtral",
"phi_35_vl",
]:
return ProcessingStrategy(
processor, chat_template, image_size, image_resize_algorithm