Feat: Rework multimodal support (mllama, llava, pixtral, qwen2, qwen25, gemma3, mistral3) (#2435)

This commit is contained in:
NanoCode012
2025-03-23 22:08:51 +07:00
committed by GitHub
parent 9f00465a5c
commit a9b0733f2c
19 changed files with 971 additions and 184 deletions

View File

@@ -586,6 +586,14 @@ resume_from_checkpoint:
# Be careful with this being turned on between different models.
auto_resume_from_checkpoints: false
## Multimodal section
# int | tuple[int, int] | None . Size to resize images to, width x height.
# Will read from model/processor config if not set.
image_size:
# str. Algorithm to use for image resizing. "bilinear", "bicubic", "lanczos". Default is "bilinear".
image_resize_algorithm: 'bilinear'
## End of multimodal section
# Don't mess with this, it's here for accelerate and torchrun
local_rank: