add two checks to handle legacy format interleaved multimodal ds (#2721) [skip ci]

* add two checks to handle legacy format interleaved ds

* fix: add warning about multiple image using legacy format

---------

Co-authored-by: NanoCode012 <nano@axolotl.ai>
This commit is contained in:
artem
2025-05-28 01:49:43 -07:00
committed by GitHub
parent 4a80d309e8
commit a703560a10

View File

@@ -1,5 +1,6 @@
"""Module containing ProcessingStrategy classes and its derivative for different MultiModal Model types"""
import logging
from copy import deepcopy
from typing import Optional
@@ -9,6 +10,8 @@ from torch import Tensor
from transformers import ProcessorMixin
from transformers.image_utils import load_image
LOG = logging.getLogger(__name__)
class ProcessingStrategy:
"""Base Processing Strategy class"""
@@ -112,7 +115,9 @@ class ProcessingStrategy:
)
processed_example = None
if "messages" in example: # OpenAI format
if (
"messages" in example and example["messages"] is not None
): # OpenAI format
processed_example = example
else: # Legacy format
processed_example = convert_legacy_format(example)
@@ -132,10 +137,17 @@ class ProcessingStrategy:
break
# if the image key exists, add the image to the first message
if image_key is not None:
if image_key is not None and processed_example[image_key] is not None:
# TODO: check if it's normal to be single image only for common datasets
# From observation, it's usually a list of single image but some datasets may have several columns for images
# Temporary solution: take the first image and suggest people convert their datasets to use multi-content Messages
if len(processed_example[image_key]) > 0:
LOG.warning(
f"Found {len(processed_example[image_key])} images in a sample. Using the first one."
"If you are using a dataset with multiple images per sample, please convert it to use multi-content Messages."
"See https://docs.axolotl.ai/docs/multimodal.html#dataset-format"
)
image_value = processed_example[image_key][0]
# Handle image loading (Image, url, path, base64)