diff --git a/src/axolotl/processing_strategies.py b/src/axolotl/processing_strategies.py index 4dee4f8a2..1cb6ed064 100644 --- a/src/axolotl/processing_strategies.py +++ b/src/axolotl/processing_strategies.py @@ -1,5 +1,6 @@ """Module containing ProcessingStrategy classes and its derivative for different MultiModal Model types""" +import logging from copy import deepcopy from typing import Optional @@ -9,6 +10,8 @@ from torch import Tensor from transformers import ProcessorMixin from transformers.image_utils import load_image +LOG = logging.getLogger(__name__) + class ProcessingStrategy: """Base Processing Strategy class""" @@ -112,7 +115,9 @@ class ProcessingStrategy: ) processed_example = None - if "messages" in example: # OpenAI format + if ( + "messages" in example and example["messages"] is not None + ): # OpenAI format processed_example = example else: # Legacy format processed_example = convert_legacy_format(example) @@ -132,10 +137,17 @@ class ProcessingStrategy: break # if the image key exists, add the image to the first message - if image_key is not None: + if image_key is not None and processed_example[image_key] is not None: # TODO: check if it's normal to be single image only for common datasets # From observation, it's usually a list of single image but some datasets may have several columns for images # Temporary solution: take the first image and suggest people convert their datasets to use multi-content Messages + if len(processed_example[image_key]) > 0: + LOG.warning( + f"Found {len(processed_example[image_key])} images in a sample. Using the first one." + "If you are using a dataset with multiple images per sample, please convert it to use multi-content Messages." + "See https://docs.axolotl.ai/docs/multimodal.html#dataset-format" + ) + image_value = processed_example[image_key][0] # Handle image loading (Image, url, path, base64)