diff --git a/src/axolotl/utils/collators/mm_chat.py b/src/axolotl/utils/collators/mm_chat.py index 772fe818a..7ec9c77f9 100644 --- a/src/axolotl/utils/collators/mm_chat.py +++ b/src/axolotl/utils/collators/mm_chat.py @@ -213,9 +213,12 @@ class MultiModalChatDataCollator(DataCollatorMixin): for example in examples ] - if chat_template_type == "llava": - max_images = 1 - images = __class__.process_images(examples, max_images=max_images) + if chat_template_type != "llava": + images = __class__.process_images(examples, max_images=max_images) + else: + # LLava1.5 does not support multiple images + images = [example["images"] for example in examples] + images = [image[0] for image in images] # Tokenize the texts and process the images batch = processor(text=texts, images=images, return_tensors="pt", padding=True)