From df359c8a6e14ecdd2e1eb0049bd8143c32421952 Mon Sep 17 00:00:00 2001 From: Afrizal Hasbi Azizy Date: Sat, 12 Oct 2024 00:34:13 +0700 Subject: [PATCH] Handle image input as string paths for MMLMs (#1958) * Update mm_chat.py Handle string image (paths) * chore: lint --------- Co-authored-by: Wing Lian --- src/axolotl/utils/collators/mm_chat.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/axolotl/utils/collators/mm_chat.py b/src/axolotl/utils/collators/mm_chat.py index f49e97f37..b9b67f875 100644 --- a/src/axolotl/utils/collators/mm_chat.py +++ b/src/axolotl/utils/collators/mm_chat.py @@ -4,6 +4,7 @@ Collators for multi-modal chat messages and packing from dataclasses import dataclass from typing import Any, Dict, List, Optional, Union +from PIL import Image from transformers import PreTrainedTokenizerBase, ProcessorMixin from transformers.data.data_collator import DataCollatorMixin from transformers.utils import PaddingStrategy @@ -52,7 +53,12 @@ class MultiModalChatDataCollator(DataCollatorMixin): ) for example in examples ] - images = [example["images"] for example in examples] + images = [ + Image.open(example["images"]) + if isinstance(example["images"], str) + else example["images"] + for example in examples + ] if max_images > 0: images = [img_batch[:max_images] for img_batch in images]