ensure that batch vs single is done properly

2025-01-07 14:03:49 -05:00
parent 01896b1bde
commit 684b38291f
2 changed files with 5 additions and 2 deletions
--- a/src/axolotl/prompt_strategies/bradley_terry/chat_template.py
+++ b/src/axolotl/prompt_strategies/bradley_terry/chat_template.py
@@ -21,7 +21,7 @@ class BTChatTemplateStrategy(ChatTemplateStrategy):
    Bradley-Terry reward model pairwise chat template prompt strategy.
    """

-    def tokenize_prompt(self, prompt):
+    def _tokenize_single_prompt(self, prompt):
        """

        :param prompt: the actual row of data from the underlying dataset
--- a/src/axolotl/prompt_strategies/chat_template.py
+++ b/src/axolotl/prompt_strategies/chat_template.py
@@ -226,11 +226,14 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
        # Let calling code know we can handle lists of examples
        return True

-    def tokenize_prompt(self, prompt: dict[str, Any]) -> Dict[str, List[List[int]]]:
+    def tokenize_prompt(self, prompt: dict[str, Any]):
        """
        Public method that can handle either a single prompt or a batch of prompts.
        """

+        if not all(isinstance(v, list) for v in prompt.values()):
+            return self._tokenize_single_prompt(prompt)
+
        res = defaultdict(lambda: [])
        feature_names = list(prompt.keys())