reward model doesn't work well with batched

2025-01-07 18:19:42 -05:00
parent 01d31587fe
commit 1ffca753ca
2 changed files with 5 additions and 1 deletions
--- a/src/axolotl/prompt_strategies/bradley_terry/chat_template.py
+++ b/src/axolotl/prompt_strategies/bradley_terry/chat_template.py
@@ -21,6 +21,10 @@ class BTChatTemplateStrategy(ChatTemplateStrategy):
    Bradley-Terry reward model pairwise chat template prompt strategy.
    """

+    @property
+    def supports_batched(self) -> bool:
+        return False
+
    def _tokenize_single_prompt(self, prompt):
        """

--- a/src/axolotl/prompt_strategies/chat_template.py
+++ b/src/axolotl/prompt_strategies/chat_template.py
@@ -239,7 +239,7 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
        Public method that can handle either a single prompt or a batch of prompts.
        """

-        if not self.is_prompt_batched(prompt):
+        if not self.is_prompt_batched(prompt) or not self.supports_batched:
            return self._tokenize_single_prompt(prompt)

        res = defaultdict(lambda: [])