reward model doesn't work well with batched

This commit is contained in:
Wing Lian
2025-01-07 18:19:42 -05:00
parent 27bb21c459
commit f0b6581f8c
2 changed files with 5 additions and 1 deletions

View File

@@ -21,6 +21,10 @@ class BTChatTemplateStrategy(ChatTemplateStrategy):
Bradley-Terry reward model pairwise chat template prompt strategy.
"""
@property
def supports_batched(self) -> bool:
return False
def _tokenize_single_prompt(self, prompt):
"""

View File

@@ -239,7 +239,7 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
Public method that can handle either a single prompt or a batch of prompts.
"""
if not self.is_prompt_batched(prompt):
if not self.is_prompt_batched(prompt) or not self.supports_batched:
return self._tokenize_single_prompt(prompt)
res = defaultdict(lambda: [])