reward model doesn't work well with batched
This commit is contained in:
@@ -21,6 +21,10 @@ class BTChatTemplateStrategy(ChatTemplateStrategy):
|
|||||||
Bradley-Terry reward model pairwise chat template prompt strategy.
|
Bradley-Terry reward model pairwise chat template prompt strategy.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def supports_batched(self) -> bool:
|
||||||
|
return False
|
||||||
|
|
||||||
def _tokenize_single_prompt(self, prompt):
|
def _tokenize_single_prompt(self, prompt):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|||||||
@@ -239,7 +239,7 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
|
|||||||
Public method that can handle either a single prompt or a batch of prompts.
|
Public method that can handle either a single prompt or a batch of prompts.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not self.is_prompt_batched(prompt):
|
if not self.is_prompt_batched(prompt) or not self.supports_batched:
|
||||||
return self._tokenize_single_prompt(prompt)
|
return self._tokenize_single_prompt(prompt)
|
||||||
|
|
||||||
res = defaultdict(lambda: [])
|
res = defaultdict(lambda: [])
|
||||||
|
|||||||
Reference in New Issue
Block a user