refactor conversation plucking in sharegpt
This commit is contained in:
@@ -268,6 +268,9 @@ class AlpacaReflectionPTStrategy(ReflectionPromptTokenizingStrategy):
|
|||||||
|
|
||||||
|
|
||||||
class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
|
class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
|
||||||
|
def get_conversation_thread(self, prompt):
|
||||||
|
return prompt["conversations"]
|
||||||
|
|
||||||
def tokenize_prompt(self, prompt):
|
def tokenize_prompt(self, prompt):
|
||||||
result = {
|
result = {
|
||||||
"input_ids": [],
|
"input_ids": [],
|
||||||
@@ -279,7 +282,7 @@ class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
|
|||||||
assistant_token = self._get_assistant_token()
|
assistant_token = self._get_assistant_token()
|
||||||
try:
|
try:
|
||||||
for i, part in enumerate(
|
for i, part in enumerate(
|
||||||
self.prompter.build_prompt(prompt["conversations"])
|
self.prompter.build_prompt(self.get_conversation_thread(prompt))
|
||||||
):
|
):
|
||||||
if isinstance(part, tuple):
|
if isinstance(part, tuple):
|
||||||
if part[0] == "USER:":
|
if part[0] == "USER:":
|
||||||
|
|||||||
Reference in New Issue
Block a user