Merge pull request #255 from OpenAccess-AI-Collective/open-orca-prompts

open orca support
This commit is contained in:
Wing Lian
2023-07-01 01:11:23 -04:00
committed by GitHub
2 changed files with 27 additions and 0 deletions

View File

@@ -75,6 +75,20 @@ class SystemDataPrompter(AlpacaPrompter):
yield res
class OpenOrcaPromptTokenizingStrategy(InstructionWSystemPromptTokenizingStrategy):
"""
Tokenizing strategy for OpenOrca datasets
"""
def parse_instruction_fields(self, prompt) -> Tuple[str, str, str, str]:
return (
prompt["question"],
"",
prompt["response"],
prompt["system_prompt"],
)
def load(tokenizer, cfg):
return load_chat(tokenizer, cfg)
@@ -95,3 +109,12 @@ def load_chat(tokenizer, cfg):
cfg.train_on_inputs,
cfg.sequence_len,
)
def load_open_orca(tokenizer, cfg):
return OpenOrcaPromptTokenizingStrategy(
SystemDataPrompter(PromptStyle.INSTRUCT.value),
tokenizer,
cfg.train_on_inputs,
cfg.sequence_len,
)