Add CompletionPrompt type
This commit is contained in:
@@ -125,6 +125,25 @@ class NomicGPT4AllPromptTokenizingStrategy(InstructionPromptTokenizingStrategy):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CompletionPromptTokenizingStrategy(InstructionPromptTokenizingStrategy):
|
||||||
|
def parse_instruction_fields(self, prompt) -> (str):
|
||||||
|
return (
|
||||||
|
prompt["text"]
|
||||||
|
)
|
||||||
|
|
||||||
|
def tokenize_prompt(self, prompt):
|
||||||
|
text = self.parse_instruction_fields(prompt)
|
||||||
|
full_prompt = self._build_full_prompt(text)
|
||||||
|
tokenized_full_prompt = self._tokenize(full_prompt)
|
||||||
|
|
||||||
|
return tokenized_full_prompt
|
||||||
|
|
||||||
|
def _build_full_prompt(self, text):
|
||||||
|
return self.prompter.build_prompt(
|
||||||
|
text
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ReflectionPromptTokenizingStrategy(PromptTokenizingStrategy):
|
class ReflectionPromptTokenizingStrategy(PromptTokenizingStrategy):
|
||||||
def parse_instruction_fields(self, prompt) -> (str, str, str, str, str):
|
def parse_instruction_fields(self, prompt) -> (str, str, str, str, str):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|||||||
@@ -35,6 +35,17 @@ class JeopardyPrompter(AlpacaPrompter):
|
|||||||
prompt_input = "Below is a Jeopardy clue paired with input providing the category of the clue. Write a concise response that best answers tbe clue given the category.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
|
prompt_input = "Below is a Jeopardy clue paired with input providing the category of the clue. Write a concise response that best answers tbe clue given the category.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
|
||||||
|
|
||||||
|
|
||||||
|
class CompletionPrompter(AlpacaPrompter):
|
||||||
|
def build_prompt(
|
||||||
|
self,
|
||||||
|
text: str
|
||||||
|
) -> str:
|
||||||
|
return text
|
||||||
|
|
||||||
|
def get_response(self, output: str) -> str:
|
||||||
|
return output.strip()
|
||||||
|
|
||||||
|
|
||||||
class GPTeacherPrompter(AlpacaPrompter):
|
class GPTeacherPrompter(AlpacaPrompter):
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|||||||
@@ -11,13 +11,17 @@ from axolotl.prompt_tokenizers import (
|
|||||||
GPTeacherPromptTokenizingStrategy,
|
GPTeacherPromptTokenizingStrategy,
|
||||||
OpenAssistantPromptTokenizingStrategy,
|
OpenAssistantPromptTokenizingStrategy,
|
||||||
AlpacaReflectionPTStrategy,
|
AlpacaReflectionPTStrategy,
|
||||||
ShareGPTPromptTokenizingStrategy, JeopardyPromptTokenizingStrategy,
|
ShareGPTPromptTokenizingStrategy,
|
||||||
|
JeopardyPromptTokenizingStrategy,
|
||||||
|
CompletionPromptTokenizingStrategy,
|
||||||
)
|
)
|
||||||
from axolotl.prompters import (
|
from axolotl.prompters import (
|
||||||
AlpacaPrompter,
|
AlpacaPrompter,
|
||||||
GPTeacherPrompter,
|
GPTeacherPrompter,
|
||||||
ReflectAlpacaPrompter,
|
ReflectAlpacaPrompter,
|
||||||
ShareGPTPrompter, JeopardyPrompter,
|
ShareGPTPrompter,
|
||||||
|
JeopardyPrompter,
|
||||||
|
CompletionPrompter,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -118,6 +122,15 @@ def load_prepare_datasets(tokenizer, cfg, default_dataset_prepared_path):
|
|||||||
)
|
)
|
||||||
ds_wrapper = TokenizedPromptDataset(ds_strategy, ds["train"])
|
ds_wrapper = TokenizedPromptDataset(ds_strategy, ds["train"])
|
||||||
datasets.append(ds_wrapper)
|
datasets.append(ds_wrapper)
|
||||||
|
elif d.type == "completion":
|
||||||
|
ds_strategy = CompletionPromptTokenizingStrategy(
|
||||||
|
CompletionPrompter(),
|
||||||
|
tokenizer,
|
||||||
|
cfg.train_on_inputs,
|
||||||
|
cfg.sequence_len,
|
||||||
|
)
|
||||||
|
ds_wrapper = TokenizedPromptDataset(ds_strategy, ds["train"])
|
||||||
|
datasets.append(ds_wrapper)
|
||||||
else:
|
else:
|
||||||
logging.error(f"unhandled prompt tokenization strategy: {d.type}")
|
logging.error(f"unhandled prompt tokenization strategy: {d.type}")
|
||||||
logging.info("tokenizing, merging, and shuffling master dataset")
|
logging.info("tokenizing, merging, and shuffling master dataset")
|
||||||
|
|||||||
Reference in New Issue
Block a user