Merge pull request #155 from OpenAccess-AI-Collective/misc-fixes
new prompters, misc fixes for output dir missing using fsdp, and changing max seq len
This commit is contained in:
20
README.md
20
README.md
@@ -165,10 +165,30 @@ Have dataset(s) in one of the following format (JSONL recommended):
|
||||
```json
|
||||
{"article": "...", "summary": "..."}
|
||||
```
|
||||
- `alpaca_chat`: basic instruct for alpaca chat
|
||||
```json
|
||||
{"instruction": "...", "input": "...", "response": "..."}
|
||||
```
|
||||
- `alpaca_chat.load_qa`: question and answer for alpaca chat
|
||||
```json
|
||||
{"question": "...", "answer": "..."}
|
||||
```
|
||||
- `alpaca_chat.load_concise`: question and answer for alpaca chat, for concise answers
|
||||
```json
|
||||
{"instruction": "...", "input": "...", "response": "..."}
|
||||
```
|
||||
- `alpaca_chat.load_camel_ai`: question and answer for alpaca chat, for load_camel_ai
|
||||
```json
|
||||
{"message_1": "...", "message_2": "..."}
|
||||
```
|
||||
- `context_qa`: in context question answering from an article
|
||||
```json
|
||||
{"article": "...", "question": "...", "answer": "..."}
|
||||
```
|
||||
- `context_qa.load_404`: in context question answering from an article, with default response for no answer from context
|
||||
```json
|
||||
{"article": "...", "unanswerable_question": "..."}
|
||||
```
|
||||
- `creative_acr.load_answer`: instruction and revision
|
||||
```json
|
||||
{"instruction": "...", "revision": "..."}
|
||||
|
||||
@@ -279,6 +279,9 @@ def train(
|
||||
logging.info(
|
||||
f"Using Auto-resume functionality to start with checkpoint at {resume_from_checkpoint}"
|
||||
)
|
||||
|
||||
if not Path(cfg.output_dir).is_dir():
|
||||
os.makedirs(cfg.output_dir, exist_ok=True)
|
||||
trainer.train(resume_from_checkpoint=resume_from_checkpoint)
|
||||
|
||||
logging.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}")
|
||||
|
||||
@@ -18,6 +18,15 @@ def load(tokenizer, cfg):
|
||||
)
|
||||
|
||||
|
||||
class AlpacaConcisePrompter(AlpacaPrompter):
|
||||
"""
|
||||
Alpaca Prompter extending the system prompt to ask for concise answers
|
||||
"""
|
||||
|
||||
system_prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that concisely and appropriately completes the request.\n\n"
|
||||
system_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately and concisely completes the request.\n\n"
|
||||
|
||||
|
||||
class AlpacaQAPromptTokenizingStrategy(InstructionPromptTokenizingStrategy):
|
||||
"""
|
||||
Tokenizing strategy for AlpacaQA
|
||||
@@ -31,6 +40,28 @@ class AlpacaQAPromptTokenizingStrategy(InstructionPromptTokenizingStrategy):
|
||||
)
|
||||
|
||||
|
||||
class CamelAIPromptTokenizingStrategy(InstructionPromptTokenizingStrategy):
|
||||
"""
|
||||
Tokenizing strategy for CamelAI datasets
|
||||
"""
|
||||
|
||||
def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]:
|
||||
return (
|
||||
prompt["message_1"],
|
||||
"",
|
||||
prompt["message_1"],
|
||||
)
|
||||
|
||||
|
||||
def load_concise(tokenizer, cfg):
|
||||
return AlpacaPromptTokenizingStrategy(
|
||||
AlpacaConcisePrompter(PromptStyle.CHAT.value),
|
||||
tokenizer,
|
||||
cfg.train_on_inputs,
|
||||
cfg.sequence_len,
|
||||
)
|
||||
|
||||
|
||||
def load_qa(tokenizer, cfg):
|
||||
return AlpacaQAPromptTokenizingStrategy(
|
||||
AlpacaPrompter(PromptStyle.CHAT.value),
|
||||
@@ -38,3 +69,12 @@ def load_qa(tokenizer, cfg):
|
||||
cfg.train_on_inputs,
|
||||
cfg.sequence_len,
|
||||
)
|
||||
|
||||
|
||||
def load_camel_ai(tokenizer, cfg):
|
||||
return CamelAIPromptTokenizingStrategy(
|
||||
AlpacaPrompter(PromptStyle.CHAT.value),
|
||||
tokenizer,
|
||||
cfg.train_on_inputs,
|
||||
cfg.sequence_len,
|
||||
)
|
||||
|
||||
67
src/axolotl/prompt_strategies/context_qa.py
Normal file
67
src/axolotl/prompt_strategies/context_qa.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""Module containing the classes for Context QA Prompt Tokenization Strategies"""
|
||||
from typing import Tuple
|
||||
|
||||
from axolotl.prompt_tokenizers import InstructionPromptTokenizingStrategy
|
||||
from axolotl.prompters import AlpacaPrompter, PromptStyle
|
||||
|
||||
|
||||
# article, unanswerable_question, question, answer
|
||||
def load_404(tokenizer, cfg):
|
||||
return AlpacaMissingInfoContextPromptTokenizingStrategy(
|
||||
AlpacaContextPrompter(PromptStyle.CHAT.value),
|
||||
tokenizer,
|
||||
cfg.train_on_inputs,
|
||||
cfg.sequence_len,
|
||||
)
|
||||
|
||||
|
||||
def load(tokenizer, cfg):
|
||||
return AlpacaContextPromptTokenizingStrategy(
|
||||
AlpacaContextPrompter(PromptStyle.CHAT.value),
|
||||
tokenizer,
|
||||
cfg.train_on_inputs,
|
||||
cfg.sequence_len,
|
||||
)
|
||||
|
||||
|
||||
class AlpacaContextPrompter(AlpacaPrompter):
|
||||
"""
|
||||
Customized system prompted for concise QA
|
||||
"""
|
||||
|
||||
system_prompt = (
|
||||
"Use the following contextual information to concisely answer the question.\n"
|
||||
)
|
||||
system_no_input_prompt = (
|
||||
"Use the following contextual information to concisely answer the question.\n"
|
||||
)
|
||||
|
||||
|
||||
class AlpacaContextPromptTokenizingStrategy(InstructionPromptTokenizingStrategy):
|
||||
"""
|
||||
Tokenization Strategy to combine in-context article with a question and answer
|
||||
"""
|
||||
|
||||
def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]:
|
||||
return (
|
||||
prompt["article"] + "\n===\n" + prompt["question"],
|
||||
"",
|
||||
prompt["answer"],
|
||||
)
|
||||
|
||||
|
||||
class AlpacaMissingInfoContextPromptTokenizingStrategy(
|
||||
InstructionPromptTokenizingStrategy
|
||||
):
|
||||
"""
|
||||
Tokenization Strategy to combine in-context article with a question that can't be answered
|
||||
from the context and a default response to that effect
|
||||
"""
|
||||
|
||||
def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]:
|
||||
return (
|
||||
prompt["article"] + "\n===\n" + prompt["unanswerable_question"],
|
||||
"",
|
||||
"The context provided does not contain any information about your inquiry. "
|
||||
"Therefore, I'm unable to answer your question based on the given context.",
|
||||
)
|
||||
@@ -234,6 +234,10 @@ def load_model(
|
||||
base_model,
|
||||
trust_remote_code=cfg.trust_remote_code or False,
|
||||
)
|
||||
# Shouldn't be a problem most of the time. will obviously error if the model doesn't support this
|
||||
# when training starts
|
||||
if config.max_seq_len and cfg.sequence_len > config.max_seq_len:
|
||||
config.max_seq_len = cfg.sequence_len
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
base_model,
|
||||
config=config,
|
||||
|
||||
Reference in New Issue
Block a user