remove fastchat and sharegpt (#2021)

* remove fastchat and sharegpt * remove imports * remove more fastchat imports * chore: remove unused functions * feat: remove sharegpt and deprecate from docs * chore: remove unused sharegpt checks * fix: remove sharegpt type from tests * feat: add sharegpt deprecation error * feat: update readme --------- Co-authored-by: NanoCode012 <nano@axolotl.ai>
2024-11-08 13:45:49 -05:00
parent 3265b7095e
commit fd3b80716a
22 changed files with 28 additions and 1804 deletions
--- a/src/axolotl/cli/preprocess.py
+++ b/src/axolotl/cli/preprocess.py
@@ -23,10 +23,6 @@ from axolotl.cli import (
 )
 from axolotl.common.cli import PreprocessCliArgs
 from axolotl.common.const import DEFAULT_DATASET_PREPARED_PATH
-from axolotl.prompt_strategies.sharegpt import (
-    register_chatml_template,
-    register_llama3_template,
-)
 from axolotl.utils.trainer import disable_datasets_caching

 LOG = logging.getLogger("axolotl.cli.preprocess")
@@ -44,23 +40,6 @@ def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs):
        return_remaining_strings=True
    )

-    if parsed_cfg.chat_template == "chatml":
-        if parsed_cfg.default_system_message:
-            LOG.info(
-                f"ChatML set. Adding default system message: {parsed_cfg.default_system_message}"
-            )
-            register_chatml_template(parsed_cfg.default_system_message)
-        else:
-            register_chatml_template()
-    elif parsed_cfg.chat_template == "llama3":
-        if parsed_cfg.default_system_message:
-            LOG.info(
-                f"LLaMA-3 set. Adding default system message: {parsed_cfg.default_system_message}"
-            )
-            register_llama3_template(parsed_cfg.default_system_message)
-        else:
-            register_llama3_template()
-
    if not parsed_cfg.dataset_prepared_path:
        msg = (
            Fore.RED
--- a/src/axolotl/cli/train.py
+++ b/src/axolotl/cli/train.py
@@ -19,10 +19,6 @@ from axolotl.cli import (
 )
 from axolotl.common.cli import TrainerCliArgs
 from axolotl.integrations.base import PluginManager
-from axolotl.prompt_strategies.sharegpt import (
-    register_chatml_template,
-    register_llama3_template,
-)
 from axolotl.train import train

 LOG = logging.getLogger("axolotl.cli.train")
@@ -42,21 +38,6 @@ def do_train(cfg, cli_args) -> None:
    print_axolotl_text_art()
    check_accelerate_default_config()
    check_user_token()
-    if cfg.chat_template == "chatml" and cfg.default_system_message:
-        LOG.info(
-            f"ChatML set. Adding default system message: {cfg.default_system_message}"
-        )
-        register_chatml_template(cfg.default_system_message)
-    else:
-        register_chatml_template()
-
-    if cfg.chat_template == "llama3" and cfg.default_system_message:
-        LOG.info(
-            f"LLaMA-3 set. Adding default system message: {cfg.default_system_message}"
-        )
-        register_llama3_template(cfg.default_system_message)
-    else:
-        register_llama3_template()

    if cfg.rl:  # and cfg.rl != "orpo":
        dataset_meta = load_rl_datasets(cfg=cfg, cli_args=cli_args)
--- a/src/axolotl/monkeypatch/fastchat_conversation_turns.py
+++ b/src/axolotl/monkeypatch/fastchat_conversation_turns.py
@@ -1,231 +0,0 @@
-"""
-monkeypatch to add a get_turns method
-"""
-
-import logging
-from typing import Generator, Tuple
-
-from fastchat.conversation import SeparatorStyle
-
-LOG = logging.getLogger("axolotl.monkeypatch.fastchat_conversation_turns")
-
-
-def get_prompt(self) -> str:
-    ret = ""
-    for role, msg in self.get_turns():
-        ret += role + msg
-    return ret
-
-
-def get_turns(  # pylint: disable=too-many-return-statements
-    self,
-) -> Generator[Tuple[str, str], None, None]:
-    """Get the prompt for generation."""
-    system_prompt = self.system_template.format(system_message=self.system_message)
-    if self.sep_style == SeparatorStyle.ADD_COLON_SINGLE:
-        yield "", system_prompt + self.sep
-        for role, message in self.messages:
-            if message:
-                yield role + ": ", message + self.sep
-            else:
-                yield role + ":", ""
-        return
-    if self.sep_style == SeparatorStyle.ADD_COLON_TWO:
-        seps = [self.sep, self.sep2]
-        yield "", system_prompt + seps[0]
-        for i, (role, message) in enumerate(self.messages):
-            if message:
-                yield role + ": ", message + seps[i % 2]
-            else:
-                yield role + ":", ""
-        return
-    if self.sep_style == SeparatorStyle.ADD_COLON_SPACE_SINGLE:
-        yield "", system_prompt + self.sep
-        for role, message in self.messages:
-            if message:
-                yield role + ": ", message + self.sep
-            else:
-                yield role + ": ", ""  # must be end with a space
-        return
-    if self.sep_style == SeparatorStyle.ADD_NEW_LINE_SINGLE:
-        yield "", "" if system_prompt == "" else system_prompt + self.sep
-        for role, message in self.messages:
-            if message:
-                yield role + "\n", message + self.sep
-            else:
-                yield role + "\n", ""
-        return
-    if self.sep_style == SeparatorStyle.NO_COLON_SINGLE:
-        yield "", system_prompt
-        for role, message in self.messages:
-            if message:
-                yield role, message + self.sep
-            else:
-                yield role, ""
-        return
-    if self.sep_style == SeparatorStyle.NO_COLON_TWO:
-        seps = [self.sep, self.sep2]
-        yield "", system_prompt
-        for i, (role, message) in enumerate(self.messages):
-            if message:
-                yield role, message + seps[i % 2]
-            else:
-                yield role, ""
-        return
-    if self.sep_style == SeparatorStyle.RWKV:
-        yield "", system_prompt
-        for i, (role, message) in enumerate(self.messages):
-            if message:
-                yield role + ": ", message.replace("\r\n", "\n").replace(
-                    "\n\n", "\n"
-                ) + "\n\n"
-            else:
-                yield role + ":", ""
-        return
-    if self.sep_style == SeparatorStyle.LLAMA2 and self.name != "mistral":
-        if self.system_message:
-            if self.messages:
-                # For llama, the system message is incorporated into the first human instruction
-                first_role, first_msg = self.messages[0]
-                if first_role == self.roles[0]:
-                    system_prompt += first_msg
-                    self.messages.pop(0)
-            yield "", system_prompt
-        for i, (role, message) in enumerate(self.messages):
-            if message:
-                if (i % 2 == 0 and not self.system_message) or (
-                    i % 2 != 0 and self.system_message
-                ):
-                    role = "<s> " + role
-                yield role + " ", message
-            else:
-                yield role, ""
-        return
-    if self.sep_style == SeparatorStyle.LLAMA2 and self.name == "mistral":
-        contains_sys_msg = False
-        if self.system_message:
-            contains_sys_msg = True
-            if self.messages:
-                # There is no clear guidance on how to handle system messages in Mistral so we just prepend it to the first human instruction separated by a newline
-                first_role, first_msg = self.messages[0]
-                if first_role == self.roles[0]:
-                    system_prompt = self.system_template.format(
-                        system_message=" " + self.system_message
-                    )
-                    system_prompt += first_msg
-                    self.messages.pop(0)
-            yield "", system_prompt
-        for i, (role, message) in enumerate(self.messages):
-            if message and i == 0 and not contains_sys_msg:
-                yield "", system_prompt.strip() + " " + message  # if there is no system message, we need to make sure there is the a `<s> [INST]` at the beginning of the first instruction.
-            elif message:
-                yield role + " ", message
-            else:
-                yield role, ""
-        return
-    if self.sep_style == SeparatorStyle.LLAMA3:
-        if self.system_message:
-            # For llama3, the system message is NOT incorporated into the first human instruction
-            # All messages follow <|start_header_id|>' + role + '<|end_header_id|>\n\n'+ message + '<|eot_id|>
-            yield "", system_prompt
-        for i, (role, message) in enumerate(self.messages):
-            if message:
-                yield f"<|start_header_id|>{role}<|end_header_id|>\n\n", f"{message.strip()}<|eot_id|>"
-            else:
-                yield f"<|start_header_id|>{role}<|end_header_id|>\n\n", ""
-        return
-    if self.sep_style == SeparatorStyle.GEMMA:
-        if self.system_message:
-            raise ValueError("Gemma chat template does not support system messages")
-        for i, (role, message) in enumerate(self.messages):
-            prefix = "<bos>" if i == 0 else ""
-            message_str = message if message else ""
-            yield prefix + "<start_of_turn>" + role + "\n", message_str + "<end_of_turn>\n"
-        return
-    if self.sep_style == SeparatorStyle.CHATGLM:
-        # source: https://huggingface.co/THUDM/chatglm-6b/blob/1d240ba371910e9282298d4592532d7f0f3e9f3e/modeling_chatglm.py#L1302-L1308
-        # source2: https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926
-        round_add_n = 1 if self.name == "chatglm2" else 0
-        if system_prompt:
-            yield "", system_prompt + self.sep
-
-        for i, (role, message) in enumerate(self.messages):
-            if i % 2 == 0:
-                yield "", f"[Round {i//2 + round_add_n}]{self.sep}"
-
-            if message:
-                yield f"{role}：", f"{message}{self.sep}"
-            else:
-                yield f"{role}：", ""
-        return
-    if self.sep_style == SeparatorStyle.CHATML:
-        yield "", "" if system_prompt == "" else system_prompt + self.sep + "\n"
-        for role, message in self.messages:
-            if message:
-                yield role + "\n", message + self.sep + "\n"
-            else:
-                yield role + "\n", ""
-        return
-    if self.sep_style == SeparatorStyle.CHATGLM3:
-        if self.system_message:
-            yield "", system_prompt
-        for role, message in self.messages:
-            if message:
-                yield role + "\n", " " + message
-            else:
-                yield role
-        return
-    if self.sep_style == SeparatorStyle.CHATINTERN:
-        # source: https://huggingface.co/internlm/internlm-chat-7b-8k/blob/bd546fa984b4b0b86958f56bf37f94aa75ab8831/modeling_internlm.py#L771
-        seps = [self.sep, self.sep2]
-        yield "", system_prompt
-        for i, (role, message) in enumerate(self.messages):
-            prefix = "<s>" if i % 2 == 0 else ""
-            if message:
-                yield prefix + role + ":", message + seps[i % 2] + "\n"
-            else:
-                yield role + ":", ""
-        return
-    if self.sep_style == SeparatorStyle.DOLLY:
-        seps = [self.sep, self.sep2]
-        yield "", system_prompt
-        for i, (role, message) in enumerate(self.messages):
-            if message:
-                suffix = "\n\n" if i % 2 == 1 else ""
-                yield role + ":\n", message + seps[i % 2] + suffix
-            else:
-                yield role + ":\n", ""
-        return
-    if self.sep_style == SeparatorStyle.PHOENIX:
-        yield "", system_prompt
-        for role, message in self.messages:
-            if message:
-                yield role + ": ", "<s>" + message + "</s>"
-            else:
-                yield role + ": " + "<s>", ""
-        return
-    if self.sep_style == SeparatorStyle.ROBIN:
-        yield "", system_prompt + self.sep
-        for role, message in self.messages:
-            if message:
-                yield role + ":\n", message + self.sep
-            else:
-                yield role + ":\n", ""
-        return
-    if self.sep_style == SeparatorStyle.FALCON_CHAT:
-        if self.system_message:
-            yield "", system_prompt + self.sep
-        for role, message in self.messages:
-            if message:
-                yield role + ": ", message + self.sep
-            else:
-                yield role + ":", ""
-    else:
-        raise ValueError(f"Invalid style: {self.sep_style}")
-
-
-def add_get_turns_to_conversation():
-    import fastchat.conversation
-
-    fastchat.conversation.Conversation.get_turns = get_turns
-    fastchat.conversation.Conversation.get_prompt = get_prompt
--- a/src/axolotl/prompt_strategies/instruct.py
+++ b/src/axolotl/prompt_strategies/instruct.py
@@ -1,33 +0,0 @@
-"""Module containing the InstructShareGPTPromptTokenizingStrategy class"""
-from typing import Any, Dict, Optional
-
-from axolotl.prompt_tokenizers import ShareGPTPromptTokenizingStrategy
-from axolotl.prompters import ShareGPTPrompterV2
-
-
-def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None):
-    conversation = (
-        ds_cfg["conversation"] if ds_cfg and "conversation" in ds_cfg else None
-    )
-    strategy = InstructShareGPTPromptTokenizingStrategy(
-        # pylint: disable=duplicate-code
-        ShareGPTPrompterV2(
-            conversation=conversation,
-        ),
-        tokenizer,
-        cfg.train_on_inputs,
-        cfg.sequence_len,
-    )
-    return strategy
-
-
-class InstructShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy):
-    """
-    basic sharegpt strategy to grab conversations from the sample row
-    """
-
-    def get_conversation_thread(self, prompt):
-        return [
-            {"from": "human", "value": prompt["instruction"]},
-            {"from": "gpt", "value": prompt["output"]},
-        ]
--- a/src/axolotl/prompt_strategies/llama2_chat.py
+++ b/src/axolotl/prompt_strategies/llama2_chat.py
@@ -29,7 +29,7 @@ from dataclasses import dataclass, field
 from typing import Generator, List, Sequence

 from axolotl.prompt_tokenizers import PromptTokenizingStrategy
-from axolotl.prompters import IGNORE_TOKEN_ID, SHAREGPT_ASSERTION_FAILED_ROLE
+from axolotl.prompters import ALTERNATING_ASSERTION_FAILED_ROLE, IGNORE_TOKEN_ID


@dataclass
@@ -75,7 +75,7 @@ class Llama2ChatConversation:

 class LLama2ChatTokenizingStrategy(PromptTokenizingStrategy):
    """
-    Tokenizing strategy for ShareGPT prompts.
+    Tokenizing strategy for Llama2 prompts.
    adapted from https://github.com/lm-sys/FastChat/blob/main/fastchat/train/train.py
    """

@@ -191,7 +191,7 @@ class Llama2ChatPrompter:  # pylint: disable=too-few-public-methods
        conv.messages = []  # pylint: disable=R0801
        for j, sentence in enumerate(source):
            role = roles[sentence["from"]]
-            assert role == conv.roles[j % 2], SHAREGPT_ASSERTION_FAILED_ROLE
+            assert role == conv.roles[j % 2], ALTERNATING_ASSERTION_FAILED_ROLE
            if sentence["value"]:
                conv.append_message(role, sentence["value"])
        yield conv
--- a/src/axolotl/prompt_strategies/sharegpt.py
+++ b/src/axolotl/prompt_strategies/sharegpt.py
@@ -1,223 +0,0 @@
-"""Module containing the SimpleShareGPTPromptTokenizingStrategy class"""
-
-import logging
-from typing import Any, Dict, Optional, Type
-
-from fastchat.conversation import Conversation, SeparatorStyle, register_conv_template
-
-from axolotl.prompt_tokenizers import ShareGPTPromptTokenizingStrategy
-from axolotl.prompters import ShareGPTPrompterV2
-from axolotl.utils.tokenization import (
-    chatml_to_conversation,
-    merge_consecutive_messages,
-)
-
-LOG = logging.getLogger("axolotl")
-
-
-def register_chatml_template(system_message=None):
-    system_message = system_message or "You are a helpful assistant."
-    register_conv_template(
-        Conversation(
-            name="chatml",
-            system_template="<|im_start|>system\n{system_message}",
-            system_message=system_message,
-            roles=("<|im_start|>user", "<|im_start|>assistant"),
-            sep_style=SeparatorStyle.CHATML,
-            sep="<|im_end|>",
-        )
-    )
-    register_conv_template(
-        Conversation(
-            name="chatml_glaive",
-            system_template="<|im_start|>system\n{system_message}",
-            system_message=system_message,
-            roles=("<|im_start|>user", "<|im_start|>assistant", "<|im_start|>tool"),
-            sep_style=SeparatorStyle.CHATML,
-            sep="<|im_end|>",
-        )
-    )
-
-
-def register_llama3_template(system_message=None):
-    system_message = system_message or "You are a helpful assistant."
-    register_conv_template(
-        Conversation(
-            name="llama3",
-            system_template="<|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>",
-            system_message=system_message,
-            roles=("user", "assistant"),
-            sep_style=SeparatorStyle.LLAMA3,
-            sep="",
-            stop_str="<|eot_id|>",
-            stop_token_ids=[128001, 128009],
-        )
-    )
-
-
-def build_loader(
-    tokenization_strategy_cls: Type["ShareGPTPromptTokenizingStrategy"],
-    prompter_cls: Type["ShareGPTPrompterV2"],
-    default_conversation: Optional[str] = None,
-):
-    def _load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None):
-        LOG.warning(
-            "sharegpt type support will be deprecated in the next release of Axolotl. Please use chat_template instead. https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/conversation.html#chat_template",
-        )
-        conversation = (
-            ds_cfg["conversation"]
-            if ds_cfg and "conversation" in ds_cfg
-            else default_conversation
-        )
-        field_human = (
-            ds_cfg["field_human"] if ds_cfg and "field_human" in ds_cfg else None
-        )
-        field_model = (
-            ds_cfg["field_model"] if ds_cfg and "field_model" in ds_cfg else None
-        )
-        roles = ds_cfg["roles"].to_dict() if ds_cfg and "roles" in ds_cfg else None
-        strategy = tokenization_strategy_cls(
-            prompter_cls(
-                conversation=conversation,
-                role_key_model=field_model,
-                role_key_human=field_human,
-                roles=roles,
-            ),
-            tokenizer,
-            cfg.train_on_inputs,
-            cfg.sequence_len,
-        )
-        if ds_cfg and "strict" in ds_cfg and hasattr(strategy, "strict"):
-            strategy.strict = ds_cfg["strict"]
-        if ds_cfg and "field_messages" in ds_cfg and hasattr(strategy, "messages"):
-            strategy.messages = ds_cfg["field_messages"]
-        return strategy
-
-    return _load
-
-
-class SimpleShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy):
-    """
-    basic sharegpt strategy to grab conversations from the sample row
-    """
-
-    _strict = False
-    _messages = "conversations"
-
-    @property
-    def strict(self):
-        return self._strict
-
-    @strict.setter
-    def strict(self, strict):
-        self._strict = strict
-
-    @property
-    def messages(self):
-        return self._messages
-
-    @messages.setter
-    def messages(self, messages):
-        self._messages = messages
-
-    def get_conversation_thread(self, prompt):
-        conversations = prompt[self.messages]
-        if self.strict:
-            return conversations
-        role_key = "from"
-        if "role" in conversations[0].keys():
-            role_key = "role"
-        value_key = "value"
-        if "text" in conversations[0].keys():
-            value_key = "text"
-        elif "content" in conversations[0].keys():
-            value_key = "content"
-        # remap roles - allow for assistant turn"
-        role_map = {
-            "user": "human",
-            "human": "human",
-            "assistant": "gpt",
-            "gpt": "gpt",
-            "system": "system",
-        }
-        turns = [
-            {
-                "from": (
-                    role_map[t[role_key]] if t[role_key] in role_map else t[role_key]
-                ),
-                "value": t[value_key],
-                "weight": 1
-                if "weight" not in t or t["weight"] is None
-                else t["weight"],
-            }
-            for t in conversations
-        ]
-        return turns
-
-
-class SimpleRoleShareGPTPromptTokenizingStrategy(
-    SimpleShareGPTPromptTokenizingStrategy
-):
-    """
-    basic sharegpt strategy to grab conversations from the sample row, but uses role instead of from
-    """
-
-    def get_conversation_thread(self, prompt):
-        conversations = prompt["conversations"]
-        # remap role: prompter/assistant, text: ... => from: human/gpt, value: ...
-        turns = [{"from": t["role"], "value": t["value"]} for t in conversations]
-        return turns
-
-
-class GuanacoShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy):
-    """
-    sharegpt strategy that remaps oasst data to sharegpt format
-    """
-
-    def get_conversation_thread(self, prompt):
-        conversations = prompt["conversations"]
-        # remap role: prompter/assistant, text: ... => from: human/gpt, value: ...
-        role_map = {"prompter": "human", "assistant": "gpt"}
-        turns = [
-            {"from": role_map[t["role"]], "value": t["text"]} for t in conversations
-        ]
-        return turns
-
-
-class UltrachatShareGPTPromptTokenizingStrategy(SimpleShareGPTPromptTokenizingStrategy):
-    """
-    sharegpt strategy that remaps ultrachat data to sharegpt format
-    """
-
-    def get_conversation_thread(self, prompt):
-        conversations = prompt["messages"]
-        role_map = {"user": "human", "assistant": "gpt"}
-        turns = [
-            {"from": role_map[t["role"]], "value": t["content"]} for t in conversations
-        ]
-        return turns
-
-
-class GlaiveShareGPTPromptTokenizingStrategy(SimpleShareGPTPromptTokenizingStrategy):
-    """
-    sharegpt strategy that remaps glaive data to sharegpt format
-    """
-
-    def get_conversation_thread(self, prompt):
-        conversation = chatml_to_conversation(prompt)
-        conversation = merge_consecutive_messages(conversation)
-
-        return conversation
-
-
-load = build_loader(SimpleShareGPTPromptTokenizingStrategy, ShareGPTPrompterV2)
-load_role = build_loader(SimpleRoleShareGPTPromptTokenizingStrategy, ShareGPTPrompterV2)
-load_ultrachat = build_loader(
-    UltrachatShareGPTPromptTokenizingStrategy, ShareGPTPrompterV2
-)
-load_guanaco = build_loader(GuanacoShareGPTPromptTokenizingStrategy, ShareGPTPrompterV2)
-load_glaive = build_loader(
-    GlaiveShareGPTPromptTokenizingStrategy,
-    ShareGPTPrompterV2,
-    default_conversation="chatml_glaive",
-)
--- a/src/axolotl/prompt_strategies/sharegpt_jokes.py
+++ b/src/axolotl/prompt_strategies/sharegpt_jokes.py
@@ -1,28 +0,0 @@
-"""Module for Jokes prompts using sharegpt style """
-from axolotl.prompt_tokenizers import ShareGPTPromptTokenizingStrategy
-from axolotl.prompters import ShareGPTPrompterV2
-
-
-def load(tokenizer, cfg):
-    return SimpleJokesShareGPTPromptTokenizingStrategy(
-        ShareGPTPrompterV2(),
-        tokenizer,
-        cfg.train_on_inputs,
-        cfg.sequence_len,
-    )
-
-
-class SimpleJokesShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy):
-    """
-    Tokenization strategy for asking bot to tell a joke and then explain why its funny
-    """
-
-    # title, text, explanation
-    def get_conversation_thread(self, prompt):
-        title = "" if not prompt["title"] else prompt["title"] + " "
-        return [
-            {"from": "human", "value": "Tell me a joke."},
-            {"from": "gpt", "value": title + prompt["text"]},
-            {"from": "human", "value": "Why is that joke funny?"},
-            {"from": "gpt", "value": prompt["explanation"]},
-        ]
--- a/src/axolotl/prompt_tokenizers.py
+++ b/src/axolotl/prompt_tokenizers.py
@@ -1,17 +1,12 @@
 """Module containing PromptTokenizingStrategy and Prompter classes"""

 import abc
-import copy
 import logging
 from typing import Dict, List, Tuple, Union

-from fastchat.conversation import Conversation
 from transformers import BatchEncoding, PreTrainedTokenizer

-from axolotl.monkeypatch.fastchat_conversation_turns import (
-    add_get_turns_to_conversation,
-)
-from axolotl.prompters import IGNORE_TOKEN_ID, Prompter
+from axolotl.prompters import Prompter

 LOG = logging.getLogger("axolotl")

@@ -21,8 +16,6 @@ LLAMA_DEFAULT_EOS_TOKEN = "</s>"  # nosec
 LLAMA_DEFAULT_BOS_TOKEN = "<s>"  # nosec
 LLAMA_DEFAULT_UNK_TOKEN = "<unk>"  # nosec

-add_get_turns_to_conversation()
-

 class InvalidDataException(Exception):
    """
@@ -331,154 +324,6 @@ class AlpacaReflectionPTStrategy(ReflectionPromptTokenizingStrategy):
        )


-class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
-    """
-    Tokenizing strategy for ShareGPT prompts.
-    """
-
-    def get_conversation_thread(self, prompt):
-        return prompt["conversations"]
-
-    def tokenize_prompt(self, prompt):
-        # Initial values. We will append to these as we go through the conversation.
-        result, current_len = tokenize_prompt_default()
-        conversation: Conversation = (
-            self.prompter._conversation.copy()  # pylint: disable=protected-access
-        )
-
-        input_roles = {conversation.roles[0]}
-        output_roles = {conversation.roles[1]}
-
-        if len(conversation.roles) == 3:
-            tool_role_label = conversation.roles[2]
-            input_roles.add(tool_role_label)
-
-        # Add roles from the config
-        if self.prompter.roles:
-            if "input" in self.prompter.roles and self.prompter.roles["input"]:
-                for role in self.prompter.roles["input"]:
-                    input_roles.add(role)
-
-            if "output" in self.prompter.roles and self.prompter.roles["output"]:
-                for role in self.prompter.roles["output"]:
-                    output_roles.add(role)
-
-        # support for custom roles from the dataset, only useful for vicuna style prompts/roles
-        role_remap = []
-        if (
-            conversation.name == "vicuna_v1.1"
-            and "roles" in prompt
-            and len(prompt["roles"]) >= 2
-        ):
-            role_remap = [
-                {"from": conversation.roles[0], "to": prompt["roles"][0]},
-                {"from": conversation.roles[1], "to": prompt["roles"][1]},
-            ]
-
-        try:
-            for _, part in enumerate(
-                self.prompter.build_prompt(self.get_conversation_thread(prompt))
-            ):
-                if not isinstance(part, tuple):
-                    LOG.warning(f"expected tuple, got {part}")
-                    continue
-
-                if len(part) <= 2:
-                    role, content = part
-                    weight = 1
-                else:
-                    role, content, weight = part
-
-                # Uses "in" because role contains extra characters
-                input_turn = any(r.lower() in role.lower() for r in input_roles)
-                output_turn = any(r.lower() in role.lower() for r in output_roles)
-                empty_role = role.strip() == ""
-
-                if not any([input_turn, output_turn, empty_role]):
-                    LOG.warning(f"unhandled role: {role}")
-                    continue
-
-                if input_turn:
-                    role = (
-                        role.replace(role_remap[0]["from"], role_remap[0]["to"])
-                        if role_remap
-                        else role
-                    )
-                    turn = role + content
-                    # this is still the user query, we should
-                    if not content.strip():
-                        LOG.warning(f"user turn has empty text: {prompt}")
-                    res = self._tokenize(
-                        turn,
-                        add_eos_token=False,
-                        strip_bos_token=True,
-                    )
-                    if self.train_on_inputs and weight == 1:
-                        labels = copy.deepcopy(res["input_ids"])
-                    else:
-                        # everything from this is masked out from the labels
-                        labels = [IGNORE_TOKEN_ID] * len(res["input_ids"])
-                elif output_turn:
-                    role = (
-                        role.replace(role_remap[1]["from"], role_remap[1]["to"])
-                        if role_remap
-                        else role
-                    )
-                    turn = role + content
-                    # this should be the assistant response, should end with an eos token
-                    if not content.strip():
-                        LOG.warning(f"assistant turn has empty text: {prompt}")
-                    add_eos_token = not (
-                        conversation.name == "chatml"
-                        and conversation.sep == self.tokenizer.eos_token
-                    )
-                    res = self._tokenize(
-                        turn,
-                        add_eos_token=add_eos_token,
-                        strip_bos_token=True,
-                    )
-                    role_res = self._tokenize(
-                        role.rstrip(),
-                        add_eos_token=False,
-                        strip_bos_token=True,
-                    )
-                    labels = copy.deepcopy(res["input_ids"])
-                    if not self.train_on_inputs:
-                        # mask out role tokens from the labels
-                        len_role = len(role_res["input_ids"])
-                        labels[:len_role] = [IGNORE_TOKEN_ID] * min(
-                            len_role, len(labels)
-                        )
-                    if weight == 0:
-                        # everything from this is masked out from the labels
-                        # (role is masked out too because it makes no sense if contents is masked out)
-                        labels = [IGNORE_TOKEN_ID] * len(res["input_ids"])
-
-                elif empty_role:
-                    turn = content
-                    # this is only ever the first part, should include the bos token and the user query
-                    res = self._tokenize(
-                        turn, add_eos_token=False, strip_bos_token=False
-                    )
-                    if self.train_on_inputs and weight == 1:
-                        labels = copy.deepcopy(res["input_ids"])
-                    else:
-                        # everything from this is masked out from the labels
-                        labels = [IGNORE_TOKEN_ID] * len(res["input_ids"])
-
-                # pylint: disable=duplicate-code
-                result, current_len = parse_tokenized_to_result(
-                    result,
-                    current_len,
-                    res,
-                    labels,
-                    pad_token_id=self.tokenizer.pad_token_id,
-                )
-            return result
-        except (KeyError, AssertionError, IndexError) as err:
-            raise InvalidDataException(str(err)) from err
-
-
 def tokenize_prompt_default() -> Tuple[Dict[str, List[int]], int]:
    """
    Returns the default values for the tokenize prompt function
--- a/src/axolotl/prompters.py
+++ b/src/axolotl/prompters.py
@@ -5,7 +5,6 @@ from enum import Enum
 from typing import Generator, Optional, Union

 from colorama import Fore
-from fastchat.conversation import Conversation, get_conv_template

 LOG = logging.getLogger("axolotl")
 IGNORE_TOKEN_ID = -100
@@ -262,166 +261,10 @@ class ReflectAlpacaPrompter(Prompter):
        )


-SHAREGPT_ASSERTION_FAILED_ROLE = (
+ALTERNATING_ASSERTION_FAILED_ROLE = (
    "Role did not alternate between turns (gpt and human). Please check your data."
 )

-CONVERSATION_ROLE_FORMAT = {
-    "chatml": "<|im_start|>{ROLE}",
-    "zephyr": "<|{ROLE}|>",
-    "vicuna_v1.1": "{ROLE}",
-    "llama3": "<|start_header_id|>{ROLE}<|end_header_id|>",
-}
-
-
-class ShareGPTPrompter(Prompter):  # pylint: disable=too-few-public-methods
-    """
-    A prompter that generates prompts for the ShareGPT
-    """
-
-    role_key_human = "human"
-    role_key_model = "gpt"
-    # Optional, only used for tool usage datasets.
-    role_key_tool: Optional[str] = None
-    # Optional, role input/output mapping
-    roles: Optional[dict] = None
-
-    def __init__(
-        self,
-        prompt_style=None,  # pylint: disable=unused-argument
-        conversation: Optional[Union[str, Conversation]] = None,
-        role_key_human: Optional[str] = None,
-        role_key_model: Optional[str] = None,
-        role_key_tool: Optional[str] = None,
-        roles: Optional[dict] = None,
-    ):
-        if conversation:
-            if isinstance(conversation, Conversation):
-                self._conversation = conversation
-            else:
-                self._conversation = get_conv_template(conversation)
-        else:
-            self._conversation = get_conv_template("vicuna_v1.1")
-        if role_key_human:
-            self.role_key_human = role_key_human
-        if role_key_model:
-            self.role_key_model = role_key_model
-        if role_key_tool:
-            self.role_key_tool = role_key_tool
-        if roles:
-            self.roles = roles
-
-    def _build_result(self, source):
-        if len(source) < 2:
-            # If there isn't a back and forth conversation, ignore it
-            # also happens on the data splitting leaving empty conversations
-            raise IndexError(
-                f"A conversation entry has less than 2 messages :\n{source}"
-            )
-
-        conv = self._conversation.copy()
-
-        original_source = source.copy()
-        # Add the conversation system prompt if provided, otherwise use the default one
-        if source[0]["from"] == "system":
-            conv.set_system_message(source[0]["value"])
-            source.pop(0)
-
-        roles = {self.role_key_human: conv.roles[0], self.role_key_model: conv.roles[1]}
-        if self.role_key_tool:
-            roles[self.role_key_tool] = conv.roles[2]
-
-        try:
-            # Apply prompt templates
-            if source[0]["from"] not in roles:
-                # Skip the first one if it is not from human
-                source = source[1:]
-        except IndexError as err:
-            # sometimes there is a bing or system chat
-            raise err
-
-        conv.messages = []
-        for _, sentence in enumerate(source):
-            from_role = sentence["from"]
-            if from_role in roles:
-                role = roles[from_role]
-            else:
-                if self._conversation.name not in CONVERSATION_ROLE_FORMAT:
-                    raise NotImplementedError(
-                        f"Role ({role}) not in default roles, and {self._conversation.name} does not support role remapping yet."
-                        "Please help us by creating an Issue to add support for this conversation type."
-                    )
-
-                if self._conversation.name in ["llama3"]:
-                    role = from_role
-                else:
-                    role = CONVERSATION_ROLE_FORMAT[self._conversation.name].format(
-                        ROLE=from_role
-                    )
-
-            if len(conv.messages) > 0 and ((role == conv.messages[-1][0])):
-                if (
-                    role != "assistant"
-                ):  # back to back assistant calls may be okay for tool calls
-                    LOG.warning(f"{SHAREGPT_ASSERTION_FAILED_ROLE}: {sentence}")
-
-            conv.append_message(role, sentence["value"])
-        turns = list(conv.get_turns())
-        original_source_length = len(original_source)
-        assert len(turns) in [
-            original_source_length - 1,
-            original_source_length,
-            original_source_length + 1,
-        ]
-        if len(turns) == original_source_length + 1:
-            original_source = [{"weight": None}] + original_source
-        elif len(turns) == original_source_length - 1:
-            original_source = original_source[1:]
-        return [
-            (*turn, weight)
-            for turn, weight in zip(
-                turns,
-                [
-                    1 if "weight" not in e or e["weight"] is None else e["weight"]
-                    for e in original_source
-                ],
-            )
-        ]
-
-    def build_prompt(self, source) -> Generator[str, None, None]:
-        turns = self._build_result(source)
-
-        for part in turns:
-            if part[0] and not part[1]:
-                LOG.warning(f"role with empty message: {part[0]}")
-            yield part
-
-    def __repr__(self) -> str:
-        turns = self._build_result([{"from": "{from}", "value": "{value}"}])
-        return "\n".join([REPR_TEMPLATE.format(full_prompt=part) for part in turns])
-
-
-class ShareGPTPrompterV2(ShareGPTPrompter):
-    """
-    A V2 prompter that generates prompts for the ShareGPT
-    """
-
-    def __init__(
-        self,
-        conversation: Optional[Union[str, Conversation]] = None,
-        role_key_human: Optional[str] = None,
-        role_key_model: Optional[str] = None,
-        role_key_tool: Optional[str] = None,
-        roles: Optional[dict] = None,
-    ):
-        super().__init__(
-            conversation=conversation,
-            role_key_human=role_key_human,
-            role_key_model=role_key_model,
-            role_key_tool=role_key_tool,
-            roles=roles,
-        )
-

 class UnsupportedPrompter(Prompter):
    """
--- a/src/axolotl/utils/config/init.py
+++ b/src/axolotl/utils/config/init.py
@@ -215,11 +215,6 @@ def normalize_cfg_datasets(cfg):
    if cfg.chat_template:
        if cfg.datasets:
            for idx, ds_cfg in enumerate(cfg.datasets):
-                if ds_cfg.type == "sharegpt" and not ds_cfg.conversation:
-                    LOG.info(
-                        f"updating dataset {ds_cfg.path} with `conversation: {cfg.chat_template}` to match your chat_template"
-                    )
-                    cfg.datasets[idx].conversation = cfg.chat_template
                if (
                    ds_cfg.type in ["orpo.chat_template", "chat_template"]
                    and not ds_cfg.chat_template
@@ -461,27 +456,6 @@ def legacy_validate_config(cfg):
                "`early_stopping_patience` requires that eval_steps should evenly divide save_steps."
            )

-    if cfg.datasets:
-        for idx, ds_cfg in enumerate(cfg.datasets):
-            if not ds_cfg.type:
-                continue
-            if ds_cfg.type == "sharegpt:chat":
-                LOG.warning(
-                    PendingDeprecationWarning(
-                        "`type: sharegpt:chat` will soon be deprecated. simply use `type: sharegpt` instead."
-                    )
-                )
-                cfg.datasets[idx].type = "sharegpt"
-            if "sharegpt_simple" in ds_cfg.type:
-                LOG.warning(
-                    PendingDeprecationWarning(
-                        "`type: sharegpt_simple` will soon be deprecated. simply use `type: sharegpt` instead."
-                    )
-                )
-                cfg.datasets[idx].type = cfg.datasets[idx].type.replace(
-                    "sharegpt_simple", "sharegpt"
-                )
-
    if cfg.saves_per_epoch and cfg.save_steps:
        raise ValueError(
            "save_steps and saves_per_epoch are mutually exclusive and cannot be used together."
--- a/src/axolotl/utils/config/models/input/v0_4_1/init.py
+++ b/src/axolotl/utils/config/models/input/v0_4_1/init.py
@@ -783,26 +783,16 @@ class AxolotlInputConfig(

    @field_validator("datasets", mode="before")
    @classmethod
-    def fix_sharegpt_datasets(cls, datasets):
-        for idx, ds_cfg in enumerate(datasets):
-            if not ds_cfg["type"]:
+    def deprecate_sharegpt_datasets(cls, datasets):
+        for _, ds_cfg in enumerate(datasets):
+            if not ds_cfg.get("type"):
                continue
-            if ds_cfg["type"] == "sharegpt:chat":
-                LOG.warning(
-                    PendingDeprecationWarning(
-                        "`type: sharegpt:chat` will soon be deprecated. simply use `type: sharegpt` instead."
-                    )
-                )
-                datasets[idx]["type"] = "sharegpt"
-            if "sharegpt_simple" in ds_cfg["type"]:
-                LOG.warning(
-                    PendingDeprecationWarning(
-                        "`type: sharegpt_simple` will soon be deprecated. simply use `type: sharegpt` instead."
-                    )
-                )
-                datasets[idx]["type"] = datasets[idx]["type"].replace(
-                    "sharegpt_simple", "sharegpt"
+
+            if ds_cfg["type"].startswith("sharegpt"):
+                raise ValueError(
+                    "`type: sharegpt.*` is deprecated. Please use `type: chat_template` instead."
                )
+
        return datasets

    @model_validator(mode="before")
--- a/src/axolotl/utils/tokenization.py
+++ b/src/axolotl/utils/tokenization.py
@@ -1,8 +1,6 @@
 """Module for tokenization utilities"""

 import logging
-import re
-from typing import Dict, List

 from termcolor import colored

@@ -93,65 +91,3 @@ def check_rl_example_labels(example, tokenizer, text_only=False):
    LOG.info(f"REJECTED RESPONSE: {delimiter.join(colored_rejecteds)}\n\n\n")

    return delimiter.join(colored_tokens)
-
-
-GLAIVE_ROLES = ["USER", "ASSISTANT", "FUNCTION RESPONSE"]
-GLAIVE_TO_SHAREGPT_ROLE = {
-    "SYSTEM": "system",
-    "USER": "human",
-    "ASSISTANT": "gpt",
-    "FUNCTION RESPONSE": "tool",
-}
-
-GLAIVE_MSG_REGEX = re.compile(rf"({'|'.join(GLAIVE_ROLES)}): ")
-
-
-def chatml_to_conversation(row: Dict[str, str]) -> List[Dict[str, str]]:
-    """
-    Converts a ChatML formatted row to a list of messages in ShareGPT format.
-    Initially based off https://github.com/lilacai/lilac/blob/main/notebooks/GlaiveToShareGPT.ipynb.
-    """
-
-    system_prompt = row.get("system")
-    if system_prompt:
-        system_prompt = system_prompt.removeprefix("SYSTEM: ")
-
-    chat_str = row["chat"]
-    chat_msgs = [s.strip() for s in GLAIVE_MSG_REGEX.split(chat_str) if s]
-
-    chat_msg_dicts = [
-        {"from": GLAIVE_TO_SHAREGPT_ROLE[role], "value": value}
-        for role, value in zip(chat_msgs[::2], chat_msgs[1::2])
-    ]
-
-    if system_prompt:
-        chat_msg_dicts = [
-            {"from": GLAIVE_TO_SHAREGPT_ROLE["SYSTEM"], "value": system_prompt}
-        ] + chat_msg_dicts
-
-    return chat_msg_dicts
-
-
-def merge_consecutive_messages(messages):
-    """
-    Merge consecutive messages from the same sender into a single message.
-    This can be useful with datasets that contain multiple consecutive tool calls.
-    """
-
-    merged_messages = []
-    current_from = None
-    current_message = ""
-
-    for msg in messages:
-        if current_from == msg["from"]:
-            current_message += msg["value"]
-        else:
-            if current_from is not None:
-                merged_messages.append({"from": current_from, "value": current_message})
-            current_from = msg["from"]
-            current_message = msg["value"]
-
-    if current_from is not None:
-        merged_messages.append({"from": current_from, "value": current_message})
-
-    return merged_messages