Lint prompters.py

2023-05-29 10:03:13 +09:00
parent 4c0eddb3f8
commit cb4f0e9342
1 changed files with 73 additions and 25 deletions
--- a/src/axolotl/prompters.py
+++ b/src/axolotl/prompters.py
@@ -1,28 +1,37 @@
-import copy
+"""Module containing prompters"""
+
 import dataclasses
 import logging
 from enum import auto, Enum
-from typing import List, Tuple, Any, Union, Generator
+from typing import List, Union, Generator

 IGNORE_TOKEN_ID = -100


 class PromptStyle(Enum):
-    instruct = "instruct"
-    chat = "chat"
+    """
+    Enum for prompt styles
+    """
+
+    INSTRUCT = "instruct"
+    CHAT = "chat"


 class AlpacaPrompter:
+    """
+    Base class for alpaca prompters
+    """
+
    system_prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n"
    system_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n"
    prompt_style = None

-    def __init__(self, prompt_style=PromptStyle.instruct.value):
-        self.prompt_style = prompt_style if prompt_style else PromptStyle.instruct.value
+    def __init__(self, prompt_style=PromptStyle.INSTRUCT.value):
+        self.prompt_style = prompt_style if prompt_style else PromptStyle.INSTRUCT.value
        self.match_prompt_style()

    def match_prompt_style(self):
-        if self.prompt_style == PromptStyle.instruct.value:
+        if self.prompt_style == PromptStyle.INSTRUCT.value:
            self.prompt_input = (
                self.system_prompt
                + "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
@@ -32,7 +41,7 @@ class AlpacaPrompter:
                + "### Instruction:\n{instruction}\n\n### Response:\n"
            )
            self.response_split = "### Response:"
-        if self.prompt_style == PromptStyle.chat.value:
+        if self.prompt_style == PromptStyle.CHAT.value:
            self.prompt_input = (
                self.system_prompt + "USER: {instruction}\n{input}\nASSISTANT:"
            )
@@ -44,7 +53,7 @@ class AlpacaPrompter:
    def build_prompt(
        self,
        instruction: str,
-        input: Union[None, str] = None,
+        input: Union[None, str] = None,  # pylint: disable=redefined-builtin
        output: Union[None, str] = None,
    ) -> Generator[str, None, None]:
        # returns the full prompt from instruction and optional input
@@ -62,33 +71,60 @@ class AlpacaPrompter:


 class UnpromptedPrompter(AlpacaPrompter):
+    """
+    Prompter for alpaca no system prompt
+    """
+
    system_prompt = ""
    system_no_input_prompt = ""


 class JeopardyPrompter(AlpacaPrompter):
+    """
+    Prompter for Jeopardy
+    """
+
    prompt_input = "Below is a Jeopardy clue paired with input providing the category of the clue. Write a concise response that best answers tbe clue given the category.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"


 class MultipleChoiceExplainPrompter(AlpacaPrompter):
+    """
+    Prompter for multiple choice explain
+    """
+
    system_prompt = (
        "Choose the answer that best answers the question. Explain your reasoning."
    )


 class MultipleChoiceConcisePrompter(AlpacaPrompter):
+    """
+    Prompter for multiple choice concise
+    """
+
    prompt_input = "Choose the answer that best answers the question. Be concise in your response.\n\nUSER: {instruction}\n{input}\nASSISTANT:\n"


 class SummarizeTLDRPrompter(AlpacaPrompter):
+    """
+    Prompter for summarize TLDR
+    """
+
    prompt_no_input = (
        "USER: Summarize the following article as a TL;DR.\n{instruction}\nASSISTANT:"
    )


 class CompletionPrompter:
+    """
+    Prompter for completion
+    """
+
    def build_prompt(
-        self, instruction: str, input=None, output=None
+        self,
+        instruction: str,
+        input=None,  # pylint: disable=redefined-builtin, unused-argument
+        output=None,  # pylint: disable=unused-argument
    ) -> Generator[str, None, None]:
        yield instruction

@@ -97,14 +133,22 @@ class CompletionPrompter:


 class GPTeacherPrompter(AlpacaPrompter):
-    ...
+    """
+    Prompter for GPTeacher
+    """


 class NomicGPT4AllPrompter(AlpacaPrompter):
-    ...
+    """
+    Prompter for NomicGPT4All
+    """


 class ReflectAlpacaPrompter:
+    """
+    Prompter for ReflectAlpaca
+    """
+
    system_prompt = "Below is an instruction that describes a task, paired with an input that provides further context. You, the Assistant, should generate a response as if it were an abstract for an academic or technical paper on the query along with a methodology. Then generate an Agent Reflection where you create a long form response as if from subject matter expert, be verbose, diligent, and creative in your application of knowledge, apply it through the lens of the response generated by the assistant. Look for flawed reasoning, faulty logic, or other mistakes in the method. Finally, generate a final response and method for the user with the Assistant abstract and Reflection analysis as augmentations to the generation\n\n"
    system_no_input_prompt = "Below is an instruction that describes a task. You, the Assistant, should generate a response as if it were an abstract for an academic or technical paper on the query along with a methodology. Then generate an Agent Reflection where you create a long form response as if from subject matter expert, be verbose, diligent, and creative in your application of knowledge, apply it through the lens of the response generated by the assistant. Look for flawed reasoning, faulty logic, or other mistakes in the method. Finally, generate a final response and method for the user with the Assistant abstract and Reflection analysis as augmentations to the generation\n\n"

@@ -120,7 +164,7 @@ class ReflectAlpacaPrompter:
        self.match_prompt_style()

    def match_prompt_style(self):
-        if self.prompt_style == PromptStyle.instruct.value:
+        if self.prompt_style == PromptStyle.INSTRUCT.value:
            self.prompt_input = (
                self.system_prompt
                + "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
@@ -131,7 +175,7 @@ class ReflectAlpacaPrompter:
            )
            self.agent_label = "### Thought:\n{output}\n\n### Agent Reflection:\n{reflection}\n\n### Final Response:\n{corrected}"
            self.response_split = "### Final Response:"
-        if self.prompt_style == PromptStyle.chat.value:
+        if self.prompt_style == PromptStyle.CHAT.value:
            self.prompt_input = (
                self.system_prompt + "USER: {instruction}\n{input}\nASSISTANT:"
            )
@@ -143,10 +187,10 @@ class ReflectAlpacaPrompter:
            )
            self.response_split = "ASSISTANT:"

-    def build_prompt(
+    def build_prompt(  # pylint: disable=too-many-arguments
        self,
        instruction: str,
-        input: Union[None, str] = None,
+        input: Union[None, str] = None,  # pylint: disable=redefined-builtin
        output: Union[None, str] = None,
        reflection: Union[None, str] = None,
        corrected: Union[None, str] = None,
@@ -176,7 +220,7 @@ class SeparatorStyle(Enum):
    DOLLY = auto()


-# TODO clean this 💩 up
+# TODO clean this 💩 up  # pylint: disable=fixme
@dataclasses.dataclass
 class Conversation:
    """A class that keeps all conversation history."""
@@ -193,11 +237,11 @@ class Conversation:
        seps = [self.sep, self.sep2]
        preamble = self.system + seps[0]
        yield preamble
-        for i, (role, message) in enumerate(self.messages):
+        for _, (role, message) in enumerate(self.messages):
            if message:
                yield (role + ":", " " + message)
            else:
-                logging.warning("role with empty message: " + role)
+                logging.warning(f"role with empty message: {role}")
                yield (role + ":",)

    def copy(self):
@@ -227,10 +271,14 @@ conv_vicuna_v1_1 = Conversation(
 )


-class ShareGPTPrompter:
+class ShareGPTPrompter:  # pylint: disable=too-few-public-methods
+    """
+    A prompter that generates prompts for the ShareGPT
+    """
+
    def __init__(self, prompt_style=None):
-        if prompt_style != PromptStyle.chat.value:
-            raise Exception(
+        if prompt_style != PromptStyle.CHAT.value:
+            raise ValueError(
                f"unsupported prompt_style for ShareGPTPrompter({prompt_style})"
            )

@@ -240,7 +288,7 @@ class ShareGPTPrompter:
    #         self.prompt_no_input = self.system_no_input_prompt + "USER: {instruction}\nASSISTANT:"
    #         self.response_split = "ASSISTANT:"

-    def build_prompt(self, source, *args, **kwargs) -> Generator[str, None, None]:
+    def build_prompt(self, source) -> Generator[str, None, None]:
        # ignore the system prompt if provided
        if source[0]["from"] == "system":
            source.pop(0)
@@ -261,9 +309,9 @@ class ShareGPTPrompter:
            ):
                # Skip the first one if it is not from human
                source = source[1:]
-        except IndexError as e:
+        except IndexError as err:
            # sometimes there is a bing or system chat
-            raise e
+            raise err

        conv.messages = []
        for j, sentence in enumerate(source):