diff --git a/src/axolotl/datasets.py b/src/axolotl/datasets.py index 40c58bc9c..5593a8dd3 100644 --- a/src/axolotl/datasets.py +++ b/src/axolotl/datasets.py @@ -126,6 +126,7 @@ class ConstantLengthDataset(IterableDataset): buffer_len = 0 if example: + # FIXME # just going to drop data points that are too long if len(example["input_ids"]) <= self.seq_length: input_ids = example["input_ids"] diff --git a/src/axolotl/prompt_strategies/alpaca_w_system.py b/src/axolotl/prompt_strategies/alpaca_w_system.py index 88acf0d0e..aacae8739 100644 --- a/src/axolotl/prompt_strategies/alpaca_w_system.py +++ b/src/axolotl/prompt_strategies/alpaca_w_system.py @@ -21,6 +21,7 @@ class InstructionWSystemPromptTokenizingStrategy(PromptTokenizingStrategy): ) def tokenize_prompt(self, prompt): + # pylint: disable=duplicate-code ( instruction, input, # pylint: disable=redefined-builtin