From 6abb7f6a1603f4955f0e3851f7bbcd4026f76ad7 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Mon, 29 May 2023 13:54:06 +0900 Subject: [PATCH] Lint datasets --- src/axolotl/datasets.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/axolotl/datasets.py b/src/axolotl/datasets.py index c7bb9fbfe..1e72be114 100644 --- a/src/axolotl/datasets.py +++ b/src/axolotl/datasets.py @@ -1,3 +1,5 @@ +"""Module containing Dataset functionality""" + import logging from typing import List @@ -14,7 +16,14 @@ from .prompt_tokenizers import PromptTokenizingStrategy, InvalidDataException class TokenizedPromptDataset(IterableDataset): - def __init__( + """ + Iterable dataset that returns tokenized prompts from a stream of text files. + Args: + prompt_tokenizer (PromptTokenizingStrategy): The prompt tokenizing method for proccessing the data. + dataset (dataset.Dataset): Dataset with text files. + """ + + def __init__( # pylint: disable=super-init-not-called self, prompt_tokenizer: PromptTokenizingStrategy, dataset: IterableDataset, @@ -42,7 +51,7 @@ class ConstantLengthDataset(IterableDataset): seq_length (int): Length of token sequences to return. """ - def __init__( + def __init__( # pylint: disable=super-init-not-called self, tokenizer, datasets,