From e4e8ffd40c6d3c27a16689e0530e9dea956456e5 Mon Sep 17 00:00:00 2001
From: Dan Saunders <danjsaund@gmail.com>
Date: Wed, 20 Aug 2025 04:28:18 +0000
Subject: [PATCH] nits

---
 src/axolotl/datasets.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/axolotl/datasets.py b/src/axolotl/datasets.py
index 99ba2522b..87f26275f 100644
--- a/src/axolotl/datasets.py
+++ b/src/axolotl/datasets.py
@@ -1,5 +1,5 @@
 """
-Module containing Dataset functionality
+Module containing dataset functionality.
 
 We want this to be a wrapper for an existing dataset that we have loaded. Lets use the
 concept of middlewares to wrap each dataset, for example:
@@ -47,6 +47,7 @@ class TokenizedPromptDataset(Dataset):
         )
 
     def process(self, dataset: Dataset | IterableDataset) -> Dataset | IterableDataset:
+        """Apply filtering and tokenization."""
         # For IterableDataset, we can't access features up front. Anyways, we don't care
         # to remove unused columns from streaming datasets.
         features = None
@@ -104,7 +105,8 @@ def wrap_dataset_for_tokenized_prompt(
     return TokenizedPromptDataset(prompt_tokenizer, dataset, **kwargs)
 
 
-# TODO this isn't the best since it can't interleave datasets
+# TODO: this isn't the best since it can't interleave datasets.
+# NOTE: this is only used in a test. Can it be deleted?
 class ConstantLengthDataset(IterableDataset):
     """Iterable dataset that returns constant length chunks of tokens from stream of
     text files.