diff --git a/docs/config.qmd b/docs/config.qmd
index ac4c3fa4f..eab8d28ca 100644
--- a/docs/config.qmd
+++ b/docs/config.qmd
@@ -98,8 +98,10 @@ plugins:
   # - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
 
 # A list of one or more datasets to finetune the model with
+# See https://docs.axolotl.ai/docs/dataset_loading.html for guide on loading datasets
+# See https://docs.axolotl.ai/docs/dataset-formats/ for guide on dataset formats
 datasets:
-  # HuggingFace dataset repo | s3://,gs:// path | "json" for local dataset, make sure to fill data_files
+  # HuggingFace dataset repo | s3:// | gs:// | path to local file or directory
   - path: vicgalle/alpaca-gpt4
     # The type of prompt to use for training. [alpaca, gpteacher, oasst, reflection]
     type: alpaca # format | format:<prompt_style> (chat/instruct) | <prompt_strategies>.load_<load_fn>
@@ -221,7 +223,7 @@ datasets:
 # The same applies to the `test_datasets` option and the `pretraining_dataset` option. Default is true.
 shuffle_merged_datasets: true
 
-Deduplicates datasets and test_datasets with identical entries.
+# Deduplicates datasets and test_datasets with identical entries.
 dataset_exact_deduplication: true
 
 # A list of one or more datasets to eval the model with.
diff --git a/docs/dataset-formats/index.qmd b/docs/dataset-formats/index.qmd
index 9898bbc9b..a0113db07 100644
--- a/docs/dataset-formats/index.qmd
+++ b/docs/dataset-formats/index.qmd
@@ -36,10 +36,6 @@ It is typically recommended to save your dataset as `.jsonl` due to its flexibil
 
 Axolotl supports loading from a Hugging Face hub repo or from local files.
 
-::: {.callout-important}
-For pre-training only, Axolotl would split texts if it exceeds the context length into multiple smaller prompts.
-:::
-
 ### Pre-training from Hugging Face hub datasets
 
 As an example, to train using a Hugging Face dataset `hf_org/name`, you can pass the following config:
@@ -77,18 +73,21 @@ datasets:
     type: completion
 ```
 
-From local files (either example works):
+From local files:
 
 ```yaml
 datasets:
   - path: A.jsonl
     type: completion
 
-  - path: json
-    data_files: ["A.jsonl", "B.jsonl", "C.jsonl"]
+  - path: B.jsonl
     type: completion
 ```
 
+::: {.callout-important}
+For `completion` only, Axolotl would split texts if it exceeds the context length into multiple smaller prompts. If you are interested in having this for `pretraining_dataset` too, please let us know or help make a PR!
+:::
+
 ### Pre-training dataset configuration tips
 
 #### Setting max_steps
diff --git a/docs/dataset_loading.qmd b/docs/dataset_loading.qmd
index 09c8b0098..b78f86a98 100644
--- a/docs/dataset_loading.qmd
+++ b/docs/dataset_loading.qmd
@@ -54,7 +54,7 @@ datasets:
 
 #### Files
 
-Usually, to load a JSON file, you would do something like this:
+To load a JSON file, you would do something like this:
 
 ```python
 from datasets import load_dataset
@@ -66,20 +66,12 @@ Which translates to the following config:
 
 ```yaml
 datasets:
-  - path: json
-    data_files: /path/to/your/file.jsonl
-```
-
-However, to make things easier, we have added a few shortcuts for loading local dataset files.
-
-You can just point the `path` to the file or directory along with the `ds_type` to load the dataset. The below example shows for a JSON file:
-
-```yaml
-datasets:
-  - path: /path/to/your/file.jsonl
+  - path: data.json
     ds_type: json
 ```
 
+In the example above, it can be seen that we can just point the `path` to the file or directory along with the `ds_type` to load the dataset.
+
 This works for CSV, JSON, Parquet, and Arrow files.
 
 ::: {.callout-tip}