fix: force train split for json,csv,txt for test_datasets and misc doc changes (#3226)

* fix: force train split for json,csv,txt for test_datasets * feat(doc): add info on mixing datasets for VLM * feat(doc): max memory * fix(doc): clarify lr groups * fix: add info on vision not being dropped * feat: add qwen3-vl to multimodal docs * fix: add moe blocks to arch list * feat(doc): improve mistral docs * chore: add helpful link [skip-e2e] * fix: add vram usage for mistral small * Update link in docs/faq.qmd Co-authored-by: salman <salman.mohammadi@outlook.com> --------- Co-authored-by: Wing Lian <wing@axolotl.ai> Co-authored-by: salman <salman.mohammadi@outlook.com>
2025-10-23 05:23:20 +07:00
parent 3750fdcf79
commit 243620394a
9 changed files with 88 additions and 4 deletions
--- a/src/axolotl/common/architectures.py
+++ b/src/axolotl/common/architectures.py
@@ -12,7 +12,9 @@ MOE_ARCH_BLOCK = {
    "mixtral": "MixtralSparseMoeBlock",
    "qwen2_moe": "Qwen2MoeSparseMoeBlock",
    "qwen3_moe": "Qwen3MoeSparseMoeBlock",
+    "qwen3_vl_moe": "Qwen3VLMoeTextSparseMoeBlock",
    "deepseek_v2": "DeepseekV2MoE",
+    "deepseek_v3": "DeepseekV3MoE",
    "gpt_oss": "GptOssDecoderLayer",
    "lfm2_moe": "Lfm2MoeSparseMoeBlock",
 }
--- a/src/axolotl/utils/data/shared.py
+++ b/src/axolotl/utils/data/shared.py
@@ -239,6 +239,11 @@ def _load_from_local_path(
            return load_dataset(dataset_config.path, **load_dataset_kwargs)
    elif local_path.is_file():
        dataset_type = get_dataset_type(dataset_config)
+
+        # For single file datasets, HF always creates only a "train" split
+        if dataset_type in ("json", "csv", "text"):
+            load_dataset_kwargs["split"] = "train"
+
        return load_dataset(
            dataset_type,
            data_files=dataset_config.path,