Post release fixes (#2581)

* fix missing kwarg on child * make the runpod test shorter * update docs * rename runpod test json file * typing fixes and ordering of doc
2025-04-29 10:01:38 -04:00
parent fedbcc0254
commit 80b4edb4a7
4 changed files with 27 additions and 20 deletions
--- a/.runpod/test-input.json
+++ b/.runpod/test-input.json
@@ -12,22 +12,22 @@
      "base_model": "HuggingFaceTB/SmolLM2-135M",
      "model_type": "AutoModelForCausalLM",
      "tokenizer_type": "AutoTokenizer",
-      "load_in_8bit": true,
-      "load_in_4bit": false,
+      "load_in_4bit": true,
      "strict": false,
      "datasets": [
        {
          "path": "mhenrichsen/alpaca_2k_test",
-          "type": "alpaca"
+          "type": "alpaca",
+          "split": "train[:10%]"
        }
      ],
-      "val_set_size": 0.05,
+      "val_set_size": 0.02,
      "output_dir": "./outputs/lora-out",
      "sequence_len": 4096,
      "sample_packing": true,
      "eval_sample_packing": false,
      "pad_to_sequence_len": true,
-      "adapter": "lora",
+      "adapter": "qlora",
      "lora_r": 32,
      "lora_alpha": 64,
      "lora_dropout": 0.05,
@@ -36,8 +36,8 @@
        "embed_tokens",
        "lm_head"
      ],
-      "gradient_accumulation_steps": 4,
-      "micro_batch_size": 2,
+      "gradient_accumulation_steps": 2,
+      "micro_batch_size": 1,
      "num_epochs": 1,
      "optimizer": "adamw_torch_fused",
      "lr_scheduler": "cosine",
@@ -56,7 +56,8 @@
      "weight_decay": 0.0,
      "special_tokens": {
        "pad_token": "<|endoftext|>"
-      }
+      },
+      "max_steps": 20
    },
    "timeout": 100000
  },