Support device_map=sequential & max_memory config parameters (#903)

* Support device_map sequential (and others). Support max_memory in cfg. * Update documentation in README accordingly. * Update README.md --------- Co-authored-by: Wing Lian <wing.lian@gmail.com>
2023-12-04 06:29:21 -08:00
parent a1da39cd48
commit 992e742cdc
3 changed files with 8 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -612,6 +612,12 @@ eval_sample_packing:
 sample_packing_eff_est:
 total_num_tokens:

+# Passed through to transformers when loading the model when launched without accelerate
+# Use `sequential` when training w/ model parallelism to limit memory
+device_map:
+# Defines the max memory usage per gpu on the system. Passed through to transformers when loading the model.
+max_memory:
+
 # If you want to use 'lora' or 'qlora' or leave blank to train all parameters in original model
 adapter: lora
 # If you already have a lora model trained that you want to load, put that here.
--- a/src/axolotl/utils/config.py
+++ b/src/axolotl/utils/config.py
@@ -27,7 +27,7 @@ def choose_device(cfg):

    cfg.device = get_device()
    if cfg.world_size == 1:
-        cfg.device_map = "auto"
+        cfg.device_map = cfg.device_map or "auto"
    else:
        if cfg.device.startswith("cuda"):
            cfg.device_map = {"": torch.cuda.current_device()}
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -216,6 +216,7 @@ def load_model(
    model_kwargs = {}

    model_kwargs["device_map"] = cfg.device_map
+    model_kwargs["max_memory"] = cfg.max_memory
    model_kwargs["torch_dtype"] = cfg.torch_dtype

    if cfg.model_revision: