diff --git a/docs/config.qmd b/docs/config.qmd index 4686b8b3a..07a37db01 100644 --- a/docs/config.qmd +++ b/docs/config.qmd @@ -78,8 +78,8 @@ tf32: true # require >=ampere bfloat16: true # require >=ampere float16: true -# Tensor parallel -tp_size: 1 # should be set to the number of cuda devices available +# Use Tensor parallel +tensor_parallel: true # require multi-gGPU # Limit the memory for all available GPUs to this amount (if an integer, expressed in gigabytes); default: unset gpu_memory_limit: 20GiB diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 3e59730bd..382bff2d9 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -762,10 +762,8 @@ class ModelLoader: return hf_ds_cfg skip_move_to_device = False - if self.cfg.tp_size is not None: - # self.model_kwargs["tp_plan"] = "auto" + if self.cfg.tensor_parallel: del self.model_kwargs["device_map"] - # skip_move_to_device = True if ( # pylint: disable=condition-evals-to-constant) (self.cfg.fsdp and self.cfg.fsdp_config.fsdp_cpu_ram_efficient_loading)