diff --git a/docs/config.qmd b/docs/config.qmd
index 4686b8b3a..07a37db01 100644
--- a/docs/config.qmd
+++ b/docs/config.qmd
@@ -78,8 +78,8 @@ tf32: true # require >=ampere
 bfloat16: true # require >=ampere
 float16: true
 
-# Tensor parallel
-tp_size: 1 # should be set to the number of cuda devices available
+# Use Tensor parallel
+tensor_parallel: true # require multi-gGPU
 
 # Limit the memory for all available GPUs to this amount (if an integer, expressed in gigabytes); default: unset
 gpu_memory_limit: 20GiB
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index 3e59730bd..382bff2d9 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -762,10 +762,8 @@ class ModelLoader:
             return hf_ds_cfg
 
         skip_move_to_device = False
-        if self.cfg.tp_size is not None:
-            # self.model_kwargs["tp_plan"] = "auto"
+        if self.cfg.tensor_parallel:
             del self.model_kwargs["device_map"]
-            # skip_move_to_device = True
 
         if (  # pylint: disable=condition-evals-to-constant)
             (self.cfg.fsdp and self.cfg.fsdp_config.fsdp_cpu_ram_efficient_loading)