From eb13054672531015650e48abb1d3c5104e0e1ea5 Mon Sep 17 00:00:00 2001
From: NanoCode012 <nano@axolotl.ai>
Date: Wed, 25 Feb 2026 14:49:46 +0700
Subject: [PATCH] fix: apply fix for only CP mode

---
 src/axolotl/core/trainers/base.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/axolotl/core/trainers/base.py b/src/axolotl/core/trainers/base.py
index 354177baf..d055608dd 100644
--- a/src/axolotl/core/trainers/base.py
+++ b/src/axolotl/core/trainers/base.py
@@ -720,10 +720,14 @@ class AxolotlTrainer(
         os.makedirs(output_dir, exist_ok=True)
         LOG.info(f"Saving model checkpoint to {output_dir}")
 
-        # fix for Context Parallel save
-        if state_dict is None:
-            state_dict = self.accelerator.get_state_dict(self.model)
-        if state_dict is not None:
+        # fix for Context Parallel save: CP eval invalidates tensor storage
+        # pointers, so clone to CPU to get fresh valid storage for safetensors
+        if (
+            state_dict is not None
+            and self.axolotl_cfg
+            and self.axolotl_cfg.context_parallel_size
+            and self.axolotl_cfg.context_parallel_size > 1
+        ):
             state_dict = {
                 k: v.detach().cpu() if isinstance(v, torch.Tensor) else v
                 for k, v in state_dict.items()