diff --git a/src/axolotl/monkeypatch/moe_quant.py b/src/axolotl/monkeypatch/moe_quant.py
index 702a5004e..be3b3e1e1 100644
--- a/src/axolotl/monkeypatch/moe_quant.py
+++ b/src/axolotl/monkeypatch/moe_quant.py
@@ -117,8 +117,10 @@ def quantize_moe_expert_params(model, quant_type=None, compress_statistics=None)
             quant_type=quant_type,
         )
         count += 1
+        # Free the bf16 → 4-bit conversion buffers after each parameter
+        # to avoid accumulating peak reserved VRAM.
+        torch.cuda.empty_cache()
 
-    torch.cuda.empty_cache()
     LOG.info(
         "Quantized %d MoE expert parameters to 4-bit (quant_type=%s, compress_statistics=%s)",
         count,