From 64345e7707e5fab6e07542df3afe2c0db98c5f1e Mon Sep 17 00:00:00 2001 From: Dan Saunders Date: Fri, 19 Sep 2025 12:58:58 -0400 Subject: [PATCH] recurse fix --- src/axolotl/kernels/moe/torch_grouped.py | 9 ++++++++- src/axolotl/monkeypatch/moe_grouped.py | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/axolotl/kernels/moe/torch_grouped.py b/src/axolotl/kernels/moe/torch_grouped.py index 3ea7b9045..68ff631bc 100644 --- a/src/axolotl/kernels/moe/torch_grouped.py +++ b/src/axolotl/kernels/moe/torch_grouped.py @@ -286,7 +286,14 @@ def moe_ffn_forward_grouped( ) return None, None - parent_block = getattr(experts_module, "_ax_parent_block", None) + parent_block = None + parent_ref = getattr(experts_module, "_ax_parent_block_ref", None) + if parent_ref is not None: + try: + parent_block = parent_ref() + except TypeError: + parent_block = None + expert_container = getattr(experts_module, "experts", experts_module) expert_impls = _iter_expert_impls(expert_container) diff --git a/src/axolotl/monkeypatch/moe_grouped.py b/src/axolotl/monkeypatch/moe_grouped.py index 55eeffa89..7bfcd4d59 100644 --- a/src/axolotl/monkeypatch/moe_grouped.py +++ b/src/axolotl/monkeypatch/moe_grouped.py @@ -1,4 +1,5 @@ import logging +import weakref from functools import wraps import torch @@ -78,7 +79,7 @@ def apply_grouped_to_moe_blocks(cfg=None) -> None: bsz, seqlen, hdim = hidden_states.shape # expose parent block so grouped backend can access shared expert context try: - self.experts._ax_parent_block = self + self.experts._ax_parent_block_ref = weakref.ref(self) except Exception: pass y, router_logits = _tg.moe_ffn_forward_grouped(