From ac80a1b8b365845823319200c063c4225a6a2abb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BA=8E=E7=AC=91=E9=A2=9C?= Date: Tue, 24 Feb 2026 16:11:50 +0800 Subject: [PATCH] fix(r3,vlm): remove orphaned RoutingReplay from decoder rebuild. --- slime/backends/megatron_utils/model.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/slime/backends/megatron_utils/model.py b/slime/backends/megatron_utils/model.py index 45627e47ce..7740710b31 100644 --- a/slime/backends/megatron_utils/model.py +++ b/slime/backends/megatron_utils/model.py @@ -109,6 +109,20 @@ def setup_model_and_optimizer( wrap_model_provider_with_freeze(get_model_provider_func(args, role), args), ModelType.encoder_or_decoder ) + # Some models (e.g., Qwen3VLGPTModel) rebuild the decoder in __init__, + # which causes duplicate RoutingReplay registrations. Rebuild the list from + # the actual model modules to remove orphaned entries. + if os.environ.get("ENABLE_ROUTING_REPLAY", "0") == "1": + + from slime.utils.routing_replay import RoutingReplay + active_replays = [] + for model_chunk in model: + for module in model_chunk.modules(): + if hasattr(module, "routing_replay") and isinstance(module.routing_replay, RoutingReplay): + active_replays.append(module.routing_replay) + if active_replays: + RoutingReplay.all_routing_replays = active_replays + # Optimizer kwargs = {} for f in dataclasses.fields(OptimizerConfig):