ROCm · carlushuang · May 9, 2026 · May 9, 2026 · May 9, 2026 · May 10, 2026
diff --git a/atom/config.py b/atom/config.py
@@ -512,6 +512,7 @@ def _remap_layer_name(name: str) -> list[str]:
     "kimi_k25": "text_config",
     "qwen3_5": "text_config",
     "qwen3_5_moe": "text_config",
+    "mistral3": "text_config",
 }
 
 # multimodal models fully supported by plugin mode

diff --git a/atom/model_engine/model_runner.py b/atom/model_engine/model_runner.py
@@ -67,6 +67,8 @@
     "KimiK25ForConditionalGeneration": "atom.models.kimi_k25.KimiK25ForCausalLM",
     "MiniMaxM2ForCausalLM": "atom.models.minimax_m2.MiniMaxM2ForCausalLM",
     "MiMoV2FlashForCausalLM": "atom.models.mimo_v2_flash.MiMoV2FlashForCausalLM",
+    "Mistral3ForConditionalGeneration": "atom.models.mistral3.Mistral3TextOnly",
+    "MistralForCausalLM": "atom.models.mistral3.Mistral3ForCausalLM",
 }
 # seed = 34567
 # np.random.seed(seed)

diff --git a/atom/model_ops/activation.py b/atom/model_ops/activation.py
@@ -2,17 +2,28 @@
 # Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
 
 import torch
-from typing import Optional
-from torch import nn
 import torch.nn.functional as F
-from aiter import silu_and_mul
-from atom.config import QuantizationConfig
-from atom.quant_spec import LayerQuantConfig
-from aiter.jit.utils.torch_guard import torch_compile_guard
-
 from aiter import (
     QuantType,
+    silu_and_mul,
 )
+from aiter.jit.utils.torch_guard import torch_compile_guard
+from atom.config import QuantizationConfig
+from atom.quant_spec import LayerQuantConfig
+from torch import nn
+from typing import Optional
+
+
+def _detect_gfx1201() -> bool:
+    try:
+        return (torch.cuda.get_device_properties(0).gcnArchName or "").startswith(
+            "gfx1201"
+        )
+    except Exception:
+        return False
+
+
+_IS_GFX1201: bool = _detect_gfx1201()
 
 
 def mxfp4_act_mul_quant_fuse_fake(
@@ -84,6 +95,19 @@ def forward_native(
     def forward(
         self, x: torch.Tensor, x_scale: Optional[torch.Tensor] = None
     ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
+        # gfx1201 (RDNA4): aiter prebuilt silu_and_mul HIP kernel has no
+        # gfx1201 code object (CDNA-only v_pk_mul_f32). Use the portable
+        # triton silu_and_mul added in aiter PR #3168 (which mirrors the
+        # HIP signature out=fn(x)).
+        if _IS_GFX1201:
+            from aiter.ops.triton.activation import (
+                silu_and_mul as _aiter_silu_mul_triton,
+            )
+
+            half = x.shape[-1] // 2
+            out = torch.empty((*x.shape[:-1], half), dtype=x.dtype, device=x.device)
+            _aiter_silu_mul_triton(out, x)
+            return out
         # fp8 quantization
         if x_scale is not None and self.fused_quant:
             from aiter.ops.triton.fused_fp8_quant import (