Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions amd/comgr/src/hotswap/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ add_library(hotswap-transpiler OBJECT
raiser.cpp
code_object_utils.cpp
mc_state.cpp
canonical_op.cpp
)

if(NOT TARGET hotswap::transpiler)
Expand Down
80 changes: 80 additions & 0 deletions amd/comgr/src/hotswap/amdgpu_formats.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
//===- amdgpu_formats.h - Hotswap transpiler ------------------------------===//
//
// Part of Comgr, under the Apache License v2.0 with LLVM Exceptions. See
// amd/comgr/LICENSE.TXT in this repository for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef HOTSWAP_TRANSPILER_AMDGPU_FORMATS_H
#define HOTSWAP_TRANSPILER_AMDGPU_FORMATS_H

#include <cstdint>

// Source tree: lib/Target/AMDGPU/SIDefines.h — target-private but exposed
// through the LLVM build tree via our CMake include path. Provides the
// authoritative `SIInstrFlags` enum and `AMDGPU::OPERAND_INPUT_MODS` operand
// type used by the disassembler's TSFlags / OperandType fields.
#include "SIDefines.h"

#include "Utils/AMDGPUBaseInfo.h" // AMDGPU::isVOPD

namespace COMGR::hotswap {

// Alias `COMGR::hotswap::SIInstrFlags` to the LLVM namespace so existing call
// sites (`SIInstrFlags::SOPP`, `SIInstrFlags::FLAT`, etc.) keep compiling.
namespace SIInstrFlags = llvm::SIInstrFlags;

// AMDGPU target-specific operand type for VOP3 source modifiers (abs, neg).
// Defined in llvm::AMDGPU::OperandType from SIDefines.h.
constexpr unsigned OPERAND_INPUT_MODS = llvm::AMDGPU::OPERAND_INPUT_MODS;

// Human-readable format label for diagnostics. There is no runtime dispatch
// on this string — it is consumed only by error messages in the decoder.
// The precedence of the TSFlags tests below mirrors LLVM's own decoder:
// * `IsMAI` is a VOP3 subclass, so check before VOP3.
// * `DPP` / `SDWA` are orthogonal encoding bits that coexist with
// VOP1/VOP2/VOPC; check them first so those aren't misnamed as VOP1/2.
// * `VOP3P` coexists with `VOP3` on some subtargets; check VOP3P first.
// * VOPD has no dedicated TSFlags bit (LLVM's VOPD3 bit varies across
// versions); use `AMDGPU::isVOPD(opc)` instead.
inline const char *formatName(uint64_t flags, unsigned opc) {
if (llvm::AMDGPU::isVOPD(opc)) return "VOPD";
if (flags & SIInstrFlags::IsMAI) return "MFMA";
if (flags & SIInstrFlags::DPP) return "DPP";
if (flags & SIInstrFlags::SDWA) return "SDWA";
if (flags & SIInstrFlags::SOPP) return "SOPP";
if (flags & SIInstrFlags::SOPC) return "SOPC";
if (flags & SIInstrFlags::SOP1) return "SOP1";
if (flags & SIInstrFlags::SOP2) return "SOP2";
if (flags & SIInstrFlags::SOPK) return "SOPK";
if (flags & SIInstrFlags::VOPC) return "VOPC";
if (flags & SIInstrFlags::VOP3P) return "VOP3P";
if (flags & SIInstrFlags::VOP3) return "VOP3";
if (flags & SIInstrFlags::VOP2) return "VOP2";
if (flags & SIInstrFlags::VOP1) return "VOP1";
if (flags & SIInstrFlags::SMRD) return "SMEM";
if (flags & SIInstrFlags::FLAT) return "FLAT";
if (flags & SIInstrFlags::MUBUF) return "MUBUF";
if (flags & SIInstrFlags::DS) return "DS";
// VIMAGE: gfx12+ vector image / tensor encoding family. Pure-image
// members carry `SIInstrFlags::VIMAGE` directly; the gfx1250 TENSOR
// pseudos (`tensor_load_to_lds_d{2,4}`,
// `tensor_store_from_lds_d{2,4}`, MIMGInstructions.td:2049-2113) do
// NOT — they extend `InstSI` directly and only set `let VALU = 1`
// and `let TENSOR_CNT = 1`, so the `VIMAGE` field stays 0. Detect
// them via the `TENSOR_CNT` TSFlags bit instead. The only other
// user of that bit is `s_wait_tensorcnt` (SOPP), which already
// matches the SOPP arm above and never reaches this fallthrough.
// Routing both arms to the same `"VIMAGE"` label lets
// `kerneldex`/`raise_cli` bucket the cross-target failures as
// `[format=VIMAGE]` rather than `[format=Unknown]`, which is what
// the handler refusal contract is keyed on.
if (flags & SIInstrFlags::VIMAGE) return "VIMAGE";
if (flags & SIInstrFlags::TENSOR_CNT) return "VIMAGE";
return "Unknown";
}

} // namespace COMGR::hotswap

#endif
270 changes: 270 additions & 0 deletions amd/comgr/src/hotswap/canonical_op.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
//===- canonical_op.cpp - Hotswap transpiler ------------------------------===//
//
// Part of Comgr, under the Apache License v2.0 with LLVM Exceptions. See
// amd/comgr/LICENSE.TXT in this repository for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "canonical_op.h"

namespace COMGR::hotswap {

// Mechanical enum-to-name switch. Kept in alphabetical-ish groups to
// match the layout of the `CanonicalOp` enum in `canonical_op.h` — if a new
// CanonicalOp lands in the enum, the compiler's `-Wswitch` (enabled by
// default for scoped enums) will flag the missing case here.
const char *canonicalOpName(CanonicalOp Op) {
#define S(N) case CanonicalOp::N: return #N;
switch (Op) {
S(Unknown)
// SOPP / control flow
S(S_ENDPGM) S(S_NOP) S(S_BRANCH) S(S_CODE_END)
S(S_CBRANCH_SCC0) S(S_CBRANCH_SCC1)
S(S_CBRANCH_VCCZ) S(S_CBRANCH_VCCNZ)
S(S_CBRANCH_EXECZ) S(S_CBRANCH_EXECNZ)
S(S_WAITCNT) S(S_WAIT_LOADCNT) S(S_WAIT_KMCNT) S(S_WAIT_DSCNT) S(S_WAIT_XCNT)
S(S_WAIT_ASYNCCNT) S(S_WAIT_TENSORCNT)
S(S_WAIT_LOADCNT_DSCNT) S(S_WAIT_ALU)
S(S_CLAUSE) S(S_DELAY_ALU) S(S_SET_GPR_IDX_ON) S(S_SET_GPR_IDX_OFF) S(S_SETVSKIP)
S(S_BARRIER) S(S_BARRIER_WAIT) S(S_BARRIER_SIGNAL)
// SMEM
S(S_LOAD_B32) S(S_LOAD_B64) S(S_LOAD_B96) S(S_LOAD_B128)
S(S_LOAD_B256) S(S_LOAD_B512)
S(S_LOAD_U8) S(S_LOAD_I8) S(S_LOAD_U16) S(S_LOAD_I16)
S(S_STORE_B32) S(S_STORE_B64) S(S_STORE_B128)
// SOPC
S(S_CMP_EQ_U32) S(S_CMP_LG_U32) S(S_CMP_GT_U32) S(S_CMP_GE_U32)
S(S_CMP_LT_U32) S(S_CMP_LE_U32)
S(S_CMP_EQ_U64) S(S_CMP_LG_U64)
S(S_CMP_EQ_I32) S(S_CMP_LG_I32) S(S_CMP_GT_I32) S(S_CMP_GE_I32)
S(S_CMP_LT_I32) S(S_CMP_LE_I32)
S(S_CMP_EQ_F32) S(S_CMP_LG_F32) S(S_CMP_GT_F32) S(S_CMP_GE_F32)
S(S_CMP_LT_F32) S(S_CMP_LE_F32) S(S_CMP_NEQ_F32)
S(S_CMP_NGT_F32) S(S_CMP_NGE_F32) S(S_CMP_NLT_F32) S(S_CMP_NLE_F32) S(S_CMP_NLG_F32)
S(S_CMP_EQ_F16) S(S_CMP_LG_F16) S(S_CMP_GT_F16) S(S_CMP_GE_F16)
S(S_CMP_LT_F16) S(S_CMP_LE_F16) S(S_CMP_NEQ_F16)
S(S_CMP_NGT_F16) S(S_CMP_NGE_F16) S(S_CMP_NLT_F16) S(S_CMP_NLE_F16) S(S_CMP_NLG_F16)
// SOPK
S(S_MOVK_I32) S(S_ADDK_I32) S(S_MULK_I32)
S(S_CMPK_GE_I32) S(S_CMPK_GT_I32) S(S_CMPK_LE_I32) S(S_CMPK_LT_I32)
S(S_CMPK_GE_U32) S(S_CMPK_GT_U32) S(S_CMPK_LE_U32) S(S_CMPK_LT_U32)
S(S_CMPK_EQ_I32) S(S_CMPK_EQ_U32) S(S_CMPK_LG_I32) S(S_CMPK_LG_U32)
S(S_GETREG_B32) S(S_SETREG_B32) S(S_SETREG_IMM32_B32)
// SOP1
S(S_MOV_B32) S(S_MOV_B64) S(S_NOT_B32) S(S_NOT_B64)
S(S_BREV_B32) S(S_FF1_I32_B32) S(S_FF1_I32_B64)
S(S_FF0_I32_B32) S(S_FF0_I32_B64)
S(S_FLBIT_I32_B32) S(S_FLBIT_I32_B64) S(S_FLBIT_I32) S(S_FLBIT_I32_I64)
S(S_SEXT_I32_I8) S(S_SEXT_I32_I16)
S(S_CVT_F32_U32) S(S_CVT_F32_I32) S(S_CVT_U32_F32) S(S_CVT_I32_F32)
S(S_AND_SAVEEXEC_B32) S(S_OR_SAVEEXEC_B32) S(S_XOR_SAVEEXEC_B32)
S(S_ANDN2_SAVEEXEC_B32) S(S_ORN2_SAVEEXEC_B32)
S(S_GETPC_B64)
S(S_SET_PC_I64)
S(S_SWAP_PC_I64)
S(S_ABS_I32)
S(S_SET_VGPR_MSB)
S(S_BITSET0_B32) S(S_BITSET1_B32)
S(S_BITSET0_B64) S(S_BITSET1_B64)
S(S_BITCMP0_B32) S(S_BITCMP1_B32)
S(S_BITCMP0_B64) S(S_BITCMP1_B64)
S(S_CMOV_B32) S(S_CMOV_B64)
// SOP2
S(S_ADD_U32) S(S_ADDC_U32) S(S_SUB_U32) S(S_SUBB_U32)
S(S_AND_B32) S(S_AND_B64) S(S_OR_B32) S(S_OR_B64) S(S_XOR_B32) S(S_XOR_B64)
S(S_ANDN2_B32) S(S_ANDN2_B64) S(S_ORN2_B32) S(S_ORN2_B64)
S(S_NAND_B32) S(S_NAND_B64) S(S_NOR_B32) S(S_NOR_B64)
S(S_XNOR_B32) S(S_XNOR_B64)
S(S_ABSDIFF_I32)
S(S_LSHL_B32) S(S_LSHL_B64) S(S_LSHR_B32) S(S_LSHR_B64) S(S_ASHR_I32) S(S_ASHR_I64)
S(S_MUL_I32) S(S_MUL_HI_U32) S(S_MUL_HI_I32) S(S_MUL_U64) S(S_MUL_F32) S(S_ADD_F32) S(S_SUB_F32)
S(S_FMAC_F32)
S(S_MAX_NUM_F32) S(S_MIN_NUM_F32)
S(S_BFE_U32) S(S_BFE_I32) S(S_BFM_B32) S(S_BFM_B64)
S(S_CSELECT_B32) S(S_CSELECT_B64)
S(S_MIN_I32) S(S_MIN_U32) S(S_MAX_I32) S(S_MAX_U32)
S(S_PACK_LL_B32_B16) S(S_PACK_LH_B32_B16)
S(S_LSHL1_ADD_U32) S(S_LSHL2_ADD_U32) S(S_LSHL3_ADD_U32) S(S_LSHL4_ADD_U32)
S(S_ADD_NC_U64) S(S_SUB_NC_U64)
// VOP1
S(V_MOV_B32) S(V_MOV_B64) S(V_NOP) S(V_NOT_B32) S(V_BFREV_B32)
S(V_SWAP_B32)
S(V_CVT_F32_I32) S(V_CVT_F32_U32) S(V_CVT_I32_F32) S(V_CVT_U32_F32)
S(V_CVT_F16_F32) S(V_CVT_F32_F16) S(V_CVT_F32_BF16)
S(V_CVT_F32_UBYTE0) S(V_CVT_F32_UBYTE1) S(V_CVT_F32_UBYTE2) S(V_CVT_F32_UBYTE3)
S(V_CVT_F64_U32) S(V_CVT_F64_I32) S(V_CVT_U32_F64)
S(V_RCP_IFLAG_F32) S(V_RCP_F32) S(V_RSQ_F32) S(V_SQRT_F32)
S(V_EXP_F32) S(V_LOG_F32)
S(V_S_EXP_F32) S(V_S_LOG_F32) S(V_S_RCP_F32) S(V_S_RSQ_F32) S(V_S_SQRT_F32)
S(V_LDEXP_F32)
S(V_FLOOR_F32) S(V_CEIL_F32) S(V_TRUNC_F32) S(V_FRACT_F32)
S(V_READFIRSTLANE_B32)
S(V_FFBH_U32) S(V_FFBL_B32) S(V_FFBH_I32)
S(V_CVT_PK_F32_FP8) S(V_CVT_PK_F32_BF8)
S(V_CVT_F32_FP8) S(V_CVT_F32_BF8)
S(V_CVT_SCALE_PK8_BF16_FP4)
// VOP2 / VOP3
S(V_ADD_F32) S(V_SUB_F32) S(V_SUBREV_F32) S(V_MUL_F32)
S(V_FMAC_F32) S(V_FMA_F32) S(V_FMAMK_F32) S(V_FMAAK_F32)
S(V_MAX_F32) S(V_MIN_F32)
S(V_ADD_NC_U32) S(V_SUB_NC_U32) S(V_SUBREV_NC_U32)
S(V_ADD_CO_U32) S(V_ADD_CO_CI_U32)
S(V_SUB_CO_U32) S(V_SUBREV_CO_U32) S(V_SUB_CO_CI_U32) S(V_SUBREV_CO_CI_U32)
S(V_AND_B32) S(V_OR_B32) S(V_XOR_B32) S(V_XNOR_B32)
S(V_LSHLREV_B32) S(V_LSHRREV_B32) S(V_ASHRREV_I32)
S(V_CNDMASK_B32)
S(V_MUL_LO_U32) S(V_MUL_HI_U32) S(V_MUL_HI_I32)
S(V_MUL_I32_I24) S(V_MUL_U32_U24) S(V_MUL_HI_U32_U24) S(V_MUL_HI_I32_I24)
S(V_MAD_U32_U24) S(V_MAD_U32)
S(V_ADD3_U32) S(V_LSHL_ADD_U32) S(V_ADD_LSHL_U32)
S(V_LSHL_OR_B32) S(V_AND_OR_B32) S(V_OR3_B32) S(V_XAD_U32) S(V_XOR3_B32)
S(V_ALIGNBIT_B32)
S(V_ADD_NC_U16)
S(V_BFE_U32) S(V_BFE_I32) S(V_BFI_B32) S(V_PERM_B32)
S(V_MBCNT_LO_U32_B32) S(V_MBCNT_HI_U32_B32)
S(V_READLANE_B32) S(V_WRITELANE_B32)
S(V_MED3_F32) S(V_MAX3_F32) S(V_MIN3_F32) S(V_MAX3_NUM_F32)
S(V_MAX3_U32) S(V_MED3_I32) S(V_MINMAX_NUM_F32)
S(V_MAX_NUM_F32) S(V_MIN_NUM_F32)
S(V_MAXIMUM_F32) S(V_MINIMUM_F32)
S(V_DIV_FIXUP_F32) S(V_DIV_FMAS_F32) S(V_DIV_SCALE_F32)
S(V_FMA_MIX_F32) S(V_FMA_MIX_F32_BF16) S(V_FMA_MIXLO_BF16)
S(V_ADD_F16) S(V_MUL_F16) S(V_SUB_F16) S(V_SUBREV_F16)
S(V_MAC_F16) S(V_FMAC_F16) S(V_MADMK_F16) S(V_MADAK_F16)
S(V_MAX_F16) S(V_MIN_F16) S(V_LDEXP_F16) S(V_FLOOR_F16)
S(V_CVT_F16_U16) S(V_CVT_U16_F16)
S(V_ASHRREV_I16) S(V_LSHRREV_B16) S(V_LSHLREV_B16)
S(V_MAX_U16) S(V_MIN_U16) S(V_MAX_I16) S(V_MIN_I16)
S(V_ADD_U16) S(V_SUB_U16) S(V_SUBREV_U16) S(V_MUL_LO_U16)
S(V_DOT2C_I32_I16) S(V_DOT4C_I32_I8) S(V_DOT8C_I32_I4)
S(V_PK_FMAC_F16)
S(V_PACK_B32_F16)
S(V_CVT_PK_BF16_F32) S(V_CVT_PK_BF8_F32) S(V_CVT_PK_FP8_F32)
S(V_CVT_PKRTZ_F16_F32) S(V_CVT_PK_F16_F32)
S(V_CVT_SCALEF32_PK_FP4_F32)
S(V_BFM_B32)
// VOP2/VOP3 FP64
S(V_ADD_F64) S(V_MUL_F64) S(V_FMA_F64) S(V_FMAC_F64)
S(V_RCP_F64)
S(V_MAX_U32) S(V_MIN_U32) S(V_MAX_I32) S(V_MIN_I32)
S(V_PERMLANE16_B32) S(V_PERMLANEX16_B32) S(V_PERMLANE64_B32)
S(V_PERMLANE16_SWAP_B32) S(V_PERMLANE32_SWAP_B32)
// VOPC
S(V_CMP) S(V_CMPX)
// VOP3P
S(V_PK_ADD_F32) S(V_PK_MUL_F32) S(V_PK_FMA_F32)
S(V_PK_MAX_F32) S(V_PK_MIN_F32) S(V_PK_MOV_B32)
S(V_PK_ADD_U16) S(V_PK_LSHLREV_B16)
S(V_BITOP3_B32) S(V_BITOP3_B16)
S(V_ADD_I32) S(V_SUB_I32)
S(V_LSHLREV_B64) S(V_LSHRREV_B64) S(V_ASHRREV_I64)
S(V_LSHL_ADD_U64) S(V_ADD_NC_U64) S(V_SUB_NC_U64)
S(V_MAX_I64) S(V_MAX_U64) S(V_MIN_I64) S(V_MIN_U64)
S(V_MUL_U64)
S(V_MAD_U64_U32) S(V_MAD_CO_U64_U32)
S(V_MAD_NC_U64_U32) S(V_MAD_NC_I64_I32)
// FLAT / GLOBAL
S(FLAT_LOAD_UBYTE) S(FLAT_LOAD_SBYTE) S(FLAT_LOAD_USHORT) S(FLAT_LOAD_SSHORT)
S(FLAT_LOAD_DWORD) S(FLAT_LOAD_DWORDX2) S(FLAT_LOAD_DWORDX3) S(FLAT_LOAD_DWORDX4)
S(FLAT_STORE_BYTE) S(FLAT_STORE_SHORT) S(FLAT_STORE_SHORT_D16_HI)
S(FLAT_STORE_DWORD) S(FLAT_STORE_DWORDX2) S(FLAT_STORE_DWORDX3) S(FLAT_STORE_DWORDX4)
S(GLOBAL_LOAD_UBYTE) S(GLOBAL_LOAD_SBYTE) S(GLOBAL_LOAD_USHORT) S(GLOBAL_LOAD_SSHORT)
S(GLOBAL_LOAD_SHORT_D16_HI)
S(GLOBAL_LOAD_DWORD) S(GLOBAL_LOAD_DWORDX2) S(GLOBAL_LOAD_DWORDX3) S(GLOBAL_LOAD_DWORDX4)
S(GLOBAL_STORE_BYTE) S(GLOBAL_STORE_SHORT) S(GLOBAL_STORE_SHORT_D16_HI)
S(GLOBAL_STORE_DWORD) S(GLOBAL_STORE_DWORDX2) S(GLOBAL_STORE_DWORDX3) S(GLOBAL_STORE_DWORDX4)
S(SCRATCH_LOAD_DWORD) S(SCRATCH_LOAD_DWORDX2) S(SCRATCH_LOAD_DWORDX3) S(SCRATCH_LOAD_DWORDX4)
S(SCRATCH_STORE_DWORD) S(SCRATCH_STORE_DWORDX2) S(SCRATCH_STORE_DWORDX3) S(SCRATCH_STORE_DWORDX4)
// FLAT atomics
S(FLAT_ATOMIC_ADD) S(FLAT_ATOMIC_SUB)
S(FLAT_ATOMIC_AND) S(FLAT_ATOMIC_OR) S(FLAT_ATOMIC_XOR)
S(FLAT_ATOMIC_SMIN) S(FLAT_ATOMIC_SMAX) S(FLAT_ATOMIC_UMIN) S(FLAT_ATOMIC_UMAX)
S(FLAT_ATOMIC_SWAP) S(FLAT_ATOMIC_CMPSWAP)
S(FLAT_ATOMIC_ADD_F32)
// GLOBAL atomics
S(GLOBAL_ATOMIC_ADD) S(GLOBAL_ATOMIC_SUB)
S(GLOBAL_ATOMIC_AND) S(GLOBAL_ATOMIC_OR) S(GLOBAL_ATOMIC_XOR)
S(GLOBAL_ATOMIC_SMIN) S(GLOBAL_ATOMIC_SMAX) S(GLOBAL_ATOMIC_UMIN) S(GLOBAL_ATOMIC_UMAX)
S(GLOBAL_ATOMIC_SWAP) S(GLOBAL_ATOMIC_CMPSWAP)
S(GLOBAL_ATOMIC_ADD_F32)
S(GLOBAL_ATOMIC_PK_ADD_BF16) S(GLOBAL_ATOMIC_PK_ADD_F16)
// SMEM atomics
S(S_ATOMIC_SWAP)
S(S_ATOMIC_DEC)
// DS
S(DS_LOAD_TR16_B128)
S(DS_READ_B64_TR_B16)
S(DS_READ_B64_TR_B8)
S(DS_LOAD_TR8_B64)
S(DS_READ_B32) S(DS_READ_B64) S(DS_READ_B96) S(DS_READ_B128)
S(DS_READ2_B32) S(DS_READ2_B64)
S(DS_READ2ST64_B32) S(DS_READ2ST64_B64)
S(DS_READ_U16) S(DS_READ_I16) S(DS_READ_U8) S(DS_READ_I8)
S(DS_WRITE_B32) S(DS_WRITE_B64) S(DS_WRITE_B96) S(DS_WRITE_B128)
S(DS_WRITE2_B32) S(DS_WRITE2_B64)
S(DS_WRITE2ST64_B32) S(DS_WRITE2ST64_B64)
S(DS_WRITE_B16) S(DS_WRITE_B8)
S(DS_WRITE_B16_D16_HI) S(DS_WRITE_B8_D16_HI)
S(DS_BPERMUTE_B32)
S(DS_SWIZZLE_B32)
// MUBUF
S(BUFFER_LOAD_DWORD) S(BUFFER_LOAD_DWORDX2) S(BUFFER_LOAD_DWORDX3) S(BUFFER_LOAD_DWORDX4)
S(BUFFER_LOAD_UBYTE) S(BUFFER_LOAD_SBYTE) S(BUFFER_LOAD_USHORT) S(BUFFER_LOAD_SSHORT)
S(BUFFER_LOAD_SHORT_D16) S(BUFFER_LOAD_SHORT_D16_HI)
S(BUFFER_LOAD_UBYTE_D16) S(BUFFER_LOAD_UBYTE_D16_HI)
S(BUFFER_LOAD_SBYTE_D16) S(BUFFER_LOAD_SBYTE_D16_HI)
S(BUFFER_LOAD_DWORD_LDS) S(BUFFER_LOAD_DWORDX2_LDS)
S(BUFFER_LOAD_DWORDX4_LDS) S(BUFFER_STORE_DWORDX4_LDS)
S(BUFFER_STORE_DWORD) S(BUFFER_STORE_DWORDX2) S(BUFFER_STORE_DWORDX3) S(BUFFER_STORE_DWORDX4)
S(BUFFER_STORE_BYTE) S(BUFFER_STORE_SHORT)
// MUBUF atomics
S(BUFFER_ATOMIC_ADD) S(BUFFER_ATOMIC_SUB)
S(BUFFER_ATOMIC_AND) S(BUFFER_ATOMIC_OR) S(BUFFER_ATOMIC_XOR)
S(BUFFER_ATOMIC_SWAP) S(BUFFER_ATOMIC_CMPSWAP)
S(BUFFER_ATOMIC_ADD_F32)
S(BUFFER_ATOMIC_PK_ADD_BF16) S(BUFFER_ATOMIC_PK_ADD_F16)
// MFMA
S(V_MFMA_F32_16x16x128_F8F6F4) S(V_MFMA_SCALE_F32_16x16x128_F8F6F4)
S(V_MFMA_F32_32x32x64_F8F6F4) S(V_MFMA_SCALE_F32_32x32x64_F8F6F4)
S(V_MFMA_F32_16x16x16_F16) S(V_MFMA_F32_32x32x8_F16)
S(V_MFMA_F32_16x16x4_F32) S(V_MFMA_F32_32x32x1_F32) S(V_MFMA_F32_32x32x2_F32)
S(V_MFMA_F32_4x4x1_F32) S(V_MFMA_F32_16x16x1_F32)
S(V_MFMA_F32_32x32x4_F16) S(V_MFMA_F32_16x16x4_F16) S(V_MFMA_F32_4x4x4_F16)
S(V_MFMA_I32_16x16x32_I8) S(V_MFMA_I32_32x32x16_I8)
S(V_MFMA_I32_32x32x4_I8) S(V_MFMA_I32_16x16x4_I8) S(V_MFMA_I32_4x4x4_I8)
S(V_MFMA_F32_16x16x8_XF32) S(V_MFMA_F32_32x32x4_XF32)
S(V_MFMA_F32_32x32x2_BF16) S(V_MFMA_F32_16x16x2_BF16) S(V_MFMA_F32_4x4x2_BF16)
S(V_MFMA_F32_16x16x16_BF16_1K) S(V_MFMA_F32_32x32x8_BF16_1K)
S(V_MFMA_F32_16x16x32_BF16) S(V_MFMA_F32_32x32x16_BF16)
S(V_MFMA_F32_16x16x32_F16)
S(V_MFMA_F32_16x16x32_FP8_FP8) S(V_MFMA_F32_16x16x32_FP8_BF8)
S(V_MFMA_F32_16x16x32_BF8_FP8) S(V_MFMA_F32_16x16x32_BF8_BF8)
S(V_MFMA_F32_32x32x16_FP8_FP8) S(V_MFMA_F32_32x32x16_FP8_BF8)
S(V_MFMA_F32_32x32x16_BF8_FP8) S(V_MFMA_F32_32x32x16_BF8_BF8)
// WMMA
S(V_WMMA_F32_16x16x32_F16) S(V_WMMA_F32_16x16x32_BF16)
S(V_WMMA_F32_16x16x4_F32)
S(V_WMMA_F32_16x16x64_FP8_FP8) S(V_WMMA_F32_16x16x64_FP8_BF8)
S(V_WMMA_F32_16x16x64_BF8_FP8) S(V_WMMA_F32_16x16x64_BF8_BF8)
S(V_WMMA_I32_16x16x64_IU8)
S(V_WMMA_SCALE_F32_16x16x128_F8F6F4)
// VOPD
S(VOPD_GENERIC)
// VIMAGE TENSOR (gfx1250-only)
S(TENSOR_LOAD_TO_LDS) S(TENSOR_STORE_FROM_LDS)
// FLAT async global → LDS (gfx1250-only)
S(GLOBAL_LOAD_ASYNC_TO_LDS_B8) S(GLOBAL_LOAD_ASYNC_TO_LDS_B32)
S(GLOBAL_LOAD_ASYNC_TO_LDS_B64) S(GLOBAL_LOAD_ASYNC_TO_LDS_B128)
// FLAT VMEM prefetch (gfx1250-only, hint-class)
S(GLOBAL_PREFETCH_B8)
// AGPR
S(V_ACCVGPR_READ_B32) S(V_ACCVGPR_WRITE_B32)

case CanonicalOp::CanonicalOp_COUNT: return "<CanonicalOp_COUNT>";
}
#undef S
return "<unknown CanonicalOp>";
}

} // namespace COMGR::hotswap
Loading