From a70e90e9dc2d79571f8ba0d2e6797dfa546f708d Mon Sep 17 00:00:00 2001 From: xintin Date: Mon, 11 May 2026 23:16:49 +0000 Subject: [PATCH 1/3] hotswap dispatcher vtable + .def registry, fix Windows support Signed-off-by: xintin --- amd/comgr/src/comgr-hotswap-b0a0.cpp | 99 ++++++++++++------- amd/comgr/src/comgr-hotswap-internal.h | 55 +++++++++-- amd/comgr/src/comgr-hotswap-patch-inplace.cpp | 15 +-- .../src/comgr-hotswap-patch-trampoline.cpp | 15 +-- .../src/comgr-hotswap-patch-vop3px2-src2.cpp | 10 +- .../src/comgr-hotswap-patch-wmma-hazard.cpp | 10 +- amd/comgr/src/comgr-hotswap-patches.def | 35 +++++++ amd/comgr/test-unit/CMakeLists.txt | 3 + amd/comgr/test-unit/HotswapMCTest.cpp | 68 +++++++++++++ 9 files changed, 240 insertions(+), 70 deletions(-) create mode 100644 amd/comgr/src/comgr-hotswap-patches.def diff --git a/amd/comgr/src/comgr-hotswap-b0a0.cpp b/amd/comgr/src/comgr-hotswap-b0a0.cpp index a7352fe4714c9..92b4648e434d9 100644 --- a/amd/comgr/src/comgr-hotswap-b0a0.cpp +++ b/amd/comgr/src/comgr-hotswap-b0a0.cpp @@ -10,9 +10,15 @@ /// retargetCodeObjectB0A0 orchestrator that drives the full pipeline: /// decode -> patch -> trampoline growth -> DWARF update. /// -/// Patch entry points are declared as weak symbols returning 0. Each -/// comgr-hotswap-patch-*.cpp file provides a strong override, allowing -/// patches to land as independent PRs with no merge conflicts. +/// Patch passes are dispatched through HotswapPatchVTable. The membership +/// list lives in comgr-hotswap-patches.def; each entry corresponds to one +/// slot on the vtable and one register*Patch function in a sibling +/// comgr-hotswap-patch-*.cpp. installHotswapPatches() walks the .def to +/// bind every slot; a std::call_once at the top of retargetCodeObjectB0A0 +/// drives the install exactly once per process. This replaces the prior +/// LLVM_ATTRIBUTE_WEAK + `#if !defined(_MSC_VER)` override pattern, which +/// silently disabled hotswap on Windows because PE/COFF does not honour +/// weak the way ELF does (issue ROCm/llvm-project#2479). /// //===----------------------------------------------------------------------===// @@ -21,6 +27,8 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Compiler.h" +#include + using namespace llvm; namespace COMGR { @@ -59,18 +67,14 @@ static RewriteConfig makeGfx1250B0A0Config() { return Config; } -// -- Forward declarations for patch/liveness/DWARF stubs ---------------------- +// -- Forward declarations for liveness/DWARF stubs ---------------------------- // -// These have weak default definitions below; patch .cpp files may provide -// strong overrides at link time so patches can land as independent PRs. -// Patch entry points are declared in comgr-hotswap-internal.h. - -uint32_t applyInPlacePatches(PatchContext &, size_t); -uint32_t applyTrampolinePatches(PatchContext &, size_t); -uint32_t applyWmmaHazardPatch(PatchContext &); -uint32_t applyVop3px2Src2Fix(PatchContext &); -uint32_t applyWmmaSplitPatches(PatchContext &, size_t); -uint32_t applyScratchPatches(PatchContext &, size_t); +// These have weak default definitions below; future patch .cpp files may +// provide strong overrides at link time. The patch family (apply*) has +// migrated to the HotswapPatchVTable contract above; these helpers stay +// on the old pattern until they grow their first strong override, at +// which point they migrate the same way (one .def entry + register*). + CFG buildCfg(ArrayRef Decoded, const MCInstrInfo &); LivenessInfo computeLiveness(ArrayRef Decoded, const CFG &, const MCInstrInfo &, const MCRegisterInfo &, @@ -93,21 +97,25 @@ void patchDebugInfo(uint8_t *Elf, size_t ElfSize, uint64_t TextAddr, void patchDebugFrame(uint8_t *Elf, size_t ElfSize, uint64_t TextAddr, uint64_t TextSizeBefore, uint64_t TrampTotal); -// -- Weak-symbol patch stubs -------------------------------------------------- - -LLVM_ATTRIBUTE_WEAK uint32_t applyInPlacePatches(PatchContext &, size_t) { - return 0; -} -LLVM_ATTRIBUTE_WEAK uint32_t applyTrampolinePatches(PatchContext &, size_t) { - return 0; -} -LLVM_ATTRIBUTE_WEAK uint32_t applyWmmaHazardPatch(PatchContext &) { return 0; } -LLVM_ATTRIBUTE_WEAK uint32_t applyVop3px2Src2Fix(PatchContext &) { return 0; } -LLVM_ATTRIBUTE_WEAK uint32_t applyWmmaSplitPatches(PatchContext &, size_t) { - return 0; +// -- HotswapPatchVTable plumbing ---------------------------------------------- +// +// Patch-module forward declarations live in comgr-hotswap-internal.h +// (driven off the same comgr-hotswap-patches.def), so libamd_comgr and +// the unit tests share one prototype source. Here we just supply the +// singleton accessor and the installer that walks the .def to invoke +// each register*Patch. A missing register*Patch produces a link error +// at libamd_comgr link time -- the loud-failure shape the weak-symbol +// pattern lacked. + +HotswapPatchVTable &getHotswapPatchVTable() { + static HotswapPatchVTable VT; + return VT; } -LLVM_ATTRIBUTE_WEAK uint32_t applyScratchPatches(PatchContext &, size_t) { - return 0; + +void installHotswapPatches(HotswapPatchVTable &VT) { +#define HOTSWAP_PATCH(Name) register##Name##Patch(VT); +#include "comgr-hotswap-patches.def" +#undef HOTSWAP_PATCH } // -- Weak-symbol liveness stubs ----------------------------------------------- @@ -283,6 +291,16 @@ buildNopSledMap(ArrayRef Decoded, const LLVMState &LS) { // -- applyGfx1250B0toA0Rules -------------------------------------------------- +/// Per-instruction patch-pass trampoline: invokes \p Fn with (\p Ctx, +/// \p Idx) if it is non-null, or returns 0 otherwise. nullptr means +/// the corresponding pass family has no implementation linked in +/// (e.g. wmma-split / scratch today), which the dispatcher treats as +/// a no-op slot. +static uint32_t runPerInstPass(uint32_t (*Fn)(PatchContext &, size_t), + PatchContext &Ctx, size_t Idx) { + return Fn ? Fn(Ctx, Idx) : 0; +} + /// Main per-instruction dispatcher for the GFX1250 B0-to-A0 rewrite. /// Builds the NOP sled map, CFG, and VGPR liveness for the decoded stream, /// then walks each decoded instruction and runs the patch passes in order @@ -320,28 +338,30 @@ applyGfx1250B0toA0Rules(std::vector &Decoded, OutTrampolines, Sleds, Elf, Liveness, KernelStats, OutScratchPatches}; + const HotswapPatchVTable &VT = getHotswapPatchVTable(); + for (size_t Idx = 0, E = Decoded.size(); Idx < E; ++Idx) { const InternalDecodedInst &DI = Decoded[Idx]; if (DI.Mnemonic == "") continue; uint32_t P = 0; - P += applyInPlacePatches(Ctx, Idx); + P += runPerInstPass(VT.applyInPlacePatches, Ctx, Idx); if (P) { Patched += P; continue; } - P += applyTrampolinePatches(Ctx, Idx); + P += runPerInstPass(VT.applyTrampolinePatches, Ctx, Idx); if (P) { Patched += P; continue; } - P += applyWmmaSplitPatches(Ctx, Idx); + P += runPerInstPass(VT.applyWmmaSplitPatches, Ctx, Idx); if (P) { Patched += P; continue; } - P += applyScratchPatches(Ctx, Idx); + P += runPerInstPass(VT.applyScratchPatches, Ctx, Idx); if (P) { Patched += P; continue; @@ -359,8 +379,10 @@ applyGfx1250B0toA0Rules(std::vector &Decoded, // treat a branch as WMMA/VALU/VOP3PX2. // If a future patch family changes instruction boundaries, the Decoded // stream must be rebuilt before these passes run. - Patched += applyWmmaHazardPatch(Ctx); - Patched += applyVop3px2Src2Fix(Ctx); + if (VT.applyWmmaHazardPatch) + Patched += VT.applyWmmaHazardPatch(Ctx); + if (VT.applyVop3px2Src2Fix) + Patched += VT.applyVop3px2Src2Fix(Ctx); for (const llvm::StringMapEntry &KV : KernelStats) { StringRef KName = KV.first(); @@ -483,6 +505,15 @@ static void runScratchVerification(WritableMemoryBuffer &OutBuf, amd_comgr_status_t retargetCodeObjectB0A0(const void *ElfData, size_t ElfSize, const TargetIdentifier &TargetIdent, std::unique_ptr &Out) { + // Bind every patch module's implementation into the singleton vtable. + // std::call_once gives us a single, deterministic install per process + // with no inter-TU static-init order contract. Cost is one atomic check + // on every entry after the first; the first call runs the .def-driven + // register*Patch chain in declaration order. + static std::once_flag InstallOnce; + std::call_once(InstallOnce, + [] { installHotswapPatches(getHotswapPatchVTable()); }); + // Take a working copy so the input is preserved and we have a mutable // buffer to parse / patch. std::vector Buf(static_cast(ElfData), diff --git a/amd/comgr/src/comgr-hotswap-internal.h b/amd/comgr/src/comgr-hotswap-internal.h index 29e132c55adde..4129d4520f1c9 100644 --- a/amd/comgr/src/comgr-hotswap-internal.h +++ b/amd/comgr/src/comgr-hotswap-internal.h @@ -509,13 +509,56 @@ struct PatchContext { uint32_t InstSize, llvm::ArrayRef Replacement); -// -- Patch entry points (weak stubs in comgr-hotswap-b0a0.cpp) ---------------- +// -- Patch dispatch vtable ---------------------------------------------------- +// +// Function-pointer dispatch table that replaces the prior LLVM_ATTRIBUTE_WEAK +// + `#if !defined(_MSC_VER)` override pattern. PE/COFF does not honour weak +// the way ELF does, so on Windows the weak stubs silently won every patch +// call and the feature was a no-op (issue ROCm/llvm-project#2479). +// +// Patch modules supply their implementations through register*Patch +// functions invoked by installHotswapPatches(). The membership list is +// comgr-hotswap-patches.def; each entry there corresponds to one slot +// below and one register*Patch function in a sibling +// comgr-hotswap-patch-*.cpp. nullptr slots are treated as no-op by the +// dispatcher, so an unmigrated pass family (e.g. wmma-split, scratch) is +// safe to leave unbound until its first strong override lands. + +struct HotswapPatchVTable { + // Per-instruction passes: called in declaration order; first non-zero + // return wins for an instruction (matches the pre-vtable dispatcher + // behaviour in applyGfx1250B0toA0Rules). + uint32_t (*applyInPlacePatches)(PatchContext &, size_t) = nullptr; + uint32_t (*applyTrampolinePatches)(PatchContext &, size_t) = nullptr; + uint32_t (*applyWmmaSplitPatches)(PatchContext &, size_t) = nullptr; + uint32_t (*applyScratchPatches)(PatchContext &, size_t) = nullptr; + + // Whole-kernel passes: called once per kernel after the per-instruction + // loop completes. + uint32_t (*applyWmmaHazardPatch)(PatchContext &) = nullptr; + uint32_t (*applyVop3px2Src2Fix)(PatchContext &) = nullptr; +}; -uint32_t applyInPlacePatches(PatchContext &Ctx, size_t Idx); -uint32_t applyTrampolinePatches(PatchContext &Ctx, size_t Idx); -uint32_t applyWmmaHazardPatch(PatchContext &Ctx); -uint32_t applyWmmaSplitPatches(PatchContext &Ctx, size_t Idx); -uint32_t applyScratchPatches(PatchContext &Ctx, size_t Idx); +/// Process-wide HotswapPatchVTable singleton (Meyers-style; thread-safe +/// under C++11). Populated once by installHotswapPatches(); the dispatcher +/// gates that install on a std::call_once at the top of +/// retargetCodeObjectB0A0 so no inter-TU static-init contract is required. +HotswapPatchVTable &getHotswapPatchVTable(); + +/// Walk comgr-hotswap-patches.def and bind every patch module's +/// implementation into \p VT by calling its register*Patch function. +/// A missing register*Patch produces a link error, which is the +/// loud-failure shape the weak-symbol pattern lacked. +void installHotswapPatches(HotswapPatchVTable &VT); + +// Forward-declare every patch module's installer from the central .def +// registry. Patch modules define these in their comgr-hotswap-patch-*.cpp; +// installHotswapPatches() consumes them; unit tests under test-unit/ also +// invoke them directly. A patches.def line with no matching definition +// produces a libamd_comgr / HotswapMCTests link error. +#define HOTSWAP_PATCH(Name) void register##Name##Patch(HotswapPatchVTable &); +#include "comgr-hotswap-patches.def" +#undef HOTSWAP_PATCH // -- Function declarations (B0-to-A0 policy layer) ---------------------------- diff --git a/amd/comgr/src/comgr-hotswap-patch-inplace.cpp b/amd/comgr/src/comgr-hotswap-patch-inplace.cpp index ccfa18401d438..473d5b57bc60c 100644 --- a/amd/comgr/src/comgr-hotswap-patch-inplace.cpp +++ b/amd/comgr/src/comgr-hotswap-patch-inplace.cpp @@ -23,13 +23,6 @@ #include "comgr-hotswap-internal.h" -// MSVC does not support weak symbols; LLVM_ATTRIBUTE_WEAK expands to nothing, -// so the stub in comgr-hotswap-b0a0.cpp becomes a regular definition and -// this file would produce a duplicate-symbol link error (LNK2005). Guard -// the strong override until a proper registration mechanism replaces the -// weak-symbol pattern on Windows (tracked in #2294 / #2285). -#if !defined(_MSC_VER) - #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCCodeEmitter.h" @@ -99,7 +92,7 @@ bool swapOpcode(InternalDecodedInst &DI, uint8_t *Text, const LLVMState &LS, } // anonymous namespace -uint32_t applyInPlacePatches(PatchContext &Ctx, size_t Idx) { +static uint32_t applyInPlacePatchesImpl(PatchContext &Ctx, size_t Idx) { InternalDecodedInst &DI = Ctx.Decoded[Idx]; StringRef Mnemonic(DI.Mnemonic); @@ -162,7 +155,9 @@ uint32_t applyInPlacePatches(PatchContext &Ctx, size_t Idx) { return 0; } +void registerInPlacePatch(HotswapPatchVTable &VT) { + VT.applyInPlacePatches = &applyInPlacePatchesImpl; +} + } // namespace hotswap } // namespace COMGR - -#endif // !defined(_MSC_VER) diff --git a/amd/comgr/src/comgr-hotswap-patch-trampoline.cpp b/amd/comgr/src/comgr-hotswap-patch-trampoline.cpp index 2698f2a418f72..0d6c8281bc9be 100644 --- a/amd/comgr/src/comgr-hotswap-patch-trampoline.cpp +++ b/amd/comgr/src/comgr-hotswap-patch-trampoline.cpp @@ -15,13 +15,6 @@ #include "comgr-hotswap-internal.h" -// MSVC does not support weak symbols; LLVM_ATTRIBUTE_WEAK expands to nothing, -// so the stub in comgr-hotswap-b0a0.cpp becomes a regular definition and -// this file would produce a duplicate-symbol link error (LNK2005). Guard -// the strong override until a proper registration mechanism replaces the -// weak-symbol pattern on Windows (tracked in #2294 / #2285). -#if !defined(_MSC_VER) - #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCCodeEmitter.h" @@ -361,7 +354,7 @@ static bool patchDs2AddrStride64(PatchContext &Ctx, size_t Idx) { // // ds_*_2addr_stride64_* -> split into two single-address DS ops -uint32_t applyTrampolinePatches(PatchContext &Ctx, size_t Idx) { +static uint32_t applyTrampolinePatchesImpl(PatchContext &Ctx, size_t Idx) { StringRef Mnem(Ctx.Decoded[Idx].Mnemonic); if (!getDs2AddrReplacement(Mnem).empty()) @@ -370,7 +363,9 @@ uint32_t applyTrampolinePatches(PatchContext &Ctx, size_t Idx) { return 0; } +void registerTrampolinePatch(HotswapPatchVTable &VT) { + VT.applyTrampolinePatches = &applyTrampolinePatchesImpl; +} + } // namespace hotswap } // namespace COMGR - -#endif // !defined(_MSC_VER) diff --git a/amd/comgr/src/comgr-hotswap-patch-vop3px2-src2.cpp b/amd/comgr/src/comgr-hotswap-patch-vop3px2-src2.cpp index 98f287bd7908b..69a2f16ac8a76 100644 --- a/amd/comgr/src/comgr-hotswap-patch-vop3px2-src2.cpp +++ b/amd/comgr/src/comgr-hotswap-patch-vop3px2-src2.cpp @@ -33,8 +33,6 @@ #include "comgr-hotswap-internal.h" -#if !defined(_MSC_VER) - #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" @@ -90,7 +88,7 @@ bool patchScaleSrc2(uint8_t *InstBytes) { // This only fires on the B0-to-A0 rewrite path (applyGfx1250B0toA0Rules). // A0-native binaries are compiled with an A0-targeted Clang that sets the // field correctly at codegen time, so they do not need hotswap rewriting. -uint32_t applyVop3px2Src2Fix(PatchContext &Ctx) { +static uint32_t applyVop3px2Src2FixImpl(PatchContext &Ctx) { uint32_t Patched = 0; unsigned Scanned = 0; @@ -112,7 +110,9 @@ uint32_t applyVop3px2Src2Fix(PatchContext &Ctx) { return Patched; } +void registerVop3px2Src2Patch(HotswapPatchVTable &VT) { + VT.applyVop3px2Src2Fix = &applyVop3px2Src2FixImpl; +} + } // namespace hotswap } // namespace COMGR - -#endif // !defined(_MSC_VER) diff --git a/amd/comgr/src/comgr-hotswap-patch-wmma-hazard.cpp b/amd/comgr/src/comgr-hotswap-patch-wmma-hazard.cpp index 336cc15bbd25f..893806fcfdd73 100644 --- a/amd/comgr/src/comgr-hotswap-patch-wmma-hazard.cpp +++ b/amd/comgr/src/comgr-hotswap-patch-wmma-hazard.cpp @@ -15,8 +15,6 @@ #include "comgr-hotswap-internal.h" -#if !defined(_MSC_VER) - #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" @@ -172,7 +170,7 @@ std::vector findWmmaCoexecHazards(const PatchContext &Ctx) { } // anonymous namespace -uint32_t applyWmmaHazardPatch(PatchContext &Ctx) { +static uint32_t applyWmmaHazardPatchImpl(PatchContext &Ctx) { std::vector Hazards = findWmmaCoexecHazards(Ctx); if (Hazards.empty()) return 0; @@ -207,7 +205,9 @@ uint32_t applyWmmaHazardPatch(PatchContext &Ctx) { return Patched; } +void registerWmmaHazardPatch(HotswapPatchVTable &VT) { + VT.applyWmmaHazardPatch = &applyWmmaHazardPatchImpl; +} + } // namespace hotswap } // namespace COMGR - -#endif // !defined(_MSC_VER) diff --git a/amd/comgr/src/comgr-hotswap-patches.def b/amd/comgr/src/comgr-hotswap-patches.def new file mode 100644 index 0000000000000..c7d0a5bf07225 --- /dev/null +++ b/amd/comgr/src/comgr-hotswap-patches.def @@ -0,0 +1,35 @@ +//===- comgr-hotswap-patches.def - HotSwap patch registry ----------------===// +// +// Part of Comgr, under the Apache License v2.0 with LLVM Exceptions. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// X-macro list of HotSwap patch modules. One line per patch; each entry +/// names the module suffix used to form (a) the strong implementation +/// symbol applyXxx (declared in comgr-hotswap-internal.h) and (b) the +/// installer registerXxxPatch(HotswapPatchVTable&) defined in the patch +/// module's .cpp file. +/// +/// New patches add exactly one HOTSWAP_PATCH() line here (alpha-ordered) +/// plus a sibling comgr-hotswap-patch-.cpp file that defines its +/// registerXxxPatch function. The dispatcher reads this file twice from +/// comgr-hotswap-b0a0.cpp: once to forward-declare every registerXxxPatch, +/// and once to call them all from installHotswapPatches(). +/// +/// HotswapPatchVTable slots default to no-op stubs, so an entry without +/// a matching .cpp registrar at link time produces a link error rather +/// than a silent disable -- which is the entire point of replacing the +/// LLVM_ATTRIBUTE_WEAK pattern (see issue ROCm/llvm-project#2479). +/// +//===----------------------------------------------------------------------===// + +#ifndef HOTSWAP_PATCH +#error "comgr-hotswap-patches.def included without HOTSWAP_PATCH defined" +#endif + +HOTSWAP_PATCH(InPlace) +HOTSWAP_PATCH(Trampoline) +HOTSWAP_PATCH(Vop3px2Src2) +HOTSWAP_PATCH(WmmaHazard) diff --git a/amd/comgr/test-unit/CMakeLists.txt b/amd/comgr/test-unit/CMakeLists.txt index 7e11d495ca764..0983fe8356d95 100644 --- a/amd/comgr/test-unit/CMakeLists.txt +++ b/amd/comgr/test-unit/CMakeLists.txt @@ -98,8 +98,11 @@ comgr_configure_test_target(HotswapElfTests) add_executable(HotswapMCTests HotswapMCTest.cpp + ../src/comgr-hotswap-b0a0.cpp ../src/comgr-hotswap-elf.cpp ../src/comgr-hotswap-llvm.cpp + ../src/comgr-hotswap-patch-inplace.cpp + ../src/comgr-hotswap-patch-trampoline.cpp ../src/comgr-hotswap-patch-vop3px2-src2.cpp ../src/comgr-hotswap-patch-wmma-hazard.cpp ../src/comgr-env.cpp) diff --git a/amd/comgr/test-unit/HotswapMCTest.cpp b/amd/comgr/test-unit/HotswapMCTest.cpp index 7b490d7228fb3..b751f595a677e 100644 --- a/amd/comgr/test-unit/HotswapMCTest.cpp +++ b/amd/comgr/test-unit/HotswapMCTest.cpp @@ -428,3 +428,71 @@ TEST(PatchScaleSrc2, PreservesNonScaleSrc2Bits) { EXPECT_EQ(Inst[6] & 0xFC, 0x00); EXPECT_EQ(Inst[7] & 0x07, 0x04); } + +// -- HotswapPatchVTable ------------------------------------------------------- +// +// Tests for the .def-driven patch registry that replaced the +// LLVM_ATTRIBUTE_WEAK override pattern (issue ROCm/llvm-project#2479). +// +// Coverage strategy: link errors already catch missing register*Patch +// definitions and missing comgr-hotswap-patches.def entries, so we only +// test what the linker cannot: +// 1. One canonical per-installer "binds only its own slot" check, +// kept as a worked example for future patch authors. Wrong-slot +// bugs in the other register*Patch functions are caught via the +// install end-to-end test below. +// 2. End-to-end install: defaults are null, .def entries get bound, +// slots without a .def entry stay null (the dispatcher's no-op +// contract). +// 3. The Meyers singleton accessor is a real singleton and bindings +// persist on it (production install path in retargetCodeObjectB0A0 +// relies on this). + +TEST(HotswapPatchVTable, RegisterInPlaceBindsOnlyInPlaceSlot) { + HotswapPatchVTable VT; + registerInPlacePatch(VT); + EXPECT_NE(VT.applyInPlacePatches, nullptr); + EXPECT_EQ(VT.applyTrampolinePatches, nullptr); + EXPECT_EQ(VT.applyWmmaHazardPatch, nullptr); + EXPECT_EQ(VT.applyVop3px2Src2Fix, nullptr); +} + +TEST(HotswapPatchVTable, InstallBindsRegisteredAndLeavesUnregisteredNull) { + HotswapPatchVTable VT; + + // Defaults: every slot null (no patch implementation linked yet). + EXPECT_EQ(VT.applyInPlacePatches, nullptr); + EXPECT_EQ(VT.applyTrampolinePatches, nullptr); + EXPECT_EQ(VT.applyWmmaHazardPatch, nullptr); + EXPECT_EQ(VT.applyVop3px2Src2Fix, nullptr); + EXPECT_EQ(VT.applyWmmaSplitPatches, nullptr); + EXPECT_EQ(VT.applyScratchPatches, nullptr); + + installHotswapPatches(VT); + + // Slots backed by a comgr-hotswap-patches.def entry get bound. If a + // register*Patch fails to set its slot (or sets the wrong one), one + // of these EXPECT_NEs catches it. + EXPECT_NE(VT.applyInPlacePatches, nullptr); + EXPECT_NE(VT.applyTrampolinePatches, nullptr); + EXPECT_NE(VT.applyWmmaHazardPatch, nullptr); + EXPECT_NE(VT.applyVop3px2Src2Fix, nullptr); + + // Slots without a .def entry stay null; the dispatcher relies on + // this to treat unimplemented pass families (wmma-split, scratch + // today) as no-op. + EXPECT_EQ(VT.applyWmmaSplitPatches, nullptr); + EXPECT_EQ(VT.applyScratchPatches, nullptr); +} + +TEST(HotswapPatchVTable, ProcessSingletonIdentityAndInstallPersists) { + HotswapPatchVTable &VT1 = getHotswapPatchVTable(); + HotswapPatchVTable &VT2 = getHotswapPatchVTable(); + EXPECT_EQ(&VT1, &VT2); + + installHotswapPatches(VT1); + EXPECT_NE(VT2.applyInPlacePatches, nullptr); + EXPECT_NE(VT2.applyTrampolinePatches, nullptr); + EXPECT_NE(VT2.applyWmmaHazardPatch, nullptr); + EXPECT_NE(VT2.applyVop3px2Src2Fix, nullptr); +} From 2276113fd67c9b02e902ee0a27c3f25b575965c8 Mon Sep 17 00:00:00 2001 From: xintin Date: Mon, 11 May 2026 23:56:00 +0000 Subject: [PATCH 2/3] updated agent rules Signed-off-by: xintin --- amd/comgr/AGENT_CONVENTIONS.md | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/amd/comgr/AGENT_CONVENTIONS.md b/amd/comgr/AGENT_CONVENTIONS.md index 81c266cd1f6a9..76915d4a307e0 100644 --- a/amd/comgr/AGENT_CONVENTIONS.md +++ b/amd/comgr/AGENT_CONVENTIONS.md @@ -52,9 +52,28 @@ fix upstream. Do not implement a parallel version inside Comgr. - Apply upstream LLVM code review standards (small focused commits, meaningful commit messages, no unrelated changes). - **Avoid Windows-hostile code**: - - Use `LLVM_ATTRIBUTE_WEAK` (from `llvm/Support/Compiler.h`), not - `__attribute__((weak))`. MSVC does not understand the GCC syntax - and will fail to build. + - **Do not rely on `LLVM_ATTRIBUTE_WEAK` for cross-platform symbol + overriding.** PE/COFF does not honour weak symbols the way ELF + does (the MSVC expansion is empty), so a `LLVM_ATTRIBUTE_WEAK` + default in one TU plus a strong override in another silently + becomes a duplicate-definition link error on Windows -- or, + worse, a silent feature disable if the override is guarded out + with `#if !defined(_MSC_VER)`. This is exactly the failure mode + reported in + [issue ROCm/llvm-project#2479](https://github.com/ROCm/llvm-project/issues/2479). + - **Use the HotswapPatchVTable + `.def` registry contract for any + new `apply*`-style hotswap pass.** Each patch ships a + `register*Patch(HotswapPatchVTable&)` function in its + `comgr-hotswap-patch-*.cpp`, an alpha-ordered line in + `comgr-hotswap-patches.def`, and a slot on `HotswapPatchVTable`. + A missing registrar produces a link error at libamd_comgr link + time -- the loud-failure shape the weak-symbol pattern lacked. + See `comgr-hotswap-internal.h` and `comgr-hotswap-b0a0.cpp` for + the dispatcher / install flow. + - `LLVM_ATTRIBUTE_WEAK` remains acceptable only for the few + liveness / DWARF stubs that currently have no strong override + anywhere in tree; they migrate to the vtable contract the first + time a real implementation lands. - No GCC/Clang-only attributes without an LLVM-portable wrapper. - All assembly / disassembly goes through the MC layer (e.g., `assembleSingleInst`, `parseAsmToMCInsts`). **No hardcoded From 657b312475cfa531de65b3797633fe423961886a Mon Sep 17 00:00:00 2001 From: xintin Date: Tue, 12 May 2026 18:49:04 +0000 Subject: [PATCH 3/3] addressed review comments Signed-off-by: xintin --- amd/comgr/AGENT_CONVENTIONS.md | 25 +---- amd/comgr/src/comgr-hotswap-b0a0.cpp | 91 ++++++++++--------- amd/comgr/src/comgr-hotswap-internal.h | 29 ++++-- .../src/comgr-hotswap-patch-wmma-split.cpp | 18 ++-- amd/comgr/src/comgr-hotswap-patches.def | 15 ++- amd/comgr/test-unit/CMakeLists.txt | 1 + amd/comgr/test-unit/HotswapMCTest.cpp | 36 +++++--- 7 files changed, 109 insertions(+), 106 deletions(-) diff --git a/amd/comgr/AGENT_CONVENTIONS.md b/amd/comgr/AGENT_CONVENTIONS.md index 76915d4a307e0..81c266cd1f6a9 100644 --- a/amd/comgr/AGENT_CONVENTIONS.md +++ b/amd/comgr/AGENT_CONVENTIONS.md @@ -52,28 +52,9 @@ fix upstream. Do not implement a parallel version inside Comgr. - Apply upstream LLVM code review standards (small focused commits, meaningful commit messages, no unrelated changes). - **Avoid Windows-hostile code**: - - **Do not rely on `LLVM_ATTRIBUTE_WEAK` for cross-platform symbol - overriding.** PE/COFF does not honour weak symbols the way ELF - does (the MSVC expansion is empty), so a `LLVM_ATTRIBUTE_WEAK` - default in one TU plus a strong override in another silently - becomes a duplicate-definition link error on Windows -- or, - worse, a silent feature disable if the override is guarded out - with `#if !defined(_MSC_VER)`. This is exactly the failure mode - reported in - [issue ROCm/llvm-project#2479](https://github.com/ROCm/llvm-project/issues/2479). - - **Use the HotswapPatchVTable + `.def` registry contract for any - new `apply*`-style hotswap pass.** Each patch ships a - `register*Patch(HotswapPatchVTable&)` function in its - `comgr-hotswap-patch-*.cpp`, an alpha-ordered line in - `comgr-hotswap-patches.def`, and a slot on `HotswapPatchVTable`. - A missing registrar produces a link error at libamd_comgr link - time -- the loud-failure shape the weak-symbol pattern lacked. - See `comgr-hotswap-internal.h` and `comgr-hotswap-b0a0.cpp` for - the dispatcher / install flow. - - `LLVM_ATTRIBUTE_WEAK` remains acceptable only for the few - liveness / DWARF stubs that currently have no strong override - anywhere in tree; they migrate to the vtable contract the first - time a real implementation lands. + - Use `LLVM_ATTRIBUTE_WEAK` (from `llvm/Support/Compiler.h`), not + `__attribute__((weak))`. MSVC does not understand the GCC syntax + and will fail to build. - No GCC/Clang-only attributes without an LLVM-portable wrapper. - All assembly / disassembly goes through the MC layer (e.g., `assembleSingleInst`, `parseAsmToMCInsts`). **No hardcoded diff --git a/amd/comgr/src/comgr-hotswap-b0a0.cpp b/amd/comgr/src/comgr-hotswap-b0a0.cpp index 92b4648e434d9..a99c933e152f4 100644 --- a/amd/comgr/src/comgr-hotswap-b0a0.cpp +++ b/amd/comgr/src/comgr-hotswap-b0a0.cpp @@ -14,11 +14,16 @@ /// list lives in comgr-hotswap-patches.def; each entry corresponds to one /// slot on the vtable and one register*Patch function in a sibling /// comgr-hotswap-patch-*.cpp. installHotswapPatches() walks the .def to -/// bind every slot; a std::call_once at the top of retargetCodeObjectB0A0 -/// drives the install exactly once per process. This replaces the prior -/// LLVM_ATTRIBUTE_WEAK + `#if !defined(_MSC_VER)` override pattern, which -/// silently disabled hotswap on Windows because PE/COFF does not honour -/// weak the way ELF does (issue ROCm/llvm-project#2479). +/// bind every slot. The vtable is exposed through getHotswapPatchVTable(), +/// a Meyers singleton whose initializer eagerly runs installHotswapPatches +/// on its private storage; C++11 [stmt.dcl]/4 guarantees this happens +/// exactly once and is safe under concurrent first access, so the +/// dispatcher and the amd_comgr_hotswap_rewrite entry point can fetch the +/// fully-bound vtable with no explicit synchronization. +/// This replaces the prior LLVM_ATTRIBUTE_WEAK + `#if !defined(_MSC_VER)` +/// override pattern, which silently disabled hotswap on Windows because +/// PE/COFF does not honour weak the way ELF does +/// (issue ROCm/llvm-project#2479). /// //===----------------------------------------------------------------------===// @@ -27,8 +32,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Compiler.h" -#include - using namespace llvm; namespace COMGR { @@ -69,11 +72,10 @@ static RewriteConfig makeGfx1250B0A0Config() { // -- Forward declarations for liveness/DWARF stubs ---------------------------- // -// These have weak default definitions below; future patch .cpp files may -// provide strong overrides at link time. The patch family (apply*) has -// migrated to the HotswapPatchVTable contract above; these helpers stay -// on the old pattern until they grow their first strong override, at -// which point they migrate the same way (one .def entry + register*). +// These have weak default definitions below. The apply* patch families use +// HotswapPatchVTable dispatch; these lower-level helpers stay on weak stubs +// until a real implementation lands, at which point they should migrate to +// an explicit registration contract as well. CFG buildCfg(ArrayRef Decoded, const MCInstrInfo &); LivenessInfo computeLiveness(ArrayRef Decoded, const CFG &, @@ -101,16 +103,19 @@ void patchDebugFrame(uint8_t *Elf, size_t ElfSize, uint64_t TextAddr, // // Patch-module forward declarations live in comgr-hotswap-internal.h // (driven off the same comgr-hotswap-patches.def), so libamd_comgr and -// the unit tests share one prototype source. Here we just supply the +// the unit tests share one prototype source. Here we supply the // singleton accessor and the installer that walks the .def to invoke -// each register*Patch. A missing register*Patch produces a link error -// at libamd_comgr link time -- the loud-failure shape the weak-symbol -// pattern lacked. - -HotswapPatchVTable &getHotswapPatchVTable() { - static HotswapPatchVTable VT; - return VT; -} +// each register*Patch. A .def entry without a matching register*Patch +// definition produces a link error at libamd_comgr link time. +// +// installHotswapPatches() is exposed in the header so unit tests can +// bind a local HotswapPatchVTable for fixture-style coverage. Production +// code never calls it directly: getHotswapPatchVTable()'s initializer +// invokes it eagerly on the singleton's private storage, which the C++11 +// magic-static rule guarantees runs exactly once even under concurrent +// first access. That removes both the explicit std::call_once at the +// retargetCodeObjectB0A0 entry point and any inter-TU static-init order +// dependency on the patch modules. void installHotswapPatches(HotswapPatchVTable &VT) { #define HOTSWAP_PATCH(Name) register##Name##Patch(VT); @@ -118,6 +123,15 @@ void installHotswapPatches(HotswapPatchVTable &VT) { #undef HOTSWAP_PATCH } +HotswapPatchVTable &getHotswapPatchVTable() { + static HotswapPatchVTable VT = [] { + HotswapPatchVTable Tmp; + installHotswapPatches(Tmp); + return Tmp; + }(); + return VT; +} + // -- Weak-symbol liveness stubs ----------------------------------------------- // // Conservative defaults: all VGPRs reported live. ScratchAllocator will @@ -294,8 +308,7 @@ buildNopSledMap(ArrayRef Decoded, const LLVMState &LS) { /// Per-instruction patch-pass trampoline: invokes \p Fn with (\p Ctx, /// \p Idx) if it is non-null, or returns 0 otherwise. nullptr means /// the corresponding pass family has no implementation linked in -/// (e.g. wmma-split / scratch today), which the dispatcher treats as -/// a no-op slot. +/// (e.g. scratch today), which the dispatcher treats as a no-op slot. static uint32_t runPerInstPass(uint32_t (*Fn)(PatchContext &, size_t), PatchContext &Ctx, size_t Idx) { return Fn ? Fn(Ctx, Idx) : 0; @@ -340,29 +353,29 @@ applyGfx1250B0toA0Rules(std::vector &Decoded, const HotswapPatchVTable &VT = getHotswapPatchVTable(); + // Skip undecoded slots produced by the decoder for bytes it could not + // classify as a valid instruction; the dispatcher has nothing to match + // against on these and we must not invoke the patch passes for them. + constexpr StringLiteral UnknownMnemonic = ""; + for (size_t Idx = 0, E = Decoded.size(); Idx < E; ++Idx) { const InternalDecodedInst &DI = Decoded[Idx]; - if (DI.Mnemonic == "") + if (DI.Mnemonic == UnknownMnemonic) continue; - uint32_t P = 0; - P += runPerInstPass(VT.applyInPlacePatches, Ctx, Idx); - if (P) { + if (uint32_t P = runPerInstPass(VT.applyInPlacePatches, Ctx, Idx)) { Patched += P; continue; } - P += runPerInstPass(VT.applyTrampolinePatches, Ctx, Idx); - if (P) { + if (uint32_t P = runPerInstPass(VT.applyTrampolinePatches, Ctx, Idx)) { Patched += P; continue; } - P += runPerInstPass(VT.applyWmmaSplitPatches, Ctx, Idx); - if (P) { + if (uint32_t P = runPerInstPass(VT.applyWmmaSplitPatches, Ctx, Idx)) { Patched += P; continue; } - P += runPerInstPass(VT.applyScratchPatches, Ctx, Idx); - if (P) { + if (uint32_t P = runPerInstPass(VT.applyScratchPatches, Ctx, Idx)) { Patched += P; continue; } @@ -505,14 +518,10 @@ static void runScratchVerification(WritableMemoryBuffer &OutBuf, amd_comgr_status_t retargetCodeObjectB0A0(const void *ElfData, size_t ElfSize, const TargetIdentifier &TargetIdent, std::unique_ptr &Out) { - // Bind every patch module's implementation into the singleton vtable. - // std::call_once gives us a single, deterministic install per process - // with no inter-TU static-init order contract. Cost is one atomic check - // on every entry after the first; the first call runs the .def-driven - // register*Patch chain in declaration order. - static std::once_flag InstallOnce; - std::call_once(InstallOnce, - [] { installHotswapPatches(getHotswapPatchVTable()); }); + // The dispatcher fetches the patch vtable lazily via + // getHotswapPatchVTable() inside applyGfx1250B0toA0Rules; the singleton's + // initializer binds every register*Patch slot on first access, so no + // explicit install step is needed here. // Take a working copy so the input is preserved and we have a mutable // buffer to parse / patch. diff --git a/amd/comgr/src/comgr-hotswap-internal.h b/amd/comgr/src/comgr-hotswap-internal.h index 4129d4520f1c9..604fcaf86f5fc 100644 --- a/amd/comgr/src/comgr-hotswap-internal.h +++ b/amd/comgr/src/comgr-hotswap-internal.h @@ -521,8 +521,13 @@ struct PatchContext { // comgr-hotswap-patches.def; each entry there corresponds to one slot // below and one register*Patch function in a sibling // comgr-hotswap-patch-*.cpp. nullptr slots are treated as no-op by the -// dispatcher, so an unmigrated pass family (e.g. wmma-split, scratch) is -// safe to leave unbound until its first strong override lands. +// dispatcher, so an unmigrated pass family (e.g. scratch) is safe to +// leave unbound until its first strong override lands. +// +// The singleton accessor below eagerly installs every registered slot in +// its own initializer, so production callers never observe an empty +// vtable. installHotswapPatches() is still exported for unit tests that +// want to drive the install against a local HotswapPatchVTable. struct HotswapPatchVTable { // Per-instruction passes: called in declaration order; first non-zero @@ -539,18 +544,24 @@ struct HotswapPatchVTable { uint32_t (*applyVop3px2Src2Fix)(PatchContext &) = nullptr; }; -/// Process-wide HotswapPatchVTable singleton (Meyers-style; thread-safe -/// under C++11). Populated once by installHotswapPatches(); the dispatcher -/// gates that install on a std::call_once at the top of -/// retargetCodeObjectB0A0 so no inter-TU static-init contract is required. -HotswapPatchVTable &getHotswapPatchVTable(); - /// Walk comgr-hotswap-patches.def and bind every patch module's /// implementation into \p VT by calling its register*Patch function. /// A missing register*Patch produces a link error, which is the -/// loud-failure shape the weak-symbol pattern lacked. +/// loud-failure shape the weak-symbol pattern lacked. Production code +/// never calls this directly; it runs inside getHotswapPatchVTable()'s +/// initializer. Exposed here so unit tests can drive the install against +/// a local HotswapPatchVTable. void installHotswapPatches(HotswapPatchVTable &VT); +/// Process-wide HotswapPatchVTable singleton (Meyers-style). The +/// initializer eagerly calls installHotswapPatches() on its own storage, +/// so every reference returned here is to a fully bound vtable. C++11 +/// [stmt.dcl]/4 guarantees the initializer runs exactly once and is safe +/// under concurrent first access, which removes the need for an explicit +/// std::call_once at the entry point and any inter-TU static-init order +/// contract on the patch modules. +HotswapPatchVTable &getHotswapPatchVTable(); + // Forward-declare every patch module's installer from the central .def // registry. Patch modules define these in their comgr-hotswap-patch-*.cpp; // installHotswapPatches() consumes them; unit tests under test-unit/ also diff --git a/amd/comgr/src/comgr-hotswap-patch-wmma-split.cpp b/amd/comgr/src/comgr-hotswap-patch-wmma-split.cpp index e3bdd463f9268..9c763ebf25c1d 100644 --- a/amd/comgr/src/comgr-hotswap-patch-wmma-split.cpp +++ b/amd/comgr/src/comgr-hotswap-patch-wmma-split.cpp @@ -6,7 +6,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// Strong-symbol override for applyWmmaSplitPatches. Decomposes WMMA +/// Patch module bound to HotswapPatchVTable::applyWmmaSplitPatches via +/// registerWmmaSplitPatch (see comgr-hotswap-patches.def). Decomposes WMMA /// variants present on GFX1250 B0 but not on A0 into pairs of narrower WMMAs /// that exist on both steppings, emitted as trampolines appended to .text: /// @@ -62,13 +63,6 @@ #include "comgr-hotswap-internal.h" -// MSVC does not support weak symbols; LLVM_ATTRIBUTE_WEAK expands to nothing, -// so the stub in comgr-hotswap-b0a0.cpp becomes a regular definition and -// this file would produce a duplicate-symbol link error (LNK2005). Guard -// the strong override until a proper registration mechanism replaces the -// weak-symbol pattern on Windows (tracked in #2294 / #2285). -#if !defined(_MSC_VER) - #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -594,7 +588,7 @@ std::vector buildSplit32x16Asm(StringRef Replacement, // log() (so the failure is at least visible when AMD_COMGR_EMIT_VERBOSE_LOGS // is set) and returns 0. The early "did not match" path returns 0 // without logging. -uint32_t applyWmmaSplitPatches(PatchContext &Ctx, size_t Idx) { +static uint32_t applyWmmaSplitPatchesImpl(PatchContext &Ctx, size_t Idx) { InternalDecodedInst &DI = Ctx.Decoded[Idx]; std::optional Match = lookupSplitRule(DI.Mnemonic); @@ -679,7 +673,9 @@ uint32_t applyWmmaSplitPatches(PatchContext &Ctx, size_t Idx) { return 1; } +void registerWmmaSplitPatch(HotswapPatchVTable &VT) { + VT.applyWmmaSplitPatches = &applyWmmaSplitPatchesImpl; +} + } // namespace hotswap } // namespace COMGR - -#endif // !defined(_MSC_VER) diff --git a/amd/comgr/src/comgr-hotswap-patches.def b/amd/comgr/src/comgr-hotswap-patches.def index c7d0a5bf07225..c169e36905d5d 100644 --- a/amd/comgr/src/comgr-hotswap-patches.def +++ b/amd/comgr/src/comgr-hotswap-patches.def @@ -7,10 +7,9 @@ /// /// \file /// X-macro list of HotSwap patch modules. One line per patch; each entry -/// names the module suffix used to form (a) the strong implementation -/// symbol applyXxx (declared in comgr-hotswap-internal.h) and (b) the -/// installer registerXxxPatch(HotswapPatchVTable&) defined in the patch -/// module's .cpp file. +/// names the module suffix used to form the installer symbol +/// registerXxxPatch(HotswapPatchVTable&) defined in the patch module's .cpp +/// file. /// /// New patches add exactly one HOTSWAP_PATCH() line here (alpha-ordered) /// plus a sibling comgr-hotswap-patch-.cpp file that defines its @@ -18,10 +17,9 @@ /// comgr-hotswap-b0a0.cpp: once to forward-declare every registerXxxPatch, /// and once to call them all from installHotswapPatches(). /// -/// HotswapPatchVTable slots default to no-op stubs, so an entry without -/// a matching .cpp registrar at link time produces a link error rather -/// than a silent disable -- which is the entire point of replacing the -/// LLVM_ATTRIBUTE_WEAK pattern (see issue ROCm/llvm-project#2479). +/// HotswapPatchVTable slots default to nullptr. An entry here without a +/// matching .cpp registrar produces a link error rather than leaving the +/// patch silently unbound. /// //===----------------------------------------------------------------------===// @@ -33,3 +31,4 @@ HOTSWAP_PATCH(InPlace) HOTSWAP_PATCH(Trampoline) HOTSWAP_PATCH(Vop3px2Src2) HOTSWAP_PATCH(WmmaHazard) +HOTSWAP_PATCH(WmmaSplit) diff --git a/amd/comgr/test-unit/CMakeLists.txt b/amd/comgr/test-unit/CMakeLists.txt index 0983fe8356d95..fde76bf88f314 100644 --- a/amd/comgr/test-unit/CMakeLists.txt +++ b/amd/comgr/test-unit/CMakeLists.txt @@ -105,6 +105,7 @@ add_executable(HotswapMCTests ../src/comgr-hotswap-patch-trampoline.cpp ../src/comgr-hotswap-patch-vop3px2-src2.cpp ../src/comgr-hotswap-patch-wmma-hazard.cpp + ../src/comgr-hotswap-patch-wmma-split.cpp ../src/comgr-env.cpp) llvm_map_components_to_libnames(COMGR_TEST_UNIT_MC_LIBS diff --git a/amd/comgr/test-unit/HotswapMCTest.cpp b/amd/comgr/test-unit/HotswapMCTest.cpp index b751f595a677e..f875542895bf0 100644 --- a/amd/comgr/test-unit/HotswapMCTest.cpp +++ b/amd/comgr/test-unit/HotswapMCTest.cpp @@ -441,12 +441,13 @@ TEST(PatchScaleSrc2, PreservesNonScaleSrc2Bits) { // kept as a worked example for future patch authors. Wrong-slot // bugs in the other register*Patch functions are caught via the // install end-to-end test below. -// 2. End-to-end install: defaults are null, .def entries get bound, -// slots without a .def entry stay null (the dispatcher's no-op -// contract). -// 3. The Meyers singleton accessor is a real singleton and bindings -// persist on it (production install path in retargetCodeObjectB0A0 -// relies on this). +// 2. End-to-end install: a default-constructed vtable has null slots, +// installHotswapPatches() binds every .def entry, and slots without +// a .def entry stay null (the dispatcher's no-op contract). +// 3. The production singleton accessor returns the same fully-bound +// vtable on every call -- the initializer eagerly runs the install +// under the C++11 magic-static rule, so production code never sees +// an empty vtable. TEST(HotswapPatchVTable, RegisterInPlaceBindsOnlyInPlaceSlot) { HotswapPatchVTable VT; @@ -477,22 +478,27 @@ TEST(HotswapPatchVTable, InstallBindsRegisteredAndLeavesUnregisteredNull) { EXPECT_NE(VT.applyTrampolinePatches, nullptr); EXPECT_NE(VT.applyWmmaHazardPatch, nullptr); EXPECT_NE(VT.applyVop3px2Src2Fix, nullptr); + EXPECT_NE(VT.applyWmmaSplitPatches, nullptr); // Slots without a .def entry stay null; the dispatcher relies on - // this to treat unimplemented pass families (wmma-split, scratch - // today) as no-op. - EXPECT_EQ(VT.applyWmmaSplitPatches, nullptr); + // this to treat unimplemented pass families (scratch today) as no-op. EXPECT_EQ(VT.applyScratchPatches, nullptr); } -TEST(HotswapPatchVTable, ProcessSingletonIdentityAndInstallPersists) { +TEST(HotswapPatchVTable, ProcessSingletonIdentityAndEagerInstall) { HotswapPatchVTable &VT1 = getHotswapPatchVTable(); HotswapPatchVTable &VT2 = getHotswapPatchVTable(); EXPECT_EQ(&VT1, &VT2); - installHotswapPatches(VT1); - EXPECT_NE(VT2.applyInPlacePatches, nullptr); - EXPECT_NE(VT2.applyTrampolinePatches, nullptr); - EXPECT_NE(VT2.applyWmmaHazardPatch, nullptr); - EXPECT_NE(VT2.applyVop3px2Src2Fix, nullptr); + // The singleton's initializer runs installHotswapPatches() on first + // access, so every .def-backed slot is already bound by the time the + // first reference is handed out. Pinning this contract here keeps the + // dispatcher safe to call getHotswapPatchVTable() without any explicit + // install step at the entry point. + EXPECT_NE(VT1.applyInPlacePatches, nullptr); + EXPECT_NE(VT1.applyTrampolinePatches, nullptr); + EXPECT_NE(VT1.applyWmmaHazardPatch, nullptr); + EXPECT_NE(VT1.applyVop3px2Src2Fix, nullptr); + EXPECT_NE(VT1.applyWmmaSplitPatches, nullptr); + EXPECT_EQ(VT1.applyScratchPatches, nullptr); }