diff --git a/amd/comgr/src/comgr-hotswap-b0a0.cpp b/amd/comgr/src/comgr-hotswap-b0a0.cpp index a7352fe4714c9..a99c933e152f4 100644 --- a/amd/comgr/src/comgr-hotswap-b0a0.cpp +++ b/amd/comgr/src/comgr-hotswap-b0a0.cpp @@ -10,9 +10,20 @@ /// retargetCodeObjectB0A0 orchestrator that drives the full pipeline: /// decode -> patch -> trampoline growth -> DWARF update. /// -/// Patch entry points are declared as weak symbols returning 0. Each -/// comgr-hotswap-patch-*.cpp file provides a strong override, allowing -/// patches to land as independent PRs with no merge conflicts. +/// Patch passes are dispatched through HotswapPatchVTable. The membership +/// list lives in comgr-hotswap-patches.def; each entry corresponds to one +/// slot on the vtable and one register*Patch function in a sibling +/// comgr-hotswap-patch-*.cpp. installHotswapPatches() walks the .def to +/// bind every slot. The vtable is exposed through getHotswapPatchVTable(), +/// a Meyers singleton whose initializer eagerly runs installHotswapPatches +/// on its private storage; C++11 [stmt.dcl]/4 guarantees this happens +/// exactly once and is safe under concurrent first access, so the +/// dispatcher and the amd_comgr_hotswap_rewrite entry point can fetch the +/// fully-bound vtable with no explicit synchronization. +/// This replaces the prior LLVM_ATTRIBUTE_WEAK + `#if !defined(_MSC_VER)` +/// override pattern, which silently disabled hotswap on Windows because +/// PE/COFF does not honour weak the way ELF does +/// (issue ROCm/llvm-project#2479). /// //===----------------------------------------------------------------------===// @@ -59,18 +70,13 @@ static RewriteConfig makeGfx1250B0A0Config() { return Config; } -// -- Forward declarations for patch/liveness/DWARF stubs ---------------------- +// -- Forward declarations for liveness/DWARF stubs ---------------------------- // -// These have weak default definitions below; patch .cpp files may provide -// strong overrides at link time so patches can land as independent PRs. -// Patch entry points are declared in comgr-hotswap-internal.h. - -uint32_t applyInPlacePatches(PatchContext &, size_t); -uint32_t applyTrampolinePatches(PatchContext &, size_t); -uint32_t applyWmmaHazardPatch(PatchContext &); -uint32_t applyVop3px2Src2Fix(PatchContext &); -uint32_t applyWmmaSplitPatches(PatchContext &, size_t); -uint32_t applyScratchPatches(PatchContext &, size_t); +// These have weak default definitions below. The apply* patch families use +// HotswapPatchVTable dispatch; these lower-level helpers stay on weak stubs +// until a real implementation lands, at which point they should migrate to +// an explicit registration contract as well. + CFG buildCfg(ArrayRef Decoded, const MCInstrInfo &); LivenessInfo computeLiveness(ArrayRef Decoded, const CFG &, const MCInstrInfo &, const MCRegisterInfo &, @@ -93,21 +99,37 @@ void patchDebugInfo(uint8_t *Elf, size_t ElfSize, uint64_t TextAddr, void patchDebugFrame(uint8_t *Elf, size_t ElfSize, uint64_t TextAddr, uint64_t TextSizeBefore, uint64_t TrampTotal); -// -- Weak-symbol patch stubs -------------------------------------------------- - -LLVM_ATTRIBUTE_WEAK uint32_t applyInPlacePatches(PatchContext &, size_t) { - return 0; -} -LLVM_ATTRIBUTE_WEAK uint32_t applyTrampolinePatches(PatchContext &, size_t) { - return 0; -} -LLVM_ATTRIBUTE_WEAK uint32_t applyWmmaHazardPatch(PatchContext &) { return 0; } -LLVM_ATTRIBUTE_WEAK uint32_t applyVop3px2Src2Fix(PatchContext &) { return 0; } -LLVM_ATTRIBUTE_WEAK uint32_t applyWmmaSplitPatches(PatchContext &, size_t) { - return 0; +// -- HotswapPatchVTable plumbing ---------------------------------------------- +// +// Patch-module forward declarations live in comgr-hotswap-internal.h +// (driven off the same comgr-hotswap-patches.def), so libamd_comgr and +// the unit tests share one prototype source. Here we supply the +// singleton accessor and the installer that walks the .def to invoke +// each register*Patch. A .def entry without a matching register*Patch +// definition produces a link error at libamd_comgr link time. +// +// installHotswapPatches() is exposed in the header so unit tests can +// bind a local HotswapPatchVTable for fixture-style coverage. Production +// code never calls it directly: getHotswapPatchVTable()'s initializer +// invokes it eagerly on the singleton's private storage, which the C++11 +// magic-static rule guarantees runs exactly once even under concurrent +// first access. That removes both the explicit std::call_once at the +// retargetCodeObjectB0A0 entry point and any inter-TU static-init order +// dependency on the patch modules. + +void installHotswapPatches(HotswapPatchVTable &VT) { +#define HOTSWAP_PATCH(Name) register##Name##Patch(VT); +#include "comgr-hotswap-patches.def" +#undef HOTSWAP_PATCH } -LLVM_ATTRIBUTE_WEAK uint32_t applyScratchPatches(PatchContext &, size_t) { - return 0; + +HotswapPatchVTable &getHotswapPatchVTable() { + static HotswapPatchVTable VT = [] { + HotswapPatchVTable Tmp; + installHotswapPatches(Tmp); + return Tmp; + }(); + return VT; } // -- Weak-symbol liveness stubs ----------------------------------------------- @@ -283,6 +305,15 @@ buildNopSledMap(ArrayRef Decoded, const LLVMState &LS) { // -- applyGfx1250B0toA0Rules -------------------------------------------------- +/// Per-instruction patch-pass trampoline: invokes \p Fn with (\p Ctx, +/// \p Idx) if it is non-null, or returns 0 otherwise. nullptr means +/// the corresponding pass family has no implementation linked in +/// (e.g. scratch today), which the dispatcher treats as a no-op slot. +static uint32_t runPerInstPass(uint32_t (*Fn)(PatchContext &, size_t), + PatchContext &Ctx, size_t Idx) { + return Fn ? Fn(Ctx, Idx) : 0; +} + /// Main per-instruction dispatcher for the GFX1250 B0-to-A0 rewrite. /// Builds the NOP sled map, CFG, and VGPR liveness for the decoded stream, /// then walks each decoded instruction and runs the patch passes in order @@ -320,29 +351,31 @@ applyGfx1250B0toA0Rules(std::vector &Decoded, OutTrampolines, Sleds, Elf, Liveness, KernelStats, OutScratchPatches}; + const HotswapPatchVTable &VT = getHotswapPatchVTable(); + + // Skip undecoded slots produced by the decoder for bytes it could not + // classify as a valid instruction; the dispatcher has nothing to match + // against on these and we must not invoke the patch passes for them. + constexpr StringLiteral UnknownMnemonic = ""; + for (size_t Idx = 0, E = Decoded.size(); Idx < E; ++Idx) { const InternalDecodedInst &DI = Decoded[Idx]; - if (DI.Mnemonic == "") + if (DI.Mnemonic == UnknownMnemonic) continue; - uint32_t P = 0; - P += applyInPlacePatches(Ctx, Idx); - if (P) { + if (uint32_t P = runPerInstPass(VT.applyInPlacePatches, Ctx, Idx)) { Patched += P; continue; } - P += applyTrampolinePatches(Ctx, Idx); - if (P) { + if (uint32_t P = runPerInstPass(VT.applyTrampolinePatches, Ctx, Idx)) { Patched += P; continue; } - P += applyWmmaSplitPatches(Ctx, Idx); - if (P) { + if (uint32_t P = runPerInstPass(VT.applyWmmaSplitPatches, Ctx, Idx)) { Patched += P; continue; } - P += applyScratchPatches(Ctx, Idx); - if (P) { + if (uint32_t P = runPerInstPass(VT.applyScratchPatches, Ctx, Idx)) { Patched += P; continue; } @@ -359,8 +392,10 @@ applyGfx1250B0toA0Rules(std::vector &Decoded, // treat a branch as WMMA/VALU/VOP3PX2. // If a future patch family changes instruction boundaries, the Decoded // stream must be rebuilt before these passes run. - Patched += applyWmmaHazardPatch(Ctx); - Patched += applyVop3px2Src2Fix(Ctx); + if (VT.applyWmmaHazardPatch) + Patched += VT.applyWmmaHazardPatch(Ctx); + if (VT.applyVop3px2Src2Fix) + Patched += VT.applyVop3px2Src2Fix(Ctx); for (const llvm::StringMapEntry &KV : KernelStats) { StringRef KName = KV.first(); @@ -483,6 +518,11 @@ static void runScratchVerification(WritableMemoryBuffer &OutBuf, amd_comgr_status_t retargetCodeObjectB0A0(const void *ElfData, size_t ElfSize, const TargetIdentifier &TargetIdent, std::unique_ptr &Out) { + // The dispatcher fetches the patch vtable lazily via + // getHotswapPatchVTable() inside applyGfx1250B0toA0Rules; the singleton's + // initializer binds every register*Patch slot on first access, so no + // explicit install step is needed here. + // Take a working copy so the input is preserved and we have a mutable // buffer to parse / patch. std::vector Buf(static_cast(ElfData), diff --git a/amd/comgr/src/comgr-hotswap-internal.h b/amd/comgr/src/comgr-hotswap-internal.h index 29e132c55adde..604fcaf86f5fc 100644 --- a/amd/comgr/src/comgr-hotswap-internal.h +++ b/amd/comgr/src/comgr-hotswap-internal.h @@ -509,13 +509,67 @@ struct PatchContext { uint32_t InstSize, llvm::ArrayRef Replacement); -// -- Patch entry points (weak stubs in comgr-hotswap-b0a0.cpp) ---------------- +// -- Patch dispatch vtable ---------------------------------------------------- +// +// Function-pointer dispatch table that replaces the prior LLVM_ATTRIBUTE_WEAK +// + `#if !defined(_MSC_VER)` override pattern. PE/COFF does not honour weak +// the way ELF does, so on Windows the weak stubs silently won every patch +// call and the feature was a no-op (issue ROCm/llvm-project#2479). +// +// Patch modules supply their implementations through register*Patch +// functions invoked by installHotswapPatches(). The membership list is +// comgr-hotswap-patches.def; each entry there corresponds to one slot +// below and one register*Patch function in a sibling +// comgr-hotswap-patch-*.cpp. nullptr slots are treated as no-op by the +// dispatcher, so an unmigrated pass family (e.g. scratch) is safe to +// leave unbound until its first strong override lands. +// +// The singleton accessor below eagerly installs every registered slot in +// its own initializer, so production callers never observe an empty +// vtable. installHotswapPatches() is still exported for unit tests that +// want to drive the install against a local HotswapPatchVTable. + +struct HotswapPatchVTable { + // Per-instruction passes: called in declaration order; first non-zero + // return wins for an instruction (matches the pre-vtable dispatcher + // behaviour in applyGfx1250B0toA0Rules). + uint32_t (*applyInPlacePatches)(PatchContext &, size_t) = nullptr; + uint32_t (*applyTrampolinePatches)(PatchContext &, size_t) = nullptr; + uint32_t (*applyWmmaSplitPatches)(PatchContext &, size_t) = nullptr; + uint32_t (*applyScratchPatches)(PatchContext &, size_t) = nullptr; + + // Whole-kernel passes: called once per kernel after the per-instruction + // loop completes. + uint32_t (*applyWmmaHazardPatch)(PatchContext &) = nullptr; + uint32_t (*applyVop3px2Src2Fix)(PatchContext &) = nullptr; +}; -uint32_t applyInPlacePatches(PatchContext &Ctx, size_t Idx); -uint32_t applyTrampolinePatches(PatchContext &Ctx, size_t Idx); -uint32_t applyWmmaHazardPatch(PatchContext &Ctx); -uint32_t applyWmmaSplitPatches(PatchContext &Ctx, size_t Idx); -uint32_t applyScratchPatches(PatchContext &Ctx, size_t Idx); +/// Walk comgr-hotswap-patches.def and bind every patch module's +/// implementation into \p VT by calling its register*Patch function. +/// A missing register*Patch produces a link error, which is the +/// loud-failure shape the weak-symbol pattern lacked. Production code +/// never calls this directly; it runs inside getHotswapPatchVTable()'s +/// initializer. Exposed here so unit tests can drive the install against +/// a local HotswapPatchVTable. +void installHotswapPatches(HotswapPatchVTable &VT); + +/// Process-wide HotswapPatchVTable singleton (Meyers-style). The +/// initializer eagerly calls installHotswapPatches() on its own storage, +/// so every reference returned here is to a fully bound vtable. C++11 +/// [stmt.dcl]/4 guarantees the initializer runs exactly once and is safe +/// under concurrent first access, which removes the need for an explicit +/// std::call_once at the entry point and any inter-TU static-init order +/// contract on the patch modules. +HotswapPatchVTable &getHotswapPatchVTable(); + +// Forward-declare every patch module's installer from the central .def +// registry. Patch modules define these in their comgr-hotswap-patch-*.cpp; +// installHotswapPatches() consumes them; unit tests under test-unit/ also +// invoke them directly. A patches.def line with no matching definition +// produces a libamd_comgr / HotswapMCTests link error. +#define HOTSWAP_PATCH(Name) void register##Name##Patch(HotswapPatchVTable &); +#include "comgr-hotswap-patches.def" +#undef HOTSWAP_PATCH // -- Function declarations (B0-to-A0 policy layer) ---------------------------- diff --git a/amd/comgr/src/comgr-hotswap-patch-inplace.cpp b/amd/comgr/src/comgr-hotswap-patch-inplace.cpp index ccfa18401d438..473d5b57bc60c 100644 --- a/amd/comgr/src/comgr-hotswap-patch-inplace.cpp +++ b/amd/comgr/src/comgr-hotswap-patch-inplace.cpp @@ -23,13 +23,6 @@ #include "comgr-hotswap-internal.h" -// MSVC does not support weak symbols; LLVM_ATTRIBUTE_WEAK expands to nothing, -// so the stub in comgr-hotswap-b0a0.cpp becomes a regular definition and -// this file would produce a duplicate-symbol link error (LNK2005). Guard -// the strong override until a proper registration mechanism replaces the -// weak-symbol pattern on Windows (tracked in #2294 / #2285). -#if !defined(_MSC_VER) - #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCCodeEmitter.h" @@ -99,7 +92,7 @@ bool swapOpcode(InternalDecodedInst &DI, uint8_t *Text, const LLVMState &LS, } // anonymous namespace -uint32_t applyInPlacePatches(PatchContext &Ctx, size_t Idx) { +static uint32_t applyInPlacePatchesImpl(PatchContext &Ctx, size_t Idx) { InternalDecodedInst &DI = Ctx.Decoded[Idx]; StringRef Mnemonic(DI.Mnemonic); @@ -162,7 +155,9 @@ uint32_t applyInPlacePatches(PatchContext &Ctx, size_t Idx) { return 0; } +void registerInPlacePatch(HotswapPatchVTable &VT) { + VT.applyInPlacePatches = &applyInPlacePatchesImpl; +} + } // namespace hotswap } // namespace COMGR - -#endif // !defined(_MSC_VER) diff --git a/amd/comgr/src/comgr-hotswap-patch-trampoline.cpp b/amd/comgr/src/comgr-hotswap-patch-trampoline.cpp index 2698f2a418f72..0d6c8281bc9be 100644 --- a/amd/comgr/src/comgr-hotswap-patch-trampoline.cpp +++ b/amd/comgr/src/comgr-hotswap-patch-trampoline.cpp @@ -15,13 +15,6 @@ #include "comgr-hotswap-internal.h" -// MSVC does not support weak symbols; LLVM_ATTRIBUTE_WEAK expands to nothing, -// so the stub in comgr-hotswap-b0a0.cpp becomes a regular definition and -// this file would produce a duplicate-symbol link error (LNK2005). Guard -// the strong override until a proper registration mechanism replaces the -// weak-symbol pattern on Windows (tracked in #2294 / #2285). -#if !defined(_MSC_VER) - #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCCodeEmitter.h" @@ -361,7 +354,7 @@ static bool patchDs2AddrStride64(PatchContext &Ctx, size_t Idx) { // // ds_*_2addr_stride64_* -> split into two single-address DS ops -uint32_t applyTrampolinePatches(PatchContext &Ctx, size_t Idx) { +static uint32_t applyTrampolinePatchesImpl(PatchContext &Ctx, size_t Idx) { StringRef Mnem(Ctx.Decoded[Idx].Mnemonic); if (!getDs2AddrReplacement(Mnem).empty()) @@ -370,7 +363,9 @@ uint32_t applyTrampolinePatches(PatchContext &Ctx, size_t Idx) { return 0; } +void registerTrampolinePatch(HotswapPatchVTable &VT) { + VT.applyTrampolinePatches = &applyTrampolinePatchesImpl; +} + } // namespace hotswap } // namespace COMGR - -#endif // !defined(_MSC_VER) diff --git a/amd/comgr/src/comgr-hotswap-patch-vop3px2-src2.cpp b/amd/comgr/src/comgr-hotswap-patch-vop3px2-src2.cpp index 98f287bd7908b..69a2f16ac8a76 100644 --- a/amd/comgr/src/comgr-hotswap-patch-vop3px2-src2.cpp +++ b/amd/comgr/src/comgr-hotswap-patch-vop3px2-src2.cpp @@ -33,8 +33,6 @@ #include "comgr-hotswap-internal.h" -#if !defined(_MSC_VER) - #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" @@ -90,7 +88,7 @@ bool patchScaleSrc2(uint8_t *InstBytes) { // This only fires on the B0-to-A0 rewrite path (applyGfx1250B0toA0Rules). // A0-native binaries are compiled with an A0-targeted Clang that sets the // field correctly at codegen time, so they do not need hotswap rewriting. -uint32_t applyVop3px2Src2Fix(PatchContext &Ctx) { +static uint32_t applyVop3px2Src2FixImpl(PatchContext &Ctx) { uint32_t Patched = 0; unsigned Scanned = 0; @@ -112,7 +110,9 @@ uint32_t applyVop3px2Src2Fix(PatchContext &Ctx) { return Patched; } +void registerVop3px2Src2Patch(HotswapPatchVTable &VT) { + VT.applyVop3px2Src2Fix = &applyVop3px2Src2FixImpl; +} + } // namespace hotswap } // namespace COMGR - -#endif // !defined(_MSC_VER) diff --git a/amd/comgr/src/comgr-hotswap-patch-wmma-hazard.cpp b/amd/comgr/src/comgr-hotswap-patch-wmma-hazard.cpp index 336cc15bbd25f..893806fcfdd73 100644 --- a/amd/comgr/src/comgr-hotswap-patch-wmma-hazard.cpp +++ b/amd/comgr/src/comgr-hotswap-patch-wmma-hazard.cpp @@ -15,8 +15,6 @@ #include "comgr-hotswap-internal.h" -#if !defined(_MSC_VER) - #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" @@ -172,7 +170,7 @@ std::vector findWmmaCoexecHazards(const PatchContext &Ctx) { } // anonymous namespace -uint32_t applyWmmaHazardPatch(PatchContext &Ctx) { +static uint32_t applyWmmaHazardPatchImpl(PatchContext &Ctx) { std::vector Hazards = findWmmaCoexecHazards(Ctx); if (Hazards.empty()) return 0; @@ -207,7 +205,9 @@ uint32_t applyWmmaHazardPatch(PatchContext &Ctx) { return Patched; } +void registerWmmaHazardPatch(HotswapPatchVTable &VT) { + VT.applyWmmaHazardPatch = &applyWmmaHazardPatchImpl; +} + } // namespace hotswap } // namespace COMGR - -#endif // !defined(_MSC_VER) diff --git a/amd/comgr/src/comgr-hotswap-patch-wmma-split.cpp b/amd/comgr/src/comgr-hotswap-patch-wmma-split.cpp index e3bdd463f9268..9c763ebf25c1d 100644 --- a/amd/comgr/src/comgr-hotswap-patch-wmma-split.cpp +++ b/amd/comgr/src/comgr-hotswap-patch-wmma-split.cpp @@ -6,7 +6,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// Strong-symbol override for applyWmmaSplitPatches. Decomposes WMMA +/// Patch module bound to HotswapPatchVTable::applyWmmaSplitPatches via +/// registerWmmaSplitPatch (see comgr-hotswap-patches.def). Decomposes WMMA /// variants present on GFX1250 B0 but not on A0 into pairs of narrower WMMAs /// that exist on both steppings, emitted as trampolines appended to .text: /// @@ -62,13 +63,6 @@ #include "comgr-hotswap-internal.h" -// MSVC does not support weak symbols; LLVM_ATTRIBUTE_WEAK expands to nothing, -// so the stub in comgr-hotswap-b0a0.cpp becomes a regular definition and -// this file would produce a duplicate-symbol link error (LNK2005). Guard -// the strong override until a proper registration mechanism replaces the -// weak-symbol pattern on Windows (tracked in #2294 / #2285). -#if !defined(_MSC_VER) - #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -594,7 +588,7 @@ std::vector buildSplit32x16Asm(StringRef Replacement, // log() (so the failure is at least visible when AMD_COMGR_EMIT_VERBOSE_LOGS // is set) and returns 0. The early "did not match" path returns 0 // without logging. -uint32_t applyWmmaSplitPatches(PatchContext &Ctx, size_t Idx) { +static uint32_t applyWmmaSplitPatchesImpl(PatchContext &Ctx, size_t Idx) { InternalDecodedInst &DI = Ctx.Decoded[Idx]; std::optional Match = lookupSplitRule(DI.Mnemonic); @@ -679,7 +673,9 @@ uint32_t applyWmmaSplitPatches(PatchContext &Ctx, size_t Idx) { return 1; } +void registerWmmaSplitPatch(HotswapPatchVTable &VT) { + VT.applyWmmaSplitPatches = &applyWmmaSplitPatchesImpl; +} + } // namespace hotswap } // namespace COMGR - -#endif // !defined(_MSC_VER) diff --git a/amd/comgr/src/comgr-hotswap-patches.def b/amd/comgr/src/comgr-hotswap-patches.def new file mode 100644 index 0000000000000..c169e36905d5d --- /dev/null +++ b/amd/comgr/src/comgr-hotswap-patches.def @@ -0,0 +1,34 @@ +//===- comgr-hotswap-patches.def - HotSwap patch registry ----------------===// +// +// Part of Comgr, under the Apache License v2.0 with LLVM Exceptions. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// X-macro list of HotSwap patch modules. One line per patch; each entry +/// names the module suffix used to form the installer symbol +/// registerXxxPatch(HotswapPatchVTable&) defined in the patch module's .cpp +/// file. +/// +/// New patches add exactly one HOTSWAP_PATCH() line here (alpha-ordered) +/// plus a sibling comgr-hotswap-patch-.cpp file that defines its +/// registerXxxPatch function. The dispatcher reads this file twice from +/// comgr-hotswap-b0a0.cpp: once to forward-declare every registerXxxPatch, +/// and once to call them all from installHotswapPatches(). +/// +/// HotswapPatchVTable slots default to nullptr. An entry here without a +/// matching .cpp registrar produces a link error rather than leaving the +/// patch silently unbound. +/// +//===----------------------------------------------------------------------===// + +#ifndef HOTSWAP_PATCH +#error "comgr-hotswap-patches.def included without HOTSWAP_PATCH defined" +#endif + +HOTSWAP_PATCH(InPlace) +HOTSWAP_PATCH(Trampoline) +HOTSWAP_PATCH(Vop3px2Src2) +HOTSWAP_PATCH(WmmaHazard) +HOTSWAP_PATCH(WmmaSplit) diff --git a/amd/comgr/test-unit/CMakeLists.txt b/amd/comgr/test-unit/CMakeLists.txt index 7e11d495ca764..fde76bf88f314 100644 --- a/amd/comgr/test-unit/CMakeLists.txt +++ b/amd/comgr/test-unit/CMakeLists.txt @@ -98,10 +98,14 @@ comgr_configure_test_target(HotswapElfTests) add_executable(HotswapMCTests HotswapMCTest.cpp + ../src/comgr-hotswap-b0a0.cpp ../src/comgr-hotswap-elf.cpp ../src/comgr-hotswap-llvm.cpp + ../src/comgr-hotswap-patch-inplace.cpp + ../src/comgr-hotswap-patch-trampoline.cpp ../src/comgr-hotswap-patch-vop3px2-src2.cpp ../src/comgr-hotswap-patch-wmma-hazard.cpp + ../src/comgr-hotswap-patch-wmma-split.cpp ../src/comgr-env.cpp) llvm_map_components_to_libnames(COMGR_TEST_UNIT_MC_LIBS diff --git a/amd/comgr/test-unit/HotswapMCTest.cpp b/amd/comgr/test-unit/HotswapMCTest.cpp index 7b490d7228fb3..f875542895bf0 100644 --- a/amd/comgr/test-unit/HotswapMCTest.cpp +++ b/amd/comgr/test-unit/HotswapMCTest.cpp @@ -428,3 +428,77 @@ TEST(PatchScaleSrc2, PreservesNonScaleSrc2Bits) { EXPECT_EQ(Inst[6] & 0xFC, 0x00); EXPECT_EQ(Inst[7] & 0x07, 0x04); } + +// -- HotswapPatchVTable ------------------------------------------------------- +// +// Tests for the .def-driven patch registry that replaced the +// LLVM_ATTRIBUTE_WEAK override pattern (issue ROCm/llvm-project#2479). +// +// Coverage strategy: link errors already catch missing register*Patch +// definitions and missing comgr-hotswap-patches.def entries, so we only +// test what the linker cannot: +// 1. One canonical per-installer "binds only its own slot" check, +// kept as a worked example for future patch authors. Wrong-slot +// bugs in the other register*Patch functions are caught via the +// install end-to-end test below. +// 2. End-to-end install: a default-constructed vtable has null slots, +// installHotswapPatches() binds every .def entry, and slots without +// a .def entry stay null (the dispatcher's no-op contract). +// 3. The production singleton accessor returns the same fully-bound +// vtable on every call -- the initializer eagerly runs the install +// under the C++11 magic-static rule, so production code never sees +// an empty vtable. + +TEST(HotswapPatchVTable, RegisterInPlaceBindsOnlyInPlaceSlot) { + HotswapPatchVTable VT; + registerInPlacePatch(VT); + EXPECT_NE(VT.applyInPlacePatches, nullptr); + EXPECT_EQ(VT.applyTrampolinePatches, nullptr); + EXPECT_EQ(VT.applyWmmaHazardPatch, nullptr); + EXPECT_EQ(VT.applyVop3px2Src2Fix, nullptr); +} + +TEST(HotswapPatchVTable, InstallBindsRegisteredAndLeavesUnregisteredNull) { + HotswapPatchVTable VT; + + // Defaults: every slot null (no patch implementation linked yet). + EXPECT_EQ(VT.applyInPlacePatches, nullptr); + EXPECT_EQ(VT.applyTrampolinePatches, nullptr); + EXPECT_EQ(VT.applyWmmaHazardPatch, nullptr); + EXPECT_EQ(VT.applyVop3px2Src2Fix, nullptr); + EXPECT_EQ(VT.applyWmmaSplitPatches, nullptr); + EXPECT_EQ(VT.applyScratchPatches, nullptr); + + installHotswapPatches(VT); + + // Slots backed by a comgr-hotswap-patches.def entry get bound. If a + // register*Patch fails to set its slot (or sets the wrong one), one + // of these EXPECT_NEs catches it. + EXPECT_NE(VT.applyInPlacePatches, nullptr); + EXPECT_NE(VT.applyTrampolinePatches, nullptr); + EXPECT_NE(VT.applyWmmaHazardPatch, nullptr); + EXPECT_NE(VT.applyVop3px2Src2Fix, nullptr); + EXPECT_NE(VT.applyWmmaSplitPatches, nullptr); + + // Slots without a .def entry stay null; the dispatcher relies on + // this to treat unimplemented pass families (scratch today) as no-op. + EXPECT_EQ(VT.applyScratchPatches, nullptr); +} + +TEST(HotswapPatchVTable, ProcessSingletonIdentityAndEagerInstall) { + HotswapPatchVTable &VT1 = getHotswapPatchVTable(); + HotswapPatchVTable &VT2 = getHotswapPatchVTable(); + EXPECT_EQ(&VT1, &VT2); + + // The singleton's initializer runs installHotswapPatches() on first + // access, so every .def-backed slot is already bound by the time the + // first reference is handed out. Pinning this contract here keeps the + // dispatcher safe to call getHotswapPatchVTable() without any explicit + // install step at the entry point. + EXPECT_NE(VT1.applyInPlacePatches, nullptr); + EXPECT_NE(VT1.applyTrampolinePatches, nullptr); + EXPECT_NE(VT1.applyWmmaHazardPatch, nullptr); + EXPECT_NE(VT1.applyVop3px2Src2Fix, nullptr); + EXPECT_NE(VT1.applyWmmaSplitPatches, nullptr); + EXPECT_EQ(VT1.applyScratchPatches, nullptr); +}