Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 80 additions & 40 deletions amd/comgr/src/comgr-hotswap-b0a0.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,20 @@
/// retargetCodeObjectB0A0 orchestrator that drives the full pipeline:
/// decode -> patch -> trampoline growth -> DWARF update.
///
/// Patch entry points are declared as weak symbols returning 0. Each
/// comgr-hotswap-patch-*.cpp file provides a strong override, allowing
/// patches to land as independent PRs with no merge conflicts.
/// Patch passes are dispatched through HotswapPatchVTable. The membership
/// list lives in comgr-hotswap-patches.def; each entry corresponds to one
/// slot on the vtable and one register*Patch function in a sibling
/// comgr-hotswap-patch-*.cpp. installHotswapPatches() walks the .def to
/// bind every slot. The vtable is exposed through getHotswapPatchVTable(),
/// a Meyers singleton whose initializer eagerly runs installHotswapPatches
/// on its private storage; C++11 [stmt.dcl]/4 guarantees this happens
/// exactly once and is safe under concurrent first access, so the
/// dispatcher and the amd_comgr_hotswap_rewrite entry point can fetch the
/// fully-bound vtable with no explicit synchronization.
/// This replaces the prior LLVM_ATTRIBUTE_WEAK + `#if !defined(_MSC_VER)`
/// override pattern, which silently disabled hotswap on Windows because
/// PE/COFF does not honour weak the way ELF does
/// (issue ROCm/llvm-project#2479).
///
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -59,18 +70,13 @@ static RewriteConfig makeGfx1250B0A0Config() {
return Config;
}

// -- Forward declarations for patch/liveness/DWARF stubs ----------------------
// -- Forward declarations for liveness/DWARF stubs ----------------------------
//
// These have weak default definitions below; patch .cpp files may provide
// strong overrides at link time so patches can land as independent PRs.
// Patch entry points are declared in comgr-hotswap-internal.h.

uint32_t applyInPlacePatches(PatchContext &, size_t);
uint32_t applyTrampolinePatches(PatchContext &, size_t);
uint32_t applyWmmaHazardPatch(PatchContext &);
uint32_t applyVop3px2Src2Fix(PatchContext &);
uint32_t applyWmmaSplitPatches(PatchContext &, size_t);
uint32_t applyScratchPatches(PatchContext &, size_t);
// These have weak default definitions below. The apply* patch families use
// HotswapPatchVTable dispatch; these lower-level helpers stay on weak stubs
// until a real implementation lands, at which point they should migrate to
// an explicit registration contract as well.

CFG buildCfg(ArrayRef<InternalDecodedInst> Decoded, const MCInstrInfo &);
LivenessInfo computeLiveness(ArrayRef<InternalDecodedInst> Decoded, const CFG &,
const MCInstrInfo &, const MCRegisterInfo &,
Expand All @@ -93,21 +99,37 @@ void patchDebugInfo(uint8_t *Elf, size_t ElfSize, uint64_t TextAddr,
void patchDebugFrame(uint8_t *Elf, size_t ElfSize, uint64_t TextAddr,
uint64_t TextSizeBefore, uint64_t TrampTotal);

// -- Weak-symbol patch stubs --------------------------------------------------

LLVM_ATTRIBUTE_WEAK uint32_t applyInPlacePatches(PatchContext &, size_t) {
return 0;
}
LLVM_ATTRIBUTE_WEAK uint32_t applyTrampolinePatches(PatchContext &, size_t) {
return 0;
}
LLVM_ATTRIBUTE_WEAK uint32_t applyWmmaHazardPatch(PatchContext &) { return 0; }
LLVM_ATTRIBUTE_WEAK uint32_t applyVop3px2Src2Fix(PatchContext &) { return 0; }
LLVM_ATTRIBUTE_WEAK uint32_t applyWmmaSplitPatches(PatchContext &, size_t) {
return 0;
// -- HotswapPatchVTable plumbing ----------------------------------------------
//
// Patch-module forward declarations live in comgr-hotswap-internal.h
// (driven off the same comgr-hotswap-patches.def), so libamd_comgr and
// the unit tests share one prototype source. Here we supply the
// singleton accessor and the installer that walks the .def to invoke
// each register*Patch. A .def entry without a matching register*Patch
// definition produces a link error at libamd_comgr link time.
//
// installHotswapPatches() is exposed in the header so unit tests can
// bind a local HotswapPatchVTable for fixture-style coverage. Production
// code never calls it directly: getHotswapPatchVTable()'s initializer
// invokes it eagerly on the singleton's private storage, which the C++11
// magic-static rule guarantees runs exactly once even under concurrent
// first access. That removes both the explicit std::call_once at the
// retargetCodeObjectB0A0 entry point and any inter-TU static-init order
// dependency on the patch modules.

void installHotswapPatches(HotswapPatchVTable &VT) {
#define HOTSWAP_PATCH(Name) register##Name##Patch(VT);
#include "comgr-hotswap-patches.def"
#undef HOTSWAP_PATCH
}
LLVM_ATTRIBUTE_WEAK uint32_t applyScratchPatches(PatchContext &, size_t) {
return 0;

HotswapPatchVTable &getHotswapPatchVTable() {
static HotswapPatchVTable VT = [] {
HotswapPatchVTable Tmp;
installHotswapPatches(Tmp);
return Tmp;
}();
return VT;
}

// -- Weak-symbol liveness stubs -----------------------------------------------
Expand Down Expand Up @@ -283,6 +305,15 @@ buildNopSledMap(ArrayRef<InternalDecodedInst> Decoded, const LLVMState &LS) {

// -- applyGfx1250B0toA0Rules --------------------------------------------------

/// Per-instruction patch-pass trampoline: invokes \p Fn with (\p Ctx,
/// \p Idx) if it is non-null, or returns 0 otherwise. nullptr means
/// the corresponding pass family has no implementation linked in
/// (e.g. scratch today), which the dispatcher treats as a no-op slot.
static uint32_t runPerInstPass(uint32_t (*Fn)(PatchContext &, size_t),
PatchContext &Ctx, size_t Idx) {
return Fn ? Fn(Ctx, Idx) : 0;
}

/// Main per-instruction dispatcher for the GFX1250 B0-to-A0 rewrite.
/// Builds the NOP sled map, CFG, and VGPR liveness for the decoded stream,
/// then walks each decoded instruction and runs the patch passes in order
Expand Down Expand Up @@ -320,29 +351,31 @@ applyGfx1250B0toA0Rules(std::vector<InternalDecodedInst> &Decoded,
OutTrampolines, Sleds, Elf, Liveness, KernelStats,
OutScratchPatches};

const HotswapPatchVTable &VT = getHotswapPatchVTable();

// Skip undecoded slots produced by the decoder for bytes it could not
// classify as a valid instruction; the dispatcher has nothing to match
// against on these and we must not invoke the patch passes for them.
constexpr StringLiteral UnknownMnemonic = "<unknown>";

for (size_t Idx = 0, E = Decoded.size(); Idx < E; ++Idx) {
const InternalDecodedInst &DI = Decoded[Idx];
if (DI.Mnemonic == "<unknown>")
if (DI.Mnemonic == UnknownMnemonic)
continue;

uint32_t P = 0;
P += applyInPlacePatches(Ctx, Idx);
if (P) {
if (uint32_t P = runPerInstPass(VT.applyInPlacePatches, Ctx, Idx)) {
Patched += P;
continue;
}
P += applyTrampolinePatches(Ctx, Idx);
if (P) {
if (uint32_t P = runPerInstPass(VT.applyTrampolinePatches, Ctx, Idx)) {
Patched += P;
continue;
}
P += applyWmmaSplitPatches(Ctx, Idx);
if (P) {
if (uint32_t P = runPerInstPass(VT.applyWmmaSplitPatches, Ctx, Idx)) {
Patched += P;
continue;
}
P += applyScratchPatches(Ctx, Idx);
if (P) {
if (uint32_t P = runPerInstPass(VT.applyScratchPatches, Ctx, Idx)) {
Patched += P;
continue;
}
Expand All @@ -359,8 +392,10 @@ applyGfx1250B0toA0Rules(std::vector<InternalDecodedInst> &Decoded,
// treat a branch as WMMA/VALU/VOP3PX2.
// If a future patch family changes instruction boundaries, the Decoded
// stream must be rebuilt before these passes run.
Patched += applyWmmaHazardPatch(Ctx);
Patched += applyVop3px2Src2Fix(Ctx);
if (VT.applyWmmaHazardPatch)
Patched += VT.applyWmmaHazardPatch(Ctx);
if (VT.applyVop3px2Src2Fix)
Patched += VT.applyVop3px2Src2Fix(Ctx);

for (const llvm::StringMapEntry<KernelPatchStats> &KV : KernelStats) {
StringRef KName = KV.first();
Expand Down Expand Up @@ -483,6 +518,11 @@ static void runScratchVerification(WritableMemoryBuffer &OutBuf,
amd_comgr_status_t retargetCodeObjectB0A0(const void *ElfData, size_t ElfSize,
const TargetIdentifier &TargetIdent,
std::unique_ptr<MemoryBuffer> &Out) {
// The dispatcher fetches the patch vtable lazily via
// getHotswapPatchVTable() inside applyGfx1250B0toA0Rules; the singleton's
// initializer binds every register*Patch slot on first access, so no
// explicit install step is needed here.

// Take a working copy so the input is preserved and we have a mutable
// buffer to parse / patch.
std::vector<uint8_t> Buf(static_cast<const uint8_t *>(ElfData),
Expand Down
66 changes: 60 additions & 6 deletions amd/comgr/src/comgr-hotswap-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -509,13 +509,67 @@ struct PatchContext {
uint32_t InstSize,
llvm::ArrayRef<uint8_t> Replacement);

// -- Patch entry points (weak stubs in comgr-hotswap-b0a0.cpp) ----------------
// -- Patch dispatch vtable ----------------------------------------------------
//
// Function-pointer dispatch table that replaces the prior LLVM_ATTRIBUTE_WEAK
// + `#if !defined(_MSC_VER)` override pattern. PE/COFF does not honour weak
// the way ELF does, so on Windows the weak stubs silently won every patch
// call and the feature was a no-op (issue ROCm/llvm-project#2479).
//
// Patch modules supply their implementations through register*Patch
// functions invoked by installHotswapPatches(). The membership list is
// comgr-hotswap-patches.def; each entry there corresponds to one slot
// below and one register*Patch function in a sibling
// comgr-hotswap-patch-*.cpp. nullptr slots are treated as no-op by the
// dispatcher, so an unmigrated pass family (e.g. scratch) is safe to
// leave unbound until its first strong override lands.
//
// The singleton accessor below eagerly installs every registered slot in
// its own initializer, so production callers never observe an empty
// vtable. installHotswapPatches() is still exported for unit tests that
// want to drive the install against a local HotswapPatchVTable.

struct HotswapPatchVTable {
// Per-instruction passes: called in declaration order; first non-zero
// return wins for an instruction (matches the pre-vtable dispatcher
// behaviour in applyGfx1250B0toA0Rules).
uint32_t (*applyInPlacePatches)(PatchContext &, size_t) = nullptr;
uint32_t (*applyTrampolinePatches)(PatchContext &, size_t) = nullptr;
uint32_t (*applyWmmaSplitPatches)(PatchContext &, size_t) = nullptr;
uint32_t (*applyScratchPatches)(PatchContext &, size_t) = nullptr;

// Whole-kernel passes: called once per kernel after the per-instruction
// loop completes.
uint32_t (*applyWmmaHazardPatch)(PatchContext &) = nullptr;
uint32_t (*applyVop3px2Src2Fix)(PatchContext &) = nullptr;
};

uint32_t applyInPlacePatches(PatchContext &Ctx, size_t Idx);
uint32_t applyTrampolinePatches(PatchContext &Ctx, size_t Idx);
uint32_t applyWmmaHazardPatch(PatchContext &Ctx);
uint32_t applyWmmaSplitPatches(PatchContext &Ctx, size_t Idx);
uint32_t applyScratchPatches(PatchContext &Ctx, size_t Idx);
/// Walk comgr-hotswap-patches.def and bind every patch module's
/// implementation into \p VT by calling its register*Patch function.
/// A missing register*Patch produces a link error, which is the
/// loud-failure shape the weak-symbol pattern lacked. Production code
/// never calls this directly; it runs inside getHotswapPatchVTable()'s
/// initializer. Exposed here so unit tests can drive the install against
/// a local HotswapPatchVTable.
void installHotswapPatches(HotswapPatchVTable &VT);

/// Process-wide HotswapPatchVTable singleton (Meyers-style). The
/// initializer eagerly calls installHotswapPatches() on its own storage,
/// so every reference returned here is to a fully bound vtable. C++11
/// [stmt.dcl]/4 guarantees the initializer runs exactly once and is safe
/// under concurrent first access, which removes the need for an explicit
/// std::call_once at the entry point and any inter-TU static-init order
/// contract on the patch modules.
HotswapPatchVTable &getHotswapPatchVTable();

// Forward-declare every patch module's installer from the central .def
// registry. Patch modules define these in their comgr-hotswap-patch-*.cpp;
// installHotswapPatches() consumes them; unit tests under test-unit/ also
// invoke them directly. A patches.def line with no matching definition
// produces a libamd_comgr / HotswapMCTests link error.
#define HOTSWAP_PATCH(Name) void register##Name##Patch(HotswapPatchVTable &);
#include "comgr-hotswap-patches.def"
#undef HOTSWAP_PATCH

// -- Function declarations (B0-to-A0 policy layer) ----------------------------

Expand Down
15 changes: 5 additions & 10 deletions amd/comgr/src/comgr-hotswap-patch-inplace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,6 @@

#include "comgr-hotswap-internal.h"

// MSVC does not support weak symbols; LLVM_ATTRIBUTE_WEAK expands to nothing,
// so the stub in comgr-hotswap-b0a0.cpp becomes a regular definition and
// this file would produce a duplicate-symbol link error (LNK2005). Guard
// the strong override until a proper registration mechanism replaces the
// weak-symbol pattern on Windows (tracked in #2294 / #2285).
#if !defined(_MSC_VER)

#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCCodeEmitter.h"
Expand Down Expand Up @@ -99,7 +92,7 @@ bool swapOpcode(InternalDecodedInst &DI, uint8_t *Text, const LLVMState &LS,

} // anonymous namespace

uint32_t applyInPlacePatches(PatchContext &Ctx, size_t Idx) {
static uint32_t applyInPlacePatchesImpl(PatchContext &Ctx, size_t Idx) {
InternalDecodedInst &DI = Ctx.Decoded[Idx];
StringRef Mnemonic(DI.Mnemonic);

Expand Down Expand Up @@ -162,7 +155,9 @@ uint32_t applyInPlacePatches(PatchContext &Ctx, size_t Idx) {
return 0;
}

void registerInPlacePatch(HotswapPatchVTable &VT) {
VT.applyInPlacePatches = &applyInPlacePatchesImpl;
}

} // namespace hotswap
} // namespace COMGR

#endif // !defined(_MSC_VER)
15 changes: 5 additions & 10 deletions amd/comgr/src/comgr-hotswap-patch-trampoline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,6 @@

#include "comgr-hotswap-internal.h"

// MSVC does not support weak symbols; LLVM_ATTRIBUTE_WEAK expands to nothing,
// so the stub in comgr-hotswap-b0a0.cpp becomes a regular definition and
// this file would produce a duplicate-symbol link error (LNK2005). Guard
// the strong override until a proper registration mechanism replaces the
// weak-symbol pattern on Windows (tracked in #2294 / #2285).
#if !defined(_MSC_VER)

#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCCodeEmitter.h"
Expand Down Expand Up @@ -361,7 +354,7 @@ static bool patchDs2AddrStride64(PatchContext &Ctx, size_t Idx) {
//
// ds_*_2addr_stride64_* -> split into two single-address DS ops

uint32_t applyTrampolinePatches(PatchContext &Ctx, size_t Idx) {
static uint32_t applyTrampolinePatchesImpl(PatchContext &Ctx, size_t Idx) {
StringRef Mnem(Ctx.Decoded[Idx].Mnemonic);

if (!getDs2AddrReplacement(Mnem).empty())
Expand All @@ -370,7 +363,9 @@ uint32_t applyTrampolinePatches(PatchContext &Ctx, size_t Idx) {
return 0;
}

void registerTrampolinePatch(HotswapPatchVTable &VT) {
VT.applyTrampolinePatches = &applyTrampolinePatchesImpl;
}

} // namespace hotswap
} // namespace COMGR

#endif // !defined(_MSC_VER)
10 changes: 5 additions & 5 deletions amd/comgr/src/comgr-hotswap-patch-vop3px2-src2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@

#include "comgr-hotswap-internal.h"

#if !defined(_MSC_VER)

#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"

Expand Down Expand Up @@ -90,7 +88,7 @@ bool patchScaleSrc2(uint8_t *InstBytes) {
// This only fires on the B0-to-A0 rewrite path (applyGfx1250B0toA0Rules).
// A0-native binaries are compiled with an A0-targeted Clang that sets the
// field correctly at codegen time, so they do not need hotswap rewriting.
uint32_t applyVop3px2Src2Fix(PatchContext &Ctx) {
static uint32_t applyVop3px2Src2FixImpl(PatchContext &Ctx) {
uint32_t Patched = 0;
unsigned Scanned = 0;

Expand All @@ -112,7 +110,9 @@ uint32_t applyVop3px2Src2Fix(PatchContext &Ctx) {
return Patched;
}

void registerVop3px2Src2Patch(HotswapPatchVTable &VT) {
VT.applyVop3px2Src2Fix = &applyVop3px2Src2FixImpl;
}

} // namespace hotswap
} // namespace COMGR

#endif // !defined(_MSC_VER)
10 changes: 5 additions & 5 deletions amd/comgr/src/comgr-hotswap-patch-wmma-hazard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@

#include "comgr-hotswap-internal.h"

#if !defined(_MSC_VER)

#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"

Expand Down Expand Up @@ -172,7 +170,7 @@ std::vector<WmmaHazard> findWmmaCoexecHazards(const PatchContext &Ctx) {

} // anonymous namespace

uint32_t applyWmmaHazardPatch(PatchContext &Ctx) {
static uint32_t applyWmmaHazardPatchImpl(PatchContext &Ctx) {
std::vector<WmmaHazard> Hazards = findWmmaCoexecHazards(Ctx);
if (Hazards.empty())
return 0;
Expand Down Expand Up @@ -207,7 +205,9 @@ uint32_t applyWmmaHazardPatch(PatchContext &Ctx) {
return Patched;
}

void registerWmmaHazardPatch(HotswapPatchVTable &VT) {
VT.applyWmmaHazardPatch = &applyWmmaHazardPatchImpl;
}

} // namespace hotswap
} // namespace COMGR

#endif // !defined(_MSC_VER)
Loading
Loading