From 742bfe928a66807b355c1030fc4de71eb9cfdac5 Mon Sep 17 00:00:00 2001 From: Yuri Iozzelli Date: Tue, 12 May 2026 17:44:13 +0200 Subject: [PATCH 1/3] PartialExecuter: rewrite SCC visit as iterative Replace the recursive BasicBlockGroupNode::recursiveVisit with an explicit stack-based driver, and split the responsibilities of the old class in two: - BasicBlockNode owns the implicit tree (parentNode pointer, child storage, blockToGroupMap) and the per-node visit state (start, from, isReachable, isMultiHead, BBProgress, SCCProgress, isLoopCopy, visitingAll, ...). It knows how to push its children onto an externally-owned stack via splitIntoSCCs. It is a plain struct: all fields are accessed directly by the driver. - BasicBlockGroupNode is the driver: it owns the work stack, holds the FunctionData reference, and runs iterativeVisit until the stack drains. A processed node is not popped immediately after splitIntoSCCs pushes its children: it sets a clearAllStorage sentinel and stays on the stack. Because the stack is LIFO, the node only becomes the top again after every descendant has been drained -- at which point its child storage can be safely freed without invalidating any parentNode or blockToGroupMap pointer. This keeps live memory proportional to the current root-to-leaf path, mirroring what the recursive version got from the call stack. splitIntoSCCs is also tightened to avoid push/pop churn: in the N=1 case no extra "self-as-child" node is created (self-loops are evaluated naturally), and in the N>1 case the final SCC (the one that contains start) is intentionally never pushed, since the caller runs start inline immediately after the split. The isLoopCopy flag replaces the implicit childrenNodes.size()==1 check that previously identified the loop replica. BBProgress is promoted from a bool& plumbed through the visit helpers to a member of the node. The redundant subsets / subsetIndex parallel data structures are removed; cleanUp now looks the owning child up through blockToGroupMap directly. --- llvm/lib/CheerpWriter/PartialExecuter.cpp | 415 ++++++++++++---------- 1 file changed, 224 insertions(+), 191 deletions(-) diff --git a/llvm/lib/CheerpWriter/PartialExecuter.cpp b/llvm/lib/CheerpWriter/PartialExecuter.cpp index c7a28e1c2bb9..8fc892111c30 100644 --- a/llvm/lib/CheerpWriter/PartialExecuter.cpp +++ b/llvm/lib/CheerpWriter/PartialExecuter.cpp @@ -53,6 +53,7 @@ #include #include #include +#include #include "llvm/Cheerp/Utility.h" #include "llvm/Cheerp/BuiltinInstructions.h" @@ -74,6 +75,7 @@ const uint32_t MAX_CALL_SITES = 100u; class FunctionData; class ModuleData; +struct BasicBlockNode; class BasicBlockGroupNode; class PartialInterpreter : public llvm::Interpreter { @@ -304,7 +306,7 @@ class PartialInterpreter : public llvm::Interpreter { } return nullptr; } - llvm::BasicBlock* visitBasicBlock(FunctionData& data, BasicBlockGroupNode& BBGN, llvm::BasicBlock& BB, bool& BBProgress); + llvm::BasicBlock* visitBasicBlock(FunctionData& data, BasicBlockNode& BBNode, llvm::BasicBlock& BB); explicit PartialInterpreter(std::unique_ptr M) : llvm::Interpreter(std::move(M), /*preExecute*/false) { @@ -641,7 +643,7 @@ class PartialInterpreter : public llvm::Interpreter { } return nullptr; } - void visitOuter(FunctionData& data, BasicBlockGroupNode& BBGN, llvm::Instruction& I, bool& BBProgress); + void visitOuter(FunctionData& data, BasicBlockNode& BBN, llvm::Instruction& I); bool replaceKnownCEs() { if(fullyKnownCEs.empty()) @@ -796,7 +798,7 @@ static void removeEdgeBetweenBlocks(llvm::BasicBlock* from, llvm::BasicBlock* to } } -llvm::BasicBlock* PartialInterpreter::visitBasicBlock(FunctionData& data, BasicBlockGroupNode& BBGN, llvm::BasicBlock& BB, bool& BBProgress) +llvm::BasicBlock* PartialInterpreter::visitBasicBlock(FunctionData& data, BasicBlockNode& BBNode, llvm::BasicBlock& BB) { ExecutionContext& executionContext = getTopCallFrame(); executionContext.CurBB = &BB; @@ -807,7 +809,7 @@ llvm::BasicBlock* PartialInterpreter::visitBasicBlock(FunctionData& data, BasicB // Note that here we could also execute a Call, and that implies adding a CallFrame // executing there (possibly also in depth) // So getTopCallFrame() has to be called since it will possibly change - visitOuter(data, BBGN, *getTopCallFrame().CurInst++, BBProgress); + visitOuter(data, BBNode, *getTopCallFrame().CurInst++); } // Find (if there are enough information) the next BB to be visited @@ -1362,68 +1364,82 @@ template <> struct GraphTraits { namespace cheerp { -// This class represent a Node part of a tree of BasicBlockGroupNodes -// parentNode is null IFF we are in the root, otherwise points to the parent -// childrenNodes points to the childrens of a given node +// This setup represents an implicit tree of BasicBlockNodes managed via a stack. +// `parentNode` is null IFF we are the root, otherwise points to the parent. // -// Metadata associated with every node is: -// isReachable -> whether this set is actually reachable -// isMultiHead -> whether there are more than one entry block -// iff isReachable && !isMultiHead -> start is the unique entry BasicBlock (entry from inside of this set, 'back-edges', are valid) -// iff from -> from is the BasicBlock we are coming from (so PHINodes can be set) -// blocks is a set of BasicBlocks that are part of this group +// Metadata/state associated with every BasicBlockNode includes: +// blocks -> Immutable set of BasicBlocks that belong to this group/component. +// isReachable -> True if there is a known valid path reaching this node. +// isMultiHead -> True if there is more than one entry block into this component. +// (If true, we cannot safely interpret, so we mark it as visitAll). +// isLoopCopy -> True if this node is a replica of all blocks in a SCC. +// BBProgress -> True if interpreting the single `start` BasicBlock of this node yielded +// a newly discovered information like outgoing edge (`registerEdge` returned true) or +// computed a new value. This bubbles up to set `SCCProgress`. +// SCCProgress -> True if during the execution of SCC, BBprogress was marked as true. +// visitingAll -> True when we stop partially interpreting this node dynamically (e.g., if +// limits are hit, it has multiple heads, or loops reach a fixed point). +// All instructions are statically marked skipped, and all CFG successors +// are eagerly registered. +// start -> The unique entry BasicBlock (valid IFF isReachable && !isMultiHead). +// Entries from inside of this set ('back-edges') are valid. +// from -> The BasicBlock we are coming from (used to resolve PHINodes). Valid only +// if there is exactly one external predecessor. // -// For a given node: -// union(start, children[0].blocks, children[1].blocks, ...) is always equal to the blocks of the node itself +// For a given node, its blocks always represent the same set of blocks as the union +// of the blocks of all the SCC subcomponents it gets split into. // -// The tree is not static but will be modifyied during execution. -// Starting state is a single node containing all of a Function's BBs, start = Function's entry point, isReachable = true, from = nulltpr, isMultiHead = false +// Execution structure is iterative and uses a stack (`BasicBlockGroupNode::stack`). +// The starting state is a single root node containing all of a Function's BBs, +// where `start` = Function's entry point, `isReachable` = true, `from` = nullptr. // -// Then given a reachable node with a valid start: -// 1. We add a children with the same set of nodes, call it COPY -// 2. We create (implicitly) a graph were all edges going back to start from the inner blocks will point to COPY's start, all other edges remain the same -// 3. Now start will NOT have anymore any incoming edge but only outgoing -// 4. We split this graph into SCC, start is guaranteed to be in it's own component -// 5. We visit the start BasicBlock, taking notes of outgoing edges -// (they can either point to some of the blocks, possibly the copy of start, or point to sibling's SCC, in that case we will walk the tree via notifySuccessor) -// 6. We split each SCC into its own node, and do the recursion +// When popping and evaluating a reachable, single-head node from the stack: +// 1. We process it by splitting it into Strongly Connected Components (SCCs). +// 2. First, we implicitly create a "loop copy" of the node (containing the same +// blocks). Any edges looping back to `start` from inner blocks will point +// to this loop copy's start. +// 3. We push this "loop copy" to the stack first (meaning it is visited last) to +// catch any repeated iterations of this graph component. +// 4. Then, we find the SCCs of the remaining blocks (excluding `start`) and push them +// to the stack in post-order, ensuring dependencies are visited in the correct flow. +// 5. The `start` block (which is separated into its own SCC component effectively) is +// finally visited. We interpret the block and note outgoing edges. +// (These edges can point to inner blocks, the "loop copy", or to a sibling/parent +// SCC. Edges to outer scopes are propagated upwards using `notifySuccessor`). // -// Terminating conditions is that a given node will never be visited more than a fixed number of times. +// Terminating conditions is that a given node will never be visited more than a fixed number of times, which +// in this case is a certain number of executed instructions per BB as well as per SCC. // // Note that llvm::Instruction, BasicBlock or Function are taken as non-const pointers (or reference). // While this stage DO NOT actually modify them, we are reliant here on the Interpreter infrastructure // and there values are stored as NON-const. So while logically we will not modify structure of the function // we would either have to do const_casts at the boundaries or just remove constness -class BasicBlockGroupNode + +using ReverseMapBBToGroup = llvm::DenseMap; + +struct BasicBlockNode { // Implicit tree structure - BasicBlockGroupNode* parentNode; - std::list childrenNodes; - - // Other metadata - FunctionData& data; + BasicBlockNode* parentNode; const DeterministicBBSet ownedBlocks; - public: + std::list blocksStorage; + + ReverseMapBBToGroup blockToGroupMap; + const DeterministicBBSet& blocks; bool isMultiHead; bool isReachable; bool SCCProgress; + bool isLoopCopy; + bool BBProgress; + bool visitingAll; + bool clearAllStorage; llvm::BasicBlock* start; llvm::BasicBlock* from; uint32_t currIter; uint32_t sccInstructionCounter; llvm::DenseMap minVisitIndex; - // TODO(carlo): an optmization might be having from be a set, conserving the phi that are equals - typedef llvm::DenseMap ReverseMapBBToGroup; - - // reverseMappingBBToGroup will be populated alongside childrenNodes, and for each BasicBlock reverseMappingBBToGroup[BB] - // will be the pointer of the SCC component BB is part of - ReverseMapBBToGroup reverseMappingBBToGroup; - llvm::DenseMap subsetIndex; - std::vector subsets; - - bool visitingAll; static const DeterministicBBSet getAllBasicBlocks(llvm::Function& F) { DeterministicBBSet set; @@ -1433,22 +1449,20 @@ class BasicBlockGroupNode } return set; } - void splitIntoSCCs(std::list& queueToBePopulated, ReverseMapBBToGroup& blockToGroupMap); -public: - BasicBlockGroupNode(FunctionData& data, BasicBlockGroupNode* parentBBGNode, const DeterministicBBSet& OWNEDblocks, llvm::BasicBlock* start = nullptr) - : parentNode(parentBBGNode), data(data), ownedBlocks(OWNEDblocks), blocks(ownedBlocks), isMultiHead(false), isReachable(parentNode == nullptr), SCCProgress(false), start(start), from(nullptr), sccInstructionCounter(0), visitingAll(false) + + BasicBlockNode(BasicBlockNode* parentBBNode, const DeterministicBBSet& OWNEDblocks, llvm::BasicBlock* start = nullptr) + : parentNode(parentBBNode), ownedBlocks(OWNEDblocks), blocks(ownedBlocks), isMultiHead(false), isReachable(parentNode == nullptr), SCCProgress(false), isLoopCopy(false), BBProgress(false), visitingAll(false), clearAllStorage(false), start(start), from(nullptr), sccInstructionCounter(0) { - if (start) - assert(start->getParent() == data.getFunction()); } - BasicBlockGroupNode(FunctionData& data) - : BasicBlockGroupNode(data, /*parentBBGD*/nullptr, getAllBasicBlocks(*data.getFunction()), &data.getFunction()->getEntryBlock()) + BasicBlockNode(FunctionData& data) + : BasicBlockNode(/*parentBBNode*/ nullptr, getAllBasicBlocks(*data.getFunction()), &data.getFunction()->getEntryBlock()) { } - BasicBlockGroupNode(BasicBlockGroupNode& BBGNode) - : parentNode(&BBGNode), data(BBGNode.data), blocks(BBGNode.blocks), isMultiHead(false), isReachable(false), SCCProgress(false), start(nullptr), from(nullptr), sccInstructionCounter(0), visitingAll(false) + BasicBlockNode(BasicBlockNode& BBNode) + : parentNode(&BBNode), blocks(BBNode.blocks), isMultiHead(false), isReachable(false), SCCProgress(false), isLoopCopy(true), BBProgress(false), visitingAll(false), clearAllStorage(false), start(nullptr), from (nullptr), sccInstructionCounter(0) { } + void splitIntoSCCs(std::list& stack); void addIncomingEdge(llvm::BasicBlock* comingFrom, uint32_t currIter, llvm::BasicBlock* target) { isReachable = true; @@ -1474,38 +1488,10 @@ class BasicBlockGroupNode minVisitIndex[comingFrom] = currIter; } } - // Do the visit of the BB, with 'from' (possibly nullptr if unknown) as predecessor - // Loop backs will be directed to another BBgroup - // The visit will return the set of reachable BBs, to be added into visitNext - void runVisitBasicBlock(FunctionData& data, llvm::BasicBlock& BB, llvm::SmallVectorImpl& visitNext, bool& BBProgress) - { - assert(visitNext.empty()); - - PartialInterpreter& interpreter = data.getInterpreter(); - interpreter.incomingBB = from; - BasicBlock* ret = interpreter.visitBasicBlock(data, *this, BB, BBProgress); - - if (ret) - { - visitNext.push_back(ret); - } - else - { - llvm::DenseSet setSuccessors; - for (auto* bb : successors(&BB)) - { - if (setSuccessors.insert(bb).second) - visitNext.push_back(bb); - } - } - } // notifySuccessor takes care of propagating the information 'we are visiting node from, and we have a terminator that goes to succ' // IFF succ is not in the currently visited set of nodes, it should be in one of the siblings, so we notify the parent that // will itself (possibly recursively) propagate the information - // otherwise, it's a matter of finding what children holds the succ node, and add the edge from->succ as incoming - // - // when visitingAll is set, childrens data structure is not in place since we don't have enough information to proceed - // (but we need still to propagate to parent) + // otherwise, it's a matter of finding what BBNode holds the succ node, and add the edge from->succ as incoming void notifySuccessor(llvm::BasicBlock* from, const uint32_t iter, llvm::BasicBlock* succ) { if (blocks.count(succ) == 0) @@ -1521,13 +1507,13 @@ class BasicBlockGroupNode } else { - auto it = reverseMappingBBToGroup.find(succ); - assert(it != reverseMappingBBToGroup.end()); - BasicBlockGroupNode* ptr = it->second; + auto it = blockToGroupMap.find(succ); + assert(it != blockToGroupMap.end()); + BasicBlockNode* ptr = it->second; ptr->addIncomingEdge(from, iter, succ); } } - void visitAll() + void visitAll(FunctionData& data) { visitingAll = true; PartialInterpreter& interpreter = data.getInterpreter(); @@ -1542,18 +1528,19 @@ class BasicBlockGroupNode } for (llvm::BasicBlock* succ : successors(bb)) - registerEdge(bb, succ); + registerEdge(data, bb, succ); } } - bool registerEdge(llvm::BasicBlock* from, llvm::BasicBlock* to) + bool registerEdge(FunctionData& data, llvm::BasicBlock* from, llvm::BasicBlock* to) { bool is_inserted = data.registerEdge(from, to); notifySuccessor(from, currIter, to); return is_inserted; } - void cleanUp(llvm::BasicBlock* block) + void cleanUp(FunctionData& data, llvm::BasicBlock* block) { - const DeterministicBBSet& subset = subsets[subsetIndex[block]]; + BasicBlockNode* childNode = blockToGroupMap[block]; + const DeterministicBBSet& subset = childNode->blocks; PartialInterpreter& interpreter = data.getInterpreter(); for (llvm::BasicBlock* bb : subset) { @@ -1561,10 +1548,6 @@ class BasicBlockGroupNode interpreter.removeFromMaps(&I); } } - BasicBlockGroupNode* getParent() - { - return parentNode; - } void incrementSCCInstructionCounter() { sccInstructionCounter++; @@ -1583,146 +1566,196 @@ class BasicBlockGroupNode if (parentNode) parentNode->resetSCCInstructionCounter(); } - // Visit the tree of BasicBlockGroupNodes, starting from the root and visiting children depth-first - bool recursiveVisit() + void clearAllStorages() { - if (isMultiHead) - { - // There are multiple BasicBlock that are reacheable from outside - // --> Mark everything as reachable - visitAll(); - return false; - } + blocksStorage.clear(); + blockToGroupMap.clear(); + } +}; - assert(start); //isReachable && !isMultiHead implies start being defined - currIter = data.getVisitCounter(start); +class BasicBlockGroupNode +{ + FunctionData& data; + std::list stack; - if (data.getFunctionInstructionCounter() >= MAX_INSTRUCTIONS_PER_FUNCTION) - { - visitAll(); - return false; - } +public: + BasicBlockGroupNode(FunctionData& data): data(data) + { + } + + // Do the visit of the BB, with 'from' (possibly nullptr if unknown) as predecessor + // Loop backs will be directed to another BBgroup + // The visit will return the set of reachable BBs, to be added into visitNext + void runVisitBasicBlock(BasicBlockNode& BBNode, llvm::BasicBlock& BB, llvm::SmallVectorImpl& visitNext) + { + assert(visitNext.empty()); - if (parentNode && parentNode->getSCCInstructionCounter() >= MAX_INSTRUCTIONS_PER_SCC) + PartialInterpreter& interpreter = data.getInterpreter(); + interpreter.incomingBB = BBNode.from; + BasicBlock* ret = interpreter.visitBasicBlock(data, BBNode, BB); + + if (ret) { - visitAll(); - resetSCCInstructionCounter(); - return false; + visitNext.push_back(ret); } - - if (parentNode) + else { - for (auto& p : minVisitIndex) + llvm::DenseSet setSuccessors; + for (auto* bb : successors(&BB)) { - if (data.getVisitCounter(p.first) > p.second+1) - parentNode->cleanUp(p.first); + if (setSuccessors.insert(bb).second) + visitNext.push_back(bb); } } - data.incrementVisitCounter(start); + } + void iterativeVisit() + { + BasicBlockNode rootNode(data); + stack.emplace_back(&rootNode); + while(!stack.empty()) + { + BasicBlockNode* currNode = stack.back(); - splitIntoSCCs(childrenNodes, reverseMappingBBToGroup); //These should be partially ordered with the last one possibly being the replica of the current one + if (currNode->clearAllStorage) + { + currNode->clearAllStorages(); + stack.pop_back(); + continue; + } + if (!currNode->isReachable) + { + stack.pop_back(); + continue; + } + if (currNode->isMultiHead) + { + currNode->visitAll(data); + stack.pop_back(); + continue; + } - llvm::SmallVector visitNext; + currNode->currIter = data.getVisitCounter(currNode->start); - bool BBProgress = false; - runVisitBasicBlock(data, *start, visitNext, BBProgress); - for (llvm::BasicBlock* succ : visitNext){ - if (registerEdge(start, succ)){ - BBProgress = true; + if (data.getFunctionInstructionCounter() >= MAX_INSTRUCTIONS_PER_FUNCTION) + { + currNode->visitAll(data); + stack.pop_back(); + continue; } - } - - // The first SCC (start) has already been visited - // This updates SCCProgress in case progress was made while visiting the start - if (BBProgress) - SCCProgress = true; - childrenNodes.pop_back(); - while (!childrenNodes.empty()) - { - auto& child = childrenNodes.back(); - if (child.isReachable) + if (currNode->parentNode && currNode->parentNode->getSCCInstructionCounter() >= MAX_INSTRUCTIONS_PER_SCC) { - // If childrenNodes size is 1, that means that we are visiting the copy of all basic blocks in the curret set of SCC - // Because the copy is reachable, it means that we are in a loop - // By this point we have run and visit all the SCC of a parent. If no progress was found there is no point to continue with the loop - if (childrenNodes.size() == 1 && !SCCProgress) + currNode->visitAll(data); + currNode->resetSCCInstructionCounter(); + stack.pop_back(); + continue; + } + if (currNode->isLoopCopy && !currNode->parentNode->SCCProgress) + { + currNode->visitAll(data); + stack.pop_back(); + continue; + } + if (currNode->parentNode) + { + for (auto& p : currNode->minVisitIndex) { - visitAll(); - return false; + if (data.getVisitCounter(p.first) > p.second+1) + currNode->parentNode->cleanUp(data, p.first); } - if (child.recursiveVisit()) - SCCProgress = true; } - childrenNodes.pop_back(); + + data.incrementVisitCounter(currNode->start); + + currNode->splitIntoSCCs(stack); + + currNode->clearAllStorage = true; + + llvm::SmallVector visitNext; + + runVisitBasicBlock(*currNode, *currNode->start, visitNext); + for (llvm::BasicBlock* succ : visitNext) { + if (currNode->registerEdge(data, currNode->start, succ)) { + currNode->BBProgress = true; + } + } + + if (currNode->BBProgress) + { + currNode->SCCProgress = true; + if (currNode->parentNode) + currNode->parentNode->SCCProgress = true; + } } - return BBProgress; } }; -void BasicBlockGroupNode::splitIntoSCCs(std::list& queueToBePopulated, ReverseMapBBToGroup& blockToGroupMap) +void BasicBlockNode::splitIntoSCCs(std::list& stack) { - assert(queueToBePopulated.empty()); + assert(blocksStorage.empty()); assert(blockToGroupMap.empty()); - assert(subsetIndex.empty()); - assert(subsets.empty()); - // We begin with N nodes, remove 'start', and we find the SCCs of the remaining N-1 nodes. - // - // For N = 1, it means 0 nodes remaining -> no SCCs - // For N > 1, it means > 0 nodes remaining, we divide them in 1 or more SCCs - // - // Then iff there are any edges going back to start, we add all nodes again as a single SCC to the end - // - // During the actual visit we might discover that we eventually will not loop back to start (so the recursion terminate) or we stop since we reached the maximum iteration number - - //No nodes remaining, no need to splitIntoSCCS when N=1 + // We are separating a group of N basic blocks into Strongly Connected Components (SCCs). + // Since we pop from a stack, we push items in reverse execution order: + // 1. First, push a 'loop copy' of all N blocks to handle potential back-edges returning to the start. + // 2. Next, calculate the SCCs of the blocks and push them in post-order. + + // If there is exactly 1 block, there are no intermediate blocks to isolate into SCCs. + // Furthermore, self-loops (edges from 'start' back to 'start') are evaluated naturally + // while processing the single block itself. Creating a separate 'loop copy' for only one block + // is redundant, so we skip the split entirely in this case. if (blocks.size() == 1) { - queueToBePopulated.emplace_back(*this); - BasicBlock* bb = *blocks.begin(); - subsetIndex[bb] = 0; - subsets.push_back(blocks); - queueToBePopulated.emplace_back(data, this, blocks); - blockToGroupMap[bb] = &queueToBePopulated.back(); - blockToGroupMap[start] = &queueToBePopulated.front(); + blocksStorage.emplace_back(*this); + stack.push_back(&blocksStorage.back()); + blockToGroupMap[start] = &blocksStorage.back(); return; } SubGraph SG(start, blocks); - queueToBePopulated.emplace_back(*this); + blocksStorage.emplace_back(*this); + BasicBlockNode* loopCopyPtr = &blocksStorage.back(); + stack.push_back(&blocksStorage.back()); + + // We delay pushing to the stack by one iteration to intentionally drop the final SCC. + // The last SCC iterated always corresponds to the 'start' basic block of this group. + // Since the caller implicitly processes 'start' immediately after this function returns, + // we omit it from the stack rather than pushing it and immediately popping it back off. + DeterministicBBSet pendingSubset; + bool hasPending = false; - uint32_t nextId = 0; for (auto& SCC: make_range(scc_begin(&SG), scc_end(&SG))) { - DeterministicBBSet subset; - for (auto& GN : SCC) + if (hasPending) { - BasicBlock* bb = GN->BB; - subset.insert(bb); - subsetIndex[bb] = nextId; + blocksStorage.emplace_back(this, pendingSubset); + BasicBlockNode* childPtr = &blocksStorage.back(); + stack.push_back(childPtr); + for (BasicBlock* bb : childPtr->blocks) + { + blockToGroupMap[bb] = childPtr; + } } - subsets.push_back(std::move(subset)); - nextId++; - queueToBePopulated.emplace_back(data, this, subsets.back()); - for (BasicBlock* bb : subsets.back()) + + pendingSubset.clear(); + for (auto& GN : SCC) { - blockToGroupMap[bb] = &queueToBePopulated.back(); + BasicBlock* bb = GN->BB; + pendingSubset.insert(bb); } + hasPending = true; } - blockToGroupMap[start] = &queueToBePopulated.front(); - subsetIndex[start] = nextId; - subsets.push_back(blocks); + blockToGroupMap[start] = loopCopyPtr; } void FunctionData::actualVisit() { BasicBlockGroupNode groupData(*this); - groupData.recursiveVisit(); + groupData.iterativeVisit(); } -void PartialInterpreter::visitOuter(FunctionData& data, BasicBlockGroupNode& BBGN, llvm::Instruction& I, bool& BBProgress) +void PartialInterpreter::visitOuter(FunctionData& data, BasicBlockNode& BBNode, llvm::Instruction& I) { data.incrementFunctionInstructionCounter(); - BBGN.incrementSCCInstructionCounter(); + BBNode.incrementSCCInstructionCounter(); if (PHINode* phi = dyn_cast(&I)) { @@ -1736,7 +1769,7 @@ void PartialInterpreter::visitOuter(FunctionData& data, BasicBlockGroupNode& BBG if (isValueComputed(incomingVal)) { computedPhisValues.push_back({phi, incomingVal}); - BBProgress = true; + BBNode.BBProgress = true; return; } } @@ -1802,7 +1835,7 @@ void PartialInterpreter::visitOuter(FunctionData& data, BasicBlockGroupNode& BBG { BasicBlock* next = findNextBasicBlock(I); - if (next && BBGN.getParent() && BBGN.getParent()->getSCCInstructionCounter() < MAX_INSTRUCTIONS_PER_SCC) + if (next && BBNode.parentNode && BBNode.parentNode->getSCCInstructionCounter() < MAX_INSTRUCTIONS_PER_SCC) { data.incrementVisitCounter(next); @@ -1811,7 +1844,7 @@ void PartialInterpreter::visitOuter(FunctionData& data, BasicBlockGroupNode& BBG getTopCallFrame().CurBB = next; getTopCallFrame().CurInst = getTopCallFrame().CurBB->begin(); - BBProgress = true; + BBNode.BBProgress = true; return; } skip = true; @@ -1858,7 +1891,7 @@ void PartialInterpreter::visitOuter(FunctionData& data, BasicBlockGroupNode& BBG if(isInitialCallFrame()) { if (data.registerValueForInst(I, getOperandValue(&I), strongBits)) - BBProgress = true; + BBNode.BBProgress = true; } if (!isa(I)) From 379effce7ee664856e4c078e2908a00a6b79cbda Mon Sep 17 00:00:00 2001 From: Yuri Iozzelli Date: Fri, 22 May 2026 17:48:15 +0200 Subject: [PATCH 2/3] PartialExecuter: tighten iterative SCC visitor invariants Initialize BasicBlockNode iteration state, avoid accidental DenseMap insertion during cleanup, and assert that the delayed SCC split drops only the start component. Also remove a redundant storage lookup and fix stale comments and whitespace around the iterative SCC visitor. --- llvm/lib/CheerpWriter/PartialExecuter.cpp | 55 ++++++++++++----------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/llvm/lib/CheerpWriter/PartialExecuter.cpp b/llvm/lib/CheerpWriter/PartialExecuter.cpp index 8fc892111c30..f716fa82d782 100644 --- a/llvm/lib/CheerpWriter/PartialExecuter.cpp +++ b/llvm/lib/CheerpWriter/PartialExecuter.cpp @@ -1373,38 +1373,38 @@ namespace cheerp // isMultiHead -> True if there is more than one entry block into this component. // (If true, we cannot safely interpret, so we mark it as visitAll). // isLoopCopy -> True if this node is a replica of all blocks in a SCC. -// BBProgress -> True if interpreting the single `start` BasicBlock of this node yielded +// BBProgress -> True if interpreting the single `start` BasicBlock of this node yielded // a newly discovered information like outgoing edge (`registerEdge` returned true) or // computed a new value. This bubbles up to set `SCCProgress`. // SCCProgress -> True if during the execution of SCC, BBprogress was marked as true. -// visitingAll -> True when we stop partially interpreting this node dynamically (e.g., if -// limits are hit, it has multiple heads, or loops reach a fixed point). -// All instructions are statically marked skipped, and all CFG successors +// visitingAll -> True when we stop partially interpreting this node dynamically (e.g., if +// limits are hit, it has multiple heads, or loops reach a fixed point). +// All instructions are statically marked skipped, and all CFG successors // are eagerly registered. // start -> The unique entry BasicBlock (valid IFF isReachable && !isMultiHead). // Entries from inside of this set ('back-edges') are valid. -// from -> The BasicBlock we are coming from (used to resolve PHINodes). Valid only +// from -> The BasicBlock we are coming from (used to resolve PHINodes). Valid only // if there is exactly one external predecessor. // // For a given node, its blocks always represent the same set of blocks as the union // of the blocks of all the SCC subcomponents it gets split into. // // Execution structure is iterative and uses a stack (`BasicBlockGroupNode::stack`). -// The starting state is a single root node containing all of a Function's BBs, +// The starting state is a single root node containing all of a Function's BBs, // where `start` = Function's entry point, `isReachable` = true, `from` = nullptr. // // When popping and evaluating a reachable, single-head node from the stack: // 1. We process it by splitting it into Strongly Connected Components (SCCs). -// 2. First, we implicitly create a "loop copy" of the node (containing the same -// blocks). Any edges looping back to `start` from inner blocks will point +// 2. First, we implicitly create a "loop copy" of the node (containing the same +// blocks). Any edges looping back to `start` from inner blocks will point // to this loop copy's start. // 3. We push this "loop copy" to the stack first (meaning it is visited last) to // catch any repeated iterations of this graph component. -// 4. Then, we find the SCCs of the remaining blocks (excluding `start`) and push them +// 4. Then, we find the SCCs of the remaining blocks (excluding `start`) and push them // to the stack in post-order, ensuring dependencies are visited in the correct flow. // 5. The `start` block (which is separated into its own SCC component effectively) is // finally visited. We interpret the block and note outgoing edges. -// (These edges can point to inner blocks, the "loop copy", or to a sibling/parent +// (These edges can point to inner blocks, the "loop copy", or to a sibling/parent // SCC. Edges to outer scopes are propagated upwards using `notifySuccessor`). // // Terminating conditions is that a given node will never be visited more than a fixed number of times, which @@ -1440,7 +1440,7 @@ struct BasicBlockNode uint32_t sccInstructionCounter; llvm::DenseMap minVisitIndex; - static const DeterministicBBSet getAllBasicBlocks(llvm::Function& F) + static DeterministicBBSet getAllBasicBlocks(llvm::Function& F) { DeterministicBBSet set; for (llvm::BasicBlock& bb : F) @@ -1451,7 +1451,7 @@ struct BasicBlockNode } BasicBlockNode(BasicBlockNode* parentBBNode, const DeterministicBBSet& OWNEDblocks, llvm::BasicBlock* start = nullptr) - : parentNode(parentBBNode), ownedBlocks(OWNEDblocks), blocks(ownedBlocks), isMultiHead(false), isReachable(parentNode == nullptr), SCCProgress(false), isLoopCopy(false), BBProgress(false), visitingAll(false), clearAllStorage(false), start(start), from(nullptr), sccInstructionCounter(0) + : parentNode(parentBBNode), ownedBlocks(OWNEDblocks), blocks(ownedBlocks), isMultiHead(false), isReachable(parentNode == nullptr), SCCProgress(false), isLoopCopy(false), BBProgress(false), visitingAll(false), clearAllStorage(false), start(start), from(nullptr), currIter(0), sccInstructionCounter(0) { } BasicBlockNode(FunctionData& data) @@ -1459,7 +1459,7 @@ struct BasicBlockNode { } BasicBlockNode(BasicBlockNode& BBNode) - : parentNode(&BBNode), blocks(BBNode.blocks), isMultiHead(false), isReachable(false), SCCProgress(false), isLoopCopy(true), BBProgress(false), visitingAll(false), clearAllStorage(false), start(nullptr), from (nullptr), sccInstructionCounter(0) + : parentNode(&BBNode), blocks(BBNode.blocks), isMultiHead(false), isReachable(false), SCCProgress(false), isLoopCopy(true), BBProgress(false), visitingAll(false), clearAllStorage(false), start(nullptr), from(nullptr), currIter(0), sccInstructionCounter(0) { } void splitIntoSCCs(std::list& stack); @@ -1533,13 +1533,15 @@ struct BasicBlockNode } bool registerEdge(FunctionData& data, llvm::BasicBlock* from, llvm::BasicBlock* to) { - bool is_inserted = data.registerEdge(from, to); + bool isInserted = data.registerEdge(from, to); notifySuccessor(from, currIter, to); - return is_inserted; + return isInserted; } void cleanUp(FunctionData& data, llvm::BasicBlock* block) { - BasicBlockNode* childNode = blockToGroupMap[block]; + auto it = blockToGroupMap.find(block); + assert(it != blockToGroupMap.end()); + BasicBlockNode* childNode = it->second; const DeterministicBBSet& subset = childNode->blocks; PartialInterpreter& interpreter = data.getInterpreter(); for (llvm::BasicBlock* bb : subset) @@ -1550,9 +1552,9 @@ struct BasicBlockNode } void incrementSCCInstructionCounter() { - sccInstructionCounter++; + sccInstructionCounter++; if (parentNode) - parentNode->incrementSCCInstructionCounter(); + parentNode->incrementSCCInstructionCounter(); } uint32_t getSCCInstructionCounter() { @@ -1700,20 +1702,21 @@ void BasicBlockNode::splitIntoSCCs(std::list& stack) // If there is exactly 1 block, there are no intermediate blocks to isolate into SCCs. // Furthermore, self-loops (edges from 'start' back to 'start') are evaluated naturally - // while processing the single block itself. Creating a separate 'loop copy' for only one block - // is redundant, so we skip the split entirely in this case. + // while processing the single block itself. We still create the loop-copy node used by + // the normal scheduling logic, but there are no child SCC nodes to push. if (blocks.size() == 1) { blocksStorage.emplace_back(*this); - stack.push_back(&blocksStorage.back()); - blockToGroupMap[start] = &blocksStorage.back(); + BasicBlockNode* loopCopyPtr = &blocksStorage.back(); + stack.push_back(loopCopyPtr); + blockToGroupMap[start] = loopCopyPtr; return; } SubGraph SG(start, blocks); blocksStorage.emplace_back(*this); BasicBlockNode* loopCopyPtr = &blocksStorage.back(); - stack.push_back(&blocksStorage.back()); + stack.push_back(loopCopyPtr); // We delay pushing to the stack by one iteration to intentionally drop the final SCC. // The last SCC iterated always corresponds to the 'start' basic block of this group. @@ -1743,6 +1746,8 @@ void BasicBlockNode::splitIntoSCCs(std::list& stack) } hasPending = true; } + assert(hasPending); + assert(pendingSubset.size() == 1 && pendingSubset.count(start)); blockToGroupMap[start] = loopCopyPtr; } @@ -1756,7 +1761,7 @@ void PartialInterpreter::visitOuter(FunctionData& data, BasicBlockNode& BBNode, { data.incrementFunctionInstructionCounter(); BBNode.incrementSCCInstructionCounter(); - + if (PHINode* phi = dyn_cast(&I)) { // PHI have to be execute concurrently (since they may cross-reference themselves) @@ -1934,7 +1939,7 @@ static void processFunction(const llvm::Function& F, ModuleData& moduleData) const CallBase* CS = cast(FU); if (CS->isCallee(&U)) { - data.visitCallBase(CS); + data.visitCallBase(CS); } else { From af7a48db0b675652c101ad66e4e4eb07377ee490 Mon Sep 17 00:00:00 2001 From: Yuri Iozzelli Date: Fri, 22 May 2026 17:55:26 +0200 Subject: [PATCH 3/3] PartialExecuter: make SCC visit stack phases explicit Replace the node-owned cleanup sentinel with an explicit stack frame phase. This keeps the non-recursive scheduling order unchanged while making the parent-storage cleanup step part of the stack protocol instead of mutable node state. --- llvm/lib/CheerpWriter/PartialExecuter.cpp | 51 ++++++++++++++--------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/llvm/lib/CheerpWriter/PartialExecuter.cpp b/llvm/lib/CheerpWriter/PartialExecuter.cpp index f716fa82d782..3d88e084a408 100644 --- a/llvm/lib/CheerpWriter/PartialExecuter.cpp +++ b/llvm/lib/CheerpWriter/PartialExecuter.cpp @@ -1417,6 +1417,23 @@ namespace cheerp using ReverseMapBBToGroup = llvm::DenseMap; +enum class BasicBlockFramePhase +{ + Visit, + Cleanup, +}; + +struct BasicBlockFrame +{ + BasicBlockNode* node; + BasicBlockFramePhase phase; + + BasicBlockFrame(BasicBlockNode* node, BasicBlockFramePhase phase) + : node(node), phase(phase) + { + } +}; + struct BasicBlockNode { // Implicit tree structure @@ -1433,7 +1450,6 @@ struct BasicBlockNode bool isLoopCopy; bool BBProgress; bool visitingAll; - bool clearAllStorage; llvm::BasicBlock* start; llvm::BasicBlock* from; uint32_t currIter; @@ -1451,7 +1467,7 @@ struct BasicBlockNode } BasicBlockNode(BasicBlockNode* parentBBNode, const DeterministicBBSet& OWNEDblocks, llvm::BasicBlock* start = nullptr) - : parentNode(parentBBNode), ownedBlocks(OWNEDblocks), blocks(ownedBlocks), isMultiHead(false), isReachable(parentNode == nullptr), SCCProgress(false), isLoopCopy(false), BBProgress(false), visitingAll(false), clearAllStorage(false), start(start), from(nullptr), currIter(0), sccInstructionCounter(0) + : parentNode(parentBBNode), ownedBlocks(OWNEDblocks), blocks(ownedBlocks), isMultiHead(false), isReachable(parentNode == nullptr), SCCProgress(false), isLoopCopy(false), BBProgress(false), visitingAll(false), start(start), from(nullptr), currIter(0), sccInstructionCounter(0) { } BasicBlockNode(FunctionData& data) @@ -1459,10 +1475,10 @@ struct BasicBlockNode { } BasicBlockNode(BasicBlockNode& BBNode) - : parentNode(&BBNode), blocks(BBNode.blocks), isMultiHead(false), isReachable(false), SCCProgress(false), isLoopCopy(true), BBProgress(false), visitingAll(false), clearAllStorage(false), start(nullptr), from(nullptr), currIter(0), sccInstructionCounter(0) + : parentNode(&BBNode), blocks(BBNode.blocks), isMultiHead(false), isReachable(false), SCCProgress(false), isLoopCopy(true), BBProgress(false), visitingAll(false), start(nullptr), from(nullptr), currIter(0), sccInstructionCounter(0) { } - void splitIntoSCCs(std::list& stack); + void splitIntoSCCs(std::vector& stack); void addIncomingEdge(llvm::BasicBlock* comingFrom, uint32_t currIter, llvm::BasicBlock* target) { isReachable = true; @@ -1578,7 +1594,7 @@ struct BasicBlockNode class BasicBlockGroupNode { FunctionData& data; - std::list stack; + std::vector stack; public: BasicBlockGroupNode(FunctionData& data): data(data) @@ -1613,26 +1629,25 @@ class BasicBlockGroupNode void iterativeVisit() { BasicBlockNode rootNode(data); - stack.emplace_back(&rootNode); + stack.emplace_back(&rootNode, BasicBlockFramePhase::Visit); while(!stack.empty()) { - BasicBlockNode* currNode = stack.back(); + BasicBlockFrame currFrame = stack.back(); + stack.pop_back(); + BasicBlockNode* currNode = currFrame.node; - if (currNode->clearAllStorage) + if (currFrame.phase == BasicBlockFramePhase::Cleanup) { currNode->clearAllStorages(); - stack.pop_back(); continue; } if (!currNode->isReachable) { - stack.pop_back(); continue; } if (currNode->isMultiHead) { currNode->visitAll(data); - stack.pop_back(); continue; } @@ -1641,20 +1656,17 @@ class BasicBlockGroupNode if (data.getFunctionInstructionCounter() >= MAX_INSTRUCTIONS_PER_FUNCTION) { currNode->visitAll(data); - stack.pop_back(); continue; } if (currNode->parentNode && currNode->parentNode->getSCCInstructionCounter() >= MAX_INSTRUCTIONS_PER_SCC) { currNode->visitAll(data); currNode->resetSCCInstructionCounter(); - stack.pop_back(); continue; } if (currNode->isLoopCopy && !currNode->parentNode->SCCProgress) { currNode->visitAll(data); - stack.pop_back(); continue; } if (currNode->parentNode) @@ -1668,10 +1680,9 @@ class BasicBlockGroupNode data.incrementVisitCounter(currNode->start); + stack.emplace_back(currNode, BasicBlockFramePhase::Cleanup); currNode->splitIntoSCCs(stack); - currNode->clearAllStorage = true; - llvm::SmallVector visitNext; runVisitBasicBlock(*currNode, *currNode->start, visitNext); @@ -1691,7 +1702,7 @@ class BasicBlockGroupNode } }; -void BasicBlockNode::splitIntoSCCs(std::list& stack) +void BasicBlockNode::splitIntoSCCs(std::vector& stack) { assert(blocksStorage.empty()); assert(blockToGroupMap.empty()); @@ -1708,7 +1719,7 @@ void BasicBlockNode::splitIntoSCCs(std::list& stack) { blocksStorage.emplace_back(*this); BasicBlockNode* loopCopyPtr = &blocksStorage.back(); - stack.push_back(loopCopyPtr); + stack.emplace_back(loopCopyPtr, BasicBlockFramePhase::Visit); blockToGroupMap[start] = loopCopyPtr; return; } @@ -1716,7 +1727,7 @@ void BasicBlockNode::splitIntoSCCs(std::list& stack) SubGraph SG(start, blocks); blocksStorage.emplace_back(*this); BasicBlockNode* loopCopyPtr = &blocksStorage.back(); - stack.push_back(loopCopyPtr); + stack.emplace_back(loopCopyPtr, BasicBlockFramePhase::Visit); // We delay pushing to the stack by one iteration to intentionally drop the final SCC. // The last SCC iterated always corresponds to the 'start' basic block of this group. @@ -1731,7 +1742,7 @@ void BasicBlockNode::splitIntoSCCs(std::list& stack) { blocksStorage.emplace_back(this, pendingSubset); BasicBlockNode* childPtr = &blocksStorage.back(); - stack.push_back(childPtr); + stack.emplace_back(childPtr, BasicBlockFramePhase::Visit); for (BasicBlock* bb : childPtr->blocks) { blockToGroupMap[bb] = childPtr;