diff --git a/lib/include/rocRoller/CodeGen/Instruction.hpp b/lib/include/rocRoller/CodeGen/Instruction.hpp index 6440ef9d..bca4b8b2 100644 --- a/lib/include/rocRoller/CodeGen/Instruction.hpp +++ b/lib/include/rocRoller/CodeGen/Instruction.hpp @@ -92,8 +92,9 @@ namespace rocRoller static Instruction Wait(WaitCount const& wait); static Instruction Wait(WaitCount&& wait); - static Instruction Lock(Scheduling::Dependency const& dependency, std::string comment); - static Instruction Unlock(std::string comment); + static Instruction Lock(Scheduling::Dependency dependency, std::string comment = ""); + static Instruction Unlock(Scheduling::Dependency dependency, std::string comment = ""); + static Instruction Unlock(std::string comment = ""); /** * This instruction will be considered to write `reg` even though it @@ -167,8 +168,8 @@ namespace rocRoller void toStream(std::ostream&, LogLevel level) const; std::string toString(LogLevel level) const; - constexpr int getLockValue() const; - Scheduling::Dependency getDependency() const; + constexpr Scheduling::LockOperation getLockValue() const; + constexpr Scheduling::Dependency getDependency() const; std::string const& getOpCode() const; std::array getModifiers() const; @@ -178,8 +179,9 @@ namespace rocRoller return m_nopCount; } - Instruction lock(Scheduling::Dependency const& dependency, std::string comment); - Instruction unlock(std::string comment); + Instruction& lock(Scheduling::Dependency dependency, std::string comment = ""); + Instruction& unlock(Scheduling::Dependency dependency, std::string comment = ""); + Instruction& unlock(std::string comment = ""); void addControlOp(int id); std::vector const& controlOps() const; @@ -273,7 +275,8 @@ namespace rocRoller std::string m_opcode; - Scheduling::Dependency m_dependency = Scheduling::Dependency::None; + Scheduling::LockOperation m_lockOp = Scheduling::LockOperation::None; + Scheduling::Dependency m_dependency = Scheduling::Dependency::None; std::array m_inoutDsts; std::array m_dst; diff --git a/lib/include/rocRoller/CodeGen/Instruction_impl.hpp b/lib/include/rocRoller/CodeGen/Instruction_impl.hpp index 8e3447e7..038aea5a 100644 --- a/lib/include/rocRoller/CodeGen/Instruction_impl.hpp +++ b/lib/include/rocRoller/CodeGen/Instruction_impl.hpp @@ -249,24 +249,22 @@ namespace rocRoller return rv; } - inline Instruction Instruction::Lock(Scheduling::Dependency const& dependency, - std::string comment = "") + inline Instruction Instruction::Lock(Scheduling::Dependency dependency, std::string comment) { - AssertFatal(dependency != Scheduling::Dependency::Unlock - && dependency != Scheduling::Dependency::Count, - "Can not create lock instruction with Unlock or Count dependency"); - Instruction rv; - rv.m_dependency = dependency; - rv.addComment(comment); + rv.lock(dependency, std::move(comment)); return rv; } - inline Instruction Instruction::Unlock(std::string comment = "") + inline Instruction Instruction::Unlock(std::string comment) + { + return Unlock(Scheduling::Dependency::None, std::move(comment)); + } + + inline Instruction Instruction::Unlock(Scheduling::Dependency dependency, std::string comment) { Instruction rv; - rv.m_dependency = Scheduling::Dependency::Unlock; - rv.addComment(comment); + rv.unlock(dependency, std::move(comment)); return rv; } @@ -355,7 +353,8 @@ namespace rocRoller && m_label.empty() && m_waitCount == WaitCount() && m_opcode.empty() - && m_dependency == Scheduling::Dependency::None; + && m_dependency == Scheduling::Dependency::None + && m_lockOp == Scheduling::LockOperation::None; // clang-format on } @@ -608,42 +607,43 @@ namespace rocRoller throw std::runtime_error("Too many allocations!"); } - inline Instruction Instruction::lock(Scheduling::Dependency const& dependency, - std::string comment = "") + inline Instruction& Instruction::lock(Scheduling::Dependency dependency, std::string comment) { - AssertFatal(dependency != Scheduling::Dependency::Unlock + AssertFatal(m_lockOp == Scheduling::LockOperation::None, + "An instruction can only lock or unlock once."); + + AssertFatal(dependency != Scheduling::Dependency::None && dependency != Scheduling::Dependency::Count, "Can not create lock instruction with Unlock or Count dependency"); + m_lockOp = Scheduling::LockOperation::Lock; m_dependency = dependency; - addComment(comment); + addComment(std::move(comment)); return *this; } - inline Instruction Instruction::unlock(std::string comment = "") + inline Instruction& Instruction::unlock(std::string comment) { - m_dependency = Scheduling::Dependency::Unlock; - addComment(comment); + return unlock(Scheduling::Dependency::None, std::move(comment)); + } + + inline Instruction& Instruction::unlock(Scheduling::Dependency dependency, std::string comment) + { + AssertFatal(m_lockOp == Scheduling::LockOperation::None, + "An instruction can only lock or unlock once."); + + m_lockOp = Scheduling::LockOperation::Unlock; + m_dependency = dependency; + addComment(std::move(comment)); return *this; } - inline constexpr int Instruction::getLockValue() const + inline constexpr Scheduling::LockOperation Instruction::getLockValue() const { - if(m_dependency == Scheduling::Dependency::Unlock) - { - return -1; - } - else if(m_dependency == Scheduling::Dependency::None) - { - return 0; - } - else - { - return 1; - } + return m_lockOp; } - inline Scheduling::Dependency Instruction::getDependency() const + inline constexpr Scheduling::Dependency Instruction::getDependency() const { return m_dependency; } diff --git a/lib/include/rocRoller/Scheduling/Costs/Cost.hpp b/lib/include/rocRoller/Scheduling/Costs/Cost.hpp index d0e5862b..057c5f1e 100644 --- a/lib/include/rocRoller/Scheduling/Costs/Cost.hpp +++ b/lib/include/rocRoller/Scheduling/Costs/Cost.hpp @@ -73,6 +73,8 @@ namespace rocRoller */ Result operator()(std::vector::iterator>&) const; + float operator()(Instruction const& inst) const; + /** * @brief Gets the cost of one generator for the given iteration * diff --git a/lib/include/rocRoller/Scheduling/Scheduler.hpp b/lib/include/rocRoller/Scheduling/Scheduler.hpp index 4640020d..e50f0e5e 100644 --- a/lib/include/rocRoller/Scheduling/Scheduler.hpp +++ b/lib/include/rocRoller/Scheduling/Scheduler.hpp @@ -26,6 +26,7 @@ #pragma once +#include #include #include @@ -40,15 +41,58 @@ namespace rocRoller { namespace Scheduling { + constexpr bool isNonPreemptibleDependency(Dependency dep); + + /** + * Locking Rules + * + * A scheduler has a number of streams which each will yield a sequence of instructions. + * The job of the scheduler is to pick (i.e. schedule) the instruction from the beginning of one of the streams, and then repeat until there are no more streams with any instructions left. + * + * - If a scheduler schedules an exclusive lock, it must continue to + * select instructions from that same stream until that lock has been + * unlocked. + * - That stream might include further lock/unlock instructions which + * must occur in a last-in, first-out order, those should be treated + * as a stack to track when the original lock has been unlocked. + * + * - If a stream yields any kind of lock, it cannot yield a lower-ranked + * lock until it releases the higher-ranked lock. + * - If a scheduler schedules a non-exclusive lock, it cannot schedule + * the same kind of lock from any other stream until that lock is + * released. + * Example: + * 1. Stream 0 locks M0. + * 2. Stream 1 locks VCC. + * 3. Stream 0 tries to lock VCC. It must wait until Stream 1 unlocks VCC. + * 4. Stream 1 unlocks VCC. + * 5. Stream 0 locks VCC. + * 6. Stream 1 locks SCC. Pull from Stream 1 until SCC is unlocked. + * 7. Stream 0 locks SCC. Pull from Stream 0 until SCC is unlocked. + * 8. Stream 0 unlocks VCC. + * 9. Stream 0 unlocks M0. + * + * - If a scheduler schedules a non-exclusive lock, it cannot schedule a + * lower-ranked exclusive lock from any stream until that lock is + * released. + * Examples: + * - If stream 0 locks M0, and then we see stream 3 try to lock + * Branch, we can't pull from stream 3 until stream 0 + * releases M0. + * - If stream 2 locks VCC, stream 1 can lock SCC. We will + * then have to pull from stream 1 until SCC is released. + * + */ class LockState { public: explicit LockState(ContextPtr ctx); LockState(ContextPtr ctx, Dependency dependency); - void add(Instruction const& instr); - bool isLocked() const; - void isValid(bool locked = false) const; + void add(Instruction const& instr, int streamId); + bool isNonPreemptibleStream(int streamId) const; + bool isSchedulable(Instruction const& instr, int streamId) const; + bool isLocked(Dependency dependency, int streamId) const; /** * @brief Extra checks to verify lock state integrity. @@ -56,15 +100,22 @@ namespace rocRoller * Note: disabled in Release mode. * * @param instr The instruction to verify + * @param streamId The instruction's stream ID */ - void lockCheck(Instruction const& instr); + void lockCheck(Instruction const& instr, int streamId) const; - Dependency getDependency() const; - int getLockDepth() const; + Dependency getTopDependency(int streamId) const; + int getLockDepth(int streamId) const; private: - int m_lockdepth; - Dependency m_dependency; + void lock(Dependency dep, int streamId); + void unlock(Dependency dep, int streamId); + + std::map> m_stack; + std::map m_stream; + std::unordered_multiset m_locks; + std::optional m_nonPreemptibleStream; + std::weak_ptr m_ctx; }; @@ -118,7 +169,8 @@ namespace rocRoller * - At least one instruction * - If that first instruction locks the stream, yields until the stream is unlocked. */ - Generator yieldFromStream(Generator::iterator& iter); + Generator yieldFromStream(Generator::iterator& iter, + int streamId); /** * @brief Handles new nodes being added to the instruction streams being scheduled. @@ -132,8 +184,9 @@ namespace rocRoller std::vector::iterator>& iterators); }; - std::ostream& operator<<(std::ostream&, SchedulerProcedure const&); - std::ostream& operator<<(std::ostream&, Dependency const&); + std::ostream& operator<<(std::ostream&, SchedulerProcedure proc); + std::ostream& operator<<(std::ostream&, Dependency dep); + std::ostream& operator<<(std::ostream& stream, LockOperation lockOp); } } diff --git a/lib/include/rocRoller/Scheduling/Scheduler_fwd.hpp b/lib/include/rocRoller/Scheduling/Scheduler_fwd.hpp index a53d4882..b0990a80 100644 --- a/lib/include/rocRoller/Scheduling/Scheduler_fwd.hpp +++ b/lib/include/rocRoller/Scheduling/Scheduler_fwd.hpp @@ -41,21 +41,30 @@ namespace rocRoller Count }; - enum class Dependency + enum class Dependency : int + { + None = 0, //< Temporary. Should only be used for unlocking. + Branch, //< Exclusive: Loops and ConditionalOp + M0, //< Non-exclusive: The M0 special-purpose register + VCC, //< Non-exclusive: The VCC special-purpose register + SCC, //< Exclusive: The SCC special-purpose register, which is + // implicitly written by many instructions. + Count + }; + + enum class LockOperation : int { None = 0, - SCC, - VCC, - Branch, + Lock, Unlock, - M0, Count }; class Scheduler; class LockState; - std::string toString(SchedulerProcedure const&); - std::string toString(Dependency const&); + std::string toString(SchedulerProcedure); + std::string toString(Dependency); + std::string toString(LockOperation); } } diff --git a/lib/source/CodeGen/LowerFromKernelGraph.cpp b/lib/source/CodeGen/LowerFromKernelGraph.cpp index 964a1b1b..278401c5 100644 --- a/lib/source/CodeGen/LowerFromKernelGraph.cpp +++ b/lib/source/CodeGen/LowerFromKernelGraph.cpp @@ -173,6 +173,7 @@ namespace rocRoller // Generate code for all the nodes we found. + // vector of instruction streams std::vector> generators; for(auto tag : nodes) { @@ -180,11 +181,6 @@ namespace rocRoller generators.push_back(call(tag, op, coords)); } - if(generators.size() == 1) - { - co_yield std::move(generators[0]); - } - else { co_yield Instruction::Comment( concatenate("BEGIN Scheduler for operations ", nodes)); @@ -192,14 +188,15 @@ namespace rocRoller auto cost = Settings::getInstance()->get(Settings::SchedulerCost); auto scheduler = Component::GetNew(proc, cost, m_context); + auto generator = (*scheduler)(generators); - if(!scheduler->supportsAddingStreams()) + if(generators.size() == 1 || !scheduler->supportsAddingStreams()) { - co_yield (*scheduler)(generators); + for(auto gen : generator) + co_yield gen; } else { - auto generator = (*scheduler)(generators); auto numCompletedNodes = m_completedControlNodes.size(); for(auto iter = generator.begin(); iter != generator.end(); ++iter) @@ -1241,7 +1238,14 @@ namespace rocRoller auto visitor = CodeGeneratorVisitor(graphPtr, kernel); - co_yield visitor.generate(); + for(auto what : visitor.generate()) + { + //if(!(what.getOpCode().empty())) + // std::cout << what.getOpCode() << std::endl; + if(what.getLockValue() != Scheduling::LockOperation::None) + std::cout << what.getLockValue() << " " << what.getDependency() << std::endl; + co_yield what; + } } } } diff --git a/lib/source/Costs/Cost.cpp b/lib/source/Costs/Cost.cpp index 2f358521..553070a5 100644 --- a/lib/source/Costs/Cost.cpp +++ b/lib/source/Costs/Cost.cpp @@ -90,11 +90,16 @@ namespace rocRoller return retval; } - float Cost::operator()(Generator::iterator& iter) const + float Cost::operator()(Instruction const& inst) const { - auto const& inst = *iter; - auto status = m_ctx.lock()->peek(inst); + auto status = m_ctx.lock()->peek(inst); return cost(inst, status); } + + float Cost::operator()(Generator::iterator& iter) const + { + auto const& inst = *iter; + return (*this)(inst); + } } } diff --git a/lib/source/Expression_generate.cpp b/lib/source/Expression_generate.cpp index 9080fc19..1601275b 100644 --- a/lib/source/Expression_generate.cpp +++ b/lib/source/Expression_generate.cpp @@ -190,13 +190,13 @@ namespace rocRoller * value has been consumed. */ Generator prepareSourceOperands(std::vector& results, - bool& schedulerLocked, + int& schedulerLocked, std::vector exprs) { std::vector done(exprs.size(), false); std::vector resultTypes(exprs.size()); results = std::vector(exprs.size(), nullptr); - schedulerLocked = false; + schedulerLocked = 0; auto sprUses = [] { std::unordered_map m; @@ -279,7 +279,7 @@ namespace rocRoller } sprStores--; - schedulerLocked = true; + schedulerLocked++; switch(regType) { @@ -308,7 +308,7 @@ namespace rocRoller { auto sccExprIdx = maybeSccExprIdx.value(); sprStores--; - schedulerLocked = true; + schedulerLocked++; co_yield Instruction::Lock(Scheduling::Dependency::SCC, "Expression temporary in special register (SCC)"); co_yield call(results[sccExprIdx], exprs[sccExprIdx]); @@ -519,7 +519,7 @@ namespace rocRoller operator()(Register::ValuePtr& dest, T const& expr) { co_yield Instruction::Comment(toString(expr)); - bool schedulerLocked = false; + int schedulerLocked = 0; std::vector results; std::vector subExprs{expr.lhs, expr.rhs}; @@ -535,8 +535,11 @@ namespace rocRoller co_yield generateArithmeticBinary(dest, expr, results[0], results[1], resType); - if(schedulerLocked) + while(schedulerLocked > 0) + { + schedulerLocked--; co_yield Instruction::Unlock("Expression temporary in special register"); + } } /* @@ -556,7 +559,7 @@ namespace rocRoller T, SRConvert> || std::is_same_v>); - bool schedulerLocked = false; + int schedulerLocked = 0; std::vector results; std::vector subExprs{expr.lhs, expr.rhs}; @@ -576,8 +579,11 @@ namespace rocRoller dest->element({i}), results[0]->element({i}), results[1]->element({0})); } - if(schedulerLocked) + while(schedulerLocked > 0) + { + schedulerLocked--; co_yield Instruction::Unlock("Expression temporary in special register"); + } } template @@ -586,7 +592,7 @@ namespace rocRoller operator()(Register::ValuePtr& dest, T const& expr) { co_yield Instruction::Comment(toString(expr)); - bool schedulerLocked = false; + int schedulerLocked = 0; std::vector results; std::vector subExprs{expr.lhs, expr.rhs}; @@ -603,8 +609,11 @@ namespace rocRoller co_yield generateComparisonOrLogicalBinary( dest, expr, results[0], results[1], resType); - if(schedulerLocked) + while(schedulerLocked > 0) + { + schedulerLocked--; co_yield Instruction::Unlock("Expression temporary in special register"); + } } template @@ -612,7 +621,7 @@ namespace rocRoller !CTernaryMixed && CKernelExecuteTime) Generator operator()(Register::ValuePtr& dest, Operation const& expr) { - bool schedulerLocked = false; + int schedulerLocked = 0; std::vector results; std::vector subExprs{expr.lhs, expr.r1hs, expr.r2hs}; @@ -658,15 +667,18 @@ namespace rocRoller co_yield generateOp(dest->element({k}), lhsVal, r1hsVal, r2hsVal); } - if(schedulerLocked) + while(schedulerLocked > 0) + { + schedulerLocked--; co_yield Instruction::Unlock("Expression temporary in special register"); + } } template requires CKernelExecuteTime Generator operator()(Register::ValuePtr& dest, Operation const& expr) { - bool schedulerLocked = false; + int schedulerLocked = 0; std::vector results; std::vector subExprs{expr.lhs, expr.r1hs, expr.r2hs}; @@ -697,13 +709,16 @@ namespace rocRoller //If dest, results have multiple elements, handled inside generateOp co_yield generateOp(dest, results[0], results[1], results[2]); - if(schedulerLocked) + while(schedulerLocked > 0) + { + schedulerLocked--; co_yield Instruction::Unlock("Expression temporary in special register"); + } } Generator operator()(Register::ValuePtr& dest, Conditional const& expr) { - bool schedulerLocked = false; + int schedulerLocked = 0; std::vector results; std::vector subExprs{expr.lhs, expr.r1hs, expr.r2hs}; @@ -736,15 +751,18 @@ namespace rocRoller dest->element({k}), cond->element({k}), lhsVal, rhsVal, expr); } - if(schedulerLocked) + while(schedulerLocked > 0) + { + schedulerLocked--; co_yield Instruction::Unlock("Expression temporary in special register"); + } } template requires CKernelExecuteTime Generator operator()(Register::ValuePtr& dest, Operation const& expr) { - bool schedulerLocked = false; + int schedulerLocked = 0; std::vector results; std::vector subExprs{expr.arg}; @@ -845,8 +863,11 @@ namespace rocRoller } } - if(schedulerLocked) + while(schedulerLocked > 0) + { + schedulerLocked--; co_yield Instruction::Unlock("Expression temporary in special register"); + } } Generator operator()(Register::ValuePtr& dest, MatrixMultiply expr) diff --git a/lib/source/Scheduler.cpp b/lib/source/Scheduler.cpp index f8dd18d4..926be9f0 100644 --- a/lib/source/Scheduler.cpp +++ b/lib/source/Scheduler.cpp @@ -33,7 +33,7 @@ namespace rocRoller { RegisterComponentBase(Scheduler); - std::string toString(SchedulerProcedure const& proc) + std::string toString(SchedulerProcedure proc) { switch(proc) { @@ -54,27 +54,25 @@ namespace rocRoller Throw("Invalid Scheduler Procedure: ", ShowValue(static_cast(proc))); } - std::ostream& operator<<(std::ostream& stream, SchedulerProcedure const& proc) + std::ostream& operator<<(std::ostream& stream, SchedulerProcedure proc) { return stream << toString(proc); } - std::string toString(Dependency const& dep) + std::string toString(Dependency dep) { switch(dep) { case Dependency::None: return "None"; - case Dependency::SCC: - return "SCC"; - case Dependency::VCC: - return "VCC"; case Dependency::Branch: return "Branch"; - case Dependency::Unlock: - return "Unlock"; case Dependency::M0: return "M0"; + case Dependency::VCC: + return "VCC"; + case Dependency::SCC: + return "SCC"; default: break; } @@ -82,122 +80,282 @@ namespace rocRoller Throw("Invalid Dependency: ", ShowValue(static_cast(dep))); } - std::ostream& operator<<(std::ostream& stream, Dependency const& dep) + std::ostream& operator<<(std::ostream& stream, Dependency dep) { return stream << toString(dep); } + std::string toString(LockOperation lockOp) + { + switch(lockOp) + { + case LockOperation::None: + return "None"; + case LockOperation::Lock: + return "Lock"; + case LockOperation::Unlock: + return "Unlock"; + default: + break; + } + + Throw("Invalid LockOperation: ", ShowValue(static_cast(lockOp))); + } + + std::ostream& operator<<(std::ostream& stream, LockOperation lockOp) + { + return stream << toString(lockOp); + } + + constexpr bool isNonPreemptibleDependency(Dependency dep) + { + return dep != Dependency::M0 && dep != Dependency::VCC; + } + LockState::LockState(ContextPtr ctx) - : m_dependency(Dependency::None) - , m_lockdepth(0) - , m_ctx(ctx) + : m_ctx(ctx) { } LockState::LockState(ContextPtr ctx, Dependency dependency) - : m_dependency(dependency) - , m_ctx(ctx) + : m_ctx(ctx) + { + lock(dependency, 0); + } + + void LockState::lock(Dependency dep, int streamId) { - AssertFatal(m_dependency != Scheduling::Dependency::Count - && m_dependency != Scheduling::Dependency::Unlock, - "Can not instantiate LockState with Count or Unlock dependency"); + AssertFatal(dep != Dependency::Count && dep != Dependency::None); + + auto topDep = getTopDependency(streamId); + AssertFatal(topDep <= dep, + "Out of order dependency lock can't be acquired.", + ShowValue(topDep), + ShowValue(dep)); + + // Can a stream acquire the same lock (single resource, just the top) multiple times? yes + // VCC -> SCC -> SCC -> SCC + if(m_stream.contains(dep)) + { + AssertFatal( + topDep == dep && m_stream.at(dep) == streamId, + "Only the same stream can acquire the top dependency lock multiple times.", + ShowValue(dep), + ShowValue(m_stream[dep]), + ShowValue(streamId)); + } - m_lockdepth = 1; + m_stack[streamId].push(dep); + m_stream[dep] = streamId; + m_locks.insert(dep); + + if(isNonPreemptibleDependency(dep)) + m_nonPreemptibleStream = streamId; } - void LockState::add(Instruction const& instruction) + void LockState::unlock(Dependency dep, int streamId) { - lockCheck(instruction); + AssertFatal(streamId >= 0); + AssertFatal(m_stack.contains(streamId)); + AssertFatal(m_stack[streamId].size() > 0); + AssertFatal(dep != Dependency::Count); - int inst_lockvalue = instruction.getLockValue(); + // LIFO + { + auto topDep = getTopDependency(streamId); + if(dep != Dependency::None) + AssertFatal(topDep == dep, "locks can only be released in the LIFO order"); + else + dep = topDep; + } - // Instruction does not lock or unlock, do nothing - if(inst_lockvalue == 0) { - return; + auto iter = m_stream.find(dep); + AssertFatal(iter != m_stream.end() && iter->second == streamId, + ShowValue(dep), + ShowValue(streamId)); } - // Instruction can only lock (1) or unlock (-1) - if(inst_lockvalue != -1 && inst_lockvalue != 1) + // pop the stack top + m_stack[streamId].pop(); + + // erase one instance of dep from the multiset. + // if that's the last instance, then erase its streamID mapping. { - Throw("Invalid instruction lockstate: ", ShowValue(inst_lockvalue)); + auto iter = m_locks.find(dep); + AssertFatal(iter != m_locks.end()); + m_locks.erase(iter); + + if(!m_locks.contains(dep)) + m_stream.erase(dep); } - // Instruction trying to unlock when there is no lock - if(m_lockdepth == 0 && inst_lockvalue == -1) + // update m_nonPreemptibleStream state if needed + // Example: when a stream holds multiple non-preemptible + // locks like Branch -> VCC -> SCC. + m_nonPreemptibleStream.reset(); + auto tempDep = getTopDependency(streamId); + while(tempDep != Dependency::None) { - Throw("Trying to unlock when not locked"); + if(m_stream.contains(tempDep) && m_stream.at(tempDep) == streamId + && isNonPreemptibleDependency(tempDep)) + { + m_nonPreemptibleStream = streamId; + break; + } + + auto depVal = static_cast(tempDep); + tempDep = static_cast(--depVal); } + } + + bool LockState::isSchedulable(Instruction const& instr, int streamId) const + { + auto dep = instr.getDependency(); + AssertFatal(dep != Dependency::Count && streamId >= 0); + + auto topDep = getTopDependency(streamId); + // check if the order of the dependencies satisfies + AssertFatal(dep == Dependency::None || topDep <= dep, + "Out of order dependency lock can't be acquired.", + ShowValue(topDep), + ShowValue(dep)); + + // if the stream itself is non-preemptible, + if(m_stack.empty()) + return true; + + // it's schedulable + if(isNonPreemptibleStream(streamId)) + return true; + // if another stream is non-preemptible, + // it's not schedulable. + else if(m_nonPreemptibleStream.has_value()) + return false; + + auto lockOp = instr.getLockValue(); + // if the given instr is not a lock instruction, + // it's schedulable. + if(lockOp != LockOperation::Lock) + return true; + + // if the dependency is already locked and is being + // scheduled by the same stream again, it's schedulable. + if(m_locks.contains(dep)) + return m_stream.at(dep) == streamId; - // Instruction initializes the lockstate - if(m_lockdepth == 0) + // If the given stream tries to acquire a non-preemptible lock + // and another stream currently holds a higher-ranked preemptible lock, + // the scheduler cannot schedule this lower-ranked non-preemptible + // lock from streamId until the higher-ranked preemptible lock is released + // by the another stream. + if(isNonPreemptibleDependency(dep)) { - m_dependency = instruction.getDependency(); + auto depVal = static_cast(dep); + auto tempDep = static_cast(++depVal); + while(tempDep != Dependency::Count) + { + if(m_locks.contains(tempDep)) + return false; + + depVal = static_cast(tempDep); + tempDep = static_cast(++depVal); + } } - m_lockdepth += inst_lockvalue; + return true; + } + + void LockState::add(Instruction const& instruction, int streamId) + { + //lockCheck(instruction, streamId); + + AssertFatal(isSchedulable(instruction, streamId), + "cannot add any instruction from this stream at this point"); + + auto lockOp = instruction.getLockValue(); - // Instruction releases lock - if(m_lockdepth == 0) + switch(lockOp) { - m_dependency = Scheduling::Dependency::None; + case LockOperation::None: + break; + + case LockOperation::Lock: + lock(instruction.getDependency(), streamId); + break; + + case LockOperation::Unlock: + unlock(instruction.getDependency(), streamId); + break; + + case LockOperation::Count: + Throw("Invalid LockOperation ", static_cast(lockOp)); } } - bool LockState::isLocked() const + bool LockState::isNonPreemptibleStream(int streamId) const { - return m_lockdepth > 0; + return m_nonPreemptibleStream.has_value() && streamId == m_nonPreemptibleStream.value(); } - // Will grow into a function that accepts args and checks the lock is in a valid state against those args - void LockState::isValid(bool locked) const + bool LockState::isLocked(Dependency dep, int streamId) const { - AssertFatal(isLocked() == locked, "Lock in invalid state"); + return m_stream.contains(dep) && m_stream.at(dep) == streamId; } - void LockState::lockCheck(Instruction const& instruction) + void LockState::lockCheck(Instruction const& instruction, int streamId) const { auto context = m_ctx.lock(); const auto& architecture = context->targetArchitecture(); GPUInstructionInfo info = architecture.GetInstructionInfo(instruction.getOpCode()); AssertFatal( - !info.isBranch() || isLocked(), + !info.isBranch() || isLocked(Dependency::Branch, streamId), concatenate(instruction.getOpCode(), " is a branch instruction, it should only be used within a lock.")); AssertFatal( - !info.hasImplicitAccess() || isLocked(), + !info.hasImplicitAccess() || isLocked(Dependency::M0, streamId) + || isLocked(Dependency::VCC, streamId) || isLocked(Dependency::SCC, streamId), concatenate(instruction.getOpCode(), " implicitly reads a register, it should only be used within a lock.")); AssertFatal( - !instruction.readsSpecialRegisters() || isLocked(), + !instruction.readsSpecialRegisters() || isLocked(Dependency::M0, streamId) + || isLocked(Dependency::VCC, streamId) || isLocked(Dependency::SCC, streamId), concatenate(instruction.getOpCode(), " reads a special register, it should only be used within a lock.")); } - Dependency LockState::getDependency() const + Dependency LockState::getTopDependency(int streamId) const { - return m_dependency; + if(m_stack.contains(streamId) && !(m_stack.at(streamId).empty())) + return m_stack.at(streamId).top(); + + return Dependency::None; } - int LockState::getLockDepth() const + int LockState::getLockDepth(int streamId) const { - return m_lockdepth; + if(m_stack.contains(streamId)) + return m_stack.at(streamId).size(); + + return 0; } - Generator Scheduler::yieldFromStream(Generator::iterator& iter) + Generator Scheduler::yieldFromStream(Generator::iterator& iter, + int streamId) { do { AssertFatal(iter != std::default_sentinel_t{}, - "End of instruction stream reached without unlocking"); - m_lockstate.add(*iter); + "End of instruction stream reached without unlocking", + ShowValue(streamId)); + m_lockstate.add(*iter, streamId); co_yield *iter; ++iter; co_yield consumeComments(iter, std::default_sentinel_t{}); - } while(m_lockstate.isLocked()); + } while(m_lockstate.isNonPreemptibleStream(streamId)); } bool Scheduler::supportsAddingStreams() const diff --git a/lib/source/Schedulers/CooperativeScheduler.cpp b/lib/source/Schedulers/CooperativeScheduler.cpp index 2b19ca4b..c81e6d71 100644 --- a/lib/source/Schedulers/CooperativeScheduler.cpp +++ b/lib/source/Schedulers/CooperativeScheduler.cpp @@ -74,23 +74,29 @@ namespace rocRoller size_t numSeqs = 0; size_t idx = 0; - float currentCost; while(true) { co_yield handleNewNodes(seqs, iterators); numSeqs = seqs.size(); + float currentCost = 0; + bool lockedOut = false; + if(iterators[idx] != seqs[idx].end()) { - currentCost = (*m_cost)(iterators[idx]); + auto const& inst = *iterators[idx]; + + lockedOut = !m_lockstate.isSchedulable(inst, idx); + currentCost = (*m_cost)(inst); } - if(iterators[idx] == seqs[idx].end() || currentCost > 0) + + if(iterators[idx] == seqs[idx].end() || lockedOut || currentCost > 0) { size_t origIdx = idx; float minCost = std::numeric_limits::max(); int minCostIdx = -1; - if(iterators[idx] != seqs[idx].end()) + if(iterators[idx] != seqs[idx].end() && !lockedOut) { minCost = currentCost; minCostIdx = idx; @@ -102,8 +108,10 @@ namespace rocRoller { if(iterators[idx] != seqs[idx].end()) { - currentCost = (*m_cost)(iterators[idx]); - if(currentCost < minCost) + auto const& inst = *iterators[idx]; + lockedOut = !m_lockstate.isSchedulable(inst, idx); + currentCost = (*m_cost)(inst); + if(!lockedOut && currentCost < minCost) { minCost = currentCost; minCostIdx = idx; @@ -121,10 +129,8 @@ namespace rocRoller idx = minCostIdx; } - co_yield yieldFromStream(iterators[idx]); + co_yield yieldFromStream(iterators[idx], idx); } - - m_lockstate.isValid(false); } } } diff --git a/lib/source/Schedulers/PriorityScheduler.cpp b/lib/source/Schedulers/PriorityScheduler.cpp index 024b083f..ccb62f03 100644 --- a/lib/source/Schedulers/PriorityScheduler.cpp +++ b/lib/source/Schedulers/PriorityScheduler.cpp @@ -87,7 +87,12 @@ namespace rocRoller if(iterators[idx] == seqs[idx].end()) continue; - float myCost = (*m_cost)(iterators[idx]); + auto const& instr = *iterators[idx]; + + if(!m_lockstate.isSchedulable(instr, idx)) + continue; + + float myCost = (*m_cost)(instr); if(myCost < minCost) { @@ -101,12 +106,10 @@ namespace rocRoller if(minCostIdx >= 0) { - co_yield yieldFromStream(iterators[minCostIdx]); + co_yield yieldFromStream(iterators[minCostIdx], minCostIdx); } } while(minCostIdx >= 0); - - m_lockstate.isValid(false); } } } diff --git a/lib/source/Schedulers/RandomScheduler.cpp b/lib/source/Schedulers/RandomScheduler.cpp index 5380a50c..7365ffe8 100644 --- a/lib/source/Schedulers/RandomScheduler.cpp +++ b/lib/source/Schedulers/RandomScheduler.cpp @@ -68,7 +68,16 @@ namespace rocRoller if(seqs.empty()) co_return; - auto random = m_ctx.lock()->random(); + auto random = m_ctx.lock()->random(); + auto random_from + = [&random](std::vector const& vec) -> std::tuple { + AssertFatal(!vec.empty()); + + auto max = vec.size() - 1; + auto i_rand = random->next(0, max); + return {vec.at(i_rand), i_rand}; + }; + std::vector::iterator> iterators; co_yield handleNewNodes(seqs, iterators); @@ -78,33 +87,44 @@ namespace rocRoller while(!validIterIndexes.empty()) { - size_t idx; + auto [idx, rand] = random_from(validIterIndexes); + + auto validIdx = [&](size_t idx) -> bool { + if(iterators[idx] == seqs[idx].end()) + return true; + + auto const& inst = *iterators[idx]; + + if(!m_lockstate.isSchedulable(inst, idx)) + return false; + + auto status = m_ctx.lock()->peek(inst); - // Try to find a stream that doesn't cause an out of register error. - std::vector iterIndexesToSearch = validIterIndexes; - while(!iterIndexesToSearch.empty()) + if(status.outOfRegisters.count() != 0) + return false; + + return true; + }; + + // Try to find a stream that doesn't cause an out of register + // error, or violate locking rules. + if(!validIdx(idx)) { - size_t i_rand = random->next(0, iterIndexesToSearch.size() - 1); - idx = iterIndexesToSearch[i_rand]; + auto iterIndexesToSearch = validIterIndexes; + iterIndexesToSearch.erase(iterIndexesToSearch.begin() + rand); - if(iterators[idx] != seqs[idx].end()) - { - auto status = m_ctx.lock()->peek(*iterators[idx]); - if(status.outOfRegisters.count() == 0) - { - break; - } - } - else + do { - break; - } - iterIndexesToSearch.erase(iterIndexesToSearch.begin() + i_rand); + auto [myIdx, myRand] = random_from(iterIndexesToSearch); + idx = myIdx; + + iterIndexesToSearch.erase(iterIndexesToSearch.begin() + myRand); + } while(!validIdx(idx)); } if(iterators[idx] != seqs[idx].end()) { - co_yield yieldFromStream(iterators[idx]); + co_yield yieldFromStream(iterators[idx], idx); } else { @@ -120,8 +140,6 @@ namespace rocRoller validIterIndexes.push_back(i); } } - - m_lockstate.isValid(false); } } } diff --git a/lib/source/Schedulers/RoundRobinScheduler.cpp b/lib/source/Schedulers/RoundRobinScheduler.cpp index 30a34297..67ae8ef1 100644 --- a/lib/source/Schedulers/RoundRobinScheduler.cpp +++ b/lib/source/Schedulers/RoundRobinScheduler.cpp @@ -81,13 +81,14 @@ namespace rocRoller { if(iterators[i] != seqs[i].end()) { + if(!m_lockstate.isSchedulable(*iterators[i], i)) + continue; + yield_seq = true; - co_yield yieldFromStream(iterators[i]); + co_yield yieldFromStream(iterators[i], i); } } } - - m_lockstate.isValid(false); } } } diff --git a/lib/source/Schedulers/SequentialScheduler.cpp b/lib/source/Schedulers/SequentialScheduler.cpp index 6123625e..6fc0b4ba 100644 --- a/lib/source/Schedulers/SequentialScheduler.cpp +++ b/lib/source/Schedulers/SequentialScheduler.cpp @@ -67,6 +67,7 @@ namespace rocRoller { bool yieldedAny = false; + // a vector of instruction streams std::vector::iterator> iterators; co_yield handleNewNodes(seqs, iterators); @@ -76,10 +77,13 @@ namespace rocRoller for(size_t i = 0; i < seqs.size(); i++) { - while(iterators[i] != seqs[i].end()) { - co_yield yieldFromStream(iterators[i]); + auto const& instr = *iterators[i]; + if(!m_lockstate.isSchedulable(instr, i)) + break; + for(auto const& inst : yieldFromStream(iterators[i], i)) + co_yield inst; yieldedAny = true; } diff --git a/test/unit/DependencyTest.cpp b/test/unit/DependencyTest.cpp index ea99c946..1a654017 100644 --- a/test/unit/DependencyTest.cpp +++ b/test/unit/DependencyTest.cpp @@ -169,6 +169,7 @@ namespace rocRollerTest // Double the input value. co_yield Expression::generate( v_value, v_value->expression() + v_value->expression(), m_context); + co_yield Instruction::Lock(Scheduling::Dependency::VCC); // Compare against the stop value. co_yield Expression::generate( s_condition, v_value->expression() < v_target->expression(), m_context); @@ -176,6 +177,7 @@ namespace rocRollerTest co_yield m_context->brancher()->branchIfNonZero( loop_start, s_condition, "// Conditionally branching to the label register."); + co_yield Instruction::Unlock("unlock VCC"); co_yield Instruction::Unlock("Loop end"); co_yield m_context->mem()->storeGlobal(v_ptr, v_value, 0, 4); @@ -574,7 +576,8 @@ namespace rocRollerTest co_yield m_context->brancher()->branch(end); co_yield Instruction::Label(label); co_yield m_context->copier()->copy(v_value, Register::Value::Literal(10)); - co_yield Instruction::Label(end).unlock(); + co_yield Instruction::Label(end); + co_yield Instruction::Unlock("unlock VCC"); co_yield m_context->mem()->storeGlobal(v_ptr, v_value, 0, 4); }; @@ -604,8 +607,10 @@ namespace rocRollerTest co_yield Expression::generate( v_res2, v_res2->expression() * v_res2->expression(), m_context); + co_yield Instruction::Lock(Scheduling::Dependency::VCC); co_yield Expression::generate( vcc, v_lhs2->expression() == v_rhs2->expression(), m_context); + co_yield Instruction::Unlock("unlock VCC"); }; sequences.push_back(set_vcc()); diff --git a/test/unit/EnumToStringTest.cpp b/test/unit/EnumToStringTest.cpp index 4168d1c9..869a3633 100644 --- a/test/unit/EnumToStringTest.cpp +++ b/test/unit/EnumToStringTest.cpp @@ -303,7 +303,6 @@ TEST(EnumToStringTest, ALL) {Dependency::SCC, "SCC"}, {Dependency::VCC, "VCC"}, {Dependency::Branch, "Branch"}, - {Dependency::Unlock, "Unlock"}, {Dependency::M0, "M0"}, }); } diff --git a/test/unit/LockTest.cpp b/test/unit/LockTest.cpp index b143b058..adc03239 100644 --- a/test/unit/LockTest.cpp +++ b/test/unit/LockTest.cpp @@ -41,47 +41,148 @@ namespace rocRollerTest TEST_F(LockStateTest, Basic) { - auto none_lock = Scheduling::LockState(m_context); - auto scc_lock = Scheduling::LockState(m_context, Scheduling::Dependency::SCC); - auto vcc_lock = Scheduling::LockState(m_context, Scheduling::Dependency::VCC); + auto lock_inst = Instruction::Lock(Scheduling::Dependency::SCC, "Lock Instruction"); + auto unlock_inst = Instruction::Unlock("Unlock Instruction"); + auto comment_inst = Instruction::Comment("Comment Instruction"); - EXPECT_EQ(none_lock.isLocked(), false); - EXPECT_EQ(scc_lock.isLocked(), true); - EXPECT_EQ(vcc_lock.isLocked(), true); + auto lock_m0_inst = Instruction::Lock(Scheduling::Dependency::M0, "Lock M0"); + auto unlock_m0_inst = Instruction::Unlock(Scheduling::Dependency::M0, "Lock M0"); - EXPECT_EQ(none_lock.getLockDepth(), 0); - EXPECT_EQ(scc_lock.getLockDepth(), 1); - EXPECT_EQ(vcc_lock.getLockDepth(), 1); + { + auto none_lock = Scheduling::LockState(m_context); + EXPECT_EQ(none_lock.isNonPreemptibleStream(0), false); + EXPECT_EQ(none_lock.getLockDepth(0), 0); - EXPECT_EQ(none_lock.getDependency(), Scheduling::Dependency::None); - EXPECT_EQ(scc_lock.getDependency(), Scheduling::Dependency::SCC); - EXPECT_EQ(vcc_lock.getDependency(), Scheduling::Dependency::VCC); + EXPECT_EQ(none_lock.getTopDependency(0), Scheduling::Dependency::None); - auto lock_inst = Instruction::Lock(Scheduling::Dependency::SCC, "Lock Instruction"); - none_lock.add(lock_inst); + // stream0:scc + none_lock.add(lock_inst, 0); - EXPECT_EQ(none_lock.isLocked(), true); - EXPECT_EQ(none_lock.getLockDepth(), 1); + // trying to unlock the type of lock not held by stream0 + EXPECT_THROW(none_lock.add(unlock_m0_inst, 0), FatalError); + // trying to lock out of order: m0 after scc + EXPECT_THROW(none_lock.add(lock_m0_inst, 0), FatalError); - auto unlock_inst = Instruction::Unlock("Unlock Instruction"); - auto comment_isntr = Instruction::Comment("Comment Instruction"); - scc_lock.add(unlock_inst); - vcc_lock.add(comment_isntr); + // scc is a non-preemptible lock + EXPECT_EQ(none_lock.isNonPreemptibleStream(0), true); + EXPECT_EQ(none_lock.getLockDepth(0), 1); - EXPECT_EQ(scc_lock.isLocked(), false); - EXPECT_EQ(vcc_lock.isLocked(), true); - EXPECT_EQ(scc_lock.getDependency(), Scheduling::Dependency::None); - EXPECT_EQ(scc_lock.getLockDepth(), 0); + // stream0: + none_lock.add(unlock_inst, 0); + EXPECT_EQ(none_lock.isNonPreemptibleStream(0), false); + EXPECT_EQ(none_lock.getLockDepth(0), 0); - vcc_lock.add(unlock_inst); - EXPECT_EQ(vcc_lock.isLocked(), false); + // stream0:m0 + none_lock.add(lock_m0_inst, 0); + EXPECT_EQ(none_lock.getLockDepth(0), 1); - EXPECT_THROW(scc_lock.add(unlock_inst), FatalError); - EXPECT_THROW(vcc_lock.isValid(true), FatalError); - EXPECT_NO_THROW(vcc_lock.isValid(false)); + // m0 is not a non-preemptible lock + EXPECT_EQ(none_lock.isNonPreemptibleStream(0), false); + + EXPECT_EQ(none_lock.isSchedulable(comment_inst, 0), true); + EXPECT_EQ(none_lock.isSchedulable(comment_inst, 1), true); + + EXPECT_EQ(none_lock.isSchedulable(lock_m0_inst, 0), true); + // stream1 can't lock m0 as it is held by stream0 + EXPECT_EQ(none_lock.isSchedulable(lock_m0_inst, 1), false); + + // stream0:m0,m0 + none_lock.add(lock_m0_inst, 0); + EXPECT_EQ(none_lock.getLockDepth(0), 2); + + EXPECT_EQ(none_lock.isNonPreemptibleStream(0), false); + + EXPECT_EQ(none_lock.isSchedulable(comment_inst, 0), true); + EXPECT_EQ(none_lock.isSchedulable(comment_inst, 1), true); + + EXPECT_EQ(none_lock.isSchedulable(lock_m0_inst, 0), true); + EXPECT_EQ(none_lock.isSchedulable(lock_m0_inst, 1), false); + + // stream0:m0 + none_lock.add(unlock_inst, 0); + EXPECT_EQ(none_lock.getLockDepth(0), 1); + + EXPECT_EQ(none_lock.isNonPreemptibleStream(0), false); + + EXPECT_EQ(none_lock.isSchedulable(comment_inst, 0), true); + EXPECT_EQ(none_lock.isSchedulable(comment_inst, 1), true); + + EXPECT_EQ(none_lock.isSchedulable(lock_m0_inst, 0), true); + // stream1 can't acquire m0 as it is held by stream0 + EXPECT_EQ(none_lock.isSchedulable(lock_m0_inst, 1), false); + + //stream1:scc + EXPECT_EQ(none_lock.isSchedulable(lock_inst, 1), true); + none_lock.add(lock_inst, 1); + + EXPECT_THROW(none_lock.add(lock_inst, 0), FatalError); + + EXPECT_EQ(none_lock.getLockDepth(0), 1); + EXPECT_EQ(none_lock.getLockDepth(1), 1); + + // scc is a non-preemptible lock + EXPECT_EQ(none_lock.isNonPreemptibleStream(1), true); + + EXPECT_EQ(none_lock.isSchedulable(comment_inst, 0), false); + EXPECT_EQ(none_lock.isSchedulable(comment_inst, 1), true); + + EXPECT_EQ(none_lock.isSchedulable(lock_m0_inst, 0), false); + EXPECT_EQ(none_lock.isSchedulable(lock_inst, 0), false); + // can't lock out of order: m0 after scc + EXPECT_THROW(none_lock.isSchedulable(lock_m0_inst, 1), FatalError); + + // stream0:m0 + // can't add any instruction from stream0 until another stream holds a non-preemptible lock. + EXPECT_THROW(none_lock.add(unlock_inst, 0), FatalError); + EXPECT_EQ(none_lock.getLockDepth(0), 1); + + EXPECT_EQ(none_lock.isNonPreemptibleStream(1), true); + + EXPECT_EQ(none_lock.isSchedulable(comment_inst, 0), false); + EXPECT_EQ(none_lock.isSchedulable(comment_inst, 1), true); + + EXPECT_EQ(none_lock.isSchedulable(lock_m0_inst, 0), false); + EXPECT_EQ(none_lock.isSchedulable(lock_inst, 0), false); + EXPECT_THROW(none_lock.isSchedulable(lock_m0_inst, 1), FatalError); + + none_lock.add(unlock_inst, 1); + EXPECT_EQ(none_lock.getLockDepth(1), 0); + + EXPECT_EQ(none_lock.isNonPreemptibleStream(1), false); + + none_lock.add(unlock_inst, 0); + EXPECT_EQ(none_lock.getLockDepth(0), 0); + + EXPECT_EQ(none_lock.isSchedulable(comment_inst, 0), true); + EXPECT_EQ(none_lock.isSchedulable(comment_inst, 1), true); + + EXPECT_EQ(none_lock.isSchedulable(lock_m0_inst, 0), true); + EXPECT_EQ(none_lock.isSchedulable(lock_m0_inst, 1), true); + } + + { + auto scc_lock = Scheduling::LockState(m_context, Scheduling::Dependency::SCC); + EXPECT_EQ(scc_lock.isNonPreemptibleStream(0), true); + EXPECT_EQ(scc_lock.getLockDepth(0), 1); + EXPECT_EQ(scc_lock.getTopDependency(0), Scheduling::Dependency::SCC); + scc_lock.add(unlock_inst, 0); + EXPECT_EQ(scc_lock.isNonPreemptibleStream(0), false); + EXPECT_EQ(scc_lock.getTopDependency(0), Scheduling::Dependency::None); + EXPECT_EQ(scc_lock.getLockDepth(0), 0); + EXPECT_THROW(scc_lock.add(unlock_inst, 0), FatalError); + } + + { + auto vcc_lock = Scheduling::LockState(m_context, Scheduling::Dependency::VCC); + EXPECT_EQ(vcc_lock.isNonPreemptibleStream(0), false); + EXPECT_EQ(vcc_lock.getLockDepth(0), 1); + EXPECT_EQ(vcc_lock.getTopDependency(0), Scheduling::Dependency::VCC); + vcc_lock.add(comment_inst, 0); + EXPECT_EQ(vcc_lock.isNonPreemptibleStream(0), false); + vcc_lock.add(unlock_inst, 0); + EXPECT_EQ(vcc_lock.isNonPreemptibleStream(0), false); + } - EXPECT_THROW({ auto l = Scheduling::LockState(m_context, Scheduling::Dependency::Unlock); }, - FatalError); EXPECT_THROW({ auto l = Scheduling::LockState(m_context, Scheduling::Dependency::Count); }, FatalError); } diff --git a/test/unit/SchedulerTest.cpp b/test/unit/SchedulerTest.cpp index fb08987f..709f4098 100644 --- a/test/unit/SchedulerTest.cpp +++ b/test/unit/SchedulerTest.cpp @@ -409,58 +409,67 @@ namespace rocRollerTest }; auto ifBlock = [&]() -> Generator { + EXPECT_EQ(schedulerA->getLockState().getTopDependency(1), Scheduling::Dependency::VCC); + EXPECT_EQ(schedulerB->getLockState().getTopDependency(0), Scheduling::Dependency::VCC); + EXPECT_EQ(schedulerC->getLockState().getTopDependency(1), Scheduling::Dependency::None); + co_yield( Inst("(C) If Begin").lock(Scheduling::Dependency::SCC, "(C) Scheduler C Lock")); - EXPECT_EQ(schedulerA->getLockState().getDependency(), Scheduling::Dependency::Branch); - EXPECT_EQ(schedulerB->getLockState().getDependency(), Scheduling::Dependency::VCC); - EXPECT_EQ(schedulerC->getLockState().getDependency(), Scheduling::Dependency::SCC); + EXPECT_EQ(schedulerA->getLockState().getTopDependency(1), Scheduling::Dependency::SCC); + EXPECT_EQ(schedulerB->getLockState().getTopDependency(0), Scheduling::Dependency::SCC); + EXPECT_EQ(schedulerC->getLockState().getTopDependency(1), Scheduling::Dependency::SCC); - co_yield(Inst("+++ Scheduler A Lock Depth: " - + std::to_string(schedulerA->getLockState().getLockDepth()))); - co_yield(Inst("+++ Scheduler B Lock Depth: " - + std::to_string(schedulerB->getLockState().getLockDepth()))); - co_yield(Inst("+++ Scheduler C Lock Depth: " - + std::to_string(schedulerC->getLockState().getLockDepth()))); + co_yield(Inst("+++ Scheduler A Stream 1 Lock Depth: " + + std::to_string(schedulerA->getLockState().getLockDepth(1)))); + co_yield(Inst("+++ Scheduler B Stream 0 Lock Depth: " + + std::to_string(schedulerB->getLockState().getLockDepth(0)))); + co_yield(Inst("+++ Scheduler C Stream 1 Lock Depth: " + + std::to_string(schedulerC->getLockState().getLockDepth(1)))); co_yield(Inst("(C) If Instruction")); co_yield(Inst("(C) If End").unlock("(C) Scheduler C Unlock")); - EXPECT_EQ(schedulerA->getLockState().getDependency(), Scheduling::Dependency::Branch); - EXPECT_EQ(schedulerB->getLockState().getDependency(), Scheduling::Dependency::VCC); - EXPECT_EQ(schedulerC->getLockState().getDependency(), Scheduling::Dependency::None); + EXPECT_EQ(schedulerA->getLockState().getTopDependency(1), Scheduling::Dependency::VCC); + EXPECT_EQ(schedulerB->getLockState().getTopDependency(0), Scheduling::Dependency::VCC); + EXPECT_EQ(schedulerC->getLockState().getTopDependency(1), Scheduling::Dependency::None); - co_yield(Inst("+++ Scheduler A Lock Depth: " - + std::to_string(schedulerA->getLockState().getLockDepth()))); - co_yield(Inst("+++ Scheduler B Lock Depth: " - + std::to_string(schedulerB->getLockState().getLockDepth()))); - co_yield(Inst("+++ Scheduler C Lock Depth: " - + std::to_string(schedulerC->getLockState().getLockDepth()))); + co_yield(Inst("+++ Scheduler A Stream 1 Lock Depth: " + + std::to_string(schedulerA->getLockState().getLockDepth(1)))); + co_yield(Inst("+++ Scheduler B Stream 0 Lock Depth: " + + std::to_string(schedulerB->getLockState().getLockDepth(0)))); + co_yield(Inst("+++ Scheduler C Stream 1 Lock Depth: " + + std::to_string(schedulerC->getLockState().getLockDepth(1)))); }; c_sequences.push_back(opB()); c_sequences.push_back(ifBlock()); auto unroll0 = [&]() -> Generator { + EXPECT_EQ(schedulerA->getLockState().getTopDependency(1), + Scheduling::Dependency::Branch); + EXPECT_EQ(schedulerB->getLockState().getTopDependency(0), Scheduling::Dependency::None); + co_yield(Inst("(B) Unroll 0 Begin") .lock(Scheduling::Dependency::VCC, "(B) Scheduler B Lock")); - EXPECT_EQ(schedulerA->getLockState().getDependency(), Scheduling::Dependency::Branch); - EXPECT_EQ(schedulerB->getLockState().getDependency(), Scheduling::Dependency::VCC); + EXPECT_EQ(schedulerA->getLockState().getTopDependency(1), Scheduling::Dependency::VCC); + EXPECT_EQ(schedulerB->getLockState().getTopDependency(0), Scheduling::Dependency::VCC); - co_yield(Inst("+++ Scheduler A Lock Depth: " - + std::to_string(schedulerA->getLockState().getLockDepth()))); - co_yield(Inst("+++ Scheduler B Lock Depth: " - + std::to_string(schedulerB->getLockState().getLockDepth()))); + co_yield(Inst("+++ Scheduler A Stream 1 Lock Depth: " + + std::to_string(schedulerA->getLockState().getLockDepth(1)))); + co_yield(Inst("+++ Scheduler B Stream 0 Lock Depth: " + + std::to_string(schedulerB->getLockState().getLockDepth(0)))); co_yield((*schedulerC)(c_sequences)); co_yield(Inst("(B) Unroll 0 End")).unlock("(B) Scheduler B Unlock"); - EXPECT_EQ(schedulerA->getLockState().getDependency(), Scheduling::Dependency::Branch); - EXPECT_EQ(schedulerB->getLockState().getDependency(), Scheduling::Dependency::None); + EXPECT_EQ(schedulerA->getLockState().getTopDependency(1), + Scheduling::Dependency::Branch); + EXPECT_EQ(schedulerB->getLockState().getTopDependency(0), Scheduling::Dependency::None); - co_yield(Inst("+++ Scheduler A Lock Depth: " - + std::to_string(schedulerA->getLockState().getLockDepth()))); - co_yield(Inst("+++ Scheduler B Lock Depth: " - + std::to_string(schedulerB->getLockState().getLockDepth()))); + co_yield(Inst("+++ Scheduler A Stream 1 Lock Depth: " + + std::to_string(schedulerA->getLockState().getLockDepth(1)))); + co_yield(Inst("+++ Scheduler B Stream 0 Lock Depth: " + + std::to_string(schedulerB->getLockState().getLockDepth(0)))); }; auto unroll1 = [&]() -> Generator { @@ -482,16 +491,17 @@ namespace rocRollerTest co_yield(Inst("(A) For Loop Begin") .lock(Scheduling::Dependency::Branch, "(A) Scheduler A Lock")); - EXPECT_EQ(schedulerA->getLockState().getDependency(), Scheduling::Dependency::Branch); + EXPECT_EQ(schedulerA->getLockState().getTopDependency(1), + Scheduling::Dependency::Branch); - co_yield(Inst("+++ Scheduler A Lock Depth: " - + std::to_string(schedulerA->getLockState().getLockDepth()))); + co_yield(Inst("+++ Scheduler A Stream 1 Lock Depth: " + + std::to_string(schedulerA->getLockState().getLockDepth(1)))); co_yield((*schedulerB)(b_sequences)); co_yield(Inst("(A) For Loop End").unlock("(A) Scheduler A Unlock")); - co_yield(Inst("+++ Scheduler A Lock Depth: " - + std::to_string(schedulerA->getLockState().getLockDepth()))); + co_yield(Inst("+++ Scheduler A Stream 1 Lock Depth: " + + std::to_string(schedulerA->getLockState().getLockDepth(1)))); - EXPECT_EQ(schedulerA->getLockState().getDependency(), Scheduling::Dependency::None); + EXPECT_EQ(schedulerA->getLockState().getTopDependency(1), Scheduling::Dependency::None); }; a_sequences.push_back(opA()); @@ -500,32 +510,32 @@ namespace rocRollerTest m_context->schedule((*schedulerA)(a_sequences)); std::string expected = R"( (A) Op A Begin - (A) For Loop Begin // (A) Scheduler A Lock - +++ Scheduler A Lock Depth: 1 - (B) Unroll 0 Begin // (B) Scheduler B Lock - +++ Scheduler A Lock Depth: 2 - +++ Scheduler B Lock Depth: 1 + (A) For Loop Begin // (A) Scheduler A Lock + +++ Scheduler A Stream 1 Lock Depth: 1 + (B) Unroll 0 Begin // (B) Scheduler B Lock + (B) Unroll 1 Begin + +++ Scheduler A Stream 1 Lock Depth: 2 + (B) Unroll 1 Instruction + +++ Scheduler B Stream 0 Lock Depth: 1 + (B) Unroll 1 End (C) Op B Begin - (C) If Begin // (C) Scheduler C Lock - +++ Scheduler A Lock Depth: 3 - +++ Scheduler B Lock Depth: 2 - +++ Scheduler C Lock Depth: 1 + (C) If Begin // (C) Scheduler C Lock + +++ Scheduler A Stream 1 Lock Depth: 3 + +++ Scheduler B Stream 0 Lock Depth: 2 + +++ Scheduler C Stream 1 Lock Depth: 1 (C) If Instruction - (C) If End // (C) Scheduler C Unlock + (C) If End // (C) Scheduler C Unlock (C) Op B Instruction - +++ Scheduler A Lock Depth: 2 + +++ Scheduler A Stream 1 Lock Depth: 2 (C) Op B End - +++ Scheduler B Lock Depth: 1 - +++ Scheduler C Lock Depth: 0 - (B) Unroll 0 End // (B) Scheduler B Unlock - (B) Unroll 1 Begin - +++ Scheduler A Lock Depth: 1 - (B) Unroll 1 Instruction - +++ Scheduler B Lock Depth: 0 - (B) Unroll 1 End - (A) For Loop End // (A) Scheduler A Unlock + +++ Scheduler B Stream 0 Lock Depth: 1 + +++ Scheduler C Stream 1 Lock Depth: 0 + (B) Unroll 0 End // (B) Scheduler B Unlock + +++ Scheduler A Stream 1 Lock Depth: 1 + +++ Scheduler B Stream 0 Lock Depth: 0 + (A) For Loop End // (A) Scheduler A Unlock (A) Op A Instruction - +++ Scheduler A Lock Depth: 0 + +++ Scheduler A Stream 1 Lock Depth: 0 (A) Op A End )"; diff --git a/test/unit/ScopeTest.cpp b/test/unit/ScopeTest.cpp index 56e35ba1..74ab3a26 100644 --- a/test/unit/ScopeTest.cpp +++ b/test/unit/ScopeTest.cpp @@ -105,10 +105,12 @@ namespace ScopeTest auto kexpected = R"( // CodeGeneratorVisitor::generate() begin // generate({1}) + // BEGIN Scheduler for operations {1} // Kernel(1) BEGIN // (op 1) generate({}) // (op 1) end: generate({}) // (op 1) generate({4}) + // (op 1) BEGIN Scheduler for operations {4} // (op 1) Assign VGPR 11:U32(4) BEGIN // (op 4) Assign dim(1) = 11:U32 // tag 1: v**UNALLOCATED** @@ -116,12 +118,16 @@ namespace ScopeTest // Allocated DataFlowTag1: 1 VGPR (Value: UInt32) (op 4): v0 v_mov_b32 v0, 11 // (op 4) call() // (op 1) Assign VGPR 11:U32(4) END + // (op 1) END Scheduler for operations {4} + // (op 1) BEGIN Scheduler for operations {2} // (op 1) Scope(2) BEGIN // (op 2) Lock Scope 2 // (op 2) generate({3}) + // (op 2) BEGIN Scheduler for operations {3} // (op 2) Scope(3) BEGIN // (op 3) Lock Scope 3 // (op 3) generate({6}) + // (op 3) BEGIN Scheduler for operations {6} // (op 3) Assign VGPR 33:U32(6) BEGIN // (op 6) Assign dim(3) = 33:U32 // tag 3: v**UNALLOCATED** @@ -129,16 +135,22 @@ namespace ScopeTest // Allocated DataFlowTag3: 1 VGPR (Value: UInt32) (op 6): v1 v_mov_b32 v1, 33 // (op 6) call() // (op 3) Assign VGPR 33:U32(6) END + // (op 3) END Scheduler for operations {6} + // (op 3) BEGIN Scheduler for operations {7} // (op 3) Assign VGPR 44:U32(7) BEGIN // (op 7) Assign dim(1) = 44:U32 // (op 7) Generate 44:U32 into DataFlowTag1: VGPR Value: UInt32 x 1: v0 v_mov_b32 v0, 44 // (op 7) call() // (op 3) Assign VGPR 44:U32(7) END + // (op 3) END Scheduler for operations {7} // (op 3) end: generate({6}) // Freeing DataFlowTag3: 1 VGPR (Value: UInt32) (op 6): v1 + // Deleting tag 3 // (op 3) Unlock Scope 3 // (op 2) Scope(3) END + // (op 2) END Scheduler for operations {3} + // (op 2) BEGIN Scheduler for operations {5} // (op 2) Assign VGPR 22:U32(5) BEGIN // (op 5) Assign dim(2) = 22:U32 // tag 2: v**UNALLOCATED** @@ -146,15 +158,20 @@ namespace ScopeTest // Allocated DataFlowTag2: 1 VGPR (Value: UInt32) (op 5): v1 v_mov_b32 v1, 22 // (op 5) call() // (op 2) Assign VGPR 22:U32(5) END + // (op 2) END Scheduler for operations {5} // (op 2) end: generate({3}) // Freeing DataFlowTag2: 1 VGPR (Value: UInt32) (op 5): v1 + // Deleting tag 2 // (op 2) Unlock Scope 2 // (op 1) Scope(2) END + // (op 1) END Scheduler for operations {2} // (op 1) end: generate({4}) // Freeing DataFlowTag1: 1 VGPR (Value: UInt32) (op 4): v0 + // Deleting tag 1 // Kernel(1) END + // END Scheduler for operations {1} // end: generate({1}) // CodeGeneratorVisitor::generate() end )";