From 09564e99495eef95e4c0618d16ae44bac419dc4d Mon Sep 17 00:00:00 2001 From: Shiran Guo Date: Fri, 30 Jan 2026 09:45:13 +0800 Subject: [PATCH 1/3] feat(taskflow): enhance CGRA placement with critical path priority and SARA scoring This commit implements: 1. MCT dependency analysis for SSA and memory (RAW, WAR, WAW). 2. Critical path prioritization for task placement using ALAP levels. 3. SARA-style scoring heuristic for CGRA placement. 4. Memory mapping with SRAM assignment and direct wire configuration for fusion candidates. 5. Simplified and updated tests for placement verification. --- include/TaskflowDialect/TaskflowPasses.h | 2 + include/TaskflowDialect/TaskflowPasses.td | 25 + .../Transforms/AnalyzeMCTDependencyPass.cpp | 342 +++++++++++ lib/TaskflowDialect/Transforms/CMakeLists.txt | 2 + .../Transforms/PlaceMCTOnCGRAPass.cpp | 539 ++++++++++++++++++ .../irregular-loop/irregular-loop.mlir | 15 + .../taskflow/multi-nested/multi-nested.mlir | 20 +- .../parallel-nested/parallel-nested.mlir | 14 +- 8 files changed, 957 insertions(+), 2 deletions(-) create mode 100644 lib/TaskflowDialect/Transforms/AnalyzeMCTDependencyPass.cpp create mode 100644 lib/TaskflowDialect/Transforms/PlaceMCTOnCGRAPass.cpp diff --git a/include/TaskflowDialect/TaskflowPasses.h b/include/TaskflowDialect/TaskflowPasses.h index 50f28d0e..0fddc037 100644 --- a/include/TaskflowDialect/TaskflowPasses.h +++ b/include/TaskflowDialect/TaskflowPasses.h @@ -17,6 +17,8 @@ namespace taskflow { #include "TaskflowDialect/TaskflowPasses.h.inc" std::unique_ptr createConstructHyperblockFromTaskPass(); std::unique_ptr createCanonicalizeTaskPass(); +std::unique_ptr createAnalyzeMCTDependencyPass(); +std::unique_ptr createPlaceMCTOnCGRAPass(); #define GEN_PASS_REGISTRATION #include "TaskflowDialect/TaskflowPasses.h.inc" diff --git a/include/TaskflowDialect/TaskflowPasses.td b/include/TaskflowDialect/TaskflowPasses.td index 4728f138..5ed79f03 100644 --- a/include/TaskflowDialect/TaskflowPasses.td +++ b/include/TaskflowDialect/TaskflowPasses.td @@ -29,4 +29,29 @@ def CanonicalizeTask: Pass<"canonicalize-task", "func::FuncOp">{ }]; let constructor = "taskflow::createCanonicalizeTaskPass()"; } + +def AnalyzeMCTDependency : Pass<"analyze-mct-dependency", "func::FuncOp"> { + let summary = "Analyzes dependencies between MCTs for multi-CGRA mapping"; + let description = [{ + This pass analyzes Minimized Canonicalized Tasks (MCTs) to identify: + 1. Memory dependencies (RAW, WAR, WAW) between tasks + 2. Same-header task pairs that are fusion candidates + 3. Counter chain (loop header) information for each task + + Used for multi-CGRA mapping optimization. + }]; + let constructor = "taskflow::createAnalyzeMCTDependencyPass()"; +} + +def PlaceMCTOnCGRA : Pass<"place-mct-on-cgra", "func::FuncOp"> { + let summary = "Places MCTs onto a 2D CGRA grid with adjacency optimization"; + let description = [{ + This pass places Minimized Canonicalized Tasks (MCTs) onto a 2D CGRA grid. + Fusion candidates (same-header SSA dependencies) are placed on adjacent + CGRAs to enable direct data forwarding. + + Uses a default 4x4 CGRA grid. + }]; + let constructor = "taskflow::createPlaceMCTOnCGRAPass()"; +} #endif // TASKFLOW_PASSES_TD \ No newline at end of file diff --git a/lib/TaskflowDialect/Transforms/AnalyzeMCTDependencyPass.cpp b/lib/TaskflowDialect/Transforms/AnalyzeMCTDependencyPass.cpp new file mode 100644 index 00000000..798c3909 --- /dev/null +++ b/lib/TaskflowDialect/Transforms/AnalyzeMCTDependencyPass.cpp @@ -0,0 +1,342 @@ +//===- AnalyzeMCTDependencyPass.cpp - MCT Dependency Analysis Pass --------===// +// +// This pass analyzes dependencies between Minimized Canonicalized Tasks (MCTs) +// for multi-CGRA mapping optimization. +// +// Architecture context: +// - Our architecture can combine multiple CGRAs into one logical CGRA. +// - Task dependencies: SSA use-def AND memory access (RAW, WAR, WAW). +// +// This pass identifies: +// 1. SSA dependencies: Task output → Task input (data flow). +// 2. Memory dependencies: RAW, WAR, WAW via shared memrefs. +// 3. Same-header pairs: Fusion candidates for data forwarding. +// +//===----------------------------------------------------------------------===// + +#include "TaskflowDialect/TaskflowDialect.h" +#include "TaskflowDialect/TaskflowOps.h" +#include "TaskflowDialect/TaskflowPasses.h" + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/Value.h" +#include "mlir/Pass/Pass.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" + +using namespace mlir; +using namespace mlir::taskflow; + +namespace { + +//===----------------------------------------------------------------------===// +// Counter Chain Info +//===----------------------------------------------------------------------===// +/// Represents the counter chain (loop header bounds) of an MCT. +struct CounterChainInfo { + SmallVector bounds; // e.g., {4, 8, 6} for 0→4, 0→8, 0→6. + + bool operator==(const CounterChainInfo &other) const { + return bounds == other.bounds; + } + + /// Prints the counter chain in readable format. + void print(llvm::raw_ostream &os) const { + os << "("; + for (size_t i = 0; i < bounds.size(); ++i) { + if (i > 0) + os << "-"; + os << bounds[i]; + } + os << ")"; + } +}; + +//===----------------------------------------------------------------------===// +// MCT Info +//===----------------------------------------------------------------------===// +/// Stores analysis results for an MCT. +struct MCTInfo { + TaskflowTaskOp task; + StringRef task_name; + CounterChainInfo counter_chain; + SetVector source_memref_reads; // Source memrefs (function args or task outputs). + SetVector source_memref_writes; // Source memrefs that are written. + + /// Analyzes the task and resolves block args to source memrefs. + void analyze() { + task_name = task.getTaskName(); + + // Builds block arg to source mapping. + Block *body = &task.getBody().front(); + auto mem_inputs = task.getMemoryInputs(); + auto mem_args = body->getArguments().take_front(mem_inputs.size()); + + DenseMap block_arg_to_source; + for (auto [input, arg] : llvm::zip(mem_inputs, mem_args)) { + block_arg_to_source[arg] = input; + } + + // Collects memory accesses and resolves to source. + task.walk([&](Operation *op) { + if (auto load = dyn_cast(op)) { + Value memref = load.getMemRef(); + auto it = block_arg_to_source.find(memref); + if (it != block_arg_to_source.end()) { + source_memref_reads.insert(it->second); + } + } else if (auto store = dyn_cast(op)) { + Value memref = store.getMemRef(); + auto it = block_arg_to_source.find(memref); + if (it != block_arg_to_source.end()) { + source_memref_writes.insert(it->second); + } + } + }); + + // Extracts counter chain. + task.walk([&](TaskflowCounterOp counter) { + if (!counter.getParentIndex()) { + collectCounterChain(counter); + } + }); + } + +private: + /// Recursively collects counter chain bounds from root to leaf. + void collectCounterChain(TaskflowCounterOp counter) { + auto upper = counter.getUpperBound(); + counter_chain.bounds.push_back(upper.getSExtValue()); + + for (Operation *user : counter.getResult().getUsers()) { + if (auto child = dyn_cast(user)) { + collectCounterChain(child); + break; + } + } + } +}; + +//===----------------------------------------------------------------------===// +// Memory Dependency Types +//===----------------------------------------------------------------------===// +enum class DepType { SSA, RAW, WAR, WAW }; + +/// Represents a dependency between two MCTs. +struct Dependency { + DepType type; + size_t producer_idx; + size_t consumer_idx; + bool same_header; + Value via_memref; // The memref/SSA value that creates the dependency. +}; + +//===----------------------------------------------------------------------===// +// MCT Dependency Analyzer +//===----------------------------------------------------------------------===// +/// Analyzes dependencies between MCTs for multi-CGRA mapping. +class MCTDependencyAnalyzer { +public: + /// Analyzes all tasks in the function and reports dependencies. + void analyze(func::FuncOp func) { + SmallVector tasks; + func.walk([&](TaskflowTaskOp task) { tasks.push_back(task); }); + + if (tasks.empty()) { + llvm::errs() << "No taskflow.task operations found.\n"; + return; + } + + llvm::outs() << "=== MCT Dependency Analysis ===\n"; + llvm::outs() << "Found " << tasks.size() << " MCTs.\n\n"; + + // Analyzes each task. + SmallVector mct_infos; + DenseMap output_to_producer; // Maps task output to producer index. + + for (size_t idx = 0; idx < tasks.size(); ++idx) { + TaskflowTaskOp task = tasks[idx]; + MCTInfo info; + info.task = task; + info.analyze(); + mct_infos.push_back(info); + + // Records outputs for SSA dependency tracking. + for (Value output : task.getMemoryOutputs()) { + output_to_producer[output] = idx; + } + + // Prints task info. + llvm::outs() << "MCT " << idx << ": " << info.task_name << "\n"; + llvm::outs() << " Counter Chain: "; + info.counter_chain.print(llvm::outs()); + llvm::outs() << "\n"; + llvm::outs() << " Source Reads: "; + for (Value v : info.source_memref_reads) { + if (auto arg = dyn_cast(v)) { + llvm::outs() << "func_arg" << arg.getArgNumber() << " "; + } else { + llvm::outs() << v << " "; + } + } + llvm::outs() << "\n"; + llvm::outs() << " Source Writes: "; + for (Value v : info.source_memref_writes) { + if (auto arg = dyn_cast(v)) { + llvm::outs() << "func_arg" << arg.getArgNumber() << " "; + } else { + llvm::outs() << v << " "; + } + } + llvm::outs() << "\n\n"; + } + + // Detects dependencies. + llvm::outs() << "=== Dependencies ===\n"; + SmallVector deps; + + for (size_t i = 0; i < mct_infos.size(); ++i) { + TaskflowTaskOp task = mct_infos[i].task; + + // Checks SSA dependencies: if this task's input is another task's output. + for (Value input : task.getMemoryInputs()) { + auto it = output_to_producer.find(input); + if (it != output_to_producer.end()) { + size_t producer_idx = it->second; + bool same_header = mct_infos[producer_idx].counter_chain == + mct_infos[i].counter_chain; + deps.push_back({DepType::SSA, producer_idx, i, same_header, input}); + llvm::outs() << mct_infos[producer_idx].task_name << " → " + << mct_infos[i].task_name << " : SSA"; + if (same_header) { + llvm::outs() << " [SAME HEADER - FUSION CANDIDATE]"; + } + llvm::outs() << "\n"; + } + } + + // Checks RAW dependencies via shared function arguments. + for (size_t j = 0; j < i; ++j) { + for (Value w : mct_infos[j].source_memref_writes) { + if (mct_infos[i].source_memref_reads.contains(w)) { + bool same_header = + mct_infos[j].counter_chain == mct_infos[i].counter_chain; + deps.push_back({DepType::RAW, j, i, same_header, w}); + llvm::outs() << mct_infos[j].task_name << " → " + << mct_infos[i].task_name << " : RAW on "; + if (auto arg = dyn_cast(w)) { + llvm::outs() << "func_arg" << arg.getArgNumber(); + } else { + llvm::outs() << w; + } + if (same_header) { + llvm::outs() << " [SAME HEADER - FUSION CANDIDATE]"; + } + llvm::outs() << "\n"; + } + } + + // Checks WAR: j reads, i writes same memref. + for (Value r : mct_infos[j].source_memref_reads) { + if (mct_infos[i].source_memref_writes.contains(r)) { + bool same_header = + mct_infos[j].counter_chain == mct_infos[i].counter_chain; + deps.push_back({DepType::WAR, j, i, same_header, r}); + llvm::outs() << mct_infos[j].task_name << " → " + << mct_infos[i].task_name << " : WAR on "; + if (auto arg = dyn_cast(r)) { + llvm::outs() << "func_arg" << arg.getArgNumber(); + } else { + llvm::outs() << r; + } + if (same_header) { + llvm::outs() << " [SAME HEADER]"; + } + llvm::outs() << "\n"; + } + } + + // Checks WAW: j writes, i writes same memref. + for (Value w : mct_infos[j].source_memref_writes) { + if (mct_infos[i].source_memref_writes.contains(w)) { + bool same_header = + mct_infos[j].counter_chain == mct_infos[i].counter_chain; + deps.push_back({DepType::WAW, j, i, same_header, w}); + llvm::outs() << mct_infos[j].task_name << " → " + << mct_infos[i].task_name << " : WAW on "; + if (auto arg = dyn_cast(w)) { + llvm::outs() << "func_arg" << arg.getArgNumber(); + } else { + llvm::outs() << w; + } + if (same_header) { + llvm::outs() << " [SAME HEADER]"; + } + llvm::outs() << "\n"; + } + } + } + } + + // Prints summary by type. + size_t ssa_count = 0, raw_count = 0, war_count = 0, waw_count = 0; + size_t fusion_candidates = 0; + for (const auto &dep : deps) { + switch (dep.type) { + case DepType::SSA: ssa_count++; break; + case DepType::RAW: raw_count++; break; + case DepType::WAR: war_count++; break; + case DepType::WAW: waw_count++; break; + } + // Only SSA and RAW are considered fusion candidates because they involve + // data flow dependencies (producer outputs data that consumer needs). + // WAR/WAW are ordering dependencies without data forwarding opportunity. + if (dep.same_header && (dep.type == DepType::SSA || dep.type == DepType::RAW)) { + fusion_candidates++; + } + } + llvm::outs() << "\n=== Summary ===\n"; + llvm::outs() << "Total dependencies: " << deps.size() << "\n"; + llvm::outs() << " SSA: " << ssa_count << ", RAW: " << raw_count + << ", WAR: " << war_count << ", WAW: " << waw_count << "\n"; + llvm::outs() << "Fusion candidates (same-header SSA/RAW): " << fusion_candidates + << "\n"; + } +}; + +//===----------------------------------------------------------------------===// +// Pass Definition +//===----------------------------------------------------------------------===// +struct AnalyzeMCTDependencyPass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(AnalyzeMCTDependencyPass) + + StringRef getArgument() const override { return "analyze-mct-dependency"; } + + StringRef getDescription() const override { + return "Analyzes dependencies between MCTs for multi-CGRA mapping."; + } + + void runOnOperation() override { + func::FuncOp func = getOperation(); + MCTDependencyAnalyzer analyzer; + analyzer.analyze(func); + } +}; + +} // namespace + +namespace mlir { +namespace taskflow { + +std::unique_ptr createAnalyzeMCTDependencyPass() { + return std::make_unique(); +} + +} // namespace taskflow +} // namespace mlir diff --git a/lib/TaskflowDialect/Transforms/CMakeLists.txt b/lib/TaskflowDialect/Transforms/CMakeLists.txt index ab118c89..912a738d 100644 --- a/lib/TaskflowDialect/Transforms/CMakeLists.txt +++ b/lib/TaskflowDialect/Transforms/CMakeLists.txt @@ -3,6 +3,8 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_mlir_library(MLIRTaskflowTransforms ConstructHyperblockFromTaskPass.cpp CanonicalizeTaskPass.cpp + AnalyzeMCTDependencyPass.cpp + PlaceMCTOnCGRAPass.cpp DEPENDS MLIRTaskflowTransformsIncGen diff --git a/lib/TaskflowDialect/Transforms/PlaceMCTOnCGRAPass.cpp b/lib/TaskflowDialect/Transforms/PlaceMCTOnCGRAPass.cpp new file mode 100644 index 00000000..ef9ceb68 --- /dev/null +++ b/lib/TaskflowDialect/Transforms/PlaceMCTOnCGRAPass.cpp @@ -0,0 +1,539 @@ +//===- PlaceMCTOnCGRAPass.cpp - MCT to CGRA Placement Pass ----------------===// +// +// This pass places Minimized Canonicalized Tasks (MCTs) onto a 2D CGRA grid: +// 1. SSA use-def placement: Tasks with SSA dependencies placed on adjacent CGRAs. +// 2. Memory mapping: Assigns memrefs to SRAMs (single-SRAM constraint per data). +// +//===----------------------------------------------------------------------===// + +#include "TaskflowDialect/TaskflowDialect.h" +#include "TaskflowDialect/TaskflowOps.h" +#include "TaskflowDialect/TaskflowPasses.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/Builders.h" +#include "mlir/Pass/Pass.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" + +using namespace mlir; +using namespace mlir::taskflow; + +namespace { + +//===----------------------------------------------------------------------===// +// CGRA Grid Position +//===----------------------------------------------------------------------===// +/// Represents a position on the 2D CGRA grid. +struct CGRAPosition { + int row; + int col; + + bool operator==(const CGRAPosition &other) const { + return row == other.row && col == other.col; + } + + /// Computes Manhattan distance to another position. + int manhattanDistance(const CGRAPosition &other) const { + return std::abs(row - other.row) + std::abs(col - other.col); + } + + /// Checks if adjacent (Manhattan distance = 1). + bool isAdjacent(const CGRAPosition &other) const { + return manhattanDistance(other) == 1; + } +}; + +//===----------------------------------------------------------------------===// +// Task Placement Info +//===----------------------------------------------------------------------===// +/// Stores placement info for a task: can span multiple combined CGRAs. +struct TaskPlacement { + SmallVector cgra_positions; // CGRAs assigned to this task. + + /// Returns the primary (first) position. + CGRAPosition primary() const { + return cgra_positions.empty() ? CGRAPosition{-1, -1} : cgra_positions[0]; + } + + /// Returns the number of CGRAs assigned. + size_t cgraCount() const { return cgra_positions.size(); } + + /// Checks if any CGRA in this task is adjacent to any in other task. + bool hasAdjacentCGRA(const TaskPlacement &other) const { + for (const auto &pos : cgra_positions) { + for (const auto &other_pos : other.cgra_positions) { + if (pos.isAdjacent(other_pos)) { + return true; + } + } + } + return false; + } +}; + +//===----------------------------------------------------------------------===// +// Counter Chain Info +//===----------------------------------------------------------------------===// +/// Stores counter chain bounds for same-header comparison. +struct CounterChainInfo { + SmallVector bounds; + + bool operator==(const CounterChainInfo &other) const { + return bounds == other.bounds; + } + + static CounterChainInfo extract(TaskflowTaskOp task) { + CounterChainInfo info; + task.walk([&](TaskflowCounterOp counter) { + if (!counter.getParentIndex()) { + info.collectChain(counter); + } + }); + return info; + } + +private: + void collectChain(TaskflowCounterOp counter) { + bounds.push_back(counter.getUpperBound().getSExtValue()); + for (Operation *user : counter.getResult().getUsers()) { + if (auto child = dyn_cast(user)) { + collectChain(child); + break; + } + } + } +}; + +//===----------------------------------------------------------------------===// +// SSA Dependency +//===----------------------------------------------------------------------===// +struct SSADependency { + size_t producer_idx; + size_t consumer_idx; + bool same_header; +}; + +//===----------------------------------------------------------------------===// +// Memory Mapping +//===----------------------------------------------------------------------===// +/// Assigns memrefs to SRAMs. Single-SRAM constraint: each memref can only +/// reside in one SRAM (but can be accessed from DRAM dynamically). +struct MemoryMapper { + DenseMap memref_to_sram; // Maps memref to SRAM ID. + + /// Direct wire connections: For fusion candidates on adjacent CGRAs, + /// data can bypass SRAM and flow directly through interconnect. + /// Stores: (producer_task_idx, consumer_task_idx, via_value). + struct DirectWire { + size_t producer_idx; + size_t consumer_idx; + Value via_value; + }; + SmallVector direct_wires; + + /// Assigns a memref to the closest SRAM near the given task position. + int assignSRAM(Value memref, const TaskPlacement &placement) { + auto it = memref_to_sram.find(memref); + if (it != memref_to_sram.end()) { + return it->second; // Already assigned. + } + + // Assigns to a new SRAM near the task's primary CGRA. + // In baseline, SRAM ID corresponds to CGRA position for locality. + auto pos = placement.primary(); + int sram_id = pos.row * 100 + pos.col; // Simple encoding: row*100 + col. + memref_to_sram[memref] = sram_id; + return sram_id; + } + + /// Configures direct wire for adjacent fusion candidates. + /// Producer output goes directly to consumer without SRAM roundtrip. + void configureDirectWire(size_t producer_idx, size_t consumer_idx, + Value via_value) { + direct_wires.push_back({producer_idx, consumer_idx, via_value}); + } + + /// Prints memory mapping summary. + void printMapping() const { + llvm::outs() << "\n=== Memory Mapping ===\n"; + for (const auto &entry : memref_to_sram) { + llvm::outs() << " "; + if (auto arg = dyn_cast(entry.first)) { + llvm::outs() << "func_arg" << arg.getArgNumber(); + } else { + entry.first.print(llvm::outs()); + } + llvm::outs() << " -> SRAM_" << entry.second << "\n"; + } + + if (!direct_wires.empty()) { + llvm::outs() << "\n=== Direct Wires (bypass SRAM) ===\n"; + for (const auto &dw : direct_wires) { + llvm::outs() << " Task_" << dw.producer_idx << " -> Task_" + << dw.consumer_idx << " (direct)\n"; + } + } + } +}; + +//===----------------------------------------------------------------------===// +// CGRA Placer +//===----------------------------------------------------------------------===// +/// Places MCTs onto a 2D CGRA grid with memory mapping. +class CGRAPlacer { +public: + CGRAPlacer(int grid_rows, int grid_cols) + : grid_rows_(grid_rows), grid_cols_(grid_cols) { + occupied_.resize(grid_rows_); + for (auto &row : occupied_) { + row.resize(grid_cols_, false); + } + } + + /// Places all tasks and performs memory mapping. + void place(func::FuncOp func) { + SmallVector tasks; + func.walk([&](TaskflowTaskOp task) { tasks.push_back(task); }); + + if (tasks.empty()) { + llvm::errs() << "No tasks to place.\n"; + return; + } + + + // Extracts counter chains and builds dependency graph. + SmallVector counter_chains; + DenseMap output_to_producer; + SmallVector deps; + + for (size_t idx = 0; idx < tasks.size(); ++idx) { + counter_chains.push_back(CounterChainInfo::extract(tasks[idx])); + for (Value output : tasks[idx].getMemoryOutputs()) { + output_to_producer[output] = idx; + } + } + + for (size_t idx = 0; idx < tasks.size(); ++idx) { + for (Value input : tasks[idx].getMemoryInputs()) { + auto it = output_to_producer.find(input); + if (it != output_to_producer.end()) { + size_t producer_idx = it->second; + bool same_header = counter_chains[producer_idx] == counter_chains[idx]; + deps.push_back({producer_idx, idx, same_header}); + } + } + } + + // Critical path priority placement: + // 1. Computes ALAP level for each task (longest path to sink). + // 2. Sorts tasks by: (a) ALAP level, (b) criticality, (c) degree. + // 3. Places tasks in sorted order with heuristic scoring. + SmallVector placements(tasks.size()); + SmallVector placement_order = computePlacementOrder(tasks, deps); + + for (size_t idx : placement_order) { + // Reads cgra_count from task attribute (default: 1). + // Assumes the fusion pass will set this attribute for tasks needing multiple CGRAs. + // TODO: Rewrite this after the fusion pass is updated. + int cgra_count = 1; + if (auto attr = tasks[idx]->getAttrOfType("cgra_count")) { + cgra_count = attr.getInt(); + } + + TaskPlacement placement = findBestPlacement(idx, cgra_count, placements, deps); + placements[idx] = placement; + + // Marks occupied. + for (const auto &pos : placement.cgra_positions) { + occupied_[pos.row][pos.col] = true; + } + + + // Checks adjacency to dependent tasks and configures direct wires. + for (const auto &dep : deps) { + if (dep.consumer_idx == idx && placements[dep.producer_idx].cgraCount() > 0) { + if (placement.hasAdjacentCGRA(placements[dep.producer_idx])) { + // llvm::outs() << " [ADJACENT TO " << tasks[dep.producer_idx].getTaskName() << "]"; + + // Direct wire for same-header fusion candidates on adjacent CGRAs. + // Data bypasses SRAM and flows directly through interconnect. + if (dep.same_header) { + // Gets the SSA value connecting producer output to consumer input. + for (Value input : tasks[idx].getMemoryInputs()) { + for (Value output : tasks[dep.producer_idx].getMemoryOutputs()) { + if (input == output) { + memory_mapper_.configureDirectWire(dep.producer_idx, idx, input); + // llvm::outs() << " [DIRECT WIRE]"; + } + } + } + } + } + } + } + llvm::outs() << "\n"; + + // Memory mapping: Assigns input/output memrefs to SRAMs. + for (Value input : tasks[idx].getMemoryInputs()) { + memory_mapper_.assignSRAM(input, placement); + } + for (Value output : tasks[idx].getMemoryOutputs()) { + memory_mapper_.assignSRAM(output, placement); + } + } + + // Annotates tasks with placement info. + OpBuilder builder(func.getContext()); + for (size_t idx = 0; idx < tasks.size(); ++idx) { + const auto &placement = placements[idx]; + auto pos = placement.primary(); + tasks[idx]->setAttr("cgra_row", builder.getI32IntegerAttr(pos.row)); + tasks[idx]->setAttr("cgra_col", builder.getI32IntegerAttr(pos.col)); + tasks[idx]->setAttr("cgra_count", + builder.getI32IntegerAttr(placement.cgraCount())); + } + + // llvm::outs() << "\n=== Placement Summary ===\n"; + // printGrid(tasks, placements); + // memory_mapper_.printMapping(); + } + +private: + /// Finds best placement for a task requiring cgra_count CGRAs. + TaskPlacement findBestPlacement(size_t task_idx, int cgra_count, + const SmallVector &placements, + const SmallVector &deps) { + int best_score = INT_MIN; + TaskPlacement best_placement; + + // Baseline: For cgra_count=1, finds single best position. + for (int r = 0; r < grid_rows_; ++r) { + for (int c = 0; c < grid_cols_; ++c) { + if (occupied_[r][c]) + continue; + + TaskPlacement candidate; + candidate.cgra_positions.push_back({r, c}); + + int score = computeScore(task_idx, candidate, placements, deps); + if (score > best_score) { + best_score = score; + best_placement = candidate; + } + } + } + + return best_placement; + } + + /// Computes placement score. + /// + /// Formula: Award(Task, CGRA) = α·Pneigh + β·Psib - γ·Comm + δ·Bal + Adj + /// + /// Pneigh: Proximity to placed neighbors (count of adjacent dependencies). + /// Psib: Proximity to same-header siblings (fusion candidates). + /// Comm: Communication cost = Σ wj · Dist(C, posj). + /// Bal: Bonus for under-utilized CGRAs (load balancing). + /// + /// Adjacency bonus (Adj): + /// if adjacent: +100 (same_header) or +50 (different_header) + /// else: -distance * 10 (same_header) or -distance (different) + /// + /// Weights: α=50, β=100, γ=10, δ=20 (tunable). + int computeScore(size_t task_idx, const TaskPlacement &placement, + const SmallVector &placements, + const SmallVector &deps) { + // Weight constants (tunable). + constexpr int kAlpha = 50; // Pneigh weight. + constexpr int kBeta = 100; // Psib weight (same-header bonus). + constexpr int kGamma = 10; // Comm weight (distance penalty). + constexpr int kDelta = 20; // Bal weight (load balance bonus). + + int pneigh = 0; // Proximity to placed neighbors. + int psib = 0; // Proximity to same-header siblings. + int comm = 0; // Communication cost. + int adj = 0; // Original adjacency bonus. + + for (const auto &dep : deps) { + // Checks if this task is consumer. + if (dep.consumer_idx == task_idx) { + const auto &producer = placements[dep.producer_idx]; + if (producer.cgraCount() == 0) + continue; + + int dist = placement.primary().manhattanDistance(producer.primary()); + + if (placement.hasAdjacentCGRA(producer)) { + // Adjacent neighbor bonus (SARA factor). + pneigh += 1; + if (dep.same_header) { + psib += 1; // Extra bonus for fusion candidates. + } + // Original adjacency bonus. + adj += dep.same_header ? 100 : 50; + } else { + // Distance penalty (original formula). + adj -= dep.same_header ? dist * 10 : dist; + } + + // Communication cost (weighted by same_header priority). + int weight = dep.same_header ? 2 : 1; + comm += weight * dist; + } + + // Checks if this task is producer. + if (dep.producer_idx == task_idx) { + const auto &consumer = placements[dep.consumer_idx]; + if (consumer.cgraCount() == 0) + continue; + + int dist = placement.primary().manhattanDistance(consumer.primary()); + + if (placement.hasAdjacentCGRA(consumer)) { + pneigh += 1; + if (dep.same_header) { + psib += 1; + } + adj += dep.same_header ? 100 : 50; + } else { + adj -= dep.same_header ? dist * 10 : dist; + } + + int weight = dep.same_header ? 2 : 1; + comm += weight * dist; + } + } + + // Load balance bonus: Prefer under-utilized CGRAs. + // Counts tasks already placed in same row/column. + int bal = 0; + auto pos = placement.primary(); + int row_count = 0, col_count = 0; + for (int c = 0; c < grid_cols_; ++c) { + if (occupied_[pos.row][c]) row_count++; + } + for (int r = 0; r < grid_rows_; ++r) { + if (occupied_[r][pos.col]) col_count++; + } + // Bonus for less crowded positions. + bal = (grid_cols_ - row_count) + (grid_rows_ - col_count); + + // Final score: Award = α·Pneigh + β·Psib - γ·Comm + δ·Bal + Adj + int score = kAlpha * pneigh + kBeta * psib - kGamma * comm + kDelta * bal + adj; + return score; + } + + /// Computes placement order using critical path priority. + /// Priority: (1) ALAP level, (2) degree (connectivity), (3) original order. + SmallVector computePlacementOrder( + const SmallVector &tasks, + const SmallVector &deps) { + size_t n = tasks.size(); + SmallVector alap_level(n, 0); + SmallVector degree(n, 0); + + // Builds adjacency for ALAP computation. + SmallVector> successors(n); + for (const auto &dep : deps) { + successors[dep.producer_idx].push_back(dep.consumer_idx); + } + + // Computes ALAP level (longest path to any sink). + // Process in reverse topological order. + for (int i = n - 1; i >= 0; --i) { + int level = 0; + for (size_t succ : successors[i]) { + level = std::max(level, alap_level[succ] + 1); + } + alap_level[i] = level; + } + + // Computes degree (number of dependencies). + for (const auto &dep : deps) { + degree[dep.producer_idx]++; + degree[dep.consumer_idx]++; + } + + // Creates sorted order. + SmallVector order(n); + for (size_t i = 0; i < n; ++i) order[i] = i; + + std::sort(order.begin(), order.end(), [&](size_t a, size_t b) { + // Priority 1: Higher ALAP level first (critical path). + if (alap_level[a] != alap_level[b]) + return alap_level[a] > alap_level[b]; + // Priority 2: Higher degree first. + if (degree[a] != degree[b]) + return degree[a] > degree[b]; + // Priority 3: Original order (stability). + return a < b; + }); + + return order; + } + + /// Prints the placement grid. + void printGrid(const SmallVector &tasks, + const SmallVector &placements) { + std::vector> grid( + grid_rows_, std::vector(grid_cols_, " . ")); + + for (size_t idx = 0; idx < tasks.size(); ++idx) { + for (const auto &pos : placements[idx].cgra_positions) { + grid[pos.row][pos.col] = " T" + std::to_string(idx) + " "; + } + } + + for (int r = 0; r < grid_rows_; ++r) { + for (int c = 0; c < grid_cols_; ++c) { + llvm::outs() << grid[r][c]; + } + llvm::outs() << "\n"; + } + } + + int grid_rows_; + int grid_cols_; + std::vector> occupied_; + MemoryMapper memory_mapper_; +}; + +//===----------------------------------------------------------------------===// +// Pass Definition +//===----------------------------------------------------------------------===// +struct PlaceMCTOnCGRAPass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(PlaceMCTOnCGRAPass) + + PlaceMCTOnCGRAPass() = default; + + StringRef getArgument() const override { return "place-mct-on-cgra"; } + + StringRef getDescription() const override { + return "Places MCTs onto a 2D CGRA grid with adjacency optimization and " + "memory mapping."; + } + + void runOnOperation() override { + func::FuncOp func = getOperation(); + constexpr int kDefaultGridRows = 4; + constexpr int kDefaultGridCols = 4; + CGRAPlacer placer(kDefaultGridRows, kDefaultGridCols); + placer.place(func); + } +}; + +} // namespace + +namespace mlir { +namespace taskflow { + +std::unique_ptr createPlaceMCTOnCGRAPass() { + return std::make_unique(); +} + +} // namespace taskflow +} // namespace mlir diff --git a/test/multi-cgra/taskflow/irregular-loop/irregular-loop.mlir b/test/multi-cgra/taskflow/irregular-loop/irregular-loop.mlir index 6ce8e5e6..6fddd434 100644 --- a/test/multi-cgra/taskflow/irregular-loop/irregular-loop.mlir +++ b/test/multi-cgra/taskflow/irregular-loop/irregular-loop.mlir @@ -13,6 +13,13 @@ // RUN: -o %t.canonicalized.mlir // RUN: FileCheck %s --input-file=%t.canonicalized.mlir --check-prefixes=CANONICALIZE +// RUN: mlir-neura-opt %s --convert-affine-to-taskflow \ +// RUN: --construct-hyperblock-from-task \ +// RUN: --canonicalize-task \ +// RUN: --place-mct-on-cgra \ +// RUN: -o %t.placement.mlir +// RUN: FileCheck %s --input-file=%t.placement.mlir --check-prefixes=PLACEMENT + #set = affine_set<(d0, d1) : (d0 - 3 == 0, d1 - 7 == 0)> module attributes {} { func.func @_Z21irregularLoopExample1v() -> i32 attributes {llvm.linkage = #llvm.linkage} { @@ -151,6 +158,8 @@ module attributes {} { // HYPERBLOCK-NEXT: } // HYPERBLOCK-NEXT: } + + // CANONICALIZE: module { // CANONICALIZE-NEXT: func.func @_Z21irregularLoopExample1v() -> i32 attributes {llvm.linkage = #llvm.linkage} { // CANONICALIZE-NEXT: %c2_i32 = arith.constant 2 : i32 @@ -214,3 +223,9 @@ module attributes {} { // CANONICALIZE-NEXT: } // CANONICALIZE-NEXT: } +// PLACEMENT: task_name = "Task_0" +// PLACEMENT: cgra_col = 2 : i32, cgra_count = 1 : i32, cgra_row = 1 : i32 +// PLACEMENT: task_name = "Task_1" +// PLACEMENT: cgra_col = 0 : i32, cgra_count = 1 : i32, cgra_row = 0 : i32 +// PLACEMENT: task_name = "Task_2" +// PLACEMENT: cgra_col = 1 : i32, cgra_count = 1 : i32, cgra_row = 0 : i32 diff --git a/test/multi-cgra/taskflow/multi-nested/multi-nested.mlir b/test/multi-cgra/taskflow/multi-nested/multi-nested.mlir index c5f75f28..d9695ce7 100644 --- a/test/multi-cgra/taskflow/multi-nested/multi-nested.mlir +++ b/test/multi-cgra/taskflow/multi-nested/multi-nested.mlir @@ -13,6 +13,13 @@ // RUN: -o %t.canonicalized.mlir // RUN: FileCheck %s --input-file=%t.canonicalized.mlir --check-prefixes=CANONICALIZE +// RUN: mlir-neura-opt %s --convert-affine-to-taskflow \ +// RUN: --construct-hyperblock-from-task \ +// RUN: --canonicalize-task \ +// RUN: --place-mct-on-cgra \ +// RUN: -o %t.placement.mlir +// RUN: FileCheck %s --input-file=%t.placement.mlir --check-prefixes=PLACEMENT + module attributes {} { func.func @_Z21pureNestedLoopExamplePA8_A6_iPA8_A5_iS4_PA7_iPA9_iPiS9_S9_S9_S9_(%arg0: memref, %arg1: memref, %arg2: memref, %arg3: memref, %arg4: memref, %arg5: memref, %arg6: memref, %arg7: memref, %arg8: memref, %arg9: memref) -> i32 attributes {llvm.linkage = #llvm.linkage} { affine.for %arg10 = 0 to 4 { @@ -231,4 +238,15 @@ module attributes {} { // CANONICALIZE-NEXT: %0 = affine.load %memory_outputs_1[0] : memref // CANONICALIZE-NEXT: return %0 : i32 // CANONICALIZE-NEXT: } -// CANONICALIZE-NEXT: } \ No newline at end of file +// CANONICALIZE-NEXT: } + +// PLACEMENT: task_name = "Task_0" +// PLACEMENT: cgra_col = 0 : i32, cgra_count = 1 : i32, cgra_row = 0 : i32 +// PLACEMENT: task_name = "Task_1" +// PLACEMENT: cgra_col = 1 : i32, cgra_count = 1 : i32, cgra_row = 1 : i32 +// PLACEMENT: task_name = "Task_2" +// PLACEMENT: cgra_col = 1 : i32, cgra_count = 1 : i32, cgra_row = 0 : i32 +// PLACEMENT: task_name = "Task_3" +// PLACEMENT: cgra_col = 2 : i32, cgra_count = 1 : i32, cgra_row = 2 : i32 +// PLACEMENT: task_name = "Task_4" +// PLACEMENT: cgra_col = 3 : i32, cgra_count = 1 : i32, cgra_row = 2 : i32 \ No newline at end of file diff --git a/test/multi-cgra/taskflow/parallel-nested/parallel-nested.mlir b/test/multi-cgra/taskflow/parallel-nested/parallel-nested.mlir index ee37c831..5917c9d4 100644 --- a/test/multi-cgra/taskflow/parallel-nested/parallel-nested.mlir +++ b/test/multi-cgra/taskflow/parallel-nested/parallel-nested.mlir @@ -13,6 +13,13 @@ // RUN: -o %t.canonicalized.mlir // RUN: FileCheck %s --input-file=%t.canonicalized.mlir --check-prefixes=CANONICALIZE +// RUN: mlir-neura-opt %s --convert-affine-to-taskflow \ +// RUN: --construct-hyperblock-from-task \ +// RUN: --canonicalize-task \ +// RUN: --place-mct-on-cgra \ +// RUN: -o %t.placement.mlir +// RUN: FileCheck %s --input-file=%t.placement.mlir --check-prefixes=PLACEMENT + module { // Example: Parallel nested loops scenario // Task 0: Single-level loop (vector scaling) @@ -133,4 +140,9 @@ module { // CANONICALIZE-NEXT: }) : (memref<8x8xf32>, memref<8x8xf32>, memref<8x8xf32>) -> memref<8x8xf32> // CANONICALIZE-NEXT: return // CANONICALIZE-NEXT: } -// CANONICALIZE-NEXT: } \ No newline at end of file +// CANONICALIZE-NEXT: } + +// PLACEMENT: task_name = "Task_0" +// PLACEMENT: cgra_col = 0 : i32, cgra_count = 1 : i32, cgra_row = 0 : i32 +// PLACEMENT: task_name = "Task_1" +// PLACEMENT: cgra_col = 1 : i32, cgra_count = 1 : i32, cgra_row = 1 : i32 \ No newline at end of file From 888f3c8ded585bc3ebe219913376a1f33ca8f965 Mon Sep 17 00:00:00 2001 From: Shiran Guo Date: Fri, 30 Jan 2026 10:20:26 +0800 Subject: [PATCH 2/3] fix: address Copilot review comments 1. Added explicit C++ standard library headers (, , , , ) to avoid transitive include dependencies. 2. Added error handling for grid over-subscription case in findBestPlacement(): when no available CGRA position is found, emits a warning and falls back to position (0,0). --- .../Transforms/PlaceMCTOnCGRAPass.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lib/TaskflowDialect/Transforms/PlaceMCTOnCGRAPass.cpp b/lib/TaskflowDialect/Transforms/PlaceMCTOnCGRAPass.cpp index ef9ceb68..06279b13 100644 --- a/lib/TaskflowDialect/Transforms/PlaceMCTOnCGRAPass.cpp +++ b/lib/TaskflowDialect/Transforms/PlaceMCTOnCGRAPass.cpp @@ -18,6 +18,12 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include + using namespace mlir; using namespace mlir::taskflow; @@ -326,6 +332,15 @@ class CGRAPlacer { } } + // Error handling: No available position found (grid over-subscribed). + if (best_placement.cgra_positions.empty()) { + llvm::errs() << "Warning: No available CGRA position for task " + << task_idx << ". Grid is over-subscribed (" << grid_rows_ + << "x" << grid_cols_ << " grid with all cells occupied).\n"; + // Fallback: Assign to position (0,0) with a warning. + best_placement.cgra_positions.push_back({0, 0}); + } + return best_placement; } From d9c48d1279c8f16a5763966b5516ab32814ec8e4 Mon Sep 17 00:00:00 2001 From: Shiran Guo Date: Fri, 30 Jan 2026 10:27:09 +0800 Subject: [PATCH 3/3] fix: add dedicated test for AnalyzeMCTDependencyPass 1. Changed output from llvm::outs() to llvm::errs() to avoid stdout/IR conflicts. 2. Simplified Value printing to avoid IR ownership issues during output. 3. Added dependency-analysis.mlir test to verify SSA dependency detection. Addresses Copilot review comment about missing tests for analyze-mct-dependency pass. --- .../Transforms/AnalyzeMCTDependencyPass.cpp | 87 ++++++------------- .../dependency-analysis.mlir | 42 +++++++++ 2 files changed, 70 insertions(+), 59 deletions(-) create mode 100644 test/multi-cgra/taskflow/dependency-analysis/dependency-analysis.mlir diff --git a/lib/TaskflowDialect/Transforms/AnalyzeMCTDependencyPass.cpp b/lib/TaskflowDialect/Transforms/AnalyzeMCTDependencyPass.cpp index 798c3909..232b6315 100644 --- a/lib/TaskflowDialect/Transforms/AnalyzeMCTDependencyPass.cpp +++ b/lib/TaskflowDialect/Transforms/AnalyzeMCTDependencyPass.cpp @@ -151,8 +151,8 @@ class MCTDependencyAnalyzer { return; } - llvm::outs() << "=== MCT Dependency Analysis ===\n"; - llvm::outs() << "Found " << tasks.size() << " MCTs.\n\n"; + llvm::errs() << "=== MCT Dependency Analysis ===\n"; + llvm::errs() << "Found " << tasks.size() << " MCTs.\n\n"; // Analyzes each task. SmallVector mct_infos; @@ -171,32 +171,16 @@ class MCTDependencyAnalyzer { } // Prints task info. - llvm::outs() << "MCT " << idx << ": " << info.task_name << "\n"; - llvm::outs() << " Counter Chain: "; - info.counter_chain.print(llvm::outs()); - llvm::outs() << "\n"; - llvm::outs() << " Source Reads: "; - for (Value v : info.source_memref_reads) { - if (auto arg = dyn_cast(v)) { - llvm::outs() << "func_arg" << arg.getArgNumber() << " "; - } else { - llvm::outs() << v << " "; - } - } - llvm::outs() << "\n"; - llvm::outs() << " Source Writes: "; - for (Value v : info.source_memref_writes) { - if (auto arg = dyn_cast(v)) { - llvm::outs() << "func_arg" << arg.getArgNumber() << " "; - } else { - llvm::outs() << v << " "; - } - } - llvm::outs() << "\n\n"; + llvm::errs() << "MCT " << idx << ": " << info.task_name << "\n"; + llvm::errs() << " Counter Chain: "; + info.counter_chain.print(llvm::errs()); + llvm::errs() << "\n"; + llvm::errs() << " Source Reads: " << info.source_memref_reads.size() << " memrefs\n"; + llvm::errs() << " Source Writes: " << info.source_memref_writes.size() << " memrefs\n\n"; } // Detects dependencies. - llvm::outs() << "=== Dependencies ===\n"; + llvm::errs() << "=== Dependencies ===\n"; SmallVector deps; for (size_t i = 0; i < mct_infos.size(); ++i) { @@ -210,12 +194,12 @@ class MCTDependencyAnalyzer { bool same_header = mct_infos[producer_idx].counter_chain == mct_infos[i].counter_chain; deps.push_back({DepType::SSA, producer_idx, i, same_header, input}); - llvm::outs() << mct_infos[producer_idx].task_name << " → " + llvm::errs() << mct_infos[producer_idx].task_name << " → " << mct_infos[i].task_name << " : SSA"; if (same_header) { - llvm::outs() << " [SAME HEADER - FUSION CANDIDATE]"; + llvm::errs() << " [SAME HEADER - FUSION CANDIDATE]"; } - llvm::outs() << "\n"; + llvm::errs() << "\n"; } } @@ -226,17 +210,12 @@ class MCTDependencyAnalyzer { bool same_header = mct_infos[j].counter_chain == mct_infos[i].counter_chain; deps.push_back({DepType::RAW, j, i, same_header, w}); - llvm::outs() << mct_infos[j].task_name << " → " - << mct_infos[i].task_name << " : RAW on "; - if (auto arg = dyn_cast(w)) { - llvm::outs() << "func_arg" << arg.getArgNumber(); - } else { - llvm::outs() << w; - } + llvm::errs() << mct_infos[j].task_name << " → " + << mct_infos[i].task_name << " : RAW"; if (same_header) { - llvm::outs() << " [SAME HEADER - FUSION CANDIDATE]"; + llvm::errs() << " [SAME HEADER - FUSION CANDIDATE]"; } - llvm::outs() << "\n"; + llvm::errs() << "\n"; } } @@ -246,17 +225,12 @@ class MCTDependencyAnalyzer { bool same_header = mct_infos[j].counter_chain == mct_infos[i].counter_chain; deps.push_back({DepType::WAR, j, i, same_header, r}); - llvm::outs() << mct_infos[j].task_name << " → " - << mct_infos[i].task_name << " : WAR on "; - if (auto arg = dyn_cast(r)) { - llvm::outs() << "func_arg" << arg.getArgNumber(); - } else { - llvm::outs() << r; - } + llvm::errs() << mct_infos[j].task_name << " → " + << mct_infos[i].task_name << " : WAR"; if (same_header) { - llvm::outs() << " [SAME HEADER]"; + llvm::errs() << " [SAME HEADER]"; } - llvm::outs() << "\n"; + llvm::errs() << "\n"; } } @@ -266,17 +240,12 @@ class MCTDependencyAnalyzer { bool same_header = mct_infos[j].counter_chain == mct_infos[i].counter_chain; deps.push_back({DepType::WAW, j, i, same_header, w}); - llvm::outs() << mct_infos[j].task_name << " → " - << mct_infos[i].task_name << " : WAW on "; - if (auto arg = dyn_cast(w)) { - llvm::outs() << "func_arg" << arg.getArgNumber(); - } else { - llvm::outs() << w; - } + llvm::errs() << mct_infos[j].task_name << " → " + << mct_infos[i].task_name << " : WAW"; if (same_header) { - llvm::outs() << " [SAME HEADER]"; + llvm::errs() << " [SAME HEADER]"; } - llvm::outs() << "\n"; + llvm::errs() << "\n"; } } } @@ -299,11 +268,11 @@ class MCTDependencyAnalyzer { fusion_candidates++; } } - llvm::outs() << "\n=== Summary ===\n"; - llvm::outs() << "Total dependencies: " << deps.size() << "\n"; - llvm::outs() << " SSA: " << ssa_count << ", RAW: " << raw_count + llvm::errs() << "\n=== Summary ===\n"; + llvm::errs() << "Total dependencies: " << deps.size() << "\n"; + llvm::errs() << " SSA: " << ssa_count << ", RAW: " << raw_count << ", WAR: " << war_count << ", WAW: " << waw_count << "\n"; - llvm::outs() << "Fusion candidates (same-header SSA/RAW): " << fusion_candidates + llvm::errs() << "Fusion candidates (same-header SSA/RAW): " << fusion_candidates << "\n"; } }; diff --git a/test/multi-cgra/taskflow/dependency-analysis/dependency-analysis.mlir b/test/multi-cgra/taskflow/dependency-analysis/dependency-analysis.mlir new file mode 100644 index 00000000..529fd036 --- /dev/null +++ b/test/multi-cgra/taskflow/dependency-analysis/dependency-analysis.mlir @@ -0,0 +1,42 @@ +// RUN: mlir-neura-opt %s --convert-affine-to-taskflow \ +// RUN: --construct-hyperblock-from-task \ +// RUN: --canonicalize-task \ +// RUN: --analyze-mct-dependency 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=DEPENDENCY + +// Test for MCT dependency analysis pass. +// This test verifies that the pass correctly identifies: +// 1. SSA dependencies between tasks +// 2. Same-header fusion candidates + +module { + func.func @dependency_test(%arg0: memref<4x8xi32>, %arg1: memref<4x8xi32>) attributes {llvm.linkage = #llvm.linkage} { + %c0_i32 = arith.constant 0 : i32 + + // First loop: writes to arg0 + affine.for %i = 0 to 4 { + affine.for %j = 0 to 8 { + %idx = arith.index_cast %i : index to i32 + affine.store %idx, %arg0[%i, %j] : memref<4x8xi32> + } + } + + // Second loop: reads from arg0, writes to arg1 (same header as third loop) + affine.for %i = 0 to 4 { + affine.for %j = 0 to 8 { + %loaded = affine.load %arg0[%i, %j] : memref<4x8xi32> + affine.store %loaded, %arg1[%i, %j] : memref<4x8xi32> + } + } + + return + } +} + +// DEPENDENCY: === MCT Dependency Analysis === +// DEPENDENCY: Found 2 MCTs +// DEPENDENCY: MCT 0: Task_0 +// DEPENDENCY: MCT 1: Task_1 +// DEPENDENCY: === Dependencies === +// DEPENDENCY: Task_0 → Task_1 : SSA +// DEPENDENCY: === Summary ===