Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions include/PTO/IR/PTOOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1384,6 +1384,7 @@ def InitializeL2G2LPipeOp : PTO_Op<"initialize_l2g2l_pipe", [
I32Attr:$slot_num,
OptionalAttr<I32Attr>:$local_slot_num,
OptionalAttr<I32Attr>:$flag_base,
OptionalAttr<BoolAttr>:$nosplit,
AnyType:$gm_addr,
AnyType:$local_addr,
Optional<AnyType>:$peer_local_addr
Expand All @@ -1398,6 +1399,7 @@ def InitializeL2G2LPipeOp : PTO_Op<"initialize_l2g2l_pipe", [
`slot_num` `=` $slot_num
(`,` `local_slot_num` `=` $local_slot_num^)?
(`,` `flag_base` `=` $flag_base^)?
(`,` `nosplit` `=` $nosplit^)?
`}`
`(` $gm_addr `:` type($gm_addr) `,` $local_addr `:` type($local_addr)
(`,` $peer_local_addr^ `:` type($peer_local_addr))? `)`
Expand All @@ -1415,6 +1417,7 @@ def InitializeL2LPipeOp : PTO_Op<"initialize_l2l_pipe", [
I32Attr:$slot_size,
I32Attr:$slot_num,
OptionalAttr<I32Attr>:$flag_base,
OptionalAttr<BoolAttr>:$nosplit,
AnyType:$local_addr,
Optional<AnyType>:$peer_local_addr
);
Expand All @@ -1427,6 +1430,7 @@ def InitializeL2LPipeOp : PTO_Op<"initialize_l2l_pipe", [
`slot_size` `=` $slot_size `,`
`slot_num` `=` $slot_num
(`,` `flag_base` `=` $flag_base^)?
(`,` `nosplit` `=` $nosplit^)?
`}`
`(` $local_addr `:` type($local_addr)
(`,` $peer_local_addr^ `:` type($peer_local_addr))? `)`
Expand Down
3 changes: 3 additions & 0 deletions include/PTO/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ def PTOResolveReservedBuffers : Pass<"pto-resolve-reserved-buffers", "ModuleOp">
Runs after `pto-plan-memory`. Assumes `pto.reserve_buffer` base addresses
have already been planned, then:
- aligns missing `flag_base` attrs for peer internal pipe init ops
- infers implicit `nosplit = true` for internal pipe init ops when any
downstream `pto.tpush` / `pto.tpop` / `pto.tfree` user on the same
logical pipe has `split = 0`
- rejects internal pipe init ops without explicit `flag_base` when their
`local_addr` cannot be traced back to `pto.reserve_buffer` /
`pto.import_reserved_buffer`
Expand Down
7 changes: 5 additions & 2 deletions lib/PTO/Transforms/PTOLowerFrontendPipeOpsPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ static FailureOr<FrontendPipeHandles> lowerFrontendInitOp(InitOpT initOp,
if (arch == PTOArch::A5) {
auto pipe = rewriter.create<InitializeL2LPipeOp>(
loc, pipeTy, dirAttr, slotSizeAttr, slotNumAttr, IntegerAttr{},
BoolAttr{},
localAddr, /*peer_local_addr=*/Value{});
return pipe.getPipe();
}
Expand All @@ -67,7 +68,7 @@ static FailureOr<FrontendPipeHandles> lowerFrontendInitOp(InitOpT initOp,
auto localSlotNumAttr = rewriter.getI32IntegerAttr(slotNum);
auto pipe = rewriter.create<InitializeL2G2LPipeOp>(
loc, pipeTy, dirAttr, slotSizeAttr, slotNumAttr, localSlotNumAttr,
IntegerAttr{}, initOp.getGmSlotBuffer(), localAddr,
IntegerAttr{}, BoolAttr{}, initOp.getGmSlotBuffer(), localAddr,
/*peer_local_addr=*/Value{});
return pipe.getPipe();
};
Expand Down Expand Up @@ -101,6 +102,7 @@ static FailureOr<FrontendPipeHandles> lowerFrontendInitOp(InitOpT initOp,
if (arch == PTOArch::A5) {
auto pipe = rewriter.create<InitializeL2LPipeOp>(
loc, pipeTy, dirAttr, slotSizeAttr, slotNumAttr, IntegerAttr{},
BoolAttr{},
c2vAddr, v2cAddr);
handles.c2vPipe = pipe.getPipe();
handles.v2cPipe = pipe.getPipe();
Expand All @@ -113,7 +115,8 @@ static FailureOr<FrontendPipeHandles> lowerFrontendInitOp(InitOpT initOp,
auto localSlotNumAttr = rewriter.getI32IntegerAttr(4);
auto pipe = rewriter.create<InitializeL2G2LPipeOp>(
loc, pipeTy, dirAttr, slotSizeAttr, slotNumAttr, localSlotNumAttr,
IntegerAttr{}, initOp.getGmSlotBuffer(), c2vAddr, v2cAddr);
IntegerAttr{}, BoolAttr{}, initOp.getGmSlotBuffer(), c2vAddr,
v2cAddr);
handles.c2vPipe = pipe.getPipe();
handles.v2cPipe = pipe.getPipe();
handles.anchorOp = pipe.getOperation();
Expand Down
58 changes: 58 additions & 0 deletions lib/PTO/Transforms/PTOResolveReservedBuffersPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ struct PipeInitInfo {
Operation *op = nullptr;
func::FuncOp funcOp;
int8_t dirMask = 0;
bool inferredNoSplit = false;
};

template <typename InitOpT> static Value getLocalAddrOperand(InitOpT op) {
Expand All @@ -72,6 +73,35 @@ static void setFlagBaseAttr(InitOpT op, IntegerAttr attr) {
op->setAttr("flag_base", attr);
}

template <typename InitOpT>
static void setNoSplitAttr(InitOpT op, BoolAttr attr) {
op->setAttr("nosplit", attr);
}

template <typename InitOpT> static Value getPipeResult(InitOpT op) {
return op.getPipe();
}

static bool inferNoSplitFromPipeUsers(Value pipe) {
for (Operation *user : pipe.getUsers()) {
if (auto pushOp = dyn_cast<TPushOp>(user)) {
if (pushOp.getSplit() == 0)
return true;
continue;
}
if (auto popOp = dyn_cast<TPopOp>(user)) {
if (popOp.getSplit() == 0)
return true;
continue;
}
if (auto freeOp = dyn_cast<TFreeOp>(user)) {
if (freeOp.getSplit() == 0)
return true;
}
}
return false;
Comment on lines +86 to +102
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The series of if statements with continue can be refactored into a more concise if-else if chain. This improves readability and is slightly more efficient as it avoids redundant checks. The continue statements are also unnecessary since the if blocks return immediately.

  for (Operation *user : pipe.getUsers()) {
    if (auto pushOp = dyn_cast<TPushOp>(user)) {
      if (pushOp.getSplit() == 0)
        return true;
    } else if (auto popOp = dyn_cast<TPopOp>(user)) {
      if (popOp.getSplit() == 0)
        return true;
    } else if (auto freeOp = dyn_cast<TFreeOp>(user)) {
      if (freeOp.getSplit() == 0)
        return true;
    }
  }
  return false;

}

static ReserveBufferOp findReserveBufferByName(func::FuncOp funcOp,
StringRef name) {
// Reserve-buffer lookup is name-based because import_reserved_buffer only
Expand Down Expand Up @@ -140,6 +170,7 @@ struct PTOResolveReservedBuffersPass
info.op = initOp.getOperation();
info.funcOp = initOp->template getParentOfType<func::FuncOp>();
info.dirMask = initOp.getDirMask();
info.inferredNoSplit = inferNoSplitFromPipeUsers(getPipeResult(initOp));

// Record one address into the keyed maps. Returns true when the
// address comes from reserve_buffer / import_reserved_buffer.
Expand Down Expand Up @@ -187,6 +218,7 @@ struct PTOResolveReservedBuffersPass
}

OpBuilder builder(moduleOp.getContext());
std::set<Operation *> groupedNoSplitResolved;
for (const auto &it : keyedInits) {
const auto &inits = it.second;
// flag_base is always 0: single-direction pipes use flag pair 0/1;
Expand Down Expand Up @@ -221,18 +253,44 @@ struct PTOResolveReservedBuffersPass
chosenBase = desiredBase;

auto flagBaseAttr = builder.getI32IntegerAttr(*chosenBase);
bool groupNoSplit = false;
for (const PipeInitInfo &info : inits) {
if (info.inferredNoSplit) {
groupNoSplit = true;
break;
}
}
for (const PipeInitInfo &info : inits) {
if (auto initOp = dyn_cast<InitializeL2LPipeOp>(info.op)) {
if (!getFlagBaseAttr(initOp))
setFlagBaseAttr(initOp, flagBaseAttr);
if (groupNoSplit)
setNoSplitAttr(initOp, builder.getBoolAttr(true));
groupedNoSplitResolved.insert(info.op);
continue;
}
auto initOp = cast<InitializeL2G2LPipeOp>(info.op);
if (!getFlagBaseAttr(initOp))
setFlagBaseAttr(initOp, flagBaseAttr);
if (groupNoSplit)
setNoSplitAttr(initOp, builder.getBoolAttr(true));
groupedNoSplitResolved.insert(info.op);
}
}

moduleOp.walk([&](InitializeL2LPipeOp initOp) {
if (groupedNoSplitResolved.count(initOp.getOperation()))
return;
if (inferNoSplitFromPipeUsers(initOp.getPipe()))
setNoSplitAttr(initOp, builder.getBoolAttr(true));
});
moduleOp.walk([&](InitializeL2G2LPipeOp initOp) {
if (groupedNoSplitResolved.count(initOp.getOperation()))
return;
if (inferNoSplitFromPipeUsers(initOp.getPipe()))
setNoSplitAttr(initOp, builder.getBoolAttr(true));
});
Comment on lines +281 to +292
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The logic inside the two moduleOp.walk calls for InitializeL2LPipeOp and InitializeL2G2LPipeOp is identical. This code duplication can be avoided by extracting the logic into a templated helper function, which would improve maintainability.

For example, you could define a helper function:

template <typename InitOpT>
void resolveStandaloneNoSplit(InitOpT initOp,
                              const std::set<Operation *> &resolvedOps,
                              OpBuilder &builder) {
  if (resolvedOps.count(initOp.getOperation())) {
    return;
  }
  if (inferNoSplitFromPipeUsers(getPipeResult(initOp))) {
    setNoSplitAttr(initOp, builder.getBoolAttr(true));
  }
}

And then call it from the walks:

moduleOp.walk([&](InitializeL2LPipeOp initOp) {
  resolveStandaloneNoSplit(initOp, groupedNoSplitResolved, builder);
});
moduleOp.walk([&](InitializeL2G2LPipeOp initOp) {
  resolveStandaloneNoSplit(initOp, groupedNoSplitResolved, builder);
});


return success();
}

Expand Down
14 changes: 9 additions & 5 deletions lib/PTO/Transforms/PTOToEmitC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -372,12 +372,14 @@ getTPipeDirectionToken(bool isL2G2L, int8_t dirMask, PTOArch targetArch) {

static std::string buildTPipeToken(int32_t flagBase, llvm::StringRef dirTok,
int32_t slotSize, int32_t slotNum,
std::optional<int32_t> localSlotNum) {
std::optional<int32_t> localSlotNum,
bool nosplit) {
std::string token = "TPipe<" + std::to_string(flagBase) + ", " + dirTok.str() +
", " + std::to_string(slotSize) + ", " +
std::to_string(slotNum);
if (localSlotNum)
token += ", " + std::to_string(*localSlotNum);
token += nosplit ? ", true" : ", false";
token += ">";
return token;
}
Expand All @@ -395,8 +397,9 @@ static FailureOr<std::string> buildTPipeTokenFromInitOp(Operation *op,
? initOp.getLocalSlotNumAttr().getInt()
: initOp.getSlotNum();
return buildTPipeToken(initOp.getFlagBaseAttr().getInt(), *dirTok,
initOp.getSlotSize(),
initOp.getSlotNum(), localSlotNum);
initOp.getSlotSize(), initOp.getSlotNum(),
localSlotNum, initOp.getNosplitAttr() &&
initOp.getNosplitAttr().getValue());
}

if (auto initOp = dyn_cast<pto::InitializeL2LPipeOp>(op)) {
Expand All @@ -407,8 +410,9 @@ static FailureOr<std::string> buildTPipeTokenFromInitOp(Operation *op,
if (failed(dirTok))
return failure();
return buildTPipeToken(initOp.getFlagBaseAttr().getInt(), *dirTok,
initOp.getSlotSize(),
initOp.getSlotNum(), std::nullopt);
initOp.getSlotSize(), initOp.getSlotNum(),
std::nullopt, initOp.getNosplitAttr() &&
initOp.getNosplitAttr().getValue());
}

return failure();
Expand Down
10 changes: 5 additions & 5 deletions test/basic/tpush_tpop_emitc.pto
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,17 @@ module {
// A3: const int32_t {{v[0-9]+}} = 16;
// A3: const int64_t {{v[0-9]+}} = 0;
// A3: #if defined(__DAV_CUBE__)
// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8>(
// A3: TPUSH<TPipe<0, Direction::DIR_C2V, 1024, 8, 8>, Tile<TileType::Acc, float, 16, 16, BLayout::ColMajor, 16, 16, SLayout::RowMajor, 1024, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8, true>(
// A3: TPUSH<TPipe<0, Direction::DIR_C2V, 1024, 8, 8, true>, Tile<TileType::Acc, float, 16, 16, BLayout::ColMajor, 16, 16, SLayout::RowMajor, 1024, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// A3: #endif // __DAV_CUBE__

// A3-LABEL: AICORE void vector_pop_gm(
// A3: const int32_t {{v[0-9]+}} = 0;
// A3: #if defined(__DAV_VEC__)
// A3: set_mask_norm();
// A3: set_vector_mask(-1, -1);
// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8>(
// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8, false>(
// A3: Tile<TileType::Vec, float, 8, 16, BLayout::RowMajor, 8, 16, SLayout::NoneBox, 512, PadValue::Null> {{v[0-9]+}};
// A3: TPOP<TPipe<0, Direction::DIR_C2V, 1024, 8, 8>, Tile<TileType::Vec, float, 8, 16, BLayout::RowMajor, 8, 16, SLayout::NoneBox, 512, PadValue::Null>, TileSplitAxis::TILE_UP_DOWN>(
// A3: TFREE<TPipe<0, Direction::DIR_C2V, 1024, 8, 8>, TileSplitAxis::TILE_LEFT_RIGHT>(
// A3: TPOP<TPipe<0, Direction::DIR_C2V, 1024, 8, 8, false>, Tile<TileType::Vec, float, 8, 16, BLayout::RowMajor, 8, 16, SLayout::NoneBox, 512, PadValue::Null>, TileSplitAxis::TILE_UP_DOWN>(
// A3: TFREE<TPipe<0, Direction::DIR_C2V, 1024, 8, 8, false>, TileSplitAxis::TILE_LEFT_RIGHT>(
// A3: #endif // __DAV_VEC__
20 changes: 10 additions & 10 deletions test/basic/tpush_tpop_frontend_lowering_a3.pto
Original file line number Diff line number Diff line change
Expand Up @@ -61,32 +61,32 @@ module {
}

// A3-LABEL: AICORE void cube_kernel(__gm__ float*
// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 4>(
// A3: TPUSH<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4>
// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>(
// A3: TPUSH<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>
// A3: Tile<TileType::Mat, float, 16, 16, BLayout::ColMajor, 16, 16, SLayout::RowMajor, 512, PadValue::Null> {{v[0-9]+}};
// A3: TPOP<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4>, Tile<TileType::Mat, float, 16, 16, BLayout::ColMajor, 16, 16, SLayout::RowMajor, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// A3: TPOP<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>, Tile<TileType::Mat, float, 16, 16, BLayout::ColMajor, 16, 16, SLayout::RowMajor, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// A3: Tile<TileType::Left, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::RowMajor, 512, PadValue::Null> {{v[0-9]+}};
// A3: TMOV(
// A3: TFREE<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4>, TileSplitAxis::TILE_NO_SPLIT>(
// A3: TFREE<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>, TileSplitAxis::TILE_NO_SPLIT>(

// A3-LABEL: AICORE void vector_kernel(__gm__ float*
// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 4>(
// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>(
// A3: Tile<TileType::Vec, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::NoneBox, 512, PadValue::Null> {{v[0-9]+}};
// A3: TPUSH<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4>, Tile<TileType::Vec, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::NoneBox, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// A3: TPOP<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4>, Tile<TileType::Vec, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::NoneBox, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// A3: TPUSH<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>, Tile<TileType::Vec, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::NoneBox, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// A3: TPOP<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>, Tile<TileType::Vec, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::NoneBox, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// A3: Tile<TileType::Vec, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::NoneBox, 512, PadValue::Null> {{v[0-9]+}};
// A3: TNEG(
// A3: TFREE<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4>, TileSplitAxis::TILE_NO_SPLIT>(
// A3: TFREE<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>, TileSplitAxis::TILE_NO_SPLIT>(

// SYNC-A3-LABEL: AICORE void cube_kernel(__gm__ float*
// SYNC-A3: TPOP<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4>, Tile<TileType::Mat, float, 16, 16, BLayout::ColMajor, 16, 16, SLayout::RowMajor, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// SYNC-A3: TPOP<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>, Tile<TileType::Mat, float, 16, 16, BLayout::ColMajor, 16, 16, SLayout::RowMajor, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// SYNC-A3: set_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID0);
// SYNC-A3: Tile<TileType::Left, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::RowMajor, 512, PadValue::Null>
// SYNC-A3: wait_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID0);
// SYNC-A3: TMOV(

// SYNC-A3-LABEL: AICORE void vector_kernel(__gm__ float*
// SYNC-A3: TPOP<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4>, Tile<TileType::Vec, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::NoneBox, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// SYNC-A3: TPOP<TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>, Tile<TileType::Vec, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::NoneBox, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// SYNC-A3: set_flag(PIPE_MTE2, PIPE_V, EVENT_ID0);
// SYNC-A3: Tile<TileType::Vec, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::NoneBox, 512, PadValue::Null>
// SYNC-A3: wait_flag(PIPE_MTE2, PIPE_V, EVENT_ID0);
Expand Down
16 changes: 8 additions & 8 deletions test/basic/tpush_tpop_frontend_lowering_a5.pto
Original file line number Diff line number Diff line change
Expand Up @@ -57,21 +57,21 @@ module {
}

// A5-LABEL: AICORE void cube_kernel(
// A5: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4>(
// A5: TPUSH<TPipe<0, Direction::DIR_BOTH, 1024, 4>
// A5: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, true>(
// A5: TPUSH<TPipe<0, Direction::DIR_BOTH, 1024, 4, true>
// A5: Tile<TileType::Mat, float, 16, 16, BLayout::ColMajor, 16, 16, SLayout::RowMajor, 512, PadValue::Null> {{v[0-9]+}};
// A5: TPOP<TPipe<0, Direction::DIR_BOTH, 1024, 4>, Tile<TileType::Mat, float, 16, 16, BLayout::ColMajor, 16, 16, SLayout::RowMajor, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// A5: TPOP<TPipe<0, Direction::DIR_BOTH, 1024, 4, true>, Tile<TileType::Mat, float, 16, 16, BLayout::ColMajor, 16, 16, SLayout::RowMajor, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// A5: Tile<TileType::Left, float, 16, 16, BLayout::ColMajor, 16, 16, SLayout::RowMajor, 512, PadValue::Null> {{v[0-9]+}};
// A5: TMOV(
// A5: TFREE<TPipe<0, Direction::DIR_BOTH, 1024, 4>, TileSplitAxis::TILE_NO_SPLIT>(
// A5: TFREE<TPipe<0, Direction::DIR_BOTH, 1024, 4, true>, TileSplitAxis::TILE_NO_SPLIT>(

// A5-LABEL: AICORE void vector_kernel(
// A5: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4>(
// A5: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, true>(
// A5: Tile<TileType::Vec, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::NoneBox, 512, PadValue::Null> {{v[0-9]+}};
// A5: Tile<TileType::Vec, float, 16, 16, BLayout::ColMajor, 16, 16, SLayout::RowMajor, 512, PadValue::Null> {{v[0-9]+}};
// A5: TMOV(
// A5: TPUSH<TPipe<0, Direction::DIR_BOTH, 1024, 4>, Tile<TileType::Vec, float, 16, 16, BLayout::ColMajor, 16, 16, SLayout::RowMajor, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// A5: TPOP<TPipe<0, Direction::DIR_BOTH, 1024, 4>, Tile<TileType::Vec, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::NoneBox, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// A5: TPUSH<TPipe<0, Direction::DIR_BOTH, 1024, 4, true>, Tile<TileType::Vec, float, 16, 16, BLayout::ColMajor, 16, 16, SLayout::RowMajor, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// A5: TPOP<TPipe<0, Direction::DIR_BOTH, 1024, 4, true>, Tile<TileType::Vec, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::NoneBox, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
// A5: Tile<TileType::Vec, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::NoneBox, 512, PadValue::Null> {{v[0-9]+}};
// A5: TNEG(
// A5: TFREE<TPipe<0, Direction::DIR_BOTH, 1024, 4>, TileSplitAxis::TILE_NO_SPLIT>(
// A5: TFREE<TPipe<0, Direction::DIR_BOTH, 1024, 4, true>, TileSplitAxis::TILE_NO_SPLIT>(
52 changes: 52 additions & 0 deletions test/basic/tpush_tpop_frontend_nosplit_a5.pto
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// RUN: ptoas --pto-arch=a5 %s 2>&1 | FileCheck %s --check-prefix=A5

module {
func.func @cube_kernel() attributes {pto.kernel_kind = #pto.kernel_kind<cube>} {
%v2c_local = pto.reserve_buffer {
name = "v2c_fifo",
size = 4096,
location = #pto.address_space<mat>,
auto = true
} -> i32
%c2v_import = pto.import_reserved_buffer {
name = "c2v_fifo",
peer_func = @vector_kernel
} -> i32
pto.aic_initialize_pipe {dir_mask = 3, slot_size = 1024}
(c2v_consumer_buf = %c2v_import : i32,
v2c_consumer_buf = %v2c_local : i32)

%acc_tile = pto.alloc_tile : !pto.tile_buf<loc=acc, dtype=f32, rows=16, cols=16, v_row=16, v_col=16, blayout=col_major, slayout=row_major, fractal=1024, pad=0>
pto.tpush_to_aiv(%acc_tile : !pto.tile_buf<loc=acc, dtype=f32, rows=16, cols=16, v_row=16, v_col=16, blayout=col_major, slayout=row_major, fractal=1024, pad=0>) {split = 1}
return
}

func.func @vector_kernel() attributes {pto.kernel_kind = #pto.kernel_kind<vector>} {
%c2v_local = pto.reserve_buffer {
name = "c2v_fifo",
size = 4096,
location = #pto.address_space<vec>,
auto = true
} -> i32
%v2c_import = pto.import_reserved_buffer {
name = "v2c_fifo",
peer_func = @cube_kernel
} -> i32
pto.aiv_initialize_pipe {dir_mask = 3, slot_size = 1024}
(c2v_consumer_buf = %c2v_local : i32,
v2c_consumer_buf = %v2c_import : i32)

%recv_tile = pto.tpop_from_aic {split = 0}
-> !pto.tile_buf<loc=vec, dtype=f32, rows=16, cols=16, v_row=16, v_col=16, blayout=row_major, slayout=none_box, fractal=512, pad=0>
pto.tfree_from_aic {split = 1}
return
}
}

// A5-LABEL: AICORE void cube_kernel(
// A5: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, true>(
// A5: TPUSH<TPipe<0, Direction::DIR_BOTH, 1024, 4, true>

// A5-LABEL: AICORE void vector_kernel(
// A5: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, true>(
// A5: TPOP<TPipe<0, Direction::DIR_BOTH, 1024, 4, true>, Tile<TileType::Vec, float, 16, 16, BLayout::RowMajor, 16, 16, SLayout::NoneBox, 512, PadValue::Null>, TileSplitAxis::TILE_NO_SPLIT>(
Loading