Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ set(PYPTO_SOURCES
src/ir/op/tile_ops/transform.cpp
src/ir/op/tile_ops/unary.cpp
src/ir/op/tile_ops/cross_core.cpp
src/ir/op/tile_ops/utility.cpp
src/ir/op/sync_ops/sync.cpp
src/ir/op/sync_ops/cross_core.cpp
src/ir/op/tensor_ops/broadcast.cpp
Expand All @@ -114,6 +115,7 @@ set(PYPTO_SOURCES
src/ir/op/tensor_ops/reduction.cpp
src/ir/op/tensor_ops/transform.cpp
src/ir/op/tensor_ops/unary.cpp
src/ir/op/tensor_ops/utility.cpp
src/ir/op/testing.cpp
src/ir/op/type_inference.cpp

Expand Down
15 changes: 15 additions & 0 deletions python/pypto/backend/pto_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,21 @@ def _generate_kernel_wrapper(func: _ir_core.Function, ptoas_code: str) -> str:
3. ``kernel_entry`` wrapper with arg unpacking and forward call
"""
header = _KERNEL_HEADER.format(func_name=func.name)
# TPRINT is guarded by #ifdef _DEBUG in pto-isa headers. Defining
# _DEBUG globally is too broad (it enables cce::printf calls that don't
# compile on simulation). Instead, provide a no-op fallback so the
# generated code compiles in all environments.
if "TPRINT" in ptoas_code:
header = header.replace(
"using namespace pto;",
"using namespace pto;\n\n"
"#ifndef _DEBUG\n"
"namespace pto {\n"
"template <typename T>\n"
"PTO_INST void TPRINT(T& /*src*/) {}\n"
"} // namespace pto\n"
"#endif",
)
ptoas_body = _preprocess_ptoas_output(ptoas_code)
unpacking_code, var_names = _generate_arg_unpacking(func)
call_args = ", ".join(var_names)
Expand Down
16 changes: 16 additions & 0 deletions python/pypto/ir/op/tensor_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -940,3 +940,19 @@ def scatter_update(
op_args: list[Expr] = [input, index, src]
kwargs: dict[str, Any] = {"dim": dim_val}
return _ir_core.create_op_call("tensor.scatter_update", op_args, kwargs, actual_span)


def runtime_print(tensor: Expr, span: Span | None = None) -> Call:
"""Print tensor contents at runtime for debugging.

Generates a pto.tprint instruction in the compiled output.

Args:
tensor: Input tensor expression (TensorType)
span: Optional source span for debugging (auto-captured if not provided)

Returns:
Call expression (type is pass-through TensorType)
"""
actual_span = _get_span_or_capture(span)
return _ir_core.create_op_call("tensor.runtime_print", [tensor], {}, actual_span)
16 changes: 16 additions & 0 deletions python/pypto/ir/op/tile_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1950,3 +1950,19 @@ def tpop_from_aiv(
op = _ir_core.get_op("tile.tpop_from_aiv")
return _ir_core.Call(op, [], {"split": split}, resolved_type, actual_span)
return _ir_core.create_op_call("tile.tpop_from_aiv", [], {"split": split}, actual_span)


def runtime_print(tile: Expr, span: Span | None = None) -> Call:
"""Print tile contents at runtime for debugging.

Generates a pto.tprint instruction in the compiled output.

Args:
tile: Input tile expression (TileType)
span: Optional source span for debugging (auto-captured if not provided)

Returns:
Call expression (type is pass-through TileType)
"""
actual_span = _get_span_or_capture(span)
return _ir_core.create_op_call("tile.runtime_print", [tile], {}, actual_span)
2 changes: 2 additions & 0 deletions python/pypto/language/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def scalar_func(x: pl.Scalar[pl.FP32]) -> pl.Scalar[pl.FP32]:
row_min,
row_sum,
rsqrt,
runtime_print,
slice,
sqrt,
sub,
Expand Down Expand Up @@ -264,6 +265,7 @@ def scalar_func(x: pl.Scalar[pl.FP32]) -> pl.Scalar[pl.FP32]:
"recip",
"read",
"write",
"runtime_print",
# Promoted tile-only
"create_tile",
"fillpad",
Expand Down
13 changes: 13 additions & 0 deletions python/pypto/language/op/tensor_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
"reshape",
"transpose",
"scatter_update",
"runtime_print",
]

from pypto.ir.op import tensor_ops as _ir_ops
Expand Down Expand Up @@ -779,3 +780,15 @@ def scatter_update(
"""
call_expr = _ir_ops.scatter_update(input.unwrap(), dim, index.unwrap(), src.unwrap())
return Tensor(expr=call_expr)


def runtime_print(tensor: Tensor) -> None:
"""Print tensor contents at runtime for debugging.

Generates a pto.tprint instruction in the compiled output.
This is a statement-only operation — no value is returned.

Args:
tensor: Input tensor to print
"""
_ir_ops.runtime_print(tensor.unwrap())
13 changes: 13 additions & 0 deletions python/pypto/language/op/tile_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
"tpush_to_aic",
"tpop_from_aic",
"tpop_from_aiv",
"runtime_print",
]

from pypto.ir.op import tile_ops as _ir_ops
Expand Down Expand Up @@ -1547,3 +1548,15 @@ def sels(lhs: Tile, rhs: Tile, select_mode: int | float | Expr | Scalar) -> Tile
select_mode_expr = select_mode.unwrap() if isinstance(select_mode, Scalar) else select_mode
call_expr = _ir_ops.sels(lhs.unwrap(), rhs.unwrap(), select_mode_expr)
return Tile(expr=call_expr)


def runtime_print(tile: Tile) -> None:
"""Print tile contents at runtime for debugging.

Generates a pto.tprint instruction in the compiled output.
This is a statement-only operation — no value is returned.

Args:
tile: Input tile to print
"""
_ir_ops.runtime_print(tile.unwrap())
17 changes: 17 additions & 0 deletions python/pypto/language/op/unified_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"create_tile",
"read",
"write",
"runtime_print",
]

from pypto.ir.utils import resolve_cast_mode
Expand Down Expand Up @@ -552,3 +553,19 @@ def write(dst: Tensor | Tile, offset: IntLike | Sequence[IntLike], value: Scalar
if isinstance(dst, Tile):
return _tile.write(dst, offset, value)
raise TypeError(f"write: expected Tensor or Tile, got {type(dst).__name__}")


def runtime_print(src: Tensor | Tile) -> None:
"""Print tensor or tile contents at runtime for debugging.

Generates a pto.tprint instruction in the compiled output.
This is a statement-only operation — no value is returned.

Args:
src: Tensor or tile to print
"""
if isinstance(src, Tensor):
return _tensor.runtime_print(src)
if isinstance(src, Tile):
return _tile.runtime_print(src)
raise TypeError(f"runtime_print: expected Tensor or Tile, got {type(src).__name__}")
14 changes: 11 additions & 3 deletions src/backend/common/pto_ops_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,13 @@ static std::string MakePrintCodegenPTO(const std::string& pto_op_name, const Cal
CHECK(op->args_.size() == 1) << "Operation:" << pto_op_name << "] requires 1 argument, but got "
<< op->args_.size();
std::string src = codegen.GetExprAsCode(op->args_[0]);
codegen.Emit(pto_op_name + " ins(" + src + " | !pto.partition_tensor_view<MxNxdtype>)");
std::string src_type = codegen.GetExprTypeAnnotation(op->args_[0]);
std::string line = pto_op_name + " ins(" + src;
if (!src_type.empty()) {
line += " : " + src_type;
}
line += ")";
codegen.Emit(line);
return "";
}

Expand Down Expand Up @@ -1234,9 +1240,11 @@ void RegisterPTOOps(Backend& backend, const std::unordered_set<std::string>& exc
reg("tile.mrgsort", [](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
return MakeMrgSortCodegenPTO("pto.tmrgsort", op, codegen);
});
reg("tile.print", [](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
auto make_tprint = [](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
return MakePrintCodegenPTO("pto.tprint", op, codegen);
});
};
reg("tile.runtime_print", make_tprint);
reg("tensor.runtime_print", make_tprint);

// In-place accumulation ops (matmul_acc, gemv_acc): ptoas expects the
// accumulator in ins() to be the same SSA value as outs(). InitMemRef
Expand Down
54 changes: 54 additions & 0 deletions src/ir/op/tensor_ops/utility.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright (c) PyPTO Contributors.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
* -----------------------------------------------------------------------------------------------------------
*/

/**
* @file utility.cpp
* @brief Utility tensor operations (print)
*
* This file implements utility/debugging operations for tensor-level programming.
*/

#include <any>
#include <string>
#include <utility>
#include <vector>

#include "pypto/core/logging.h"
#include "pypto/ir/kind_traits.h"
#include "pypto/ir/op_registry.h"
#include "pypto/ir/type.h"

namespace pypto {
namespace ir {

TypePtr DeduceTensorPrintType(const std::vector<ExprPtr>& args,
const std::vector<std::pair<std::string, std::any>>& kwargs,
const std::string& op_name) {
CHECK(args.size() == 1) << "The operator " << op_name << " requires 1 argument (tensor), but got "
<< args.size();
auto tensor_type = As<TensorType>(args[0]->GetType());
CHECK(tensor_type) << "The operator " << op_name << " requires argument to be a TensorType, but got "
<< args[0]->GetType()->TypeName();
// Pass-through: returns the input tensor type (print is a side-effect operation)
return tensor_type;
}

REGISTER_OP("tensor.runtime_print")
.set_op_category("TensorOp")
.set_description("Print tensor contents for debugging (generates pto.tprint)")
.add_argument("tensor", "Input tensor to print (TensorType)")
.f_deduce_type([](const std::vector<ExprPtr>& args,
const std::vector<std::pair<std::string, std::any>>& kwargs) {
return DeduceTensorPrintType(args, kwargs, "tensor.runtime_print");
});

} // namespace ir
} // namespace pypto
55 changes: 55 additions & 0 deletions src/ir/op/tile_ops/utility.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright (c) PyPTO Contributors.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
* -----------------------------------------------------------------------------------------------------------
*/

/**
* @file utility.cpp
* @brief Utility tile operations (print)
*
* This file implements utility/debugging operations for tile-level programming.
*/

#include <any>
#include <string>
#include <utility>
#include <vector>

#include "pypto/core/logging.h"
#include "pypto/ir/kind_traits.h"
#include "pypto/ir/op_registry.h"
#include "pypto/ir/type.h"

namespace pypto {
namespace ir {

TypePtr DeduceTilePrintType(const std::vector<ExprPtr>& args,
const std::vector<std::pair<std::string, std::any>>& kwargs,
const std::string& op_name) {
CHECK(args.size() == 1) << "The operator " << op_name << " requires 1 argument (tile), but got "
<< args.size();
auto tile_type = As<TileType>(args[0]->GetType());
CHECK(tile_type) << "The operator " << op_name << " requires argument to be a TileType, but got "
<< args[0]->GetType()->TypeName();
// Pass-through: returns the input tile type (print is a side-effect operation)
return tile_type;
}

REGISTER_OP("tile.runtime_print")
.set_op_category("TileOp")
.set_description("Print tile contents for debugging (generates pto.tprint)")
.add_argument("tile", "Input tile to print (TileType)")
.no_memory_spec()
.f_deduce_type([](const std::vector<ExprPtr>& args,
const std::vector<std::pair<std::string, std::any>>& kwargs) {
return DeduceTilePrintType(args, kwargs, "tile.runtime_print");
});

} // namespace ir
} // namespace pypto
36 changes: 36 additions & 0 deletions src/ir/transforms/op_conversion_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,42 @@ OpConversionRegistry::OpConversionRegistry() {
// Memory creation ops
RegisterSimple("tensor.full", "tile.full");

// Utility ops — runtime_print needs a custom converter because the
// argument may still be a TensorType (e.g. printing a function parameter
// before any explicit tile.load). In that case we insert a tile.load
// prologue to materialise the tile, matching the tensor.fillpad pattern.
RegisterCustom(
"tensor.runtime_print",
[](const std::vector<ExprPtr>& args, const std::vector<std::pair<std::string, std::any>>& kwargs,
const Span& span) -> ConversionResult {
CHECK(args.size() == 1) << "tensor.runtime_print conversion expects 1 arg (input)";
auto& op_reg = OpRegistry::GetInstance();
const auto& input = args[0];

// Already a tile — pass through.
if (As<TileType>(input->GetType())) {
return ConversionResult{op_reg.Create("tile.runtime_print", {input}, span)};
}

auto tensor_type = As<TensorType>(input->GetType());
CHECK(tensor_type) << "tensor.runtime_print conversion: input must be TensorType or TileType, got "
<< input->GetType()->TypeName();

auto offsets = MakeZeroOffsetsTuple(tensor_type->shape_.size(), span);
auto shapes = MakeShapesTuple(tensor_type->shape_, span);

std::vector<std::pair<std::string, std::any>> load_kwargs = {{"target_memory", MemorySpace::Vec},
{"transpose", false}};
auto load_call = op_reg.Create("tile.load", {input, offsets, shapes, shapes}, load_kwargs, span);
auto load_var = std::make_shared<Var>("runtime_print_src", load_call->GetType(), span);

std::vector<StmtPtr> prologue;
prologue.push_back(std::make_shared<AssignStmt>(load_var, load_call, span));

auto print_call = op_reg.Create("tile.runtime_print", {load_var}, span);
return ConversionResult{std::move(prologue), print_call};
});

// ────────────────────────────────────────────────────────────────────────
// Broadcast-aware elementwise binary ops
//
Expand Down
Loading
Loading