Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
fc788bc
Expose tensor pointer in rocpycv
zacharyvincze Apr 28, 2026
134eb7c
Expose stream pointer in rocpycv
zacharyvincze Apr 28, 2026
228f422
Update stubs
zacharyvincze Apr 29, 2026
12c85cf
Add migraphx classification example
zacharyvincze Apr 29, 2026
8fb6c4f
Remove 4S16 from pyenums
zacharyvincze Apr 29, 2026
e024eef
Move classification sample location
zacharyvincze Apr 29, 2026
25e2b9d
Move pytorch classification sample location
zacharyvincze Apr 29, 2026
c8da0a2
Cleanup migraphx classification sample
zacharyvincze Apr 29, 2026
666226f
Minor cleanup
zacharyvincze Apr 29, 2026
d0ee899
Minor variable name changes
zacharyvincze Apr 29, 2026
8ad82ba
Allow use of numpy types to specify rocpycv.Tensor types
zacharyvincze Apr 29, 2026
6a0e0fa
Swap dtype <-> layout position in tensor construction
zacharyvincze Apr 29, 2026
ea1caff
Add numpy/string layout options for convert_to and reformat python op…
zacharyvincze Apr 29, 2026
b4c39d5
Improve initial rocpycv module documentation
zacharyvincze Apr 29, 2026
edc8dac
Rename test_op_remap -> test_op_bndbox
zacharyvincze Apr 30, 2026
e52f8dc
Remove support for 4S16 in DLTypeToRoccvType
zacharyvincze Apr 30, 2026
ac74517
Add test for PyStream handle()
zacharyvincze Apr 30, 2026
c150f79
Add basic PyTensor test
zacharyvincze Apr 30, 2026
07126e7
Update year
zacharyvincze Apr 30, 2026
f8c60eb
Add dtype/layout tests for PyTensor
zacharyvincze Apr 30, 2026
2499513
Merge branch 'develop' into zv/feature/migraphx-classification
zacharyvincze May 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion python/include/py_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,23 @@ extern double2 GetDouble2FromTuple(py::tuple src);
* @param src A python tuple of size 2.
* @return int2
*/
extern int2 GetInt2FromTuple(py::tuple src);
extern int2 GetInt2FromTuple(py::tuple src);

/**
* @brief Resolves a Python object to an eTensorLayout. Accepts either an rocpycv.eTensorLayout enum
* value, or a layout string such as "NHWC". Throws std::runtime_error for unsupported inputs.
*
* @param obj A Python object describing the tensor layout.
* @return eTensorLayout
*/
extern eTensorLayout LayoutFromPyObject(py::object obj);

/**
* @brief Resolves a Python object to an eDataType. Accepts either an rocpycv.eDataType enum value,
* or anything coercible to a NumPy dtype (e.g. ``np.float32``, ``np.dtype("uint8")``,
* ``"float32"``). Throws std::runtime_error for unsupported inputs.
*
* @param obj A Python object describing the tensor data type.
* @return eDataType
*/
extern eDataType DataTypeFromPyObject(py::object obj);
12 changes: 12 additions & 0 deletions python/include/py_stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ class PyStream {
*/
void synchronize();

/**
* @brief Returns the wrapped HIP stream handle as an unsigned integer.
*
* Intended for zero-copy interop with frameworks that accept a raw HIP
* stream handle (e.g. ``migraphx.run_async`` with stream type
* ``"ihipStream_t"``). The handle is non-owning -- keep this PyStream alive
* for as long as the handle is in use.
*
* @return uintptr_t
*/
uintptr_t getHandle();

/**
* @brief Exports the PyStream object to the specified python module.
*
Expand Down
20 changes: 18 additions & 2 deletions python/include/py_tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ class PyTensor : public std::enable_shared_from_this<PyTensor> {
* wrap.
*
* @param shape The shape of the tensor.
* @param layout The layout of the tensor.
* @param dtype The data type of the tensor.
* @param layout The layout of the tensor.
* @param device The device of the tensor.
*/
PyTensor(std::vector<int64_t> shape, eTensorLayout layout, eDataType dtype, eDeviceType device);
PyTensor(std::vector<int64_t> shape, eDataType dtype, eTensorLayout layout, eDeviceType device);

/**
* @brief Wraps an existing roccv::Tensor inside of a newly constructed PyTensor.
Expand Down Expand Up @@ -138,6 +138,22 @@ class PyTensor : public std::enable_shared_from_this<PyTensor> {
*/
eDeviceType getDevice();

/**
* @brief Returns the address of the tensor's underlying data buffer as an
* unsigned integer. For GPU tensors this is a HIP device address; for CPU
* tensors it is a host address. Use ``device()`` to disambiguate.
*
* The pointer is non-owning. The caller is responsible for ensuring this
* PyTensor remains alive for as long as the pointer is used; otherwise the
* underlying buffer may be freed and the pointer left dangling.
*
* Intended for zero-copy interop with frameworks that accept a raw
* pointer + shape + dtype (e.g. ``migraphx.argument_from_pointer``).
*
* @return uintptr_t
*/
uintptr_t getDataPtr();

/**
* @brief Gets the underlying roccv::Tensor that this tensor container wraps.
*
Expand Down
57 changes: 54 additions & 3 deletions python/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,60 @@ THE SOFTWARE.

PYBIND11_MODULE(rocpycv, m) {
m.doc() = R"pbdoc(
Python API reference
-----------------------
This is the Python API reference for rocCV.
rocpycv — AMD GPU-accelerated image pre/post-processing
=======================================================

rocpycv is the Python binding for rocCV, a HIP/ROCm image processing
library. It exposes a NumPy-friendly :class:`Tensor` and a suite of
operators (resize, normalize, color conversion, geometric warps, ...)
that run on either GPU (default) or CPU.

Quick start
-----------
.. code-block:: python

import numpy as np
import rocpycv

# Wrap a NumPy array as a CPU Tensor (zero-copy via DLPack), then
# copy it to the GPU (explicit H2D transfer).
host = np.zeros((1, 480, 640, 3), np.uint8)
src = rocpycv.from_dlpack(host, "NHWC").copy_to(rocpycv.GPU)

# Functional form: operators allocate and return a new Tensor.
resized = rocpycv.resize(src, (1, 224, 224, 3), rocpycv.LINEAR)
chw = rocpycv.reformat(resized, "NCHW")

# ``*_into`` form: write into a caller-allocated output, optionally
# on a stream — useful in hot preprocessing loops.
stream = rocpycv.Stream()
out = rocpycv.Tensor((1, 224, 224, 3), np.uint8, "NHWC")
rocpycv.resize_into(out, src, rocpycv.LINEAR, stream)
stream.synchronize()

Tensors
-------
:class:`Tensor` arguments accept either rocpycv enums or familiar
Python types:

* ``dtype`` — ``rocpycv.F32`` or any NumPy dtype/scalar (``np.float32``).
* ``layout`` — ``rocpycv.NHWC`` or a layout string (``"NHWC"``).

For zero-copy interop, tensors implement the DLPack protocol — pass any
``__dlpack__``-supporting object (NumPy array, PyTorch tensor, ...) to
:func:`from_dlpack`, and use :meth:`Tensor.data_ptr` to hand a raw GPU
pointer to inference frameworks such as MIGraphX.

Operators
---------
Most operators come in two forms:

* ``op(src, ...)`` — allocates and returns a new :class:`Tensor`.
* ``op_into(dst, src, ...)`` — writes into a pre-allocated output,
avoiding per-call allocation in tight loops.

All operators accept an optional ``stream`` (a :class:`Stream` wrapping
a ``hipStream_t``) and a ``device`` argument (defaults to GPU).
)pbdoc";
PyException::Export(m);
PyEnums::Export(m);
Expand Down
18 changes: 13 additions & 5 deletions python/src/operators/py_op_convert_to.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ THE SOFTWARE.

#include <op_convert_to.hpp>

#include "py_helpers.hpp"

PyTensor PyOpConvertTo::Execute(PyTensor& input, eDataType dtype, double alpha, double beta,
std::optional<std::reference_wrapper<PyStream>> stream, eDeviceType device) {
hipStream_t hipStream = stream.has_value() ? stream.value().get().getStream() : nullptr;
Expand All @@ -44,17 +46,23 @@ void PyOpConvertTo::ExecuteInto(PyTensor& output, PyTensor& input, double alpha,

void PyOpConvertTo::Export(py::module& m) {
using namespace py::literals;
m.def("convert_to", &PyOpConvertTo::Execute, "src"_a, "dtype"_a, "alpha"_a = 1.0, "beta"_a = 0.0,
"stream"_a = nullptr, "device"_a = eDeviceType::GPU, R"pbdoc(

m.def("convert_to",
[](PyTensor& input, py::object dtype, double alpha, double beta,
std::optional<std::reference_wrapper<PyStream>> stream, eDeviceType device) {
return PyOpConvertTo::Execute(input, DataTypeFromPyObject(dtype), alpha, beta, stream, device);
},
"src"_a, "dtype"_a, "alpha"_a = 1.0, "beta"_a = 0.0, "stream"_a = nullptr,
"device"_a = eDeviceType::GPU, R"pbdoc(

Executes the Convert To operation on the given HIP stream.

See also:
Refer to the rocCV C++ API reference for more information on this operation.

Args:
src (rocpycv.Tensor): Input tensor containing one or more images.
dtype (eDataType): Datatype of the output tensor.
dtype: Datatype of the output tensor. Either an ``rocpycv.eDataType``
(e.g. ``rocpycv.F32``) or a NumPy dtype/scalar type (e.g. ``np.float32``).
alpha (double, optional): Scalar for output data. Defaults to 1.0.
beta (double, optional): Offset for the data. Defaults to 0.0.
stream (rocpycv.Stream, optional): HIP stream to run this operation on.
Expand Down
13 changes: 10 additions & 3 deletions python/src/operators/py_op_reformat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ THE SOFTWARE.

#include "operators/py_op_reformat.hpp"

#include "py_helpers.hpp"

void PyOpReformat::ExecuteInto(PyTensor& output, PyTensor& input,
std::optional<std::reference_wrapper<PyStream>> stream, eDeviceType device) {
hipStream_t hipStream = stream.has_value() ? stream.value().get().getStream() : nullptr;
Expand All @@ -46,16 +48,21 @@ PyTensor PyOpReformat::Execute(PyTensor& input, eTensorLayout outLayout,
void PyOpReformat::Export(py::module& m) {
using namespace py::literals;

m.def("reformat", &PyOpReformat::Execute, "input"_a, "out_layout"_a, "stream"_a = nullptr,
"device"_a = eDeviceType::GPU, R"pbdoc(
m.def("reformat",
[](PyTensor& input, py::object outLayout,
std::optional<std::reference_wrapper<PyStream>> stream, eDeviceType device) {
return PyOpReformat::Execute(input, LayoutFromPyObject(outLayout), stream, device);
},
"input"_a, "out_layout"_a, "stream"_a = nullptr, "device"_a = eDeviceType::GPU, R"pbdoc(
Executes the Reformat operation and returns the result as a new tensor.

See also:
Refer to the rocCV C++ API reference for more information on this operation.

Args:
input (rocpycv.Tensor): Input tensor to reformat.
out_layout (rocpycv.eTensorLayout): The layout to reformat the input tensor to.
out_layout: The layout to reformat the input tensor to. Either an
``rocpycv.eTensorLayout`` (e.g. ``rocpycv.NCHW``) or a layout string (``"NCHW"``).
stream (rocpycv.Stream, optional): HIP stream to run this operation on.
device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU.

Expand Down
1 change: 0 additions & 1 deletion python/src/py_enums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ void PyEnums::Export(py::module& m) {
.value("S32", DATA_TYPE_S32)
.value("F32", DATA_TYPE_F32)
.value("F64", DATA_TYPE_F64)
.value("4S16", DATA_TYPE_4S16)
.export_values();

py::enum_<eDeviceType>(m, "eDeviceType")
Expand Down
66 changes: 60 additions & 6 deletions python/src/py_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,23 @@ THE SOFTWARE.

#include "py_helpers.hpp"

#include <pybind11/numpy.h>

#include <core/tensor_layout.hpp>
#include <stdexcept>
#include <string>

eDataType DLTypeToRoccvType(DLDataType dtype) {
if (dtype.lanes != 1) {
throw std::runtime_error("Datatype is not supported.");
}

if (dtype.bits == 8) {
if (dtype.code == kDLUInt) return eDataType::DATA_TYPE_U8;
if (dtype.code == kDLInt) return eDataType::DATA_TYPE_S8;
} else if (dtype.bits == 16) {
if (dtype.lanes == 4) {
return eDataType::DATA_TYPE_4S16;
} else if (dtype.lanes == 1) {
if (dtype.code == kDLUInt) return eDataType::DATA_TYPE_U16;
if (dtype.code == kDLInt) return eDataType::DATA_TYPE_S16;
}
if (dtype.code == kDLUInt) return eDataType::DATA_TYPE_U16;
if (dtype.code == kDLInt) return eDataType::DATA_TYPE_S16;
} else if (dtype.bits == 32) {
if (dtype.code == kDLFloat) return eDataType::DATA_TYPE_F32;
if (dtype.code == kDLUInt) return eDataType::DATA_TYPE_U32;
Expand Down Expand Up @@ -147,4 +151,54 @@ int2 GetInt2FromTuple(py::tuple src) {
std::runtime_error("Cannot convert py::tuple to int2. py::tuple.size() != 2.");
}
return make_int2(src[0].cast<int>(), src[1].cast<int>());
}

eTensorLayout LayoutFromPyObject(py::object obj) {
if (py::isinstance<eTensorLayout>(obj)) {
return obj.cast<eTensorLayout>();
}

if (py::isinstance<py::str>(obj)) {
std::string s = obj.cast<std::string>();
for (const auto& [layout, name] : roccv::TensorLayout::layoutStringTable) {
if (name == s) return layout;
}
throw std::runtime_error("Unknown tensor layout string: '" + s + "'.");
}

throw std::runtime_error("layout must be an rocpycv.eTensorLayout or a layout string (e.g. 'NHWC').");
}

eDataType DataTypeFromPyObject(py::object obj) {
if (py::isinstance<eDataType>(obj)) {
return obj.cast<eDataType>();
}

// np.dtype() accepts numpy scalar types (np.float32), dtype instances, and dtype strings,
// so we delegate the parsing to NumPy itself rather than enumerating cases here.
py::dtype dt;
try {
static const py::object np_dtype = py::module_::import("numpy").attr("dtype");
dt = np_dtype(obj).cast<py::dtype>();
} catch (const std::exception&) {
throw std::runtime_error("dtype must be an rocpycv.eDataType or a NumPy dtype/scalar type (e.g. np.float32).");
}

DLDataTypeCode code;
switch (dt.kind()) {
case 'u':
code = kDLUInt;
break;
case 'i':
code = kDLInt;
break;
case 'f':
code = kDLFloat;
break;
default:
throw std::runtime_error("Unsupported NumPy dtype for rocpycv.Tensor (kind '" + std::string(1, dt.kind()) +
"').");
}
DLDataType dl{static_cast<uint8_t>(code), static_cast<uint8_t>(dt.itemsize() * 8), 1};
return DLTypeToRoccvType(dl);
}
9 changes: 8 additions & 1 deletion python/src/py_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,15 @@ PyStream::~PyStream() {

void PyStream::synchronize() { HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(m_stream)); }

uintptr_t PyStream::getHandle() { return reinterpret_cast<uintptr_t>(m_stream); }

void PyStream::Export(py::module& m) {
py::class_<PyStream>(m, "Stream", "Python wrapper for HIP streams.")
.def(py::init<>(), "Creates a HIP stream.")
.def("synchronize", &PyStream::synchronize, "Blocks until all worked queued on this stream is finished.");
.def("synchronize", &PyStream::synchronize, "Blocks until all worked queued on this stream is finished.")
.def("handle", &PyStream::getHandle,
"Returns the underlying HIP stream handle (hipStream_t) as an integer. "
"Intended for zero-copy interop with frameworks that accept a raw stream handle, "
"e.g. migraphx.run_async(..., stream_handle, \"ihipStream_t\"). "
"The handle is non-owning -- keep the Stream alive while the handle is in use.");
Comment thread
zacharyvincze marked this conversation as resolved.
}
Loading