diff --git a/python/include/py_helpers.hpp b/python/include/py_helpers.hpp index 369cac10..2e3dc117 100644 --- a/python/include/py_helpers.hpp +++ b/python/include/py_helpers.hpp @@ -86,4 +86,23 @@ extern double2 GetDouble2FromTuple(py::tuple src); * @param src A python tuple of size 2. * @return int2 */ -extern int2 GetInt2FromTuple(py::tuple src); \ No newline at end of file +extern int2 GetInt2FromTuple(py::tuple src); + +/** + * @brief Resolves a Python object to an eTensorLayout. Accepts either an rocpycv.eTensorLayout enum + * value, or a layout string such as "NHWC". Throws std::runtime_error for unsupported inputs. + * + * @param obj A Python object describing the tensor layout. + * @return eTensorLayout + */ +extern eTensorLayout LayoutFromPyObject(py::object obj); + +/** + * @brief Resolves a Python object to an eDataType. Accepts either an rocpycv.eDataType enum value, + * or anything coercible to a NumPy dtype (e.g. ``np.float32``, ``np.dtype("uint8")``, + * ``"float32"``). Throws std::runtime_error for unsupported inputs. + * + * @param obj A Python object describing the tensor data type. + * @return eDataType + */ +extern eDataType DataTypeFromPyObject(py::object obj); \ No newline at end of file diff --git a/python/include/py_stream.hpp b/python/include/py_stream.hpp index 202420c9..a5923359 100644 --- a/python/include/py_stream.hpp +++ b/python/include/py_stream.hpp @@ -55,6 +55,18 @@ class PyStream { */ void synchronize(); + /** + * @brief Returns the wrapped HIP stream handle as an unsigned integer. + * + * Intended for zero-copy interop with frameworks that accept a raw HIP + * stream handle (e.g. ``migraphx.run_async`` with stream type + * ``"ihipStream_t"``). The handle is non-owning -- keep this PyStream alive + * for as long as the handle is in use. + * + * @return uintptr_t + */ + uintptr_t getHandle(); + /** * @brief Exports the PyStream object to the specified python module. * diff --git a/python/include/py_tensor.hpp b/python/include/py_tensor.hpp index 523a5f27..180006d2 100644 --- a/python/include/py_tensor.hpp +++ b/python/include/py_tensor.hpp @@ -38,11 +38,11 @@ class PyTensor : public std::enable_shared_from_this { * wrap. * * @param shape The shape of the tensor. - * @param layout The layout of the tensor. * @param dtype The data type of the tensor. + * @param layout The layout of the tensor. * @param device The device of the tensor. */ - PyTensor(std::vector shape, eTensorLayout layout, eDataType dtype, eDeviceType device); + PyTensor(std::vector shape, eDataType dtype, eTensorLayout layout, eDeviceType device); /** * @brief Wraps an existing roccv::Tensor inside of a newly constructed PyTensor. @@ -138,6 +138,22 @@ class PyTensor : public std::enable_shared_from_this { */ eDeviceType getDevice(); + /** + * @brief Returns the address of the tensor's underlying data buffer as an + * unsigned integer. For GPU tensors this is a HIP device address; for CPU + * tensors it is a host address. Use ``device()`` to disambiguate. + * + * The pointer is non-owning. The caller is responsible for ensuring this + * PyTensor remains alive for as long as the pointer is used; otherwise the + * underlying buffer may be freed and the pointer left dangling. + * + * Intended for zero-copy interop with frameworks that accept a raw + * pointer + shape + dtype (e.g. ``migraphx.argument_from_pointer``). + * + * @return uintptr_t + */ + uintptr_t getDataPtr(); + /** * @brief Gets the underlying roccv::Tensor that this tensor container wraps. * diff --git a/python/src/main.cpp b/python/src/main.cpp index ddbec687..e34c6fba 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -52,9 +52,60 @@ THE SOFTWARE. PYBIND11_MODULE(rocpycv, m) { m.doc() = R"pbdoc( - Python API reference - ----------------------- - This is the Python API reference for rocCV. + rocpycv — AMD GPU-accelerated image pre/post-processing + ======================================================= + + rocpycv is the Python binding for rocCV, a HIP/ROCm image processing + library. It exposes a NumPy-friendly :class:`Tensor` and a suite of + operators (resize, normalize, color conversion, geometric warps, ...) + that run on either GPU (default) or CPU. + + Quick start + ----------- + .. code-block:: python + + import numpy as np + import rocpycv + + # Wrap a NumPy array as a CPU Tensor (zero-copy via DLPack), then + # copy it to the GPU (explicit H2D transfer). + host = np.zeros((1, 480, 640, 3), np.uint8) + src = rocpycv.from_dlpack(host, "NHWC").copy_to(rocpycv.GPU) + + # Functional form: operators allocate and return a new Tensor. + resized = rocpycv.resize(src, (1, 224, 224, 3), rocpycv.LINEAR) + chw = rocpycv.reformat(resized, "NCHW") + + # ``*_into`` form: write into a caller-allocated output, optionally + # on a stream — useful in hot preprocessing loops. + stream = rocpycv.Stream() + out = rocpycv.Tensor((1, 224, 224, 3), np.uint8, "NHWC") + rocpycv.resize_into(out, src, rocpycv.LINEAR, stream) + stream.synchronize() + + Tensors + ------- + :class:`Tensor` arguments accept either rocpycv enums or familiar + Python types: + + * ``dtype`` — ``rocpycv.F32`` or any NumPy dtype/scalar (``np.float32``). + * ``layout`` — ``rocpycv.NHWC`` or a layout string (``"NHWC"``). + + For zero-copy interop, tensors implement the DLPack protocol — pass any + ``__dlpack__``-supporting object (NumPy array, PyTorch tensor, ...) to + :func:`from_dlpack`, and use :meth:`Tensor.data_ptr` to hand a raw GPU + pointer to inference frameworks such as MIGraphX. + + Operators + --------- + Most operators come in two forms: + + * ``op(src, ...)`` — allocates and returns a new :class:`Tensor`. + * ``op_into(dst, src, ...)`` — writes into a pre-allocated output, + avoiding per-call allocation in tight loops. + + All operators accept an optional ``stream`` (a :class:`Stream` wrapping + a ``hipStream_t``) and a ``device`` argument (defaults to GPU). )pbdoc"; PyException::Export(m); PyEnums::Export(m); diff --git a/python/src/operators/py_op_convert_to.cpp b/python/src/operators/py_op_convert_to.cpp index 9100ce3f..ad3a32a5 100644 --- a/python/src/operators/py_op_convert_to.cpp +++ b/python/src/operators/py_op_convert_to.cpp @@ -24,6 +24,8 @@ THE SOFTWARE. #include +#include "py_helpers.hpp" + PyTensor PyOpConvertTo::Execute(PyTensor& input, eDataType dtype, double alpha, double beta, std::optional> stream, eDeviceType device) { hipStream_t hipStream = stream.has_value() ? stream.value().get().getStream() : nullptr; @@ -44,17 +46,23 @@ void PyOpConvertTo::ExecuteInto(PyTensor& output, PyTensor& input, double alpha, void PyOpConvertTo::Export(py::module& m) { using namespace py::literals; - m.def("convert_to", &PyOpConvertTo::Execute, "src"_a, "dtype"_a, "alpha"_a = 1.0, "beta"_a = 0.0, - "stream"_a = nullptr, "device"_a = eDeviceType::GPU, R"pbdoc( - + m.def("convert_to", + [](PyTensor& input, py::object dtype, double alpha, double beta, + std::optional> stream, eDeviceType device) { + return PyOpConvertTo::Execute(input, DataTypeFromPyObject(dtype), alpha, beta, stream, device); + }, + "src"_a, "dtype"_a, "alpha"_a = 1.0, "beta"_a = 0.0, "stream"_a = nullptr, + "device"_a = eDeviceType::GPU, R"pbdoc( + Executes the Convert To operation on the given HIP stream. See also: Refer to the rocCV C++ API reference for more information on this operation. - + Args: src (rocpycv.Tensor): Input tensor containing one or more images. - dtype (eDataType): Datatype of the output tensor. + dtype: Datatype of the output tensor. Either an ``rocpycv.eDataType`` + (e.g. ``rocpycv.F32``) or a NumPy dtype/scalar type (e.g. ``np.float32``). alpha (double, optional): Scalar for output data. Defaults to 1.0. beta (double, optional): Offset for the data. Defaults to 0.0. stream (rocpycv.Stream, optional): HIP stream to run this operation on. diff --git a/python/src/operators/py_op_reformat.cpp b/python/src/operators/py_op_reformat.cpp index 6cfa4105..1351ca53 100644 --- a/python/src/operators/py_op_reformat.cpp +++ b/python/src/operators/py_op_reformat.cpp @@ -22,6 +22,8 @@ THE SOFTWARE. #include "operators/py_op_reformat.hpp" +#include "py_helpers.hpp" + void PyOpReformat::ExecuteInto(PyTensor& output, PyTensor& input, std::optional> stream, eDeviceType device) { hipStream_t hipStream = stream.has_value() ? stream.value().get().getStream() : nullptr; @@ -46,8 +48,12 @@ PyTensor PyOpReformat::Execute(PyTensor& input, eTensorLayout outLayout, void PyOpReformat::Export(py::module& m) { using namespace py::literals; - m.def("reformat", &PyOpReformat::Execute, "input"_a, "out_layout"_a, "stream"_a = nullptr, - "device"_a = eDeviceType::GPU, R"pbdoc( + m.def("reformat", + [](PyTensor& input, py::object outLayout, + std::optional> stream, eDeviceType device) { + return PyOpReformat::Execute(input, LayoutFromPyObject(outLayout), stream, device); + }, + "input"_a, "out_layout"_a, "stream"_a = nullptr, "device"_a = eDeviceType::GPU, R"pbdoc( Executes the Reformat operation and returns the result as a new tensor. See also: @@ -55,7 +61,8 @@ void PyOpReformat::Export(py::module& m) { Args: input (rocpycv.Tensor): Input tensor to reformat. - out_layout (rocpycv.eTensorLayout): The layout to reformat the input tensor to. + out_layout: The layout to reformat the input tensor to. Either an + ``rocpycv.eTensorLayout`` (e.g. ``rocpycv.NCHW``) or a layout string (``"NCHW"``). stream (rocpycv.Stream, optional): HIP stream to run this operation on. device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU. diff --git a/python/src/py_enums.cpp b/python/src/py_enums.cpp index 0ce39d08..6059c280 100644 --- a/python/src/py_enums.cpp +++ b/python/src/py_enums.cpp @@ -48,7 +48,6 @@ void PyEnums::Export(py::module& m) { .value("S32", DATA_TYPE_S32) .value("F32", DATA_TYPE_F32) .value("F64", DATA_TYPE_F64) - .value("4S16", DATA_TYPE_4S16) .export_values(); py::enum_(m, "eDeviceType") diff --git a/python/src/py_helpers.cpp b/python/src/py_helpers.cpp index 97902915..15ea92fc 100644 --- a/python/src/py_helpers.cpp +++ b/python/src/py_helpers.cpp @@ -22,19 +22,23 @@ THE SOFTWARE. #include "py_helpers.hpp" +#include + +#include #include +#include eDataType DLTypeToRoccvType(DLDataType dtype) { + if (dtype.lanes != 1) { + throw std::runtime_error("Datatype is not supported."); + } + if (dtype.bits == 8) { if (dtype.code == kDLUInt) return eDataType::DATA_TYPE_U8; if (dtype.code == kDLInt) return eDataType::DATA_TYPE_S8; } else if (dtype.bits == 16) { - if (dtype.lanes == 4) { - return eDataType::DATA_TYPE_4S16; - } else if (dtype.lanes == 1) { - if (dtype.code == kDLUInt) return eDataType::DATA_TYPE_U16; - if (dtype.code == kDLInt) return eDataType::DATA_TYPE_S16; - } + if (dtype.code == kDLUInt) return eDataType::DATA_TYPE_U16; + if (dtype.code == kDLInt) return eDataType::DATA_TYPE_S16; } else if (dtype.bits == 32) { if (dtype.code == kDLFloat) return eDataType::DATA_TYPE_F32; if (dtype.code == kDLUInt) return eDataType::DATA_TYPE_U32; @@ -147,4 +151,54 @@ int2 GetInt2FromTuple(py::tuple src) { std::runtime_error("Cannot convert py::tuple to int2. py::tuple.size() != 2."); } return make_int2(src[0].cast(), src[1].cast()); +} + +eTensorLayout LayoutFromPyObject(py::object obj) { + if (py::isinstance(obj)) { + return obj.cast(); + } + + if (py::isinstance(obj)) { + std::string s = obj.cast(); + for (const auto& [layout, name] : roccv::TensorLayout::layoutStringTable) { + if (name == s) return layout; + } + throw std::runtime_error("Unknown tensor layout string: '" + s + "'."); + } + + throw std::runtime_error("layout must be an rocpycv.eTensorLayout or a layout string (e.g. 'NHWC')."); +} + +eDataType DataTypeFromPyObject(py::object obj) { + if (py::isinstance(obj)) { + return obj.cast(); + } + + // np.dtype() accepts numpy scalar types (np.float32), dtype instances, and dtype strings, + // so we delegate the parsing to NumPy itself rather than enumerating cases here. + py::dtype dt; + try { + static const py::object np_dtype = py::module_::import("numpy").attr("dtype"); + dt = np_dtype(obj).cast(); + } catch (const std::exception&) { + throw std::runtime_error("dtype must be an rocpycv.eDataType or a NumPy dtype/scalar type (e.g. np.float32)."); + } + + DLDataTypeCode code; + switch (dt.kind()) { + case 'u': + code = kDLUInt; + break; + case 'i': + code = kDLInt; + break; + case 'f': + code = kDLFloat; + break; + default: + throw std::runtime_error("Unsupported NumPy dtype for rocpycv.Tensor (kind '" + std::string(1, dt.kind()) + + "')."); + } + DLDataType dl{static_cast(code), static_cast(dt.itemsize() * 8), 1}; + return DLTypeToRoccvType(dl); } \ No newline at end of file diff --git a/python/src/py_stream.cpp b/python/src/py_stream.cpp index ac6ae73f..c830bc8e 100644 --- a/python/src/py_stream.cpp +++ b/python/src/py_stream.cpp @@ -38,8 +38,15 @@ PyStream::~PyStream() { void PyStream::synchronize() { HIP_VALIDATE_NO_ERRORS(hipStreamSynchronize(m_stream)); } +uintptr_t PyStream::getHandle() { return reinterpret_cast(m_stream); } + void PyStream::Export(py::module& m) { py::class_(m, "Stream", "Python wrapper for HIP streams.") .def(py::init<>(), "Creates a HIP stream.") - .def("synchronize", &PyStream::synchronize, "Blocks until all worked queued on this stream is finished."); + .def("synchronize", &PyStream::synchronize, "Blocks until all worked queued on this stream is finished.") + .def("handle", &PyStream::getHandle, + "Returns the underlying HIP stream handle (hipStream_t) as an integer. " + "Intended for zero-copy interop with frameworks that accept a raw stream handle, " + "e.g. migraphx.run_async(..., stream_handle, \"ihipStream_t\"). " + "The handle is non-owning -- keep the Stream alive while the handle is in use."); } \ No newline at end of file diff --git a/python/src/py_tensor.cpp b/python/src/py_tensor.cpp index 913a25a4..5c757c02 100644 --- a/python/src/py_tensor.cpp +++ b/python/src/py_tensor.cpp @@ -59,7 +59,7 @@ DLManagedTensor* createDLManagedTensor(std::shared_ptr tensor, st return dlTensor; } -PyTensor::PyTensor(std::vector shape, eTensorLayout layout, eDataType dtype, eDeviceType device) { +PyTensor::PyTensor(std::vector shape, eDataType dtype, eTensorLayout layout, eDeviceType device) { roccv::TensorShape tShape(roccv::TensorShape(roccv::TensorLayout(layout), shape)); m_tensor = std::make_shared(tShape, roccv::DataType(dtype), device); } @@ -178,6 +178,11 @@ eTensorLayout PyTensor::getLayout() { return m_tensor->layout().elayout(); } eDeviceType PyTensor::getDevice() { return m_tensor->device(); } +uintptr_t PyTensor::getDataPtr() { + auto tensorData = m_tensor->exportData(); + return reinterpret_cast(tensorData.basePtr()); +} + std::shared_ptr PyTensor::getTensor() { return m_tensor; } py::tuple PyTensor::getDLDevice() { @@ -197,8 +202,15 @@ void PyTensor::Export(pybind11::module& m) { pybind11::class_> tensor(m, "Tensor"); tensor - .def(pybind11::init, eTensorLayout, eDataType, eDeviceType>(), "shape"_a, "layout"_a, - "dtype"_a, "device"_a = eDeviceType::GPU, "Constructs a tensor object.") + .def(pybind11::init([](std::vector shape, py::object dtype, py::object layout, eDeviceType device) { + return std::make_shared(shape, DataTypeFromPyObject(dtype), LayoutFromPyObject(layout), + device); + }), + "shape"_a, "dtype"_a, "layout"_a, "device"_a = eDeviceType::GPU, + "Constructs a tensor object. ``dtype`` may be an ``rocpycv.eDataType`` (e.g. " + "``rocpycv.F32``) or a NumPy dtype/scalar type (e.g. ``np.float32``). ``layout`` " + "may be an ``rocpycv.eTensorLayout`` (e.g. ``rocpycv.NHWC``) or a layout string " + "(``\"NHWC\"``).") .def("copy_to", &PyTensor::copyTo, "device"_a, "Returns a deep copy of the tensor with data copied to a specified device type.") .def("__dlpack__", &PyTensor::toDLPack, "stream"_a = py::none(), @@ -207,12 +219,27 @@ void PyTensor::Export(pybind11::module& m) { .def("shape", &PyTensor::getShape, "Returns a list representing the tensor shape.") .def("layout", &PyTensor::getLayout, "Returns the layout for this tensor.") .def("device", &PyTensor::getDevice, "Returns the device this tensor is on.") + .def("data_ptr", &PyTensor::getDataPtr, + "Returns the address of the tensor's underlying buffer as an integer. " + "For GPU tensors this is a HIP device address; for CPU tensors a host address. " + "The pointer is non-owning -- keep the tensor alive for as long as the pointer is used. " + "Intended for zero-copy interop with frameworks like MIGraphX.") .def("ndim", &PyTensor::getRank, "Returns the number of dimensions of the tensor.") .def("dtype", &PyTensor::getDataType, "Returns the data type of the tensor.") .def("__dlpack_device__", &PyTensor::getDLDevice, "Returns a tuple containing the DLPack device and device id for the tensor.") - .def("reshape", &PyTensor::reshape, "new_shape"_a, "layout"_a, - "Creates a new tensor with the specified shape."); - m.def("from_dlpack", &PyTensor::fromDLPack, "buffer"_a, "layout"_a, - "Wraps a DLPack supported tensor in a rocpycv tensor."); + .def( + "reshape", + [](PyTensor& self, std::vector newShape, py::object layout) { + return self.reshape(newShape, LayoutFromPyObject(layout)); + }, + "new_shape"_a, "layout"_a, + "Creates a new tensor with the specified shape. ``layout`` may be an " + "``rocpycv.eTensorLayout`` or a layout string (e.g. ``\"NHWC\"``)."); + m.def( + "from_dlpack", + [](pybind11::object src, py::object layout) { return PyTensor::fromDLPack(src, LayoutFromPyObject(layout)); }, + "buffer"_a, "layout"_a, + "Wraps a DLPack supported tensor in a rocpycv tensor. ``layout`` may be an " + "``rocpycv.eTensorLayout`` or a layout string (e.g. ``\"NHWC\"``)."); } \ No newline at end of file diff --git a/python/src/rocpycv.pyi b/python/src/rocpycv.pyi index 855a8d5a..270fb848 100644 --- a/python/src/rocpycv.pyi +++ b/python/src/rocpycv.pyi @@ -1,8 +1,59 @@ """ - Python API reference - ----------------------- - This is the Python API reference for rocCV. + rocpycv — AMD GPU-accelerated image pre/post-processing + ======================================================= + + rocpycv is the Python binding for rocCV, a HIP/ROCm image processing + library. It exposes a NumPy-friendly :class:`Tensor` and a suite of + operators (resize, normalize, color conversion, geometric warps, ...) + that run on either GPU (default) or CPU. + + Quick start + ----------- + .. code-block:: python + + import numpy as np + import rocpycv + + # Wrap a NumPy array as a CPU Tensor (zero-copy via DLPack), then + # copy it to the GPU (explicit H2D transfer). + host = np.zeros((1, 480, 640, 3), np.uint8) + src = rocpycv.from_dlpack(host, "NHWC").copy_to(rocpycv.GPU) + + # Functional form: operators allocate and return a new Tensor. + resized = rocpycv.resize(src, (1, 224, 224, 3), rocpycv.LINEAR) + chw = rocpycv.reformat(resized, "NCHW") + + # ``*_into`` form: write into a caller-allocated output, optionally + # on a stream — useful in hot preprocessing loops. + stream = rocpycv.Stream() + out = rocpycv.Tensor((1, 224, 224, 3), np.uint8, "NHWC") + rocpycv.resize_into(out, src, rocpycv.LINEAR, stream) + stream.synchronize() + + Tensors + ------- + :class:`Tensor` arguments accept either rocpycv enums or familiar + Python types: + + * ``dtype`` — ``rocpycv.F32`` or any NumPy dtype/scalar (``np.float32``). + * ``layout`` — ``rocpycv.NHWC`` or a layout string (``"NHWC"``). + + For zero-copy interop, tensors implement the DLPack protocol — pass any + ``__dlpack__``-supporting object (NumPy array, PyTorch tensor, ...) to + :func:`from_dlpack`, and use :meth:`Tensor.data_ptr` to hand a raw GPU + pointer to inference frameworks such as MIGraphX. + + Operators + --------- + Most operators come in two forms: + + * ``op(src, ...)`` — allocates and returns a new :class:`Tensor`. + * ``op_into(dst, src, ...)`` — writes into a pre-allocated output, + avoiding per-call allocation in tight loops. + + All operators accept an optional ``stream`` (a :class:`Stream` wrapping + a ``hipStream_t``) and a ``device`` argument (defaults to GPU). """ from __future__ import annotations @@ -153,6 +204,10 @@ class Stream: """ Creates a HIP stream. """ + def handle(self) -> int: + """ + Returns the underlying HIP stream handle (hipStream_t) as an integer. Intended for zero-copy interop with frameworks that accept a raw stream handle, e.g. migraphx.run_async(..., stream_handle, "ihipStream_t"). The handle is non-owning -- keep the Stream alive while the handle is in use. + """ def synchronize(self) -> None: """ Blocks until all worked queued on this stream is finished. @@ -166,14 +221,18 @@ class Tensor: """ Returns a tuple containing the DLPack device and device id for the tensor. """ - def __init__(self, shape: collections.abc.Sequence[typing.SupportsInt | typing.SupportsIndex], layout: eTensorLayout, dtype: eDataType, device: eDeviceType = ...) -> None: + def __init__(self, shape: collections.abc.Sequence[typing.SupportsInt | typing.SupportsIndex], dtype: typing.Any, layout: typing.Any, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ - Constructs a tensor object. + Constructs a tensor object. ``dtype`` may be an ``rocpycv.eDataType`` (e.g. ``rocpycv.F32``) or a NumPy dtype/scalar type (e.g. ``np.float32``). ``layout`` may be an ``rocpycv.eTensorLayout`` (e.g. ``rocpycv.NHWC``) or a layout string (``"NHWC"``). """ def copy_to(self, device: eDeviceType) -> Tensor: """ Returns a deep copy of the tensor with data copied to a specified device type. """ + def data_ptr(self) -> int: + """ + Returns the address of the tensor's underlying buffer as an integer. For GPU tensors this is a HIP device address; for CPU tensors a host address. The pointer is non-owning -- keep the tensor alive for as long as the pointer is used. Intended for zero-copy interop with frameworks like MIGraphX. + """ def device(self) -> eDeviceType: """ Returns the device this tensor is on. @@ -190,9 +249,9 @@ class Tensor: """ Returns the number of dimensions of the tensor. """ - def reshape(self, new_shape: collections.abc.Sequence[typing.SupportsInt | typing.SupportsIndex], layout: eTensorLayout) -> Tensor: + def reshape(self, new_shape: collections.abc.Sequence[typing.SupportsInt | typing.SupportsIndex], layout: typing.Any) -> Tensor: """ - Creates a new tensor with the specified shape. + Creates a new tensor with the specified shape. ``layout`` may be an ``rocpycv.eTensorLayout`` or a layout string (e.g. ``"NHWC"``). """ def shape(self) -> list[int]: """ @@ -472,8 +531,6 @@ class eDataType: F32 F64 - - 4S16 """ F32: typing.ClassVar[eDataType] # value = F64: typing.ClassVar[eDataType] # value = @@ -483,7 +540,7 @@ class eDataType: U16: typing.ClassVar[eDataType] # value = U32: typing.ClassVar[eDataType] # value = U8: typing.ClassVar[eDataType] # value = - __members__: typing.ClassVar[dict[str, eDataType]] # value = {'U8': , 'S8': , 'U16': , 'S16': , 'U32': , 'S32': , 'F32': , 'F64': , '4S16': } + __members__: typing.ClassVar[dict[str, eDataType]] # value = {'U8': , 'S8': , 'U16': , 'S16': , 'U32': , 'S32': , 'F32': , 'F64': } def __eq__(self, other: typing.Any) -> bool: ... def __getstate__(self) -> int: @@ -728,7 +785,7 @@ class eThresholdType: @property def value(self) -> int: ... -def advcvtcolor(src: Tensor, conversion_code: eColorConversionCode, color_spec: eColorSpec, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def advcvtcolor(src: Tensor, conversion_code: eColorConversionCode, color_spec: eColorSpec, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Advanced Color Convert operation on the given HIP stream. @@ -745,7 +802,7 @@ def advcvtcolor(src: Tensor, conversion_code: eColorConversionCode, color_spec: Returns: rocpycv.Tensor: The output tensor. """ -def advcvtcolor_into(dst: Tensor, src: Tensor, conversion_code: eColorConversionCode, color_spec: eColorSpec, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def advcvtcolor_into(dst: Tensor, src: Tensor, conversion_code: eColorConversionCode, color_spec: eColorSpec, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Advanced Color Convert operation on the given HIP stream. @@ -763,7 +820,7 @@ def advcvtcolor_into(dst: Tensor, src: Tensor, conversion_code: eColorConversion Returns: None """ -def bilateral_filter(src: Tensor, diameter: typing.SupportsInt | typing.SupportsIndex, sigmaColor: typing.SupportsFloat | typing.SupportsIndex, sigmaSpace: typing.SupportsFloat | typing.SupportsIndex, borderMode: eBorderType, borderValue: list, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def bilateral_filter(src: Tensor, diameter: typing.SupportsInt | typing.SupportsIndex, sigmaColor: typing.SupportsFloat | typing.SupportsIndex, sigmaSpace: typing.SupportsFloat | typing.SupportsIndex, borderMode: eBorderType, borderValue: list, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Bilateral Filter operation on the given HIP stream. @@ -783,7 +840,7 @@ def bilateral_filter(src: Tensor, diameter: typing.SupportsInt | typing.Supports Returns: rocpycv.Tensor: The output tensor. """ -def bilateral_filter_into(dst: Tensor, src: Tensor, diameter: typing.SupportsInt | typing.SupportsIndex, sigmaColor: typing.SupportsFloat | typing.SupportsIndex, sigmaSpace: typing.SupportsFloat | typing.SupportsIndex, borderMode: eBorderType, borderValue: list, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def bilateral_filter_into(dst: Tensor, src: Tensor, diameter: typing.SupportsInt | typing.SupportsIndex, sigmaColor: typing.SupportsFloat | typing.SupportsIndex, sigmaSpace: typing.SupportsFloat | typing.SupportsIndex, borderMode: eBorderType, borderValue: list, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Bilateral Filter operation on the given HIP stream. @@ -804,7 +861,7 @@ def bilateral_filter_into(dst: Tensor, src: Tensor, diameter: typing.SupportsInt Returns: None """ -def bndbox(src: Tensor, bnd_boxes: BndBoxes, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def bndbox(src: Tensor, bnd_boxes: BndBoxes, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the BndBox operation on the given HIP stream. @@ -820,7 +877,7 @@ def bndbox(src: Tensor, bnd_boxes: BndBoxes, stream: rocpycv.Stream | None = Non Returns: rocpycv.Tensor: The output tensor. """ -def bndbox_into(dst: Tensor, src: Tensor, bnd_boxes: BndBoxes, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def bndbox_into(dst: Tensor, src: Tensor, bnd_boxes: BndBoxes, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the BndBox operation on the given HIP stream. @@ -837,7 +894,7 @@ def bndbox_into(dst: Tensor, src: Tensor, bnd_boxes: BndBoxes, stream: rocpycv.S Returns: None """ -def center_crop(src: Tensor, crop_size: tuple, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def center_crop(src: Tensor, crop_size: tuple, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Center Crop operation on the given HIP stream. @@ -854,7 +911,7 @@ def center_crop(src: Tensor, crop_size: tuple, stream: rocpycv.Stream | None = N Returns: rocpycv.Tensor: The output tensor. """ -def center_crop_into(dst: Tensor, src: Tensor, crop_size: tuple, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def center_crop_into(dst: Tensor, src: Tensor, crop_size: tuple, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Center Crop operation on the given HIP stream. @@ -871,7 +928,7 @@ def center_crop_into(dst: Tensor, src: Tensor, crop_size: tuple, stream: rocpycv Returns: None """ -def composite(foreground: Tensor, background: Tensor, fgmask: Tensor, outchannels: typing.SupportsInt | typing.SupportsIndex, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def composite(foreground: Tensor, background: Tensor, fgmask: Tensor, outchannels: typing.SupportsInt | typing.SupportsIndex, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Composite operation on the given HIP stream. @@ -889,7 +946,7 @@ def composite(foreground: Tensor, background: Tensor, fgmask: Tensor, outchannel Returns: rocpycv.Tensor: The output tensor with number of channels. """ -def composite_into(dst: Tensor, foreground: Tensor, background: Tensor, fgmask: Tensor, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def composite_into(dst: Tensor, foreground: Tensor, background: Tensor, fgmask: Tensor, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Composite operation on the given HIP stream. @@ -907,16 +964,17 @@ def composite_into(dst: Tensor, foreground: Tensor, background: Tensor, fgmask: Returns: None """ -def convert_to(src: Tensor, dtype: eDataType, alpha: typing.SupportsFloat | typing.SupportsIndex = 1.0, beta: typing.SupportsFloat | typing.SupportsIndex = 0.0, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def convert_to(src: Tensor, dtype: typing.Any, alpha: typing.SupportsFloat | typing.SupportsIndex = 1.0, beta: typing.SupportsFloat | typing.SupportsIndex = 0.0, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Convert To operation on the given HIP stream. See also: Refer to the rocCV C++ API reference for more information on this operation. - + Args: src (rocpycv.Tensor): Input tensor containing one or more images. - dtype (eDataType): Datatype of the output tensor. + dtype: Datatype of the output tensor. Either an ``rocpycv.eDataType`` + (e.g. ``rocpycv.F32``) or a NumPy dtype/scalar type (e.g. ``np.float32``). alpha (double, optional): Scalar for output data. Defaults to 1.0. beta (double, optional): Offset for the data. Defaults to 0.0. stream (rocpycv.Stream, optional): HIP stream to run this operation on. @@ -925,7 +983,7 @@ def convert_to(src: Tensor, dtype: eDataType, alpha: typing.SupportsFloat | typi Returns: rocpycv.Tensor: The output tensor. """ -def convert_to_into(dst: Tensor, src: Tensor, alpha: typing.SupportsFloat | typing.SupportsIndex = 1.0, beta: typing.SupportsFloat | typing.SupportsIndex = 0.0, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def convert_to_into(dst: Tensor, src: Tensor, alpha: typing.SupportsFloat | typing.SupportsIndex = 1.0, beta: typing.SupportsFloat | typing.SupportsIndex = 0.0, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Convert To operation on the given HIP stream. @@ -943,7 +1001,7 @@ def convert_to_into(dst: Tensor, src: Tensor, alpha: typing.SupportsFloat | typi Returns: None """ -def copymakeborder(src: Tensor, border_mode: eBorderType = ..., border_value: list = [0.0, 0.0, 0.0, 0.0], top: typing.SupportsInt | typing.SupportsIndex, bottom: typing.SupportsInt | typing.SupportsIndex, left: typing.SupportsInt | typing.SupportsIndex, right: typing.SupportsInt | typing.SupportsIndex, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def copymakeborder(src: Tensor, border_mode: eBorderType = eBorderType.eBorderType.CONSTANT, border_value: list = [0.0, 0.0, 0.0, 0.0], top: typing.SupportsInt | typing.SupportsIndex, bottom: typing.SupportsInt | typing.SupportsIndex, left: typing.SupportsInt | typing.SupportsIndex, right: typing.SupportsInt | typing.SupportsIndex, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the CopyMakeBorder operation on the given HIP stream. @@ -964,7 +1022,7 @@ def copymakeborder(src: Tensor, border_mode: eBorderType = ..., border_value: li Returns: rocpycv.Tensor: The output tensor. """ -def copymakeborder_into(dst: Tensor, src: Tensor, border_mode: eBorderType = ..., border_value: list = [0.0, 0.0, 0.0, 0.0], top: typing.SupportsInt | typing.SupportsIndex, left: typing.SupportsInt | typing.SupportsIndex, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def copymakeborder_into(dst: Tensor, src: Tensor, border_mode: eBorderType = eBorderType.eBorderType.CONSTANT, border_value: list = [0.0, 0.0, 0.0, 0.0], top: typing.SupportsInt | typing.SupportsIndex, left: typing.SupportsInt | typing.SupportsIndex, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the CopyMakeBorder operation on the given HIP stream. @@ -984,7 +1042,7 @@ def copymakeborder_into(dst: Tensor, src: Tensor, border_mode: eBorderType = ... Returns: None """ -def custom_crop(src: Tensor, crop_rect: Box, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def custom_crop(src: Tensor, crop_rect: Box, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Custom Crop operation on the given HIP stream. @@ -1001,7 +1059,7 @@ def custom_crop(src: Tensor, crop_rect: Box, stream: rocpycv.Stream | None = Non Returns: None """ -def custom_crop_into(dst: Tensor, src: Tensor, crop_rect: Box, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def custom_crop_into(dst: Tensor, src: Tensor, crop_rect: Box, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Custom Crop operation on the given HIP stream. @@ -1017,7 +1075,7 @@ def custom_crop_into(dst: Tensor, src: Tensor, crop_rect: Box, stream: rocpycv.S Returns: rocpycv.Tensor: The output tensor. """ -def cvtcolor(src: Tensor, conversion_code: eColorConversionCode, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def cvtcolor(src: Tensor, conversion_code: eColorConversionCode, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Color Convert operation on the given HIP stream. @@ -1033,7 +1091,7 @@ def cvtcolor(src: Tensor, conversion_code: eColorConversionCode, stream: rocpycv Returns: rocpycv.Tensor: The output tensor. """ -def cvtcolor_into(dst: Tensor, src: Tensor, conversion_code: eColorConversionCode, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def cvtcolor_into(dst: Tensor, src: Tensor, conversion_code: eColorConversionCode, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Color Convert operation on the given HIP stream. @@ -1050,7 +1108,7 @@ def cvtcolor_into(dst: Tensor, src: Tensor, conversion_code: eColorConversionCod Returns: None """ -def flip(src: Tensor, flip_code: typing.SupportsInt | typing.SupportsIndex, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def flip(src: Tensor, flip_code: typing.SupportsInt | typing.SupportsIndex, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Flip operation on the given HIP stream. @@ -1066,7 +1124,7 @@ def flip(src: Tensor, flip_code: typing.SupportsInt | typing.SupportsIndex, stre Returns: rocpycv.Tensor: The output tensor. """ -def flip_into(dst: Tensor, src: Tensor, flip_code: typing.SupportsInt | typing.SupportsIndex, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def flip_into(dst: Tensor, src: Tensor, flip_code: typing.SupportsInt | typing.SupportsIndex, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Flip operation on the given HIP stream. @@ -1083,11 +1141,11 @@ def flip_into(dst: Tensor, src: Tensor, flip_code: typing.SupportsInt | typing.S Returns: None """ -def from_dlpack(buffer: typing.Any, layout: eTensorLayout) -> Tensor: +def from_dlpack(buffer: typing.Any, layout: typing.Any) -> Tensor: """ - Wraps a DLPack supported tensor in a rocpycv tensor. + Wraps a DLPack supported tensor in a rocpycv tensor. ``layout`` may be an ``rocpycv.eTensorLayout`` or a layout string (e.g. ``"NHWC"``). """ -def gamma_contrast(src: Tensor, gamma: typing.SupportsFloat | typing.SupportsIndex, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def gamma_contrast(src: Tensor, gamma: typing.SupportsFloat | typing.SupportsIndex, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Gamma Contrast operation on the given HIP stream. @@ -1103,7 +1161,7 @@ def gamma_contrast(src: Tensor, gamma: typing.SupportsFloat | typing.SupportsInd Returns: rocpycv.Tensor: The output tensor. """ -def gamma_contrast_into(dst: Tensor, src: Tensor, gamma: typing.SupportsFloat | typing.SupportsIndex, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def gamma_contrast_into(dst: Tensor, src: Tensor, gamma: typing.SupportsFloat | typing.SupportsIndex, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Gamma Contrast operation on the given HIP stream. @@ -1120,7 +1178,7 @@ def gamma_contrast_into(dst: Tensor, src: Tensor, gamma: typing.SupportsFloat | Returns: None """ -def histogram(src: Tensor, mask: rocpycv.Tensor | None, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def histogram(src: Tensor, mask: rocpycv.Tensor | None, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Histogram operation on the given HIP stream. @@ -1136,7 +1194,7 @@ def histogram(src: Tensor, mask: rocpycv.Tensor | None, stream: rocpycv.Stream | Returns: rocpycv.Tensor: Output tensor with width of 256 and a height equal to the batch size of input (1 if HWC input). """ -def histogram_into(dst: Tensor, src: Tensor, mask: rocpycv.Tensor | None, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def histogram_into(dst: Tensor, src: Tensor, mask: rocpycv.Tensor | None, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Histogram operation on the given HIP stream. @@ -1153,7 +1211,7 @@ def histogram_into(dst: Tensor, src: Tensor, mask: rocpycv.Tensor | None, stream Returns: None """ -def nms(src: Tensor, scores: Tensor, score_threshold: typing.SupportsFloat | typing.SupportsIndex = 1.1920928955078125e-07, iou_threshold: typing.SupportsFloat | typing.SupportsIndex = 1.0, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def nms(src: Tensor, scores: Tensor, score_threshold: typing.SupportsFloat | typing.SupportsIndex = 1.1920928955078125e-07, iou_threshold: typing.SupportsFloat | typing.SupportsIndex = 1.0, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Non-maximum Suppression operation on the given HIP stream. @@ -1171,7 +1229,7 @@ def nms(src: Tensor, scores: Tensor, score_threshold: typing.SupportsFloat | typ Returns: rocpycv.Tensor: The output tensor of shape [i, j], containing 1 (kept) or 0 (suppressed) for each bounding box (j) per batch (i). Results will be written to this tensor. """ -def nms_into(dst: Tensor, src: Tensor, scores: Tensor, score_threshold: typing.SupportsFloat | typing.SupportsIndex = 1.1920928955078125e-07, iou_threshold: typing.SupportsFloat | typing.SupportsIndex = 1.0, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def nms_into(dst: Tensor, src: Tensor, scores: Tensor, score_threshold: typing.SupportsFloat | typing.SupportsIndex = 1.1920928955078125e-07, iou_threshold: typing.SupportsFloat | typing.SupportsIndex = 1.0, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Non-maximum Suppression operation on the given HIP stream. @@ -1190,7 +1248,7 @@ def nms_into(dst: Tensor, src: Tensor, scores: Tensor, score_threshold: typing.S Returns: None """ -def normalize(src: Tensor, base: Tensor, scale: Tensor, flags: typing.SupportsInt | typing.SupportsIndex | None = None, globalscale: typing.SupportsFloat | typing.SupportsIndex = 1.0, globalshift: typing.SupportsFloat | typing.SupportsIndex = 0.0, epsilon: typing.SupportsFloat | typing.SupportsIndex = 0.0, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def normalize(src: Tensor, base: Tensor, scale: Tensor, flags: typing.SupportsInt | typing.SupportsIndex | None = None, globalscale: typing.SupportsFloat | typing.SupportsIndex = 1.0, globalshift: typing.SupportsFloat | typing.SupportsIndex = 0.0, epsilon: typing.SupportsFloat | typing.SupportsIndex = 0.0, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Normalize operation on the given HIP stream. @@ -1211,7 +1269,7 @@ def normalize(src: Tensor, base: Tensor, scale: Tensor, flags: typing.SupportsIn Returns: rocpycv.Tensor: The output tensor. """ -def normalize_into(dst: Tensor, src: Tensor, base: Tensor, scale: Tensor, flags: typing.SupportsInt | typing.SupportsIndex | None = None, globalscale: typing.SupportsFloat | typing.SupportsIndex = 1.0, globalshift: typing.SupportsFloat | typing.SupportsIndex = 0.0, epsilon: typing.SupportsFloat | typing.SupportsIndex = 0.0, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def normalize_into(dst: Tensor, src: Tensor, base: Tensor, scale: Tensor, flags: typing.SupportsInt | typing.SupportsIndex | None = None, globalscale: typing.SupportsFloat | typing.SupportsIndex = 1.0, globalshift: typing.SupportsFloat | typing.SupportsIndex = 0.0, epsilon: typing.SupportsFloat | typing.SupportsIndex = 0.0, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Normalize operation on the given HIP stream. @@ -1233,7 +1291,7 @@ def normalize_into(dst: Tensor, src: Tensor, base: Tensor, scale: Tensor, flags: Returns: None """ -def reformat(input: Tensor, out_layout: eTensorLayout, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def reformat(input: Tensor, out_layout: typing.Any, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Reformat operation and returns the result as a new tensor. @@ -1242,14 +1300,15 @@ def reformat(input: Tensor, out_layout: eTensorLayout, stream: rocpycv.Stream | Args: input (rocpycv.Tensor): Input tensor to reformat. - out_layout (rocpycv.eTensorLayout): The layout to reformat the input tensor to. + out_layout: The layout to reformat the input tensor to. Either an + ``rocpycv.eTensorLayout`` (e.g. ``rocpycv.NCHW``) or a layout string (``"NCHW"``). stream (rocpycv.Stream, optional): HIP stream to run this operation on. device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU. Returns: rocpycv.Tensor: The reformatted tensor. """ -def reformat_into(output: Tensor, input: Tensor, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def reformat_into(output: Tensor, input: Tensor, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Reformat operation on the given HIP stream. @@ -1265,7 +1324,7 @@ def reformat_into(output: Tensor, input: Tensor, stream: rocpycv.Stream | None = Returns: None """ -def remap(src: Tensor, map: Tensor, in_interpolation: eInterpolationType, map_interpolation: eInterpolationType, map_value_type: eRemapType, align_corners: bool, border_type: eBorderType, border_value: list, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def remap(src: Tensor, map: Tensor, in_interpolation: eInterpolationType, map_interpolation: eInterpolationType, map_value_type: eRemapType, align_corners: bool, border_type: eBorderType, border_value: list, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Remap operation on the given HIP stream. @@ -1287,7 +1346,7 @@ def remap(src: Tensor, map: Tensor, in_interpolation: eInterpolationType, map_in Returns: rocpycv.Tensor: The output tensor. """ -def remap_into(dst: Tensor, src: Tensor, map: Tensor, in_interpolation: eInterpolationType, map_interpolation: eInterpolationType, map_value_type: eRemapType, align_corners: bool, border_type: eBorderType, border_value: list, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def remap_into(dst: Tensor, src: Tensor, map: Tensor, in_interpolation: eInterpolationType, map_interpolation: eInterpolationType, map_value_type: eRemapType, align_corners: bool, border_type: eBorderType, border_value: list, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Remap operation on the given HIP stream. @@ -1310,7 +1369,7 @@ def remap_into(dst: Tensor, src: Tensor, map: Tensor, in_interpolation: eInterpo Returns: None """ -def resize(src: Tensor, shape: tuple, interp: eInterpolationType, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def resize(src: Tensor, shape: tuple, interp: eInterpolationType, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Resize operation on the given HIP stream. @@ -1327,7 +1386,7 @@ def resize(src: Tensor, shape: tuple, interp: eInterpolationType, stream: rocpyc Returns: rocpycv.Tensor: The output tensor. """ -def resize_into(dst: Tensor, src: Tensor, interp: eInterpolationType, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def resize_into(dst: Tensor, src: Tensor, interp: eInterpolationType, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Resize operation on the given HIP stream. @@ -1344,7 +1403,7 @@ def resize_into(dst: Tensor, src: Tensor, interp: eInterpolationType, stream: ro Returns: None """ -def rotate(src: Tensor, angle_deg: typing.SupportsFloat | typing.SupportsIndex, shift: tuple, interpolation: eInterpolationType, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def rotate(src: Tensor, angle_deg: typing.SupportsFloat | typing.SupportsIndex, shift: tuple, interpolation: eInterpolationType, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Rotate operation on the given HIP stream. @@ -1362,7 +1421,7 @@ def rotate(src: Tensor, angle_deg: typing.SupportsFloat | typing.SupportsIndex, Returns: rocpycv.Tensor: The output tensor. """ -def rotate_into(dst: Tensor, src: Tensor, angle_deg: typing.SupportsFloat | typing.SupportsIndex, shift: tuple, interpolation: eInterpolationType, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def rotate_into(dst: Tensor, src: Tensor, angle_deg: typing.SupportsFloat | typing.SupportsIndex, shift: tuple, interpolation: eInterpolationType, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Rotate operation on the given HIP stream. @@ -1381,7 +1440,7 @@ def rotate_into(dst: Tensor, src: Tensor, angle_deg: typing.SupportsFloat | typi Returns: None """ -def threshold(src: Tensor, thresh: Tensor, maxVal: Tensor, maxBatchSize: typing.SupportsInt | typing.SupportsIndex, threshType: eThresholdType, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def threshold(src: Tensor, thresh: Tensor, maxVal: Tensor, maxBatchSize: typing.SupportsInt | typing.SupportsIndex, threshType: eThresholdType, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Thresholding operation on the given HIP stream. @@ -1397,7 +1456,7 @@ def threshold(src: Tensor, thresh: Tensor, maxVal: Tensor, maxBatchSize: typing. stream (rocpycv.Stream, optional): HIP stream to run this operation on. device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU. """ -def threshold_into(dst: Tensor, src: Tensor, thresh: Tensor, maxVal: Tensor, maxBatchSize: typing.SupportsInt | typing.SupportsIndex, threshType: eThresholdType, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def threshold_into(dst: Tensor, src: Tensor, thresh: Tensor, maxVal: Tensor, maxBatchSize: typing.SupportsInt | typing.SupportsIndex, threshType: eThresholdType, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Thresholding operation on the given HIP stream. @@ -1414,7 +1473,7 @@ def threshold_into(dst: Tensor, src: Tensor, thresh: Tensor, maxVal: Tensor, max stream (rocpycv.Stream, optional): HIP stream to run this operation on. device (rocpycv.Device, optional): The device to run this operation on. Defaults to GPU. """ -def warp_affine(src: Tensor, xform: list, inverted: bool, interp: eInterpolationType, border_mode: eBorderType, border_value: list, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def warp_affine(src: Tensor, xform: list, inverted: bool, interp: eInterpolationType, border_mode: eBorderType, border_value: list, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Warp Affine operation on the given HIP stream. @@ -1434,7 +1493,7 @@ def warp_affine(src: Tensor, xform: list, inverted: bool, interp: eInterpolation Returns: rocpycv.Tensor: The output tensor. """ -def warp_affine_into(dst: Tensor, src: Tensor, xform: list, inverted: bool, interp: eInterpolationType, border_mode: eBorderType, border_value: list, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def warp_affine_into(dst: Tensor, src: Tensor, xform: list, inverted: bool, interp: eInterpolationType, border_mode: eBorderType, border_value: list, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Warp Affine operation on the given HIP stream. @@ -1455,7 +1514,7 @@ def warp_affine_into(dst: Tensor, src: Tensor, xform: list, inverted: bool, inte Returns: None """ -def warp_perspective(src: Tensor, xform: list, inverted: bool, interp: eInterpolationType, border_mode: eBorderType, border_value: list, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> Tensor: +def warp_perspective(src: Tensor, xform: list, inverted: bool, interp: eInterpolationType, border_mode: eBorderType, border_value: list, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> Tensor: """ Executes the Warp Perspective operation on the given HIP stream. @@ -1475,7 +1534,7 @@ def warp_perspective(src: Tensor, xform: list, inverted: bool, interp: eInterpol Returns: rocpycv.Tensor: The output tensor. """ -def warp_perspective_into(dst: Tensor, src: Tensor, xform: list, inverted: bool, interp: eInterpolationType, border_mode: eBorderType, border_value: list, stream: rocpycv.Stream | None = None, device: eDeviceType = ...) -> None: +def warp_perspective_into(dst: Tensor, src: Tensor, xform: list, inverted: bool, interp: eInterpolationType, border_mode: eBorderType, border_value: list, stream: rocpycv.Stream | None = None, device: eDeviceType = eDeviceType.eDeviceType.GPU) -> None: """ Executes the Warp Perspective operation on the given HIP stream. diff --git a/samples/migraphx-classification/README.md b/samples/migraphx-classification/README.md new file mode 100644 index 00000000..83c7f4e2 --- /dev/null +++ b/samples/migraphx-classification/README.md @@ -0,0 +1,49 @@ +# rocCV MIGraphX Classification Sample + +This sample demonstrates how to use rocCV to preprocess an image on the GPU and run inference with a ResNet50 ONNX model through MIGraphX. The preprocessed tensor is handed off to MIGraphX via a raw GPU pointer for zero-copy interop, so no host round-trip is needed between preprocessing and inference. + +## Dependencies + +- A rocCV build with the Python bindings (`rocpycv`) on `PYTHONPATH`. +- [MIGraphX](https://github.com/ROCm/AMDMIGraphX) with its Python bindings. +- `opencv-python` and `numpy`. +- A ResNet50 ONNX model with input name `data` and shape `[N, 3, 224, 224]` (e.g. the ONNX Model Zoo `resnet50-v1-7.onnx`). +- Optional: a newline-separated ImageNet class labels file for human-readable output. + +## Command line + +```shell +python3 migraphx_classification.py \ + --input path/to/image.jpg \ + --model path/to/resnet50.onnx \ + --labels path/to/imagenet_classes.txt \ + --top-k 5 +``` + +Arguments: +- `--input` (required): path to the input image. +- `--model` (required): path to the ResNet50 ONNX file. +- `--labels` (optional): path to an ImageNet class label file. If omitted, classes are reported by index. +- `--top-k` (optional, default 5): number of top predictions to print. + +On the first run, the script compiles the ONNX model for the GPU and caches the result alongside the ONNX file as `_b1.mxr`. Subsequent runs load the cached `.mxr` directly and skip compilation. + +## Preprocessing Operators + +The preprocessing pipeline runs entirely on the GPU through `rocpycv`: + +1. **Resize**: Resizes to 224x224 using cubic interpolation. +2. **CvtColor**: Converts the OpenCV BGR image to RGB. +3. **Convert To**: Casts U8 pixels to float32 (no scaling — the `/255` step is folded into the normalize parameters). +4. **Normalize**: Applies ImageNet mean/std normalization. The mean and std constants are pre-multiplied by 255 so the operator can normalize directly from the [0, 255] float pixel range in a single pass. +5. **Reformat**: Converts the tensor from NHWC to NCHW, the layout MIGraphX/ONNX expects. + +## MIGraphX Interop + +The compiled MIGraphX program is built with `offload_copy=False`, so input and output buffers must already live on the GPU. The sample binds: +- The rocCV preprocessed tensor's GPU pointer (`preprocessed.data_ptr()`) as the `data` input via `migraphx.argument_from_pointer`. +- A `migraphx.allocate_gpu` buffer as the output. + +Inference is launched with `model.run_async` using the same HIP stream as the preprocessing pipeline (`stream.handle()`), so preprocessing and inference are serialized on a single stream with no extra synchronization until the final `stream.synchronize()`. + +The output logits are copied back to the host with `migraphx.from_gpu`, passed through softmax, and the top-K classes are printed. diff --git a/samples/migraphx-classification/migraphx_classification.py b/samples/migraphx-classification/migraphx_classification.py new file mode 100644 index 00000000..be7fc66e --- /dev/null +++ b/samples/migraphx-classification/migraphx_classification.py @@ -0,0 +1,179 @@ +# ############################################################################## +# Copyright (c) - 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# ############################################################################## + +"""Classification with rocCV preprocessing and MIGraphX inference.""" + +from __future__ import annotations + +import argparse +import os + +import cv2 +import migraphx +import numpy as np +import rocpycv + + +# ImageNet normalization, scaled to the [0, 255] pixel range so we can apply +# them directly to U8-derived float pixels without a separate /255 step: +# (pixel/255 - mean) / std == (pixel - mean*255) / (std*255) +IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32) * 255.0 +IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32) * 255.0 + +INPUT_H, INPUT_W = 224, 224 +BATCH_SIZE = 1 + + +def read_image(image_path: str) -> np.ndarray: + """Read an image from disk as an NHWC uint8 BGR numpy array.""" + bgr = cv2.imread(image_path) + if bgr is None: + raise FileNotFoundError(f"Unable to load image: {image_path}") + return np.stack([bgr]) + + +def load_or_compile_model(onnx_path: str, use_fp16: bool = True) -> migraphx.program: + """Load a cached compiled model, or parse + compile + cache the ONNX file.""" + precision_tag = "fp16" if use_fp16 else "fp32" + cache_path = f"{os.path.splitext(onnx_path)[0]}_b{BATCH_SIZE}_{precision_tag}.mxr" + + if os.path.exists(cache_path): + print(f"Loading cached compiled model: {cache_path}") + return migraphx.load(cache_path, format="msgpack") + + print(f"Parsing ONNX: {onnx_path}") + model = migraphx.parse_onnx( + onnx_path, + map_input_dims={"data": [BATCH_SIZE, 3, INPUT_H, INPUT_W]}, + ) + + if use_fp16: + print("Quantizing to FP16...") + # Inserts internal float -> half conversions; model inputs/outputs stay + # float32, so the existing F32 buffer setup remains unchanged. + migraphx.quantize_fp16(model) + + print("Compiling for GPU...") + # offload_copy=False allows us to bind GPU buffers directly to allow for + # zero-copy interop. + model.compile(migraphx.get_target("gpu"), offload_copy=False) + + print(f"Caching compiled model to: {cache_path}") + migraphx.save(model, cache_path, format="msgpack") + return model + + +def load_labels(labels_path: str | None) -> list[str] | None: + if labels_path is None: + return None + with open(labels_path) as f: + return [line.strip() for line in f if line.strip()] + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Classification with rocCV preprocessing and MIGraphX inference" + ) + parser.add_argument("--input", required=True, help="Path to input image") + parser.add_argument( + "--model", required=True, help="Path to a ResNet50 ONNX model" + ) + parser.add_argument( + "--labels", + default=None, + help="Optional path to a newline-separated ImageNet class labels file", + ) + parser.add_argument("--top-k", type=int, default=5) + return parser.parse_args() + + +def softmax(x: np.ndarray, axis: int = -1) -> np.ndarray: + x = x - np.max(x, axis=axis, keepdims=True) + e = np.exp(x) + return e / np.sum(e, axis=axis, keepdims=True) + + +def main() -> None: + args = parse_args() + + model = load_or_compile_model(args.model) + + print(f"Reading image: {args.input}") + np_image = read_image(args.input) + print(f"Input image shape: {np_image.shape}") + + # Load/allocate tensors on the GPU + input_tensor : rocpycv.Tensor = rocpycv.from_dlpack(np_image, "NHWC").copy_to(rocpycv.GPU) + resized : rocpycv.Tensor = rocpycv.Tensor((BATCH_SIZE, INPUT_H, INPUT_W, 3), np.uint8, "NHWC") + rgb : rocpycv.Tensor = rocpycv.Tensor((BATCH_SIZE, INPUT_H, INPUT_W, 3), np.uint8, "NHWC") + f32 : rocpycv.Tensor = rocpycv.Tensor((BATCH_SIZE, INPUT_H, INPUT_W, 3), np.float32, "NHWC") + normalized : rocpycv.Tensor = rocpycv.Tensor((BATCH_SIZE, INPUT_H, INPUT_W, 3), np.float32, "NHWC") + preprocessed : rocpycv.Tensor = rocpycv.Tensor((BATCH_SIZE, 3, INPUT_H, INPUT_W), np.float32, "NCHW") + + mean_t : rocpycv.Tensor = rocpycv.from_dlpack(IMAGENET_MEAN.reshape(1, 1, 1, 3), "NHWC").copy_to(rocpycv.GPU) + std_t : rocpycv.Tensor = rocpycv.from_dlpack(IMAGENET_STD.reshape(1, 1, 1, 3), "NHWC").copy_to(rocpycv.GPU) + + # Setup MIGraphX arguments/shapes + in_shape : migraphx.shape = migraphx.shape(type="float_type", lens=preprocessed.shape()) + out_shape : migraphx.shape = migraphx.shape(type="float_type", lens=[BATCH_SIZE, 1000]) + + in_arg : migraphx.argument = migraphx.argument_from_pointer(in_shape, preprocessed.data_ptr()) + out_buf : migraphx.buffer = migraphx.allocate_gpu(out_shape) + + # Begin preprocessing + print("Preprocessing with rocCV...") + stream : rocpycv.Stream = rocpycv.Stream() + + rocpycv.resize_into(resized, input_tensor, rocpycv.CUBIC, stream) + rocpycv.cvtcolor_into(rgb, resized, rocpycv.COLOR_BGR2RGB, stream) + rocpycv.convert_to_into(f32, rgb, 1.0, 0.0, stream) + rocpycv.normalize_into(normalized, f32, mean_t, std_t, rocpycv.NormalizeFlags.SCALE_IS_STDDEV, 1.0, 0.0, 0.0, stream) + rocpycv.reformat_into(preprocessed, normalized, stream) + + print(f"Preprocessed tensor shape (NCHW): {preprocessed.shape()}") + + print("Running MIGraphX inference...") + + outputs = model.run_async( + {"data": in_arg, "main:#output_0": out_buf}, + stream.handle(), + "ihipStream_t", + ) + stream.synchronize() + + # Postprocess the inference results + logits = np.array(migraphx.from_gpu(outputs[0])) + probs = softmax(logits, axis=1) + + labels = load_labels(args.labels) + + # Report top-K for the first image in the batch. + print(f"\nTop {args.top_k} predictions:") + top = np.argsort(probs[0])[::-1][: args.top_k] + for rank, idx in enumerate(top, start=1): + name = labels[idx] if labels is not None and idx < len(labels) else f"class {idx}" + print(f" {rank}. {name}: {probs[0][idx]:.6f}") + + +if __name__ == "__main__": + main() diff --git a/samples/classification/README.md b/samples/pytorch-classification/README.md similarity index 100% rename from samples/classification/README.md rename to samples/pytorch-classification/README.md diff --git a/samples/classification/pytorch_classification.py b/samples/pytorch-classification/pytorch_classification.py similarity index 100% rename from samples/classification/pytorch_classification.py rename to samples/pytorch-classification/pytorch_classification.py diff --git a/tests/roccv/python/test_op_adv_cvt_color.py b/tests/roccv/python/test_op_adv_cvt_color.py index 8a243f68..32b7f686 100644 --- a/tests/roccv/python/test_op_adv_cvt_color.py +++ b/tests/roccv/python/test_op_adv_cvt_color.py @@ -62,7 +62,7 @@ @pytest.mark.parametrize("samples,width,height", [[1, 64, 48], [2, 128, 72]]) def test_op_advcvtcolor_interleaved444(samples, height, width, code, spec, dtype, device): input_tensor = generate_tensor(samples, width, height, 3, dtype, device) - output_golden = rocpycv.Tensor([samples, height, width, 3], rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, height, width, 3], dtype, rocpycv.eTensorLayout.NHWC, device) stream = rocpycv.Stream() output = rocpycv.advcvtcolor(input_tensor, code, spec, stream, device) @@ -79,7 +79,7 @@ def test_op_advcvtcolor_interleaved444(samples, height, width, code, spec, dtype @pytest.mark.parametrize("samples,width,height", [[1, 64, 48], [2, 128, 72]]) def test_op_advcvtcolor_interleaved_to_semiplanar(samples, height, width, code, spec, dtype, device): input_tensor = generate_tensor(samples, width, height, 3, dtype, device) - output_golden = rocpycv.Tensor([samples, (height * 3) // 2, width, 1], rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, (height * 3) // 2, width, 1], dtype, rocpycv.eTensorLayout.NHWC, device) stream = rocpycv.Stream() output = rocpycv.advcvtcolor(input_tensor, code, spec, stream, device) @@ -96,7 +96,7 @@ def test_op_advcvtcolor_interleaved_to_semiplanar(samples, height, width, code, @pytest.mark.parametrize("samples,width,height", [[1, 64, 48], [2, 128, 72]]) def test_op_advcvtcolor_semiplanar_to_interleaved(samples, height, width, code, spec, dtype, device): input_tensor = generate_tensor(samples, width, (height * 3) // 2, 1, dtype, device) - output_golden = rocpycv.Tensor([samples, height, width, 3], rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, height, width, 3], dtype, rocpycv.eTensorLayout.NHWC, device) stream = rocpycv.Stream() output = rocpycv.advcvtcolor(input_tensor, code, spec, stream, device) diff --git a/tests/roccv/python/test_op_bilateral_filter.py b/tests/roccv/python/test_op_bilateral_filter.py index 6e0abfd3..72e6708d 100644 --- a/tests/roccv/python/test_op_bilateral_filter.py +++ b/tests/roccv/python/test_op_bilateral_filter.py @@ -44,7 +44,7 @@ ]) def test_op_bilateral_filter(samples, height, width, channels, border_mode, border_val, diameter, sigma_color, sigma_space, dtype, device): input = generate_tensor(samples, width, height, channels, dtype, device) - output_golden = rocpycv.Tensor([samples, height, width, channels], rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, height, width, channels], dtype, rocpycv.eTensorLayout.NHWC, device) stream = rocpycv.Stream() rocpycv.bilateral_filter_into(output_golden, input, diameter, sigma_color, diff --git a/tests/roccv/python/test_op_bnd_box.py b/tests/roccv/python/test_op_bnd_box.py index d6fd8045..1b203a5a 100644 --- a/tests/roccv/python/test_op_bnd_box.py +++ b/tests/roccv/python/test_op_bnd_box.py @@ -49,11 +49,10 @@ def generate_boxes(samples: int, height: int, width: int) -> rocpycv.BndBoxes: (3, 150, 50), (7, 15, 23) ]) -def test_op_remap(samples, height, width, channels, device): +def test_op_bndbox(samples, height, width, channels, device): input = generate_tensor(samples, width, height, channels, rocpycv.eDataType.U8, device) boxes = generate_boxes(samples, height, width) - output_golden = rocpycv.Tensor([samples, height, width, channels], - rocpycv.eTensorLayout.NHWC, rocpycv.eDataType.U8, device) + output_golden = rocpycv.Tensor([samples, height, width, channels], rocpycv.eDataType.U8, rocpycv.eTensorLayout.NHWC, device) stream = rocpycv.Stream() output = rocpycv.bndbox(input, boxes, stream, device) diff --git a/tests/roccv/python/test_op_center_crop.py b/tests/roccv/python/test_op_center_crop.py index a8054dcc..8daa507f 100644 --- a/tests/roccv/python/test_op_center_crop.py +++ b/tests/roccv/python/test_op_center_crop.py @@ -41,8 +41,7 @@ ]) def test_op_center_crop(samples, height, width, channels, dtype, box, device): input = generate_tensor(samples, width, height, channels, dtype, device) - output_golden = rocpycv.Tensor([samples, box[1], box[0], channels], - rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, box[1], box[0], channels], dtype, rocpycv.eTensorLayout.NHWC, device) if device == rocpycv.eDeviceType.GPU: stream = rocpycv.Stream() diff --git a/tests/roccv/python/test_op_composite.py b/tests/roccv/python/test_op_composite.py index e5d37538..12f43668 100644 --- a/tests/roccv/python/test_op_composite.py +++ b/tests/roccv/python/test_op_composite.py @@ -39,7 +39,7 @@ def test_op_composite(samples, height, width, out_channels, dtype, device): foreground = generate_tensor(samples, width, height, 3, dtype, device) background = generate_tensor(samples, width, height, 3, dtype, device) mask = generate_tensor(samples, width, height, 1, dtype, device) - output_golden = rocpycv.Tensor([samples, height, width, out_channels], rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, height, width, out_channels], dtype, rocpycv.eTensorLayout.NHWC, device) stream = rocpycv.Stream() output = rocpycv.composite(foreground, background, mask, out_channels, stream, device) diff --git a/tests/roccv/python/test_op_convert_to.py b/tests/roccv/python/test_op_convert_to.py index 9e9f389f..c7555531 100644 --- a/tests/roccv/python/test_op_convert_to.py +++ b/tests/roccv/python/test_op_convert_to.py @@ -40,7 +40,7 @@ ]) def test_op_convert_to(samples, height, width, channels, device, dtype, out_dtype, alpha, beta): input = generate_tensor(samples, width, height, channels, dtype, device) - output_golden = rocpycv.Tensor([samples, height, width, channels], rocpycv.eTensorLayout.NHWC, out_dtype, device) + output_golden = rocpycv.Tensor([samples, height, width, channels], out_dtype, rocpycv.eTensorLayout.NHWC, device) stream = rocpycv.Stream() rocpycv.convert_to_into(output_golden, input, alpha, beta, stream, device) diff --git a/tests/roccv/python/test_op_copy_make_border.py b/tests/roccv/python/test_op_copy_make_border.py index 27994e00..fcafe36f 100644 --- a/tests/roccv/python/test_op_copy_make_border.py +++ b/tests/roccv/python/test_op_copy_make_border.py @@ -46,7 +46,7 @@ def test_op_copy_make_border(samples, height, width, channels, top, right, bottom, left, border_mode, border_value, dtype, device): input = generate_tensor(samples, width, height, channels, dtype, device) output_golden = rocpycv.Tensor([samples, height + top + bottom, width + right + left, - channels], rocpycv.eTensorLayout.NHWC, dtype, device) + channels], dtype, rocpycv.eTensorLayout.NHWC, device) stream = rocpycv.Stream() output = rocpycv.copymakeborder(input, border_mode, border_value, top, bottom, left, right, stream, device) diff --git a/tests/roccv/python/test_op_custom_crop.py b/tests/roccv/python/test_op_custom_crop.py index 026130c5..47c3f1f1 100644 --- a/tests/roccv/python/test_op_custom_crop.py +++ b/tests/roccv/python/test_op_custom_crop.py @@ -41,8 +41,7 @@ ]) def test_op_custom_crop(samples, height, width, channels, dtype, box, device): input = generate_tensor(samples, width, height, channels, dtype, device) - output_golden = rocpycv.Tensor([samples, box.height, box.width, channels], - rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, box.height, box.width, channels], dtype, rocpycv.eTensorLayout.NHWC, device) if device == rocpycv.eDeviceType.GPU: stream = rocpycv.Stream() rocpycv.custom_crop_into(output_golden, input, box, stream, device) diff --git a/tests/roccv/python/test_op_cvt_color.py b/tests/roccv/python/test_op_cvt_color.py index 448a598b..637fbbfe 100644 --- a/tests/roccv/python/test_op_cvt_color.py +++ b/tests/roccv/python/test_op_cvt_color.py @@ -51,7 +51,7 @@ def test_op_cvtcolor(samples, height, width, code, dtype, device): out_channels = 1 input = generate_tensor(samples, width, height, in_channels, dtype, device) - output_golden = rocpycv.Tensor([samples, height, width, out_channels], rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, height, width, out_channels], dtype, rocpycv.eTensorLayout.NHWC, device) stream = rocpycv.Stream() output = rocpycv.cvtcolor(input, code, stream, device) diff --git a/tests/roccv/python/test_op_flip.py b/tests/roccv/python/test_op_flip.py index 32ed6215..c4980cd6 100644 --- a/tests/roccv/python/test_op_flip.py +++ b/tests/roccv/python/test_op_flip.py @@ -40,7 +40,7 @@ def test_op_flip(samples, width, height, channels, dtype, flip_code, device): input_tensor = generate_tensor(samples, width, height, channels, dtype, device) stream = rocpycv.Stream() - output_tensor_golden = rocpycv.Tensor([samples, height, width, channels], rocpycv.eTensorLayout.NHWC, dtype, device) + output_tensor_golden = rocpycv.Tensor([samples, height, width, channels], dtype, rocpycv.eTensorLayout.NHWC, device) rocpycv.flip_into(output_tensor_golden, input_tensor, flip_code, stream, device) output_tensor = rocpycv.flip(input_tensor, flip_code, stream, device) stream.synchronize() diff --git a/tests/roccv/python/test_op_gamma_contrast.py b/tests/roccv/python/test_op_gamma_contrast.py index 68343ec9..38893594 100644 --- a/tests/roccv/python/test_op_gamma_contrast.py +++ b/tests/roccv/python/test_op_gamma_contrast.py @@ -38,7 +38,7 @@ ]) def test_op_gamma_contrast(samples, height, width, channels, gamma, dtype, device): input = generate_tensor(samples, width, height, channels, dtype, device) - output_golden = rocpycv.Tensor([samples, height, width, channels], rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, height, width, channels], dtype, rocpycv.eTensorLayout.NHWC, device) stream = rocpycv.Stream() output = rocpycv.gamma_contrast(input, gamma, stream, device) diff --git a/tests/roccv/python/test_op_histogram.py b/tests/roccv/python/test_op_histogram.py index e980a057..17c741d2 100644 --- a/tests/roccv/python/test_op_histogram.py +++ b/tests/roccv/python/test_op_histogram.py @@ -37,7 +37,7 @@ ]) def test_op_histogram(samples, height, width, in_dtype, out_dtype, device): input = generate_tensor(samples, width, height, 1, in_dtype, device) - output_golden = rocpycv.Tensor([samples, 256, 1], rocpycv.eTensorLayout.HWC, out_dtype, device) + output_golden = rocpycv.Tensor([samples, 256, 1], out_dtype, rocpycv.eTensorLayout.HWC, device) stream = rocpycv.Stream() output = rocpycv.histogram(input, None, stream, device) diff --git a/tests/roccv/python/test_op_non_max_suppression.py b/tests/roccv/python/test_op_non_max_suppression.py index 163a0a8f..e39c1e44 100644 --- a/tests/roccv/python/test_op_non_max_suppression.py +++ b/tests/roccv/python/test_op_non_max_suppression.py @@ -44,7 +44,7 @@ def generate_boxes(samples: int, num_boxes: int, device: rocpycv.eDeviceType) -> def test_op_non_max_suppression(samples, num_boxes, device): boxes = generate_boxes(samples, num_boxes, device) scores = generate_tensor_generic([samples, num_boxes], rocpycv.eTensorLayout.NW, rocpycv.eDataType.F32, device) - output_golden = rocpycv.Tensor([samples, num_boxes], rocpycv.eTensorLayout.NW, rocpycv.eDataType.U8, device) + output_golden = rocpycv.Tensor([samples, num_boxes], rocpycv.eDataType.U8, rocpycv.eTensorLayout.NW, device) stream = rocpycv.Stream() # Hardcoding the score and IoU threshold here. The only thing we care about is the resulting size of the diff --git a/tests/roccv/python/test_op_normalize.py b/tests/roccv/python/test_op_normalize.py index e25bc300..72036515 100644 --- a/tests/roccv/python/test_op_normalize.py +++ b/tests/roccv/python/test_op_normalize.py @@ -39,7 +39,7 @@ def test_op_normalize(samples, height, width, channels, device, dtype): input = generate_tensor(samples, width, height, channels, dtype, device) base = generate_tensor(1, 1, 1, channels, rocpycv.eDataType.F32, device) scale = generate_tensor(1, 1, 1, channels, rocpycv.eDataType.F32, device) - output_golden = rocpycv.Tensor([samples, height, width, channels], rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, height, width, channels], dtype, rocpycv.eTensorLayout.NHWC, device) stream = rocpycv.Stream() rocpycv.normalize_into(output_golden, input, base, scale, None, 1.0, 0.0, 0.0, stream, device) diff --git a/tests/roccv/python/test_op_reformat.py b/tests/roccv/python/test_op_reformat.py index 62715ff9..10d2b7a6 100644 --- a/tests/roccv/python/test_op_reformat.py +++ b/tests/roccv/python/test_op_reformat.py @@ -54,7 +54,7 @@ def test_op_reformat(samples, height, width, channels, inLayout, outLayout, devi input_shape = create_tensor_shape(inLayout, samples, channels, height, width) output_shape = create_tensor_shape(outLayout, samples, channels, height, width) input_tensor = generate_tensor_generic(input_shape, inLayout, dtype, device) - output_golden = rocpycv.Tensor(output_shape, outLayout, dtype, device) + output_golden = rocpycv.Tensor(output_shape, dtype, outLayout, device) stream = rocpycv.Stream() rocpycv.reformat_into(input_tensor, output_golden, stream, device) diff --git a/tests/roccv/python/test_op_remap.py b/tests/roccv/python/test_op_remap.py index 25321f9e..e9ccc394 100644 --- a/tests/roccv/python/test_op_remap.py +++ b/tests/roccv/python/test_op_remap.py @@ -44,7 +44,7 @@ def test_op_remap(samples, width, height, channels, dtype, map_interp, interp, map_type, align_corners, border_mode, border_val, device): input_tensor = generate_tensor(samples, width, height, channels, dtype, device) - output_golden = rocpycv.Tensor([samples, height, width, channels], rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, height, width, channels], dtype, rocpycv.eTensorLayout.NHWC, device) if (map_type == rocpycv.REMAP_ABSOLUTE): diff --git a/tests/roccv/python/test_op_resize.py b/tests/roccv/python/test_op_resize.py index 721c1d7f..70e0c897 100644 --- a/tests/roccv/python/test_op_resize.py +++ b/tests/roccv/python/test_op_resize.py @@ -38,7 +38,7 @@ def test_op_resize(out_shape, in_shape, samples, channels, interp, dtype, device # Input/Output shapes are passed in as format [width, height] input = generate_tensor(samples, in_shape[0], in_shape[1], channels, dtype, device) output_shape = (samples, out_shape[1], out_shape[0], channels) - output_golden = rocpycv.Tensor(output_shape, rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor(output_shape, dtype, rocpycv.eTensorLayout.NHWC, device) stream = rocpycv.Stream() rocpycv.resize_into(output_golden, input, interp, stream, device) diff --git a/tests/roccv/python/test_op_rotate.py b/tests/roccv/python/test_op_rotate.py index 13c2fceb..1fa1e8e8 100644 --- a/tests/roccv/python/test_op_rotate.py +++ b/tests/roccv/python/test_op_rotate.py @@ -46,7 +46,7 @@ def calc_center_shift(center_x, center_y, angle) -> tuple[float, float]: ]) def test_op_rotate(samples, width, height, channels, angle, dtype, interp, device): input = generate_tensor(samples, width, height, channels, dtype, device) - output_golden = rocpycv.Tensor([samples, height, width, channels], rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, height, width, channels], dtype, rocpycv.eTensorLayout.NHWC, device) center_x = (width - 1) / 2 center_y = (height - 1) / 2 diff --git a/tests/roccv/python/test_op_thresholding.py b/tests/roccv/python/test_op_thresholding.py index 1eae8d97..8613a266 100644 --- a/tests/roccv/python/test_op_thresholding.py +++ b/tests/roccv/python/test_op_thresholding.py @@ -41,7 +41,7 @@ def test_op_thresholding(samples, height, width, channels, dtype, thresh, mvdata, threshType, device): input_tensor = generate_tensor(samples, width, height, channels, dtype, device) - output_golden = rocpycv.Tensor([samples, height, width, channels], rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, height, width, channels], dtype, rocpycv.eTensorLayout.NHWC, device) thresh_array = np.full(samples, thresh, np.float64) maxval_array = np.full(samples, mvdata, np.float64) diff --git a/tests/roccv/python/test_op_warp_affine.py b/tests/roccv/python/test_op_warp_affine.py index b4fd0dfb..9b9656b0 100644 --- a/tests/roccv/python/test_op_warp_affine.py +++ b/tests/roccv/python/test_op_warp_affine.py @@ -41,7 +41,7 @@ ]) def test_op_warp_affine(samples, width, height, channels, dtype, mat, inverted, interp, border_mode, border_val, device): input = generate_tensor(samples, width, height, channels, dtype, device) - output_golden = rocpycv.Tensor([samples, height, width, channels], rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, height, width, channels], dtype, rocpycv.eTensorLayout.NHWC, device) stream = rocpycv.Stream() output = rocpycv.warp_affine(input, mat, inverted, interp, border_mode, border_val, stream, device) diff --git a/tests/roccv/python/test_op_warp_perspective.py b/tests/roccv/python/test_op_warp_perspective.py index c72754a6..3164a951 100644 --- a/tests/roccv/python/test_op_warp_perspective.py +++ b/tests/roccv/python/test_op_warp_perspective.py @@ -41,7 +41,7 @@ ]) def test_op_warp_perspective(samples, width, height, channels, dtype, mat, inverted, interp, border_mode, border_val, device): input = generate_tensor(samples, width, height, channels, dtype, device) - output_golden = rocpycv.Tensor([samples, height, width, channels], rocpycv.eTensorLayout.NHWC, dtype, device) + output_golden = rocpycv.Tensor([samples, height, width, channels], dtype, rocpycv.eTensorLayout.NHWC, device) print(output_golden.shape()) stream = rocpycv.Stream() diff --git a/tests/roccv/python/test_py_stream.py b/tests/roccv/python/test_py_stream.py new file mode 100644 index 00000000..1560e4f5 --- /dev/null +++ b/tests/roccv/python/test_py_stream.py @@ -0,0 +1,32 @@ +# ############################################################################## +# Copyright (c) - 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# ############################################################################## + +import rocpycv + + +def test_stream_handle_returns_valid_pointer(): + stream = rocpycv.Stream() + handle = stream.handle() + + assert isinstance(handle, int) + assert handle != 0 diff --git a/tests/roccv/python/test_py_tensor.py b/tests/roccv/python/test_py_tensor.py new file mode 100644 index 00000000..d394de8b --- /dev/null +++ b/tests/roccv/python/test_py_tensor.py @@ -0,0 +1,85 @@ +# ############################################################################## +# Copyright (c) - 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# ############################################################################## + +import numpy as np +import pytest +import rocpycv + + +@pytest.mark.parametrize("device", [rocpycv.eDeviceType.GPU, rocpycv.eDeviceType.CPU]) +@pytest.mark.parametrize("dtype", [rocpycv.eDataType.U8, rocpycv.eDataType.F32, rocpycv.eDataType.S32]) +@pytest.mark.parametrize( + "shape, layout", + [ + ([2, 32, 64, 3], rocpycv.eTensorLayout.NHWC), + ([1, 3, 16, 16], rocpycv.eTensorLayout.NCHW), + ([8, 8, 4], rocpycv.eTensorLayout.HWC), + ], +) +def test_tensor_basic_properties(shape, layout, dtype, device): + tensor = rocpycv.Tensor(shape, dtype, layout, device) + + assert tensor.shape() == shape + assert tensor.ndim() == len(shape) + assert tensor.layout() == layout + assert tensor.device() == device + assert tensor.dtype() == dtype + assert tensor.data_ptr() != 0 + + +@pytest.mark.parametrize( + "dtype_in, expected_dtype", + [ + (rocpycv.eDataType.U8, rocpycv.eDataType.U8), + (rocpycv.eDataType.F32, rocpycv.eDataType.F32), + (np.uint8, rocpycv.eDataType.U8), + (np.float32, rocpycv.eDataType.F32), + (np.int32, rocpycv.eDataType.S32), + (np.dtype("uint16"), rocpycv.eDataType.U16), + ], +) +@pytest.mark.parametrize( + "layout_in, expected_layout, shape", + [ + (rocpycv.eTensorLayout.NHWC, rocpycv.eTensorLayout.NHWC, [2, 32, 64, 3]), + ("NHWC", rocpycv.eTensorLayout.NHWC, [2, 32, 64, 3]), + ("NCHW", rocpycv.eTensorLayout.NCHW, [1, 3, 16, 16]), + ("HWC", rocpycv.eTensorLayout.HWC, [8, 8, 4]), + ], +) +def test_tensor_construction_from_numpy_and_strings(dtype_in, expected_dtype, layout_in, expected_layout, shape): + tensor = rocpycv.Tensor(shape, dtype_in, layout_in, rocpycv.eDeviceType.GPU) + + assert tensor.shape() == shape + assert tensor.dtype() == expected_dtype + assert tensor.layout() == expected_layout + + +def test_tensor_construction_invalid_dtype_raises(): + with pytest.raises(Exception): + rocpycv.Tensor([1, 8, 8, 3], "not_a_dtype", rocpycv.eTensorLayout.NHWC, rocpycv.eDeviceType.GPU) + + +def test_tensor_construction_invalid_layout_raises(): + with pytest.raises(Exception): + rocpycv.Tensor([1, 8, 8, 3], rocpycv.eDataType.U8, "ZYXW", rocpycv.eDeviceType.GPU)