From 4918b53c0b822c485629cd1180a11519ddbc086c Mon Sep 17 00:00:00 2001 From: Essam Aly Date: Tue, 31 Mar 2026 10:15:38 -0700 Subject: [PATCH 1/4] 1st round of warnning cause removal --- CMakeLists.txt | 64 ++++++- src/common/roc_pybuffer.cpp | 208 ++++++++++++++--------- src/common/roc_pybuffer.h | 14 +- src/common/roc_pydlpack.cpp | 125 +++++++++----- src/rocdecode/roc_pydecode.cpp | 11 +- src/rocdecode/roc_pydecode.h | 12 +- src/rocdecode/roc_pyvideodecode.cpp | 78 +++++---- src/rocdecode/roc_pyvideodecode.h | 16 +- src/rocdecode/roc_pyvideodecodecpu.cpp | 4 +- src/rocdecode/roc_pyvideodecodecpu.h | 6 +- src/rocjpeg/roc_pyjpeg.cpp | 7 + src/rocjpeg/roc_pyjpeg.h | 4 +- src/rocjpeg/roc_pyjpeg_codestream.cpp | 73 ++++++-- src/rocjpeg/roc_pyjpeg_codestream.h | 11 +- src/rocjpeg/roc_pyjpeg_decode_source.cpp | 2 + src/rocjpeg/roc_pyjpeg_decode_source.h | 1 - src/rocjpeg/roc_pyjpeg_decoder.cpp | 45 ++--- src/rocjpeg/roc_pyjpeg_decoder.h | 7 +- src/rocjpeg/roc_pyjpeg_images.cpp | 57 ++++--- src/rocjpeg/roc_pyjpeg_images.h | 10 +- src/rocjpeg/roc_pyjpeg_utils.h | 168 ++++++++++-------- tests/CMakeLists.txt | 112 +++++------- 22 files changed, 622 insertions(+), 413 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a717c66..2597d2bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -114,6 +114,33 @@ else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG -fPIC") endif() +option(ROCPYDECODE_ENABLE_ALL_WARNINGS "Enable broad compiler warnings for C++ targets" ON) +set(ROCPYDECODE_WARNING_FLAGS "") +if(ROCPYDECODE_ENABLE_ALL_WARNINGS) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + list(APPEND ROCPYDECODE_WARNING_FLAGS + -Weverything + -Wno-c++98-compat + -Wno-c++98-compat-pedantic + -Wno-pre-c++14-compat + -Wno-pre-c++17-compat + -Wno-pre-c++20-compat) + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + list(APPEND ROCPYDECODE_WARNING_FLAGS + -Wall + -Wextra + -Wpedantic) + elseif(MSVC) + list(APPEND ROCPYDECODE_WARNING_FLAGS + /W4 + /permissive-) + endif() + + if(ROCPYDECODE_WARNING_FLAGS) + add_compile_options(${ROCPYDECODE_WARNING_FLAGS}) + endif() +endif() + # Set supported GPU Targets if(NOT GPU_TARGETS AND NOT AMDGPU_TARGETS) set(DEFAULT_GPU_TARGETS "gfx908;gfx90a;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102;gfx950;gfx1200;gfx1201") @@ -160,12 +187,19 @@ set (HIP_PLATFORM amd CACHE STRING "HIP platform") # Find dependencies find_package(HIP REQUIRED) -find_package(rocdecode 1.0.0 QUIET) -find_package(rocjpeg 1.0.0 QUIET) +# ROCm package versions are currently 0.x even though the APIs used here are stable. +find_package(rocdecode QUIET) +find_package(rocjpeg QUIET) find_package(pybind11 REQUIRED) find_package(DLPACK REQUIRED) find_package(FFmpeg QUIET) -set(ROCPYDECODE_USE_FFMPEG ${FFMPEG_FOUND}) +set(ROCPYDECODE_USE_FFMPEG FALSE) +set(ROCPYDECODE_FFMPEG_UTILS_DIR "${ROCM_PATH}/share/rocdecode/utils/ffmpegvideodecode") +if(FFMPEG_FOUND AND EXISTS "${ROCPYDECODE_FFMPEG_UTILS_DIR}/ffmpeg_video_dec.h") + set(ROCPYDECODE_USE_FFMPEG TRUE) +elseif(FFMPEG_FOUND) + message(STATUS "FFmpeg found, but rocdecode FFmpeg utility sources are unavailable; building without demux/CPU backend.") +endif() set(ROCPYDECODE_DEPENDENCY_READY FALSE) set(ROCPYJPEG_DEPENDENCY_READY FALSE) @@ -180,8 +214,17 @@ if(rocdecode_FOUND) include_directories(src) file(GLOB pyfiles pyRocVideoDecode/*.py pyRocVideoDecode/*.pyi) + # TBD (essam): + # # The always-built rocdecode utility sources use libavutil's MD5 helpers. + # if(AVUTIL_LIBRARY) + # list(APPEND LINK_LIBRARY_LIST ${AVUTIL_LIBRARY}) + # else() + # message(FATAL_ERROR "libavutil is required to build rocPyDecode because ROCm's roc_video_dec utilities use av_md5_* symbols.") + # endif() + # Always build the GPU path - include_directories(${rocdecode_INCLUDE_DIR} + include_directories(SYSTEM + ${rocdecode_INCLUDE_DIR} ${ROCM_PATH}/include/rocdecode ${ROCM_PATH}/share/rocdecode/utils ${ROCM_PATH}/share/rocdecode/utils/rocvideodecode) @@ -205,17 +248,25 @@ if(rocdecode_FOUND) # Core GPU sources file(GLOB include_base src/rocdecode/*.h src/common/*.h ${ROCM_PATH}/share/rocdecode/utils/rocvideodecode/*.h) file(GLOB sources_base src/rocdecode/roc_pydecode.cpp src/rocdecode/roc_pyvideodecode.cpp src/common/*.cpp ${ROCM_PATH}/share/rocdecode/utils/*.cpp ${ROCM_PATH}/share/rocdecode/utils/rocvideodecode/*.cpp) + file(GLOB sources_external_rocdecode ${ROCM_PATH}/share/rocdecode/utils/*.cpp ${ROCM_PATH}/share/rocdecode/utils/rocvideodecode/*.cpp) set(include ${include_base}) set(sources ${sources_base}) + if(sources_external_rocdecode) + set_source_files_properties(${sources_external_rocdecode} PROPERTIES COMPILE_OPTIONS "-w") + endif() if(ROCPYDECODE_USE_FFMPEG) # Demux + CPU backend (FFmpeg-dependent) file(GLOB sources_ffmpeg src/rocdecode/roc_pyvideodemuxer.cpp src/rocdecode/roc_pyvideodecodecpu.cpp src/rocdecode/roc_pydecode_test.cpp ${ROCM_PATH}/share/rocdecode/utils/ffmpegvideodecode/*.cpp) file(GLOB include_ffmpeg ${ROCM_PATH}/share/rocdecode/utils/ffmpegvideodecode/*.h) + file(GLOB sources_external_ffmpeg ${ROCM_PATH}/share/rocdecode/utils/ffmpegvideodecode/*.cpp) list(APPEND include ${include_ffmpeg}) list(APPEND sources ${sources_ffmpeg}) - include_directories(${AVUTIL_INCLUDE_DIR} ${AVCODEC_INCLUDE_DIR} ${AVFORMAT_INCLUDE_DIR}) + include_directories(SYSTEM ${AVUTIL_INCLUDE_DIR} ${AVCODEC_INCLUDE_DIR} ${AVFORMAT_INCLUDE_DIR}) set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${FFMPEG_LIBRARIES}) + if(sources_external_ffmpeg) + set_source_files_properties(${sources_external_ffmpeg} PROPERTIES COMPILE_OPTIONS "-w") + endif() else() message(STATUS "FFmpeg not found; building rocPyDecode without demux/CPU backend (GPU decode only).") set(ROCPYDECODE_HOST_LIBRARY_FOUND FALSE) @@ -225,7 +276,7 @@ else() endif() # rocJPEG if(rocjpeg_FOUND) - include_directories(${rocjpeg_INCLUDE_DIR} ${ROCM_PATH}/share/rocjpeg/samples) + include_directories(SYSTEM ${ROCM_PATH}/include ${rocjpeg_INCLUDE_DIR} ${ROCM_PATH}/share/rocjpeg/samples) set(LINK_LIBRARY_LIST_JPEG ${LINK_LIBRARY_LIST_JPEG} rocjpeg::rocjpeg) file(GLOB pyfiles_jpeg pyRocJpegDecode/*.py pyRocJpegDecode/*.pyi) @@ -247,6 +298,7 @@ else() message("-- ${Yellow}rocPyJPEG skipped -- missing rocjpeg dependency${ColourReset}") endif() message("-- ${White}rocPyDecode/rocPyJPEG -- CMAKE_CXX_FLAGS:${CMAKE_CXX_FLAGS}${ColourReset}") +message("-- ${White}rocPyDecode/rocPyJPEG -- Warning Flags:${ROCPYDECODE_WARNING_FLAGS}${ColourReset}") # set license information set(CPACK_RPM_PACKAGE_LICENSE "MIT") diff --git a/src/common/roc_pybuffer.cpp b/src/common/roc_pybuffer.cpp index 7d55ca3d..62df3964 100644 --- a/src/common/roc_pybuffer.cpp +++ b/src/common/roc_pybuffer.cpp @@ -20,18 +20,39 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "roc_pybuffer.h" -#include - -#include -#include - -using namespace std; -using namespace py::literals; - -static void CheckValidBuffer(const void *ptr) { - if (ptr == nullptr) { - throw std::runtime_error("NULL buffer not accepted"); +#include "roc_pybuffer.h" +#include +#include +#include +#include + +#include +#include + +using namespace std; +using namespace py::literals; + +namespace { +template +Target CheckedNumericCast(Source value, const char *context) { + using Limit = std::numeric_limits; + if (value > static_cast(Limit::max())) { + throw std::runtime_error(std::string(context) + " is too large"); + } + return static_cast(value); +} + +void ResetTensorMetadata(DLTensor &tensor) { + delete[] tensor.shape; + tensor.shape = nullptr; + delete[] tensor.strides; + tensor.strides = nullptr; +} +} // namespace + +static void CheckValidBuffer(const void *ptr) { + if (ptr == nullptr) { + throw std::runtime_error("NULL buffer not accepted"); } } @@ -42,22 +63,35 @@ BufferInterface::BufferInterface(DLPackPyTensor &&dlTensor) { m_dlTensor = std::move(dlTensor); } -py::tuple BufferInterface::shape() const { - py::tuple shape(m_dlTensor->ndim); - for (size_t i = 0; i < shape.size(); ++i) { - shape[i] = m_dlTensor->shape[i]; - } - return shape; -} - -py::tuple BufferInterface::strides() const { - py::tuple strides(m_dlTensor->ndim); - - for (size_t i = 0; i < strides.size(); ++i) { - strides[i] = m_dlTensor->strides[i]; - } - return strides; -} +py::tuple BufferInterface::shape() const { + const auto ndim = static_cast(m_dlTensor->ndim); + py::tuple shape(ndim); + if (m_dlTensor->shape == nullptr) { + return shape; + } + + std::vector values(ndim); + std::copy_n(m_dlTensor->shape, ndim, values.begin()); + for (size_t i = 0; i < ndim; ++i) { + shape[i] = values[i]; + } + return shape; +} + +py::tuple BufferInterface::strides() const { + const auto ndim = static_cast(m_dlTensor->ndim); + py::tuple strides(ndim); + if (m_dlTensor->strides == nullptr) { + return strides; + } + + std::vector values(ndim); + std::copy_n(m_dlTensor->strides, ndim, values.begin()); + for (size_t i = 0; i < ndim; ++i) { + strides[i] = values[i]; + } + return strides; +} std::string BufferInterface::dtype() const { if (m_dlTensor->dtype.bits == 8) @@ -71,21 +105,22 @@ void *BufferInterface::data() const { return m_dlTensor->data; } -py::capsule BufferInterface::dlpack(py::object stream) const { - - struct ManagerCtx { - DLManagedTensor tensor; - std::shared_ptr extBuffer; +py::capsule BufferInterface::dlpack(py::object stream) const { + static_cast(stream); + + struct ManagerCtx { + DLManagedTensor tensor; + std::shared_ptr extBuffer; }; auto ctx = std::make_unique(); - // Set up tensor deleter to delete the ManagerCtx - ctx->tensor.manager_ctx = ctx.get(); - ctx->tensor.deleter = [](DLManagedTensor *tensor) { - auto *ctx = static_cast(tensor->manager_ctx); - delete ctx; - }; + // Set up tensor deleter to delete the ManagerCtx + ctx->tensor.manager_ctx = ctx.get(); + ctx->tensor.deleter = [](DLManagedTensor *tensor) { + auto *manager_ctx = static_cast(tensor->manager_ctx); + delete manager_ctx; + }; // Copy tensor data ctx->tensor.dl_tensor = *m_dlTensor; @@ -131,54 +166,59 @@ void BufferInterface::ExportToPython(py::module &m) { .def("__dlpack_device__", &BufferInterface::dlpackDevice, "Get the device associated with the buffer"); } -int BufferInterface::LoadDLPack(std::vector& _shape, std::vector& _stride, uint32_t bit_depth, std::string& _type_str, void* _data, int device_id_) { - m_dlTensor->byte_offset = 0; - m_dlTensor->device.device_type = kDLROCM; // TODO: infer the device type from the memory buffer - m_dlTensor->device.device_id = device_id_; +int BufferInterface::LoadDLPack(const std::vector& _shape, const std::vector& _stride, uint32_t bit_depth, const std::string& _type_str, void* _data, int device_id_) { + if (_shape.size() != _stride.size()) { + throw std::runtime_error("Shape and stride rank must match"); + } + + m_dlTensor->byte_offset = 0; + m_dlTensor->device.device_type = kDLROCM; // TODO: infer the device type from the memory buffer + m_dlTensor->device.device_id = device_id_; // Convert data void* ptr = _data; CheckValidBuffer(ptr); m_dlTensor->data = ptr; - // Convert DataType - if (_type_str != "|u1" && _type_str != "|u2") { // TODO: can also be other letters - throw std::runtime_error("Could not create DL Pack tensor! Invalid typstr: " + _type_str); - return -1; - } - - int itemSizeDT; - - m_dlTensor->dtype.code = kDLUInt; - - if (bit_depth == 8) { - m_dlTensor->dtype.bits = 8; - itemSizeDT = sizeof(uint8_t); - } else if (bit_depth == 10) { - m_dlTensor->dtype.bits = 16; - itemSizeDT = sizeof(uint16_t); - } - m_dlTensor->dtype.lanes = 1; - - // Convert ndim - m_dlTensor->ndim = _shape.size(); - - // Convert shape - m_dlTensor->shape = new int64_t[m_dlTensor->ndim]; - for (int i = 0; i < m_dlTensor->ndim; ++i) { - m_dlTensor->shape[i] = _shape[i]; - } - - // Convert strides - int strides_dim = _stride.size(); - m_dlTensor->strides = new int64_t[strides_dim]; - for (int i = 0; i < strides_dim; ++i) { - m_dlTensor->strides[i] = _stride[i]; - if (m_dlTensor->strides[i] % itemSizeDT != 0) { - throw std::runtime_error("Stride must be a multiple of the element size in bytes"); - return -1; - } - m_dlTensor->strides[i] /= itemSizeDT; - } - return 0; -} + // Convert DataType + if (_type_str != "|u1" && _type_str != "|u2") { // TODO: can also be other letters + throw std::runtime_error("Could not create DL Pack tensor! Invalid typstr: " + _type_str); + } + + m_dlTensor->dtype.code = kDLUInt; + int item_size_dt = 0; + if (bit_depth == 8U) { + m_dlTensor->dtype.bits = 8U; + item_size_dt = static_cast(sizeof(uint8_t)); + } else if (bit_depth == 10U) { + m_dlTensor->dtype.bits = 16U; + item_size_dt = static_cast(sizeof(uint16_t)); + } else { + throw std::runtime_error("Unsupported bit depth for DLPack export"); + } + m_dlTensor->dtype.lanes = 1; + + // Convert ndim + ResetTensorMetadata(*m_dlTensor); + const auto ndim = CheckedNumericCast(_shape.size(), "tensor rank"); + m_dlTensor->ndim = ndim; + + // Convert shape + auto shape = std::make_unique(static_cast(ndim)); + for (size_t i = 0; i < _shape.size(); ++i) { + shape[i] = CheckedNumericCast(_shape[i], "shape dimension"); + } + m_dlTensor->shape = shape.release(); + + // Convert strides + auto strides = std::make_unique(static_cast(ndim)); + for (size_t i = 0; i < _stride.size(); ++i) { + const auto stride_bytes = CheckedNumericCast(_stride[i], "stride"); + if (stride_bytes % item_size_dt != 0) { + throw std::runtime_error("Stride must be a multiple of the element size in bytes"); + } + strides[i] = stride_bytes / item_size_dt; + } + m_dlTensor->strides = strides.release(); + return 0; +} diff --git a/src/common/roc_pybuffer.h b/src/common/roc_pybuffer.h index 42395517..be26c1c2 100644 --- a/src/common/roc_pybuffer.h +++ b/src/common/roc_pybuffer.h @@ -44,9 +44,9 @@ class BufferInterface final : public std::enable_shared_from_this& _shape, std::vector& _stride, uint32_t bit_depth, std::string& _type_str, void* _data, int device_id_); + BufferInterface() = default; + py::capsule dlpack(py::object stream) const; + int LoadDLPack(const std::vector& _shape, const std::vector& _stride, uint32_t bit_depth, const std::string& _type_str, void* _data, int device_id_); // to allow testing py::tuple dlpackDevice() const; @@ -54,7 +54,7 @@ class BufferInterface final : public std::enable_shared_from_this; DLPackPyTensor m_dlTensor; -}; - - -#endif // EXT_BUFFER_HEADER \ No newline at end of file +}; + + +#endif // EXT_BUFFER_HEADER diff --git a/src/common/roc_pydlpack.cpp b/src/common/roc_pydlpack.cpp index ec71ea1e..a24a71ec 100644 --- a/src/common/roc_pydlpack.cpp +++ b/src/common/roc_pydlpack.cpp @@ -20,58 +20,89 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include -#include - -namespace py = pybind11; -#include "roc_pydlpack.h" -#include -#include - -DLPackPyTensor::DLPackPyTensor() noexcept : m_tensor{} { -} +#include +#include +#include +#include + +namespace py = pybind11; +#include "roc_pydlpack.h" +#include +#include +#include +#include + +namespace { +void ReleaseTensorMetadata(DLManagedTensor *self) { + delete[] self->dl_tensor.shape; + self->dl_tensor.shape = nullptr; + delete[] self->dl_tensor.strides; + self->dl_tensor.strides = nullptr; +} + +template +Target CheckedNumericCast(Source value, const char *context) { + if (value < 0 || value > static_cast(std::numeric_limits::max())) { + throw std::runtime_error(std::string(context) + " is out of range"); + } + return static_cast(value); +} + +DLManagedTensor MakeManagedTensor(const DLTensor &tensor) { + DLManagedTensor managed_tensor{}; + managed_tensor.dl_tensor = tensor; + return managed_tensor; +} +} // namespace + +DLPackPyTensor::DLPackPyTensor() noexcept : m_tensor{} { + m_tensor.deleter = ReleaseTensorMetadata; +} DLPackPyTensor::DLPackPyTensor(DLManagedTensor &&managedTensor) : m_tensor{std::move(managedTensor)} { managedTensor = {}; } -DLPackPyTensor::DLPackPyTensor(const DLTensor &tensor) : DLPackPyTensor(DLManagedTensor{tensor}) { -} - -DLPackPyTensor::DLPackPyTensor(const py::buffer_info &info, const DLDevice &dev) : m_tensor{} { - DLTensor &dlTensor = m_tensor.dl_tensor; - dlTensor.data = info.ptr; - //TBD dtype - dlTensor.dtype.code = kDLInt; - dlTensor.dtype.bits = 8; - dlTensor.dtype.lanes = 1; - dlTensor.ndim = info.ndim; - dlTensor.device = dev; - dlTensor.byte_offset = 0; - - m_tensor.deleter = [](DLManagedTensor *self) { - delete[] self->dl_tensor.shape; - self->dl_tensor.shape = nullptr; - delete[] self->dl_tensor.strides; - self->dl_tensor.strides = nullptr; - }; - - try { - dlTensor.shape = new int64_t[info.ndim]; - std::copy_n(info.shape.begin(), info.shape.size(), dlTensor.shape); - - dlTensor.strides = new int64_t[info.ndim]; - for (int i = 0; i < info.ndim; ++i) { - if (info.strides[i] % info.itemsize != 0) { - throw std::runtime_error("Stride must be a multiple of the element size in bytes"); - } - - dlTensor.strides[i] = info.strides[i] / info.itemsize; - } - } catch (...) { - m_tensor.deleter(&m_tensor); - throw; +DLPackPyTensor::DLPackPyTensor(const DLTensor &tensor) : DLPackPyTensor(MakeManagedTensor(tensor)) { +} + +DLPackPyTensor::DLPackPyTensor(const py::buffer_info &info, const DLDevice &dev) : m_tensor{} { + DLTensor &dlTensor = m_tensor.dl_tensor; + const auto rank = CheckedNumericCast(info.ndim, "tensor rank"); + dlTensor.data = info.ptr; + //TBD dtype + dlTensor.dtype.code = kDLInt; + dlTensor.dtype.bits = 8; + dlTensor.dtype.lanes = 1; + dlTensor.ndim = CheckedNumericCast(info.ndim, "tensor rank"); + dlTensor.device = dev; + dlTensor.byte_offset = 0; + + m_tensor.deleter = ReleaseTensorMetadata; + + try { + std::vector shape_values(rank); + std::transform(info.shape.begin(), info.shape.end(), shape_values.begin(), [](ssize_t dimension) { + return static_cast(dimension); + }); + auto shape = std::make_unique(rank); + std::copy(shape_values.begin(), shape_values.end(), shape.get()); + dlTensor.shape = shape.release(); + + std::vector stride_values(rank); + for (size_t i = 0; i < rank; ++i) { + const auto stride = info.strides[i]; + if (stride % info.itemsize != 0) { + throw std::runtime_error("Stride must be a multiple of the element size in bytes"); + } + stride_values[i] = static_cast(stride / info.itemsize); + } + auto strides = std::make_unique(rank); + std::copy(stride_values.begin(), stride_values.end(), strides.get()); + dlTensor.strides = strides.release(); + } catch (...) { + m_tensor.deleter(&m_tensor); + throw; } } diff --git a/src/rocdecode/roc_pydecode.cpp b/src/rocdecode/roc_pydecode.cpp index 928ab71b..18280379 100644 --- a/src/rocdecode/roc_pydecode.cpp +++ b/src/rocdecode/roc_pydecode.cpp @@ -37,6 +37,10 @@ void Test_PyReconfigureFlushCallback(); void Test_CalculateRgbImageSize(); #endif +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunsafe-buffer-usage" +#endif PYBIND11_MODULE(rocpydecode, m) { m.doc() = "Python bindings for the C++ portions of rocDecode .."; @@ -222,8 +226,8 @@ PYBIND11_MODULE(rocpydecode, m) { }, "Get the data type of the buffer") .def("__dlpack__", [](std::shared_ptr& self, py::object stream) { return self->ext_buf[0]->dlpack(stream); - }, py::arg("stream") = NULL, "Export the buffer as a DLPack tensor") - .def("__dlpack_device__", [](std::shared_ptr& self) { + }, py::arg("stream") = py::none(), "Export the buffer as a DLPack tensor") + .def("__dlpack_device__", [](std::shared_ptr& /*self*/) { return py::make_tuple(py::int_(static_cast(DLDeviceType::kDLROCM)), py::int_(static_cast(0))); }, "Get the device associated with the buffer"); @@ -241,3 +245,6 @@ PYBIND11_MODULE(rocpydecode, m) { .def(py::init<>()) .def_static("test_all", &DLPackPyTensor::test_all); } +#if defined(__clang__) +#pragma clang diagnostic pop +#endif diff --git a/src/rocdecode/roc_pydecode.h b/src/rocdecode/roc_pydecode.h index 85126605..5a077c04 100644 --- a/src/rocdecode/roc_pydecode.h +++ b/src/rocdecode/roc_pydecode.h @@ -48,8 +48,6 @@ extern "C" { namespace py = pybind11; struct PyPacketData { - bool end_of_stream; - int pkt_flags; int64_t frame_pts; int64_t frame_size; int64_t bitstream_size; @@ -58,6 +56,9 @@ struct PyPacketData { uintptr_t frame_adrs_rgb; // rgb frame address uintptr_t frame_adrs_resized; // new resized yuv frame std::vector> ext_buf; + int pkt_flags = 0; + bool end_of_stream = false; + unsigned char padding_[3]{}; PyPacketData(){ ext_buf.push_back(std::make_shared()); //index[0]: always Y Tensor ext_buf.push_back(std::make_shared()); //index[1]: UV tensor in case of NV12, otherwise only U tensor when YUV444/P016 is supported @@ -68,9 +69,10 @@ struct PyPacketData { struct ConfigInfo { std::string device_name; std::string gcn_arch_name; - int pci_bus_id; - int pci_domain_id; - int pci_device_id; + int pci_bus_id = 0; + int pci_domain_id = 0; + int pci_device_id = 0; + int padding_ = 0; }; // defined in roc_pyvideodemuxer.cpp (FFmpeg dependent) diff --git a/src/rocdecode/roc_pyvideodecode.cpp b/src/rocdecode/roc_pyvideodecode.cpp index b82c6b02..9f4a2cad 100644 --- a/src/rocdecode/roc_pyvideodecode.cpp +++ b/src/rocdecode/roc_pyvideodecode.cpp @@ -167,20 +167,20 @@ py::object PyRocVideoDecoder::PyGetFrameYuv(PyPacketData& packet, bool SeparateY uint32_t bit_depth = GetBitDepth(); std::string type_str; std::vector stride; - if (bit_depth == 8) { - type_str = static_cast("|u1"); + if (bit_depth == 8U) { + type_str = "|u1"; stride.push_back(static_cast(surf_stride)); stride.push_back(sizeof(uint8_t)); - } else if (bit_depth <= 16) { - type_str = static_cast("|u2"); + } else if (bit_depth <= 16U) { + type_str = "|u2"; stride.push_back(static_cast(surf_stride)); stride.push_back(sizeof(uint16_t)); } // for NV12 format (also YUV444 & P016 when supported), Y always in ext_buf vector index [0] // The tensor shape->height will be all the Yuv planes if user specify 'FALSE' in 'SeparateYuvPlanes' argument - float plane_height_multiplier = SeparateYuvPlanes ? 1.0 : 1.5; // 1.5 for YUV NV12 - std::vector shape{ static_cast(height * plane_height_multiplier), static_cast(width)}; - packet.ext_buf[0]->LoadDLPack(shape, stride, bit_depth, type_str, (void *)packet.frame_adrs, device_id_); + const double plane_height_multiplier = SeparateYuvPlanes ? 1.0 : 1.5; // 1.5 for YUV NV12 + std::vector shape{ static_cast(static_cast(height) * plane_height_multiplier), static_cast(width)}; + packet.ext_buf[0]->LoadDLPack(shape, stride, bit_depth, type_str, reinterpret_cast(packet.frame_adrs), device_id_); if (SeparateYuvPlanes) { // get surface format OutputSurfaceInfo* p_surf_info; @@ -188,9 +188,9 @@ py::object PyRocVideoDecoder::PyGetFrameYuv(PyPacketData& packet, bool SeparateY if (ret) { // for NV12 only the UV interleaved in one tensor: ext_buf vector index [1] if (p_surf_info->surface_format == rocDecVideoSurfaceFormat_NV12 || p_surf_info->surface_format == rocDecVideoSurfaceFormat_P016) { - std::vector shape{ static_cast(height >> 1), static_cast(width)}; - uintptr_t uv_offset = p_surf_info->output_pitch * p_surf_info->output_vstride; // count for possible padding - packet.ext_buf[1]->LoadDLPack(shape, stride, bit_depth, type_str, (void *)(packet.frame_adrs + uv_offset), device_id_); + std::vector uv_shape{ static_cast(height >> 1U), static_cast(width)}; + const uintptr_t uv_offset = static_cast(p_surf_info->output_pitch) * p_surf_info->output_vstride; // count for possible padding + packet.ext_buf[1]->LoadDLPack(uv_shape, stride, bit_depth, type_str, reinterpret_cast(packet.frame_adrs + uv_offset), device_id_); } else { cout << "surf fmt: " << p_surf_info->surface_format << " [not supported]" << "\n"; } @@ -202,13 +202,12 @@ py::object PyRocVideoDecoder::PyGetFrameYuv(PyPacketData& packet, bool SeparateY size_t PyRocVideoDecoder::CalculateRgbImageSize(OutputFormatEnum& e_output_format, OutputSurfaceInfo * p_surf_info) { size_t rgb_image_size = 0; - int rgb_width = 0; - if (p_surf_info->bit_depth == 8) { - rgb_width = (p_surf_info->output_width + 1) & ~1; // has to be a multiple of 2 for hip colorconvert kernels - rgb_image_size = ((e_output_format == bgr) || (e_output_format == rgb)) ? rgb_width * p_surf_info->output_height * 3 : rgb_width * p_surf_info->output_height * 4; + const size_t rgb_width = static_cast((p_surf_info->output_width + 1U) & ~1U); // has to be a multiple of 2 for hip colorconvert kernels + const size_t output_height = static_cast(p_surf_info->output_height); + if (p_surf_info->bit_depth == 8U) { + rgb_image_size = ((e_output_format == bgr) || (e_output_format == rgb)) ? rgb_width * output_height * 3U : rgb_width * output_height * 4U; } else { - rgb_width = (p_surf_info->output_width + 1) & ~1; - rgb_image_size = ((e_output_format == bgr) || (e_output_format == rgb)) ? rgb_width * p_surf_info->output_height * 3 : ((e_output_format == bgr48) || (e_output_format == rgb48)) ? rgb_width * p_surf_info->output_height * 6 : rgb_width * p_surf_info->output_height * 8; + rgb_image_size = ((e_output_format == bgr) || (e_output_format == rgb)) ? rgb_width * output_height * 3U : ((e_output_format == bgr48) || (e_output_format == rgb48)) ? rgb_width * output_height * 6U : rgb_width * output_height * 8U; } return rgb_image_size; } @@ -233,7 +232,7 @@ py::object PyRocVideoDecoder::PyGetFrameRgb(PyPacketData& packet, int rgb_format return py::cast(-1); // ret failure // allocate 'new' RGB image device-memory if wasn't if(frame_ptr_rgb == nullptr) { - HIP_API_CALL(hipMalloc((void **)&frame_ptr_rgb, rgb_image_size)); + HIP_API_CALL(hipMalloc(reinterpret_cast(&frame_ptr_rgb), rgb_image_size)); if(frame_ptr_rgb == nullptr) return py::cast(-1); // ret failure } @@ -244,7 +243,7 @@ py::object PyRocVideoDecoder::PyGetFrameRgb(PyPacketData& packet, int rgb_format // use post process instance VideoPostProcess * post_proc = post_process_class; // Get Stream, and convert YUV 2 RGB - post_proc->ColorConvertYUV2RGB(reinterpret_cast(packet.frame_adrs), surf_info, frame_ptr_rgb, e_output_format, 0); + post_proc->ColorConvertYUV2RGB(reinterpret_cast(packet.frame_adrs), surf_info, frame_ptr_rgb, e_output_format, nullptr); // save the rgb ptr packet.frame_adrs_rgb = reinterpret_cast(frame_ptr_rgb); // Load DLPack Tensor @@ -253,10 +252,10 @@ py::object PyRocVideoDecoder::PyGetFrameRgb(PyPacketData& packet, int rgb_format uint32_t height = GetHeight(); uint32_t surf_stride = post_proc->GetRgbStride(e_output_format, surf_info); uint32_t bit_depth = GetBitDepth(); - std::string type_str(static_cast("|u1")); - std::vector shape{ static_cast(height), static_cast(width), 3}; // 3 rgb channels - std::vector stride{ static_cast(surf_stride), 1, 0}; // python assumes same dim for both shape & strides - packet.ext_buf[0]->LoadDLPack(shape, stride, bit_depth, type_str, (void *)frame_ptr_rgb, device_id_); + std::string type_str("|u1"); + std::vector shape{ static_cast(height), static_cast(width), size_t{3} }; // 3 rgb channels + std::vector stride{ static_cast(surf_stride), size_t{3}, size_t{1} }; + packet.ext_buf[0]->LoadDLPack(shape, stride, bit_depth, type_str, static_cast(frame_ptr_rgb), device_id_); } } return py::cast(packet.frame_pts); @@ -275,11 +274,13 @@ uintptr_t PyRocVideoDecoder::PyResizeFrame(PyPacketData& packet, Dim *resized_di if((reinterpret_cast(packet.frame_adrs) == nullptr) || resized_dim->w == 0 || resized_dim->h == 0) return 0; OutputSurfaceInfo *surf_info = reinterpret_cast(in_surf_info); + const uint32_t resized_width = static_cast(resized_dim->w); + const uint32_t resized_height = static_cast(resized_dim->h); // validate request - if ((surf_info->output_width == resized_dim->w) && (surf_info->output_height == resized_dim->h)) + if ((surf_info->output_width == resized_width) && (surf_info->output_height == resized_height)) return 0; uint8_t *in_yuv_frame = reinterpret_cast(packet.frame_adrs); - size_t requested_size_in_bytes = resized_dim->w * (resized_dim->h + (resized_dim->h >> 1)) * surf_info->bytes_per_pixel; + const size_t requested_size_in_bytes = static_cast(resized_width) * static_cast(resized_height + (resized_height >> 1U)) * surf_info->bytes_per_pixel; // alloc or refill surf-info one time, and refill if size changed if (resized_image_size_in_bytes != requested_size_in_bytes) { resized_image_size_in_bytes = requested_size_in_bytes; @@ -291,11 +292,11 @@ uintptr_t PyRocVideoDecoder::PyResizeFrame(PyPacketData& packet, Dim *resized_di } } memcpy(resized_surf_info, surf_info, sizeof(OutputSurfaceInfo)); - resized_surf_info->output_width = resized_dim->w; - resized_surf_info->output_height = resized_dim->h; - resized_surf_info->output_pitch = resized_dim->w * surf_info->bytes_per_pixel; - resized_surf_info->output_vstride = resized_dim->h; - resized_surf_info->output_surface_size_in_bytes = resized_surf_info->output_pitch * (resized_dim->h + (resized_dim->h >> 1)); + resized_surf_info->output_width = resized_width; + resized_surf_info->output_height = resized_height; + resized_surf_info->output_pitch = resized_width * surf_info->bytes_per_pixel; + resized_surf_info->output_vstride = resized_height; + resized_surf_info->output_surface_size_in_bytes = resized_surf_info->output_pitch * (resized_height + (resized_height >> 1U)); // new size means new MEM, dealloc old one if exist if (frame_ptr_resized != nullptr) { @@ -308,17 +309,24 @@ uintptr_t PyRocVideoDecoder::PyResizeFrame(PyPacketData& packet, Dim *resized_di } // new MEM if not allocated if (frame_ptr_resized == nullptr) { - hipError_t hip_status = hipMalloc((void **)&frame_ptr_resized, resized_image_size_in_bytes); + hipError_t hip_status = hipMalloc(reinterpret_cast(&frame_ptr_resized), resized_image_size_in_bytes); if (hip_status != hipSuccess) { std::cerr << "ERROR: hipMalloc failed to allocate the device memory for the output!" << hip_status << std::endl; return 0; } } // call resize kernel, TODO: below code assumes NV12/P016 for decoded surface. Modify to take other surface formats in future + const int resized_width_int = static_cast(resized_width); + const int resized_height_int = static_cast(resized_height); + const int output_pitch = static_cast(surf_info->output_pitch); + const int output_width = static_cast(surf_info->output_width); + const int output_height = static_cast(surf_info->output_height); + const uintptr_t chroma_offset = static_cast(surf_info->output_vstride) * surf_info->output_pitch; + uint8_t *const chroma_plane = reinterpret_cast(packet.frame_adrs + chroma_offset); if (surf_info->bytes_per_pixel == 2) { - ResizeP016(frame_ptr_resized, resized_dim->w * 2, resized_dim->w, resized_dim->h, in_yuv_frame, surf_info->output_pitch, surf_info->output_width, surf_info->output_height, (in_yuv_frame + surf_info->output_vstride * surf_info->output_pitch), nullptr, 0); + ResizeP016(frame_ptr_resized, resized_width_int * 2, resized_width_int, resized_height_int, in_yuv_frame, output_pitch, output_width, output_height, chroma_plane, nullptr, nullptr); } else { - ResizeNv12(frame_ptr_resized, resized_dim->w, resized_dim->w, resized_dim->h, in_yuv_frame, surf_info->output_pitch, surf_info->output_width, surf_info->output_height, (in_yuv_frame + surf_info->output_vstride * surf_info->output_pitch), nullptr, 0); + ResizeNv12(frame_ptr_resized, resized_width_int, resized_width_int, resized_height_int, in_yuv_frame, output_pitch, output_width, output_height, chroma_plane, nullptr, nullptr); } // save new resized frame address packet.frame_adrs_resized = reinterpret_cast(frame_ptr_resized); @@ -351,7 +359,7 @@ py::object PyRocVideoDecoder::PySaveFrameToFile(std::string& output_file_name_in if (e_output_format != OutputFormatEnum::native) { // native == YUV frame image_size = CalculateRgbImageSize(e_output_format, p_surf_info); } - SaveFrameToFile(output_file_name, (void *)surf_mem, p_surf_info, image_size); + SaveFrameToFile(output_file_name, reinterpret_cast(surf_mem), p_surf_info, image_size); } return py::cast(Py_None); } @@ -404,13 +412,13 @@ uint32_t PyRocVideoDecoder::PyGetBitDepth() { #if ROCDECODE_CHECK_VERSION(0,6,0) // for python binding, Session overhead refers to decoder initialization and deinitialization time -py::object PyRocVideoDecoder::PyAddDecoderSessionOverHead(int session_id, double duration) { +py::object PyRocVideoDecoder::PyAddDecoderSessionOverHead(std::uintptr_t session_id, double duration) { AddDecoderSessionOverHead(static_cast(session_id), duration); return py::cast(Py_None); } // for python binding, Session overhead refers to decoder initialization and deinitialization time -py::object PyRocVideoDecoder::PyGetDecoderSessionOverHead(int session_id) { +py::object PyRocVideoDecoder::PyGetDecoderSessionOverHead(std::uintptr_t session_id) { return py::cast(GetDecoderSessionOverHead(static_cast(session_id))); } diff --git a/src/rocdecode/roc_pyvideodecode.h b/src/rocdecode/roc_pyvideodecode.h index f35ac460..c2ee2db3 100644 --- a/src/rocdecode/roc_pyvideodecode.h +++ b/src/rocdecode/roc_pyvideodecode.h @@ -34,7 +34,8 @@ typedef enum ReconfigFlushMode_enum { // this struct is used by videodecode and videodecodeMultiFiles to dump last frames to file typedef struct ReconfigDumpFileStruct_t { - bool b_dump_frames_to_file; + bool b_dump_frames_to_file = false; + unsigned char padding_[7]{}; std::string output_file_name; } ReconfigDumpFileStruct; @@ -49,7 +50,7 @@ class PyRocVideoDecoder : public RocVideoDecoder { PyRocVideoDecoder(int device_id, int mem_type, rocDecVideoCodec codec, bool force_zero_latency = false, const Rect *p_crop_rect = nullptr, int max_width = 0, int max_height = 0, uint32_t clk_rate = 0) : RocVideoDecoder(device_id, static_cast(mem_type), codec, force_zero_latency, - p_crop_rect, false, max_width, max_height, clk_rate) { + p_crop_rect, false, 0U, max_width, max_height, clk_rate) { InitConfigStructure(); device_id_ = device_id; } ~PyRocVideoDecoder(); @@ -108,11 +109,10 @@ class PyRocVideoDecoder : public RocVideoDecoder { #if ROCDECODE_CHECK_VERSION(0,6,0) // Session overhead refers to decoder initialization and deinitialization time - py::object PyAddDecoderSessionOverHead(int session_id, double duration); - py::object PyGetDecoderSessionOverHead(int session_id); + py::object PyAddDecoderSessionOverHead(std::uintptr_t session_id, double duration); + py::object PyGetDecoderSessionOverHead(std::uintptr_t session_id); #endif private: - int device_id_; std::shared_ptr configInfo; void InitConfigStructure(); @@ -128,4 +128,8 @@ class PyRocVideoDecoder : public RocVideoDecoder { uint8_t *frame_ptr_resized = nullptr; size_t resized_image_size_in_bytes = 0; OutputSurfaceInfo *resized_surf_info = nullptr; -}; \ No newline at end of file + + private: + int device_id_ = 0; + [[maybe_unused]] int padding_ = 0; +}; diff --git a/src/rocdecode/roc_pyvideodecodecpu.cpp b/src/rocdecode/roc_pyvideodecodecpu.cpp index 7c8f7d35..fe5a54ea 100644 --- a/src/rocdecode/roc_pyvideodecodecpu.cpp +++ b/src/rocdecode/roc_pyvideodecodecpu.cpp @@ -351,13 +351,13 @@ uint32_t PyRocVideoDecoderCpu::PyGetBitDepth() { #if ROCDECODE_CHECK_VERSION(0,6,0) // for python binding, Session overhead refers to decoder initialization and deinitialization time -py::object PyRocVideoDecoderCpu::PyAddDecoderSessionOverHead(int session_id, double duration) { +py::object PyRocVideoDecoderCpu::PyAddDecoderSessionOverHead(std::uintptr_t session_id, double duration) { AddDecoderSessionOverHead(static_cast(session_id), duration); return py::cast(Py_None); } // for python binding, Session overhead refers to decoder initialization and deinitialization time -py::object PyRocVideoDecoderCpu::PyGetDecoderSessionOverHead(int session_id) { +py::object PyRocVideoDecoderCpu::PyGetDecoderSessionOverHead(std::uintptr_t session_id) { return py::cast(GetDecoderSessionOverHead(static_cast(session_id))); } diff --git a/src/rocdecode/roc_pyvideodecodecpu.h b/src/rocdecode/roc_pyvideodecodecpu.h index 45426d90..51ced086 100644 --- a/src/rocdecode/roc_pyvideodecodecpu.h +++ b/src/rocdecode/roc_pyvideodecodecpu.h @@ -90,8 +90,8 @@ class PyRocVideoDecoderCpu : public FFMpegVideoDecoder { #if ROCDECODE_CHECK_VERSION(0,6,0) // Session overhead refers to decoder initialization and deinitialization time - py::object PyAddDecoderSessionOverHead(int session_id, double duration); - py::object PyGetDecoderSessionOverHead(int session_id); + py::object PyAddDecoderSessionOverHead(std::uintptr_t session_id, double duration); + py::object PyGetDecoderSessionOverHead(std::uintptr_t session_id); #endif private: std::shared_ptr configInfo; @@ -105,4 +105,4 @@ class PyRocVideoDecoderCpu : public FFMpegVideoDecoder { uint8_t *frame_ptr_resized = nullptr; size_t resized_image_size_in_bytes = 0; OutputSurfaceInfo *resized_surf_info = nullptr; -}; \ No newline at end of file +}; diff --git a/src/rocjpeg/roc_pyjpeg.cpp b/src/rocjpeg/roc_pyjpeg.cpp index 347d9155..3f2abfe1 100644 --- a/src/rocjpeg/roc_pyjpeg.cpp +++ b/src/rocjpeg/roc_pyjpeg.cpp @@ -32,6 +32,10 @@ using namespace std; namespace py = pybind11; using namespace py::literals; +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunsafe-buffer-usage" +#endif PYBIND11_MODULE(rocpyjpegdecode, m) { m.doc() = "Python bindings for the C++ portions of rocJPEG .."; @@ -108,3 +112,6 @@ PYBIND11_MODULE(rocpyjpegdecode, m) { .def("init_hip_device",&PyRocJpegUtils::InitHipDevice); } +#if defined(__clang__) +#pragma clang diagnostic pop +#endif diff --git a/src/rocjpeg/roc_pyjpeg.h b/src/rocjpeg/roc_pyjpeg.h index d61a8f87..803f6414 100644 --- a/src/rocjpeg/roc_pyjpeg.h +++ b/src/rocjpeg/roc_pyjpeg.h @@ -36,7 +36,5 @@ THE SOFTWARE. #include namespace py = pybind11; -using namespace py::literals; - -#endif // PY_ROC_JPEG_PYBIND11_HEADER \ No newline at end of file +#endif // PY_ROC_JPEG_PYBIND11_HEADER diff --git a/src/rocjpeg/roc_pyjpeg_codestream.cpp b/src/rocjpeg/roc_pyjpeg_codestream.cpp index 5ec2b6b9..518bd33c 100644 --- a/src/rocjpeg/roc_pyjpeg_codestream.cpp +++ b/src/rocjpeg/roc_pyjpeg_codestream.cpp @@ -28,6 +28,7 @@ THE SOFTWARE. #include using namespace std; +using namespace py::literals; void CodeStream::ExportToPython(py::module& m) { py::class_(m, "CodeStream", @@ -69,7 +70,7 @@ void CodeStream::ExportToPython(py::module& m) { )pbdoc"); } -int CodeStream::ReadFromFile(const std::filesystem::path& filename, std::shared_ptr>& file_data, int& file_size) { +int CodeStream::ReadFromFile(const std::filesystem::path& filename, std::shared_ptr>& file_buffer, size_t& file_size) { // Open image file in binary mode and go to the end to get file size std::ifstream input(filename, std::ios::in | std::ios::binary | std::ios::ate); if (!input.is_open()) { @@ -77,14 +78,19 @@ int CodeStream::ReadFromFile(const std::filesystem::path& filename, std::shared_ return EXIT_FAILURE; } // Get the size - file_size = static_cast(input.tellg()); + const auto raw_file_size = input.tellg(); + if (raw_file_size <= 0) { + std::cerr << "ERROR: Invalid image size: " << filename << std::endl; + return EXIT_FAILURE; + } + file_size = static_cast(raw_file_size); input.seekg(0, std::ios::beg); // Allocate shared buffer if not already allocated or too small - if (!file_data || file_data->size() < static_cast(file_size)) { - file_data = std::make_shared>(file_size); + if (!file_buffer || file_buffer->size() < file_size) { + file_buffer = std::make_shared>(file_size); } // Read the file into the buffer - if (!input.read(file_data->data(), file_size)) { + if (!input.read(file_buffer->data(), static_cast(file_size))) { std::cerr << "ERROR: Cannot read from file: " << filename << std::endl; return EXIT_FAILURE; } @@ -92,7 +98,7 @@ int CodeStream::ReadFromFile(const std::filesystem::path& filename, std::shared_ } // Use the dat and its size if valid, otherwise use the file to load the data -int CodeStream::InitializeSingleImage(const std::filesystem::path& filename, const unsigned char* data, int data_size) { +int CodeStream::InitializeSingleImage(const std::filesystem::path& filename, const unsigned char* data, size_t data_size) { // File sanity check if(!filename.empty()) { if(!std::filesystem::exists(filename)) { @@ -102,13 +108,23 @@ int CodeStream::InitializeSingleImage(const std::filesystem::path& filename, con } // Read file data, if no data sent if (data != nullptr && data_size > 0) { - file_data = std::make_shared>(reinterpret_cast(data), reinterpret_cast(data) + data_size); + auto buffer = std::make_shared>(data_size); + std::copy_n(reinterpret_cast(data), data_size, buffer->begin()); + file_data = std::move(buffer); } else if(data == nullptr) { - int ret = EXIT_SUCCESS; - if((ret = ReadFromFile(filename, file_data, data_size)) != EXIT_SUCCESS) { - return ret; + if (ReadFromFile(filename, file_data, data_size) != EXIT_SUCCESS) { + return EXIT_FAILURE; } } + return InitializeStreamFromCurrentData(); +} + +int CodeStream::InitializeStreamFromCurrentData() { + if (!file_data || file_data->empty()) { + std::cerr << "ERROR: Empty JPEG stream" << std::endl; + return EXIT_FAILURE; + } + RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_NOT_INITIALIZED; rocjpeg_status = rocJpegStreamCreate(&stream_handle); if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { @@ -116,9 +132,9 @@ int CodeStream::InitializeSingleImage(const std::filesystem::path& filename, con return EXIT_FAILURE; } // Stream Parse - rocjpeg_status = rocJpegStreamParse(reinterpret_cast(file_data->data()), data_size, stream_handle); + rocjpeg_status = rocJpegStreamParse(reinterpret_cast(file_data->data()), file_data->size(), stream_handle); if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { - std::cerr << "ERROR: Failed to parse the input jpeg stream with " << rocJpegGetErrorName(rocjpeg_status) << ": Input File : " << (!filename.empty() ? filename : "") << std::endl; + std::cerr << "ERROR: Failed to parse the input jpeg stream with " << rocJpegGetErrorName(rocjpeg_status) << std::endl; Release(); return EXIT_FAILURE; } @@ -127,11 +143,36 @@ int CodeStream::InitializeSingleImage(const std::filesystem::path& filename, con void CodeStream::Release() { if(stream_handle) { - RocJpegStatus rocjpeg_status = rocJpegStreamDestroy(stream_handle); + rocJpegStreamDestroy(stream_handle); stream_handle = nullptr; } } +CodeStream::CodeStream(const CodeStream& other) + : file_data(other.file_data), + data_ref_bytes_(other.data_ref_bytes_), + data_ref_arr_(other.data_ref_arr_) { + if (other.stream_handle != nullptr) { + InitializeStreamFromCurrentData(); + } +} + +CodeStream& CodeStream::operator=(const CodeStream& other) { + if (this == &other) { + return *this; + } + + Release(); + file_data = other.file_data; + data_ref_bytes_ = other.data_ref_bytes_; + data_ref_arr_ = other.data_ref_arr_; + + if (other.stream_handle != nullptr) { + InitializeStreamFromCurrentData(); + } + return *this; +} + CodeStream::CodeStream(const std::filesystem::path& filename) { py::gil_scoped_release release; InitializeSingleImage(filename, nullptr, 0); @@ -139,7 +180,7 @@ CodeStream::CodeStream(const std::filesystem::path& filename) { CodeStream::CodeStream(const unsigned char* data, size_t length) { py::gil_scoped_release release; - InitializeSingleImage(static_cast(""), data, length); + InitializeSingleImage({}, data, length); } CodeStream::CodeStream(py::bytes data) { @@ -147,14 +188,14 @@ CodeStream::CodeStream(py::bytes data) { std::string data_str = static_cast(data_ref_bytes_); // Convert py::bytes to std::string std::string_view data_view(data_str); py::gil_scoped_release release; - InitializeSingleImage(static_cast(""), reinterpret_cast(data_view.data()), data_view.size()); + InitializeSingleImage({}, reinterpret_cast(data_view.data()), data_view.size()); } CodeStream::CodeStream(py::array_t arr) { data_ref_arr_ = arr; auto data = data_ref_arr_.unchecked<1>(); py::gil_scoped_release release; - InitializeSingleImage(static_cast(""), data.data(0), data.size()); + InitializeSingleImage({}, data.data(0), static_cast(data.size())); } CodeStream::CodeStream() { diff --git a/src/rocjpeg/roc_pyjpeg_codestream.h b/src/rocjpeg/roc_pyjpeg_codestream.h index b9b7e787..41d04346 100644 --- a/src/rocjpeg/roc_pyjpeg_codestream.h +++ b/src/rocjpeg/roc_pyjpeg_codestream.h @@ -36,6 +36,10 @@ class CodeStream { CodeStream(const unsigned char*, size_t); CodeStream(py::bytes); CodeStream(py::array_t); + CodeStream(const CodeStream&); + CodeStream& operator=(const CodeStream&); + CodeStream(CodeStream&&) noexcept = default; + CodeStream& operator=(CodeStream&&) noexcept = default; ~CodeStream(); CodeStream(); @@ -49,8 +53,9 @@ class CodeStream { py::bytes data_ref_bytes_; py::array_t data_ref_arr_; void Release(); - int ReadFromFile(const std::filesystem::path& filename, std::shared_ptr>& file_data, int& file_size); - int InitializeSingleImage(const std::filesystem::path& filename, const unsigned char* data, int data_size); + int ReadFromFile(const std::filesystem::path& filename, std::shared_ptr>& file_buffer, size_t& file_size); + int InitializeSingleImage(const std::filesystem::path& filename, const unsigned char* data, size_t data_size); + int InitializeStreamFromCurrentData(); }; -#endif // PY_ROC_JPEG_CODE_STREAM_HEADER \ No newline at end of file +#endif // PY_ROC_JPEG_CODE_STREAM_HEADER diff --git a/src/rocjpeg/roc_pyjpeg_decode_source.cpp b/src/rocjpeg/roc_pyjpeg_decode_source.cpp index e6b37baf..0c9f6b8a 100644 --- a/src/rocjpeg/roc_pyjpeg_decode_source.cpp +++ b/src/rocjpeg/roc_pyjpeg_decode_source.cpp @@ -23,6 +23,8 @@ THE SOFTWARE. #include #include "roc_pyjpeg_decode_source.h" +using namespace py::literals; + DecodeSource::DecodeSource(const CodeStream* code_stream_ptr) : code_stream_(std::make_unique(*code_stream_ptr)) // make a copy , code_stream_ptr_(code_stream_.get()) { diff --git a/src/rocjpeg/roc_pyjpeg_decode_source.h b/src/rocjpeg/roc_pyjpeg_decode_source.h index 8911f224..b1292853 100644 --- a/src/rocjpeg/roc_pyjpeg_decode_source.h +++ b/src/rocjpeg/roc_pyjpeg_decode_source.h @@ -25,7 +25,6 @@ THE SOFTWARE. #include "roc_pyjpeg_codestream.h" namespace py = pybind11; -using namespace py::literals; class DecodeSource { public: diff --git a/src/rocjpeg/roc_pyjpeg_decoder.cpp b/src/rocjpeg/roc_pyjpeg_decoder.cpp index ef08f9bc..39f46843 100644 --- a/src/rocjpeg/roc_pyjpeg_decoder.cpp +++ b/src/rocjpeg/roc_pyjpeg_decoder.cpp @@ -24,8 +24,10 @@ THE SOFTWARE. #include "roc_pyjpeg_utils.h" #include "roc_pyjpeg_codestream.h" #include "roc_pyjpeg_images.h" +#include using namespace std; +using namespace py::literals; void Decoder::ExportToPython(py::module& m) { // Decoder Class @@ -135,8 +137,8 @@ std::pair> Decoder::decode(std::vector stream_handles; std::vector decode_params_list; std::vector destinations; @@ -144,7 +146,7 @@ std::pair> Decoder::decode(std::vector images_; - if(batch_size <= 0) + if(batch_size == 0U) return {elapsed_ms, images_}; // loop the whole list length - Process as one BATCH @@ -175,7 +177,7 @@ std::pair> Decoder::decode(std::vector(count_of_valid_instances), // less or equal to the batch_size decode_params_list.data(), destinations.data() ); @@ -188,7 +190,7 @@ std::pair> Decoder::decode(std::vector(widths[0]); + img.m_height = static_cast(heights[0]); // Get Channel Pitch And Sizes PyRocJpegUtils rocjpeg_utils; if (rocjpeg_utils.GetChannelPitchAndSizes(img.decode_params, img.subsampling, widths, heights, img.num_channels, img.output_image, channel_sizes)) { @@ -248,16 +250,21 @@ int Decoder::GetImageInfo(RocJpegStreamHandle stream_handle, PyJpegImages& img) } // allocate memory for each channel hipError_t hip_status = hipSuccess; - for (int i = 0; i < img.num_channels; i++) { - if (img.output_image.channel[i] != nullptr) { - hip_status = hipFree((void *)img.output_image.channel[i]); - if (hip_status != hipSuccess) + std::array channels{}; + std::copy(std::begin(img.output_image.channel), std::end(img.output_image.channel), channels.begin()); + for (uint32_t i = 0; i < img.num_channels; ++i) { + if (channels[i] != nullptr) { + hip_status = hipFree(static_cast(channels[i])); + if (hip_status != hipSuccess) { return EXIT_FAILURE; - img.output_image.channel[i] = nullptr; + } + channels[i] = nullptr; } - hip_status = hipMalloc(&img.output_image.channel[i], channel_sizes[i]); - if (hip_status != hipSuccess) + hip_status = hipMalloc(&channels[i], static_cast(channel_sizes[i])); + if (hip_status != hipSuccess) { return EXIT_FAILURE; + } } + std::copy(channels.begin(), channels.end(), std::begin(img.output_image.channel)); return EXIT_SUCCESS; -} \ No newline at end of file +} diff --git a/src/rocjpeg/roc_pyjpeg_decoder.h b/src/rocjpeg/roc_pyjpeg_decoder.h index 1bf53bce..4559456f 100644 --- a/src/rocjpeg/roc_pyjpeg_decoder.h +++ b/src/rocjpeg/roc_pyjpeg_decoder.h @@ -42,15 +42,16 @@ class Decoder { // set output image format void SetOutputFormat(RocJpegOutputFormat output_format); - RocJpegOutputFormat GetFormat() {return user_output_format;}; - void SetFormat(RocJpegOutputFormat fmt) { user_output_format = fmt;}; + RocJpegOutputFormat GetFormat() const { return user_output_format; } + void SetFormat(RocJpegOutputFormat fmt) { user_output_format = fmt; } private: int m_device_id; RocJpegBackend m_backend; RocJpegHandle rocjpeg_handle; // main session RocJpegOutputFormat user_output_format; // user can adjust + [[maybe_unused]] uint32_t padding_ = 0; int GetImageInfo(RocJpegStreamHandle stream_handle, PyJpegImages& img); // finalize the parsing job }; -#endif // PY_ROC_JPEG_HEADER \ No newline at end of file +#endif // PY_ROC_JPEG_HEADER diff --git a/src/rocjpeg/roc_pyjpeg_images.cpp b/src/rocjpeg/roc_pyjpeg_images.cpp index 1696e102..e86c39ea 100644 --- a/src/rocjpeg/roc_pyjpeg_images.cpp +++ b/src/rocjpeg/roc_pyjpeg_images.cpp @@ -20,6 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include #include #include "rocjpeg/rocjpeg.h" #include "common/roc_pybuffer.h" @@ -63,8 +64,8 @@ void PyJpegImages::ExportToPython(py::module& m) { }, "Get the data type of the buffer") .def("__dlpack__", [](std::shared_ptr& self, py::object stream) { return self->ext_buf[0]->dlpack(stream); - }, py::arg("stream") = NULL, "Export the buffer as a DLPack tensor") - .def("__dlpack_device__", [](std::shared_ptr& self) { + }, py::arg("stream") = py::none(), "Export the buffer as a DLPack tensor") + .def("__dlpack_device__", [](std::shared_ptr& /*self*/) { return py::make_tuple(py::int_(static_cast(DLDeviceType::kDLROCM)), py::int_(static_cast(0))); }, "Get the device associated with the buffer") .def_readwrite("height", &PyJpegImages::m_height, @@ -81,7 +82,7 @@ py::array_t PyJpegImages::to_numpy(int index) { py::array_t ret; if (index < 0 || index >= static_cast(ext_buf.size())) throw std::out_of_range("Invalid channel index"); - auto& buf = ext_buf[index]; + auto& buf = ext_buf[static_cast(index)]; uint8_t* data_ptr = static_cast(buf->data()); py::tuple py_shape = buf->shape(); if (py_shape.size() == 3) { @@ -106,10 +107,10 @@ py::array_t PyJpegImages::to_numpy(int index) { bool PyJpegImages::GetOutputDims(std::vector& widths, std::vector& heights, uint32_t img_width, uint32_t img_height, RocJpegOutputFormat output_format, - RocJpegChromaSubsampling subsampling) { + RocJpegChromaSubsampling image_subsampling) { switch (output_format) { case ROCJPEG_OUTPUT_NATIVE: - switch (subsampling) { + switch (image_subsampling) { case ROCJPEG_CSS_444: widths[2] = widths[1] = widths[0] = img_width; heights[2] = heights[1] = heights[0] = img_height; @@ -120,7 +121,7 @@ bool PyJpegImages::GetOutputDims(std::vector& widths, std::vector> 1; break; case ROCJPEG_CSS_422: - widths[0] = img_width * 2; + widths[0] = img_width * 2U; heights[0] = img_height; break; case ROCJPEG_CSS_420: @@ -132,13 +133,14 @@ bool PyJpegImages::GetOutputDims(std::vector& widths, std::vector& widths, std::vector& widths, std::vector& widths, std::vector(m_width); + uint32_t img_height = static_cast(m_height); std::vector widths; std::vector heights; widths.resize(ROCJPEG_MAX_COMPONENT); heights.resize(ROCJPEG_MAX_COMPONENT); if(GetOutputDims(widths, heights, img_width, img_height, output_format, subsampling) == false) return false; - uint32_t bit_depth = 8; - std::string type_str(static_cast("|u1")); + const uint32_t bit_depth = 8U; + const std::string type_str("|u1"); + std::array channels{}; + std::copy(std::begin(output_image.channel), std::end(output_image.channel), channels.begin()); switch(output_format) { + case ROCJPEG_OUTPUT_NATIVE: + case ROCJPEG_OUTPUT_YUV_PLANAR: + case ROCJPEG_OUTPUT_Y: + case ROCJPEG_OUTPUT_FORMAT_MAX: + return false; case ROCJPEG_OUTPUT_RGB_PLANAR: { // each color plane in a channel separately R[0], G[1], and B[2] - uint32_t surf_stride[3] = {widths[0], widths[1], widths[2]}; // ROCJPEG_OUTPUT_RGB_PLANAR all same width = img_width - for(int i = 0; i < 3; i++) { + const std::array surf_stride{widths[0], widths[1], widths[2]}; // ROCJPEG_OUTPUT_RGB_PLANAR all same width = img_width + for(size_t i = 0; i < 3U; ++i) { std::vector shape{ static_cast(heights[i]), static_cast(widths[i])}; // depend on get_output_dims() - std::vector stride{ static_cast(surf_stride[i]), 1, 0}; + std::vector stride{ static_cast(surf_stride[i]), size_t{1} }; // RGB PLANAR using VCN JPEG decoder @ first, second, and third channel of RocJpegImage - ext_buf[i]->LoadDLPack(shape, stride, bit_depth, type_str, (void *)output_image.channel[i], device_id); // device_id was set/saved at the constructor + ext_buf[i]->LoadDLPack(shape, stride, bit_depth, type_str, static_cast(channels[i]), device_id); // device_id was set/saved at the constructor } } break; - default: case ROCJPEG_OUTPUT_RGB: { // all the RGB interleaved in one channel [0] - uint32_t surf_stride = widths[0]; // ROCJPEG_OUTPUT_RGB width is * 3 for RGB interleaved - std::vector shape{ static_cast(heights[0]), static_cast(widths[0]/3), 3}; // widths[0]/3 for ROCJPEG_OUTPUT_RGB - std::vector stride{ static_cast(surf_stride), 1, 0}; // python assumes same dim for both shape & strides + const uint32_t surf_stride = widths[0]; // ROCJPEG_OUTPUT_RGB width is * 3 for RGB interleaved + std::vector shape{ static_cast(heights[0]), static_cast(widths[0] / 3U), size_t{3} }; // widths[0]/3 for ROCJPEG_OUTPUT_RGB + std::vector stride{ static_cast(surf_stride), size_t{3}, size_t{1} }; // interleaved RGB using VCN JPEG decoder written to first channel of RocJpegImage - ext_buf[0]->LoadDLPack(shape, stride, bit_depth, type_str, (void *)output_image.channel[0], device_id); // device_id was set/saved at the constructor + ext_buf[0]->LoadDLPack(shape, stride, bit_depth, type_str, static_cast(channels[0]), device_id); // device_id was set/saved at the constructor } break; } diff --git a/src/rocjpeg/roc_pyjpeg_images.h b/src/rocjpeg/roc_pyjpeg_images.h index b993aa10..455df4fb 100644 --- a/src/rocjpeg/roc_pyjpeg_images.h +++ b/src/rocjpeg/roc_pyjpeg_images.h @@ -24,6 +24,7 @@ THE SOFTWARE. #define PY_ROC_JPEG_IMAGES_HEADER #pragma once +#include #include #include "rocjpeg/rocjpeg.h" #include "common/roc_pybuffer.h" @@ -45,16 +46,14 @@ class PyJpegImages { num_channels = 0; subsampling = ROCJPEG_CSS_UNKNOWN; } - ~PyJpegImages() {}; - static void ExportToPython(py::module& m); // The image in the GPU MEM represented with dlpack via this ext_buf (for external buffer) std::vector> ext_buf; // external buffer, a view on the GPU MEM of the decoded image // public to be accessed by python pybind - int m_width; - int m_height; + int m_width = 0; + int m_height = 0; py::array_t to_numpy(int index = 0); RocJpegChromaSubsampling subsampling; @@ -66,6 +65,7 @@ class PyJpegImages { private: bool GetOutputDims(std::vector& widths, std::vector& heights, uint32_t img_width, uint32_t img_height, RocJpegOutputFormat output_format, RocJpegChromaSubsampling subsampling); + [[maybe_unused]] uint32_t padding_ = 0; }; -#endif // PY_ROC_JPEG_IMAGES_HEADER \ No newline at end of file +#endif // PY_ROC_JPEG_IMAGES_HEADER diff --git a/src/rocjpeg/roc_pyjpeg_utils.h b/src/rocjpeg/roc_pyjpeg_utils.h index eb364a97..895099aa 100644 --- a/src/rocjpeg/roc_pyjpeg_utils.h +++ b/src/rocjpeg/roc_pyjpeg_utils.h @@ -27,11 +27,15 @@ THE SOFTWARE. #include #include #include +#include #include #include #include #include #include +#include +#include +#include #include #include #include @@ -41,21 +45,23 @@ namespace fs = std::experimental::filesystem; #include #include "rocjpeg/rocjpeg.h" -#define PY_CHECK_ROCJPEG(call) { \ - RocJpegStatus rocjpeg_status = (call); \ - if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { \ - std::cerr << #call << " returned " << rocJpegGetErrorName(rocjpeg_status) << " at " << __FILE__ << ":" << __LINE__ << std::endl;\ - exit(1); \ - } \ -} - -#define PY_CHECK_HIP(call) { \ - hipError_t hip_status = (call); \ - if (hip_status != hipSuccess) { \ - std::cout << "rocJPEG failure: '#" << hip_status << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ - exit(1); \ - } \ -} +#define PY_CHECK_ROCJPEG(call) \ + do { \ + const RocJpegStatus rocjpeg_status = (call); \ + if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { \ + std::cerr << #call << " returned " << rocJpegGetErrorName(rocjpeg_status) << " at " << __FILE__ << ":" << __LINE__ << std::endl; \ + std::exit(EXIT_FAILURE); \ + } \ + } while (false) + +#define PY_CHECK_HIP(call) \ + do { \ + const hipError_t hip_status = (call); \ + if (hip_status != hipSuccess) { \ + std::cout << "rocJPEG failure: '#" << hip_status << "' at " << __FILE__ << ":" << __LINE__ << std::endl; \ + std::exit(EXIT_FAILURE); \ + } \ + } while (false) /** * @class PyRocJpegUtils @@ -66,6 +72,8 @@ namespace fs = std::experimental::filesystem; */ class PyRocJpegUtils { public: + using ChannelArray = std::array; + /** * @brief Initializes the HIP device. * @@ -130,9 +138,6 @@ class PyRocJpegUtils { case ROCJPEG_CSS_UNKNOWN: chroma_sub_sampling = "UNKNOWN"; break; - default: - chroma_sub_sampling = ""; - break; } } @@ -151,103 +156,122 @@ class PyRocJpegUtils { * @param channel_sizes The array to store the channel sizes. * @return The channel pitch. */ - int GetChannelPitchAndSizes(RocJpegDecodeParams decode_params, RocJpegChromaSubsampling subsampling, uint32_t *widths, uint32_t *heights, - uint32_t &num_channels, RocJpegImage &output_image, uint32_t *channel_sizes) { - - bool is_roi_valid = false; - uint32_t roi_width; - uint32_t roi_height; - roi_width = decode_params.crop_rectangle.right - decode_params.crop_rectangle.left; - roi_height = decode_params.crop_rectangle.bottom - decode_params.crop_rectangle.top; - if (roi_width > 0 && roi_height > 0 && roi_width <= widths[0] && roi_height <= heights[0]) { - is_roi_valid = true; - } + int GetChannelPitchAndSizes(RocJpegDecodeParams decode_params, RocJpegChromaSubsampling subsampling, + const ChannelArray &widths, const ChannelArray &heights, + uint32_t &num_channels, RocJpegImage &output_image, + ChannelArray &channel_sizes) { + const int roi_width_raw = decode_params.crop_rectangle.right - decode_params.crop_rectangle.left; + const int roi_height_raw = decode_params.crop_rectangle.bottom - decode_params.crop_rectangle.top; + const uint32_t roi_width = static_cast(roi_width_raw); + const uint32_t roi_height = static_cast(roi_height_raw); + const bool is_roi_valid = roi_width_raw > 0 && roi_height_raw > 0 && roi_width <= widths[0] && roi_height <= heights[0]; + const uint32_t full_width = is_roi_valid ? roi_width : widths[0]; + const uint32_t full_height = is_roi_valid ? roi_height : heights[0]; + ChannelArray pitches{}; + channel_sizes.fill(0U); + + const auto set_channel = [&](std::size_t index, uint32_t pitch, uint32_t height) { + pitches[index] = pitch; + channel_sizes[index] = AlignSize(pitch, height, mem_alignment); + }; + switch (decode_params.output_format) { case ROCJPEG_OUTPUT_NATIVE: switch (subsampling) { case ROCJPEG_CSS_444: - num_channels = 3; - output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); + num_channels = 3U; + set_channel(0U, full_width, full_height); + set_channel(1U, full_width, full_height); + set_channel(2U, full_width, full_height); break; case ROCJPEG_CSS_440: - num_channels = 3; - output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); - channel_sizes[2] = channel_sizes[1] = align(output_image.pitch[0] * ((is_roi_valid ? roi_height : heights[0]) >> 1), mem_alignment); + num_channels = 3U; + set_channel(0U, full_width, full_height); + set_channel(1U, full_width, full_height >> 1U); + set_channel(2U, full_width, full_height >> 1U); break; case ROCJPEG_CSS_422: - num_channels = 1; - output_image.pitch[0] = (is_roi_valid ? roi_width : widths[0]) * 2; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); + num_channels = 1U; + set_channel(0U, full_width * 2U, full_height); break; case ROCJPEG_CSS_420: - num_channels = 2; - output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); - channel_sizes[1] = align(output_image.pitch[1] * ((is_roi_valid ? roi_height : heights[0]) >> 1), mem_alignment); + num_channels = 2U; + set_channel(0U, full_width, full_height); + set_channel(1U, full_width, full_height >> 1U); break; case ROCJPEG_CSS_400: - num_channels = 1; - output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); + num_channels = 1U; + set_channel(0U, full_width, full_height); break; - default: + case ROCJPEG_CSS_411: + case ROCJPEG_CSS_UNKNOWN: std::cout << "Unknown chroma subsampling!" << std::endl; return EXIT_FAILURE; } break; case ROCJPEG_OUTPUT_YUV_PLANAR: if (subsampling == ROCJPEG_CSS_400) { - num_channels = 1; - output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); + num_channels = 1U; + set_channel(0U, full_width, full_height); } else { - num_channels = 3; - output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - output_image.pitch[1] = is_roi_valid ? roi_width : widths[1]; - output_image.pitch[2] = is_roi_valid ? roi_width : widths[2]; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); - channel_sizes[1] = align(output_image.pitch[1] * (is_roi_valid ? roi_height : heights[1]), mem_alignment); - channel_sizes[2] = align(output_image.pitch[2] * (is_roi_valid ? roi_height : heights[2]), mem_alignment); + switch (subsampling) { + case ROCJPEG_CSS_444: + case ROCJPEG_CSS_440: + case ROCJPEG_CSS_422: + case ROCJPEG_CSS_420: + num_channels = 3U; + set_channel(0U, full_width, full_height); + set_channel(1U, is_roi_valid ? roi_width : widths[1], is_roi_valid ? roi_height : heights[1]); + set_channel(2U, is_roi_valid ? roi_width : widths[2], is_roi_valid ? roi_height : heights[2]); + break; + case ROCJPEG_CSS_400: + break; + case ROCJPEG_CSS_411: + case ROCJPEG_CSS_UNKNOWN: + std::cout << "Unknown chroma subsampling!" << std::endl; + return EXIT_FAILURE; + } } break; case ROCJPEG_OUTPUT_Y: - num_channels = 1; - output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); + num_channels = 1U; + set_channel(0U, full_width, full_height); break; case ROCJPEG_OUTPUT_RGB: - num_channels = 1; - output_image.pitch[0] = (is_roi_valid ? roi_width : widths[0]) * 3; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); + num_channels = 1U; + set_channel(0U, full_width * 3U, full_height); break; case ROCJPEG_OUTPUT_RGB_PLANAR: - num_channels = 3; - output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); + num_channels = 3U; + set_channel(0U, full_width, full_height); + set_channel(1U, full_width, full_height); + set_channel(2U, full_width, full_height); break; - default: + case ROCJPEG_OUTPUT_FORMAT_MAX: std::cout << "Unknown output format!" << std::endl; return EXIT_FAILURE; } + std::copy(pitches.begin(), pitches.end(), std::begin(output_image.pitch)); return EXIT_SUCCESS; } private: - static const int mem_alignment = 4 * 1024 * 1024; + static constexpr uint32_t mem_alignment = 4U * 1024U * 1024U; /** * @brief Aligns a value to a specified alignment. * * This function takes a value and aligns it to the specified alignment. It returns the aligned value. * - * @param value The value to be aligned. + * @param pitch The pitch of the channel in bytes. + * @param height The channel height in rows. * @param alignment The alignment value. * @return The aligned value. */ - static inline int align(int value, int alignment) { - return (value + alignment - 1) & ~(alignment - 1); + static inline uint32_t AlignSize(uint32_t pitch, uint32_t height, uint32_t alignment) { + const auto size = static_cast(pitch) * static_cast(height); + const auto aligned = (size + alignment - 1U) & ~(static_cast(alignment) - 1U); + return static_cast(aligned); } }; -#endif //ROC_PY_JPEG_UTILS \ No newline at end of file +#endif //ROC_PY_JPEG_UTILS diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9befe73c..26c5b6c3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -80,46 +80,36 @@ set(ROCPYJPEG_BUILD_PY_DIR "${CMAKE_BINARY_DIR}/rocpyjpegdecode_${PY_VERSION_SUF # check installed or build-tree pybind bindings set(VIDEO_DECODE_BINDINGS_INSTALLED OFF) -if(EXISTS "${ROCPYDECODE_PY_DIR}") +file(GLOB ROCPYDECODE_PYMODULES "${ROCPYDECODE_BUILD_LIB_DIR}/rocpydecode*.so" "${ROCPYDECODE_BUILD_LIB_DIR}/rocpydecode*.pyd") +if(ROCPYDECODE_PYMODULES AND EXISTS "${ROCPYDECODE_BUILD_PY_DIR}") + set(VIDEO_DECODE_BINDINGS_INSTALLED ON) + set(ROCPYDECODE_LIB_DIR "${ROCPYDECODE_BUILD_LIB_DIR}") + set(ROCPYDECODE_PY_DIR "${ROCPYDECODE_BUILD_PY_DIR}") + message("-- ${White}${PROJECT_NAME}: using in-tree rocPyDecode build outputs at ${ROCPYDECODE_LIB_DIR}${ColourReset}") +elseif(EXISTS "${ROCPYDECODE_PY_DIR}") set(VIDEO_DECODE_BINDINGS_INSTALLED ON) message("-- ${White}${PROJECT_NAME}: rocPyDecode found at ${ROCPYDECODE_PY_DIR}${ColourReset}") -endif() -if(VIDEO_DECODE_BINDINGS_INSTALLED) file(GLOB ROCPYDECODE_PYMODULES "${ROCPYDECODE_LIB_DIR}/rocpydecode*.so" "${ROCPYDECODE_LIB_DIR}/rocpydecode*.pyd") if(NOT ROCPYDECODE_PYMODULES) set(VIDEO_DECODE_BINDINGS_INSTALLED OFF) - message("-- ${Yellow}${PROJECT_NAME}: rocPyDecode bindings directory present but shared object not found in ${ROCPYDECODE_LIB_DIR}; will try build-tree outputs.${ColourReset}") - endif() -endif() -if(NOT VIDEO_DECODE_BINDINGS_INSTALLED) - file(GLOB ROCPYDECODE_PYMODULES "${ROCPYDECODE_BUILD_LIB_DIR}/rocpydecode*.so" "${ROCPYDECODE_BUILD_LIB_DIR}/rocpydecode*.pyd") - if(ROCPYDECODE_PYMODULES AND EXISTS "${ROCPYDECODE_BUILD_PY_DIR}") - set(VIDEO_DECODE_BINDINGS_INSTALLED ON) - set(ROCPYDECODE_LIB_DIR "${ROCPYDECODE_BUILD_LIB_DIR}") - set(ROCPYDECODE_PY_DIR "${ROCPYDECODE_BUILD_PY_DIR}") - message("-- ${White}${PROJECT_NAME}: using in-tree rocPyDecode build outputs at ${ROCPYDECODE_LIB_DIR}${ColourReset}") + message("-- ${Yellow}${PROJECT_NAME}: rocPyDecode bindings directory present but shared object not found in ${ROCPYDECODE_LIB_DIR}.${ColourReset}") endif() endif() set(JPEG_DECODE_PYBIND_SCRIPTS OFF) -if(EXISTS "${ROCPYJPEG_PY_DIR}") +file(GLOB ROCPYJPEG_PYMODULES "${ROCPYJPEG_BUILD_LIB_DIR}/rocpyjpegdecode*.so" "${ROCPYJPEG_BUILD_LIB_DIR}/rocpyjpegdecode*.pyd") +if(ROCPYJPEG_PYMODULES AND EXISTS "${ROCPYJPEG_BUILD_PY_DIR}") + set(JPEG_DECODE_PYBIND_SCRIPTS ON) + set(ROCPYJPEG_LIB_DIR "${ROCPYJPEG_BUILD_LIB_DIR}") + set(ROCPYJPEG_PY_DIR "${ROCPYJPEG_BUILD_PY_DIR}") + message("-- ${White}${PROJECT_NAME}: using in-tree rocPyJPEG build outputs at ${ROCPYJPEG_LIB_DIR}${ColourReset}") +elseif(EXISTS "${ROCPYJPEG_PY_DIR}") set(JPEG_DECODE_PYBIND_SCRIPTS ON) message("-- ${White}${PROJECT_NAME}: rocPyJPEG found at ${ROCPYJPEG_PY_DIR}${ColourReset}") -endif() -if(JPEG_DECODE_PYBIND_SCRIPTS) file(GLOB ROCPYJPEG_PYMODULES "${ROCPYJPEG_LIB_DIR}/rocpyjpegdecode*.so" "${ROCPYJPEG_LIB_DIR}/rocpyjpegdecode*.pyd") if(NOT ROCPYJPEG_PYMODULES) set(JPEG_DECODE_PYBIND_SCRIPTS OFF) - message("-- ${Yellow}${PROJECT_NAME}: rocPyJPEG bindings directory present but shared object not found in ${ROCPYJPEG_LIB_DIR}; will try build-tree outputs.${ColourReset}") - endif() -endif() -if(NOT JPEG_DECODE_PYBIND_SCRIPTS) - file(GLOB ROCPYJPEG_PYMODULES "${ROCPYJPEG_BUILD_LIB_DIR}/rocpyjpegdecode*.so" "${ROCPYJPEG_BUILD_LIB_DIR}/rocpyjpegdecode*.pyd") - if(ROCPYJPEG_PYMODULES AND EXISTS "${ROCPYJPEG_BUILD_PY_DIR}") - set(JPEG_DECODE_PYBIND_SCRIPTS ON) - set(ROCPYJPEG_LIB_DIR "${ROCPYJPEG_BUILD_LIB_DIR}") - set(ROCPYJPEG_PY_DIR "${ROCPYJPEG_BUILD_PY_DIR}") - message("-- ${White}${PROJECT_NAME}: using in-tree rocPyJPEG build outputs at ${ROCPYJPEG_LIB_DIR}${ColourReset}") + message("-- ${Yellow}${PROJECT_NAME}: rocPyJPEG bindings directory present but shared object not found in ${ROCPYJPEG_LIB_DIR}.${ColourReset}") endif() endif() @@ -149,8 +139,8 @@ else() endif() # find requirements (optional to allow skipping tests) -find_package(rocdecode 1.0.0 QUIET) -find_package(rocjpeg 1.0.0 QUIET) +find_package(rocdecode QUIET) +find_package(rocjpeg QUIET) find_library(rocdecode_HOST_LIBRARY NAMES rocdecodehost rocdecode-host PATHS ${ROCM_PATH}/lib) set(RUN_ROCPYDECODE_TESTS OFF) set(RUN_ROCPYJPEG_TESTS OFF) @@ -194,6 +184,12 @@ if(EXISTS "${ROCM_PATH}/lib/rocm_sysdeps/lib") set(USING_THE_ROCK ON) endif() +function(set_rocpy_test_env test_name pythonpath) + set_property(TEST ${test_name} PROPERTY ENVIRONMENT + "PYTHONPATH=${pythonpath}" + "LD_LIBRARY_PATH=${ROCM_PATH}/lib:$ENV{LD_LIBRARY_PATH}") +endfunction() + if(RUN_ROCPYDECODE_TESTS OR RUN_ROCPYJPEG_TESTS) # Tests execute_process(COMMAND ${Python3_EXECUTABLE} -c "import torch" RESULT_VARIABLE TORCH_PYTHON_RESULT OUTPUT_QUIET ERROR_QUIET) @@ -212,7 +208,7 @@ if(RUN_ROCPYDECODE_TESTS) -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set_property(TEST rocpydecode_test_decoders PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") + set_rocpy_test_env(rocpydecode_test_decoders "${ROCPYDECODE_PYTHONPATH}") # 2 - decodercpu test if(RUN_ROCPYDECODE_HOST_TESTS) add_test(NAME rocpydecode_test_decodercpu @@ -220,7 +216,7 @@ if(RUN_ROCPYDECODE_TESTS) -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set_property(TEST rocpydecode_test_decodercpu PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") + set_rocpy_test_env(rocpydecode_test_decodercpu "${ROCPYDECODE_PYTHONPATH}") endif() # 3 - demuxer test add_test(NAME rocpydecode_test_demuxer @@ -228,7 +224,7 @@ if(RUN_ROCPYDECODE_TESTS) -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set_property(TEST rocpydecode_test_demuxer PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") + set_rocpy_test_env(rocpydecode_test_demuxer "${ROCPYDECODE_PYTHONPATH}") else() message("-- ${Yellow}${PROJECT_NAME}: FFmpeg not available; skipping decoder/demuxer/CPU sample tests.${ColourReset}") endif() @@ -237,7 +233,7 @@ if(RUN_ROCPYDECODE_TESTS) COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/types_test.py WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set_property(TEST rocpydecode_test_types PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") + set_rocpy_test_env(rocpydecode_test_types "${ROCPYDECODE_PYTHONPATH}") if(ROCPYDECODE_USE_FFMPEG) # 5 - video_decode_python_H265 test add_test(NAME video_decode_python_H265 @@ -245,20 +241,14 @@ if(RUN_ROCPYDECODE_TESTS) -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H265.mp4 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set_property(TEST video_decode_python_H265 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") + set_rocpy_test_env(video_decode_python_H265 "${ROCPYDECODE_PYTHONPATH}") # 6 - video_decode_perf_python_H265 test add_test(NAME video_decode_perf_python_H265 COMMAND ${Python3_EXECUTABLE} ${ROCPYDECODE_SAMPLE_DIR}/videodecodeperf.py -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H265.mp4 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - if(USING_THE_ROCK) - set_property(TEST video_decode_perf_python_H265 PROPERTY ENVIRONMENT - "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$ENV{PYTHONPATH}" - "LD_LIBRARY_PATH=${ROCM_PATH}/lib:$ENV{LD_LIBRARY_PATH}") - else() - set_property(TEST video_decode_perf_python_H265 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") - endif() + set_rocpy_test_env(video_decode_perf_python_H265 "${ROCPYDECODE_PYTHONPATH}") # 7 - video_decode_rgb_python_H265 test add_test(NAME video_decode_rgb_python_H265 COMMAND ${Python3_EXECUTABLE} ${ROCPYDECODE_SAMPLE_DIR}/videodecodergb.py @@ -266,41 +256,35 @@ if(RUN_ROCPYDECODE_TESTS) -of 3 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set_property(TEST video_decode_rgb_python_H265 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") + set_rocpy_test_env(video_decode_rgb_python_H265 "${ROCPYDECODE_PYTHONPATH}") # 8 - video_decode_python_H264 test add_test(NAME video_decode_python_H264 COMMAND ${Python3_EXECUTABLE} ${ROCPYDECODE_SAMPLE_DIR}/videodecode.py -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4 -resize 640 360 -p yes WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set_property(TEST video_decode_python_H264 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") + set_rocpy_test_env(video_decode_python_H264 "${ROCPYDECODE_PYTHONPATH}") # 9 - video_decode_perf_python_H264 test add_test(NAME video_decode_perf_python_H264 COMMAND ${Python3_EXECUTABLE} ${ROCPYDECODE_SAMPLE_DIR}/videodecodeperf.py -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - if(USING_THE_ROCK) - set_property(TEST video_decode_perf_python_H264 PROPERTY ENVIRONMENT - "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$ENV{PYTHONPATH}" - "LD_LIBRARY_PATH=${ROCM_PATH}/lib:$ENV{LD_LIBRARY_PATH}") - else() - set_property(TEST video_decode_perf_python_H264 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") - endif() + set_rocpy_test_env(video_decode_perf_python_H264 "${ROCPYDECODE_PYTHONPATH}") # 10 - video_decode_python_AV1 test add_test(NAME video_decode_python_AV1 COMMAND ${Python3_EXECUTABLE} ${ROCPYDECODE_SAMPLE_DIR}/videodecode.py -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-AV1.mp4 -resize 640 360 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set_property(TEST video_decode_python_AV1 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") + set_rocpy_test_env(video_decode_python_AV1 "${ROCPYDECODE_PYTHONPATH}") # 11 - video_decode_python_AV9 test add_test(NAME video_decode_python_AV9 COMMAND ${Python3_EXECUTABLE} ${ROCPYDECODE_SAMPLE_DIR}/videodecode.py -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-VP9.ivf -resize 640 360 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set_property(TEST video_decode_python_AV9 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") + set_rocpy_test_env(video_decode_python_AV9 "${ROCPYDECODE_PYTHONPATH}") if(TORCH_PYTHON_RESULT EQUAL 0 AND RUN_ROCPYDECODE_HOST_TESTS) # 12 - video_decode_python_ffmpeg(torch) test add_test(NAME video_decode_python_ffmpeg_torch @@ -308,7 +292,7 @@ if(RUN_ROCPYDECODE_TESTS) -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4 -m 2 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set_property(TEST video_decode_python_ffmpeg_torch PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") + set_rocpy_test_env(video_decode_python_ffmpeg_torch "${ROCPYDECODE_PYTHONPATH}") endif() # 13 - video_decode_python_ffmpeg test if(RUN_ROCPYDECODE_HOST_TESTS) @@ -317,7 +301,7 @@ if(RUN_ROCPYDECODE_TESTS) -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4 -m 2 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set_property(TEST video_decode_python_ffmpeg PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") + set_rocpy_test_env(video_decode_python_ffmpeg "${ROCPYDECODE_PYTHONPATH}") endif() endif() # 14 - video_decode_raw_python (Annex-B raw bitstream) @@ -326,7 +310,7 @@ if(RUN_ROCPYDECODE_TESTS) -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.264 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set_property(TEST video_decode_raw_python PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") + set_rocpy_test_env(video_decode_raw_python "${ROCPYDECODE_PYTHONPATH}") # 15 - video_decode_raw_python_h265 (Annex-B raw HEVC bitstream) add_test(NAME video_decode_raw_python_h265 COMMAND ${Python3_EXECUTABLE} ${ROCPYDECODE_SAMPLE_DIR}/videodecoderaw.py @@ -334,14 +318,14 @@ if(RUN_ROCPYDECODE_TESTS) --codec h265 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set_property(TEST video_decode_raw_python_h265 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") + set_rocpy_test_env(video_decode_raw_python_h265 "${ROCPYDECODE_PYTHONPATH}") # 16 - all_other_decoder_apis test add_test(NAME all_other_decoder_apis COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/decoder_api_test.py -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) - set_property(TEST all_other_decoder_apis PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH") + set_rocpy_test_env(all_other_decoder_apis "${ROCPYDECODE_PYTHONPATH}") endif() if(RUN_ROCPYJPEG_TESTS) @@ -351,24 +335,12 @@ if(RUN_ROCPYJPEG_TESTS) COMMAND ${Python3_EXECUTABLE} ${ROCPYJPEG_SAMPLE_DIR}/jpegdecode.py -i ${ROCM_PATH}/share/rocjpeg/images/mug_420.jpg WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - if(USING_THE_ROCK) - set_property(TEST jpeg_decode_python PROPERTY ENVIRONMENT - "PYTHONPATH=${ROCPYJPEG_PYTHONPATH}:$ENV{PYTHONPATH}" - "LD_LIBRARY_PATH=${ROCM_PATH}/lib:$ENV{LD_LIBRARY_PATH}") - else() - set_property(TEST jpeg_decode_python PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYJPEG_PYTHONPATH}:$PYTHONPATH") - endif() + set_rocpy_test_env(jpeg_decode_python "${ROCPYJPEG_PYTHONPATH}") endif(TORCH_PYTHON_RESULT EQUAL 0) # 18 - jpegdecodebatched_test add_test(NAME jpeg_decode_batched_python COMMAND ${Python3_EXECUTABLE} ${ROCPYJPEG_SAMPLE_DIR}/jpegdecodebatched.py -i ${ROCM_PATH}/share/rocjpeg/images/ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - if(USING_THE_ROCK) - set_property(TEST jpeg_decode_batched_python PROPERTY ENVIRONMENT - "PYTHONPATH=${ROCPYJPEG_PYTHONPATH}:$ENV{PYTHONPATH}" - "LD_LIBRARY_PATH=${ROCM_PATH}/lib:$ENV{LD_LIBRARY_PATH}") - else() - set_property(TEST jpeg_decode_batched_python PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYJPEG_PYTHONPATH}:$PYTHONPATH") - endif() + set_rocpy_test_env(jpeg_decode_batched_python "${ROCPYJPEG_PYTHONPATH}") endif() From 5e6c078e882b015dc69c0c70abeeb954d4b7ef7c Mon Sep 17 00:00:00 2001 From: Essam Aly Date: Mon, 6 Apr 2026 11:18:42 -0700 Subject: [PATCH 2/4] 2nd round testing on TheRock, fixing warnings cause --- src/common/roc_pybuffer.cpp | 20 ++++++++++++++------ src/rocdecode/roc_pyvideodecode.cpp | 4 +++- src/rocdecode/roc_pyvideodecode.h | 19 +++++++++++-------- src/rocdecode/roc_pyvideodecodecpu.cpp | 4 +++- src/rocdecode/roc_pyvideodecodecpu.h | 10 +++++++++- src/rocjpeg/roc_pyjpeg_images.cpp | 20 ++++++++++++++++++++ src/rocjpeg/roc_pyjpeg_images.h | 7 ++----- src/rocjpeg/roc_pyjpeg_utils.h | 19 +++++++++++++++++++ 8 files changed, 81 insertions(+), 22 deletions(-) diff --git a/src/common/roc_pybuffer.cpp b/src/common/roc_pybuffer.cpp index 62df3964..a81be525 100644 --- a/src/common/roc_pybuffer.cpp +++ b/src/common/roc_pybuffer.cpp @@ -48,6 +48,15 @@ void ResetTensorMetadata(DLTensor &tensor) { delete[] tensor.strides; tensor.strides = nullptr; } + +std::unique_ptr MakeTensorMetadataArray(const std::vector &values, const char *context) { + std::vector converted(values.size()); + std::transform(values.begin(), values.end(), converted.begin(), + [context](size_t value) { return CheckedNumericCast(value, context); }); + auto data = std::make_unique(values.size()); + std::copy(converted.begin(), converted.end(), data.get()); + return data; +} } // namespace static void CheckValidBuffer(const void *ptr) { @@ -204,21 +213,20 @@ int BufferInterface::LoadDLPack(const std::vector& _shape, const std::ve m_dlTensor->ndim = ndim; // Convert shape - auto shape = std::make_unique(static_cast(ndim)); - for (size_t i = 0; i < _shape.size(); ++i) { - shape[i] = CheckedNumericCast(_shape[i], "shape dimension"); - } + auto shape = MakeTensorMetadataArray(_shape, "shape dimension"); m_dlTensor->shape = shape.release(); // Convert strides - auto strides = std::make_unique(static_cast(ndim)); + std::vector stride_values; + stride_values.reserve(_stride.size()); for (size_t i = 0; i < _stride.size(); ++i) { const auto stride_bytes = CheckedNumericCast(_stride[i], "stride"); if (stride_bytes % item_size_dt != 0) { throw std::runtime_error("Stride must be a multiple of the element size in bytes"); } - strides[i] = stride_bytes / item_size_dt; + stride_values.push_back(static_cast(stride_bytes / item_size_dt)); } + auto strides = MakeTensorMetadataArray(stride_values, "stride element"); m_dlTensor->strides = strides.release(); return 0; } diff --git a/src/rocdecode/roc_pyvideodecode.cpp b/src/rocdecode/roc_pyvideodecode.cpp index 9f4a2cad..f7941072 100644 --- a/src/rocdecode/roc_pyvideodecode.cpp +++ b/src/rocdecode/roc_pyvideodecode.cpp @@ -146,6 +146,8 @@ PyRocVideoDecoder::~PyRocVideoDecoder() { } } +void PyRocVideoDecoder::VTableAnchor() {} + int PyRocVideoDecoder::PyDecodeFrame(PyPacketData& packet) { if(packet.bitstream_size == 0) packet.pkt_flags |= ROCDEC_PKT_ENDOFSTREAM; @@ -291,7 +293,7 @@ uintptr_t PyRocVideoDecoder::PyResizeFrame(PyPacketData& packet, Dim *resized_di return 0; } } - memcpy(resized_surf_info, surf_info, sizeof(OutputSurfaceInfo)); + *resized_surf_info = *surf_info; resized_surf_info->output_width = resized_width; resized_surf_info->output_height = resized_height; resized_surf_info->output_pitch = resized_width * surf_info->bytes_per_pixel; diff --git a/src/rocdecode/roc_pyvideodecode.h b/src/rocdecode/roc_pyvideodecode.h index c2ee2db3..a232e844 100644 --- a/src/rocdecode/roc_pyvideodecode.h +++ b/src/rocdecode/roc_pyvideodecode.h @@ -44,16 +44,19 @@ int PyReconfigureFlushCallback(void *p_viddec_obj, uint32_t flush_mode, void * p // // AMD Video Decoder Python Interface class // +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wweak-vtables" +#endif class PyRocVideoDecoder : public RocVideoDecoder { public: PyRocVideoDecoder(int device_id, int mem_type, rocDecVideoCodec codec, bool force_zero_latency = false, const Rect *p_crop_rect = nullptr, int max_width = 0, int max_height = 0, uint32_t clk_rate = 0) : RocVideoDecoder(device_id, static_cast(mem_type), codec, force_zero_latency, - p_crop_rect, false, 0U, max_width, max_height, clk_rate) { - InitConfigStructure(); - device_id_ = device_id; } - ~PyRocVideoDecoder(); + p_crop_rect, false, 0U, max_width, max_height, clk_rate) { + InitConfigStructure(); } + ~PyRocVideoDecoder() override; // for python binding int PyDecodeFrame(PyPacketData& packet); @@ -113,6 +116,7 @@ class PyRocVideoDecoder : public RocVideoDecoder { py::object PyGetDecoderSessionOverHead(std::uintptr_t session_id); #endif private: + virtual void VTableAnchor(); std::shared_ptr configInfo; void InitConfigStructure(); @@ -128,8 +132,7 @@ class PyRocVideoDecoder : public RocVideoDecoder { uint8_t *frame_ptr_resized = nullptr; size_t resized_image_size_in_bytes = 0; OutputSurfaceInfo *resized_surf_info = nullptr; - - private: - int device_id_ = 0; - [[maybe_unused]] int padding_ = 0; }; +#if defined(__clang__) +#pragma clang diagnostic pop +#endif diff --git a/src/rocdecode/roc_pyvideodecodecpu.cpp b/src/rocdecode/roc_pyvideodecodecpu.cpp index fe5a54ea..db99d025 100644 --- a/src/rocdecode/roc_pyvideodecodecpu.cpp +++ b/src/rocdecode/roc_pyvideodecodecpu.cpp @@ -92,6 +92,8 @@ PyRocVideoDecoderCpu::~PyRocVideoDecoderCpu() { } } +void PyRocVideoDecoderCpu::VTableAnchor() {} + int PyRocVideoDecoderCpu::PyDecodeFrame(PyPacketData& packet) { if(packet.bitstream_size == 0) packet.pkt_flags |= ROCDEC_PKT_ENDOFSTREAM; @@ -237,7 +239,7 @@ uintptr_t PyRocVideoDecoderCpu::PyResizeFrame(PyPacketData& packet, Dim *resized return 0; } } - memcpy(resized_surf_info, surf_info, sizeof(OutputSurfaceInfo)); + *resized_surf_info = *surf_info; resized_surf_info->output_width = resized_dim->w; resized_surf_info->output_height = resized_dim->h; resized_surf_info->output_pitch = resized_dim->w * surf_info->bytes_per_pixel; diff --git a/src/rocdecode/roc_pyvideodecodecpu.h b/src/rocdecode/roc_pyvideodecodecpu.h index 51ced086..fcb0f37e 100644 --- a/src/rocdecode/roc_pyvideodecodecpu.h +++ b/src/rocdecode/roc_pyvideodecodecpu.h @@ -31,13 +31,17 @@ THE SOFTWARE. // // AMD Video Decoder Python Interface class // +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wweak-vtables" +#endif class PyRocVideoDecoderCpu : public FFMpegVideoDecoder { public: PyRocVideoDecoderCpu(int device_id, int mem_type = OUT_SURFACE_MEM_HOST_COPIED, rocDecVideoCodec codec = rocDecVideoCodec_HEVC, bool force_zero_latency = false, const Rect *p_crop_rect = nullptr, int max_width = 0, int max_height = 0, uint32_t clk_rate = 1000) : FFMpegVideoDecoder(device_id, static_cast(mem_type), codec, force_zero_latency, p_crop_rect, false, 0, max_width, max_height, clk_rate) { InitConfigStructure(); } - ~PyRocVideoDecoderCpu(); + ~PyRocVideoDecoderCpu() override; // for python binding int PyDecodeFrame(PyPacketData& packet); @@ -94,6 +98,7 @@ class PyRocVideoDecoderCpu : public FFMpegVideoDecoder { py::object PyGetDecoderSessionOverHead(std::uintptr_t session_id); #endif private: + virtual void VTableAnchor(); std::shared_ptr configInfo; void InitConfigStructure(); @@ -106,3 +111,6 @@ class PyRocVideoDecoderCpu : public FFMpegVideoDecoder { size_t resized_image_size_in_bytes = 0; OutputSurfaceInfo *resized_surf_info = nullptr; }; +#if defined(__clang__) +#pragma clang diagnostic pop +#endif diff --git a/src/rocjpeg/roc_pyjpeg_images.cpp b/src/rocjpeg/roc_pyjpeg_images.cpp index e86c39ea..c7799181 100644 --- a/src/rocjpeg/roc_pyjpeg_images.cpp +++ b/src/rocjpeg/roc_pyjpeg_images.cpp @@ -34,6 +34,11 @@ using namespace std; #include +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcovered-switch-default" +#endif + void PyJpegImages::ExportToPython(py::module& m) { // PyJpegImages py::class_>(m, "PyJpegImages", py::module_local()) @@ -137,6 +142,9 @@ bool PyJpegImages::GetOutputDims(std::vector& widths, std::vector& widths, std::vector& widths, std::vectorLoadDLPack(shape, stride, bit_depth, type_str, static_cast(channels[0]), device_id); // device_id was set/saved at the constructor } break; + default: + return false; } return true; } + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif diff --git a/src/rocjpeg/roc_pyjpeg_images.h b/src/rocjpeg/roc_pyjpeg_images.h index 455df4fb..bab454e3 100644 --- a/src/rocjpeg/roc_pyjpeg_images.h +++ b/src/rocjpeg/roc_pyjpeg_images.h @@ -40,9 +40,6 @@ class PyJpegImages { ext_buf.push_back(std::make_shared()); ext_buf.push_back(std::make_shared()); ext_buf.push_back(std::make_shared()); - // default, reset - memset(&decode_params, 0, sizeof(RocJpegDecodeParams)); - memset(&output_image, 0, sizeof(RocJpegImage)); num_channels = 0; subsampling = ROCJPEG_CSS_UNKNOWN; } @@ -59,8 +56,8 @@ class PyJpegImages { // not exposed to outside uint32_t num_channels = 0; - RocJpegImage output_image; - RocJpegDecodeParams decode_params; + RocJpegImage output_image{}; + RocJpegDecodeParams decode_params{}; bool ToDlpackTensor(RocJpegOutputFormat output_format, int device_id); private: diff --git a/src/rocjpeg/roc_pyjpeg_utils.h b/src/rocjpeg/roc_pyjpeg_utils.h index 895099aa..a61fd327 100644 --- a/src/rocjpeg/roc_pyjpeg_utils.h +++ b/src/rocjpeg/roc_pyjpeg_utils.h @@ -70,6 +70,10 @@ namespace fs = std::experimental::filesystem; * This class provides utility functions such as getting file paths, initializing HIP device, * getting chroma subsampling string, getting channel pitch and sizes, getting output file extension, and saving images. */ +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcovered-switch-default" +#endif class PyRocJpegUtils { public: using ChannelArray = std::array; @@ -138,6 +142,9 @@ class PyRocJpegUtils { case ROCJPEG_CSS_UNKNOWN: chroma_sub_sampling = "UNKNOWN"; break; + default: + chroma_sub_sampling = "UNKNOWN"; + break; } } @@ -207,6 +214,9 @@ class PyRocJpegUtils { case ROCJPEG_CSS_UNKNOWN: std::cout << "Unknown chroma subsampling!" << std::endl; return EXIT_FAILURE; + default: + std::cout << "Unknown chroma subsampling!" << std::endl; + return EXIT_FAILURE; } break; case ROCJPEG_OUTPUT_YUV_PLANAR: @@ -230,6 +240,9 @@ class PyRocJpegUtils { case ROCJPEG_CSS_UNKNOWN: std::cout << "Unknown chroma subsampling!" << std::endl; return EXIT_FAILURE; + default: + std::cout << "Unknown chroma subsampling!" << std::endl; + return EXIT_FAILURE; } } break; @@ -250,6 +263,9 @@ class PyRocJpegUtils { case ROCJPEG_OUTPUT_FORMAT_MAX: std::cout << "Unknown output format!" << std::endl; return EXIT_FAILURE; + default: + std::cout << "Unknown output format!" << std::endl; + return EXIT_FAILURE; } std::copy(pitches.begin(), pitches.end(), std::begin(output_image.pitch)); return EXIT_SUCCESS; @@ -273,5 +289,8 @@ class PyRocJpegUtils { return static_cast(aligned); } }; +#if defined(__clang__) +#pragma clang diagnostic pop +#endif #endif //ROC_PY_JPEG_UTILS From 8a9d4724b0e91e1a91305c606a614af9305c0751 Mon Sep 17 00:00:00 2001 From: Essam Aly Date: Mon, 6 Apr 2026 13:28:21 -0700 Subject: [PATCH 3/4] Fix bug for CI build --- src/common/roc_pybuffer.cpp | 22 +++++++++++++++++++--- src/common/roc_pydlpack.cpp | 19 ++++++++++++++++++- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/common/roc_pybuffer.cpp b/src/common/roc_pybuffer.cpp index a81be525..e8e36f2e 100644 --- a/src/common/roc_pybuffer.cpp +++ b/src/common/roc_pybuffer.cpp @@ -25,6 +25,7 @@ THE SOFTWARE. #include #include #include +#include #include #include @@ -35,9 +36,24 @@ using namespace py::literals; namespace { template Target CheckedNumericCast(Source value, const char *context) { - using Limit = std::numeric_limits; - if (value > static_cast(Limit::max())) { - throw std::runtime_error(std::string(context) + " is too large"); + if constexpr (std::is_signed_v && std::is_signed_v) { + if (value < static_cast(std::numeric_limits::min()) || + value > static_cast(std::numeric_limits::max())) { + throw std::runtime_error(std::string(context) + " is out of range"); + } + } else if constexpr (std::is_signed_v && !std::is_signed_v) { + using UnsignedSource = std::make_unsigned_t; + if (value < 0 || + static_cast(value) > std::numeric_limits::max()) { + throw std::runtime_error(std::string(context) + " is out of range"); + } + } else if constexpr (!std::is_signed_v && std::is_signed_v) { + using UnsignedTarget = std::make_unsigned_t; + if (value > static_cast(std::numeric_limits::max())) { + throw std::runtime_error(std::string(context) + " is out of range"); + } + } else if (value > std::numeric_limits::max()) { + throw std::runtime_error(std::string(context) + " is out of range"); } return static_cast(value); } diff --git a/src/common/roc_pydlpack.cpp b/src/common/roc_pydlpack.cpp index a24a71ec..be7aaef2 100644 --- a/src/common/roc_pydlpack.cpp +++ b/src/common/roc_pydlpack.cpp @@ -30,6 +30,7 @@ namespace py = pybind11; #include #include #include +#include #include namespace { @@ -42,7 +43,23 @@ void ReleaseTensorMetadata(DLManagedTensor *self) { template Target CheckedNumericCast(Source value, const char *context) { - if (value < 0 || value > static_cast(std::numeric_limits::max())) { + if constexpr (std::is_signed_v && std::is_signed_v) { + if (value < static_cast(std::numeric_limits::min()) || + value > static_cast(std::numeric_limits::max())) { + throw std::runtime_error(std::string(context) + " is out of range"); + } + } else if constexpr (std::is_signed_v && !std::is_signed_v) { + using UnsignedSource = std::make_unsigned_t; + if (value < 0 || + static_cast(value) > std::numeric_limits::max()) { + throw std::runtime_error(std::string(context) + " is out of range"); + } + } else if constexpr (!std::is_signed_v && std::is_signed_v) { + using UnsignedTarget = std::make_unsigned_t; + if (value > static_cast(std::numeric_limits::max())) { + throw std::runtime_error(std::string(context) + " is out of range"); + } + } else if (value > std::numeric_limits::max()) { throw std::runtime_error(std::string(context) + " is out of range"); } return static_cast(value); From 261a61d9527eba39467a7f77395ce0c6121cf51b Mon Sep 17 00:00:00 2001 From: Essam Aly Date: Mon, 6 Apr 2026 19:55:06 -0700 Subject: [PATCH 4/4] small test/runtime bug fixes --- samples/rocjpeg/jpegdecode.py | 6 ++---- tests/CMakeLists.txt | 12 +++++------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/samples/rocjpeg/jpegdecode.py b/samples/rocjpeg/jpegdecode.py index 178ef92c..4a25aada 100644 --- a/samples/rocjpeg/jpegdecode.py +++ b/samples/rocjpeg/jpegdecode.py @@ -18,7 +18,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -import torch import pyRocJpegDecode.decoder as jdec import rocpyjpegdecode.jpegTypes as jpegt import argparse @@ -57,8 +56,7 @@ def jpeg_decode( # example how to save the decoded image as a file if (output_file_path is not None): filename = output_file_path.strip() + ".png" - img1 = torch.from_numpy(img_tensor.to_numpy()) - arr = img1.cpu().numpy() + arr = img_tensor.to_numpy() img = Image.fromarray(arr.astype(np.uint8)) img.save(filename) print(f"Image saved as: {filename}") @@ -121,4 +119,4 @@ def jpeg_decode( print("ERROR: input passed with -i must be an existing file.") exit() - jpeg_decode(input_file_path, output_format, device_id, backend, output_file_path) \ No newline at end of file + jpeg_decode(input_file_path, output_format, device_id, backend, output_file_path) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 26c5b6c3..af029e1c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -330,13 +330,11 @@ endif() if(RUN_ROCPYJPEG_TESTS) # 17 - jpeg_decode_single_file_test - if(TORCH_PYTHON_RESULT EQUAL 0) - add_test(NAME jpeg_decode_python - COMMAND ${Python3_EXECUTABLE} ${ROCPYJPEG_SAMPLE_DIR}/jpegdecode.py - -i ${ROCM_PATH}/share/rocjpeg/images/mug_420.jpg - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - set_rocpy_test_env(jpeg_decode_python "${ROCPYJPEG_PYTHONPATH}") - endif(TORCH_PYTHON_RESULT EQUAL 0) + add_test(NAME jpeg_decode_python + COMMAND ${Python3_EXECUTABLE} ${ROCPYJPEG_SAMPLE_DIR}/jpegdecode.py + -i ${ROCM_PATH}/share/rocjpeg/images/mug_420.jpg + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + set_rocpy_test_env(jpeg_decode_python "${ROCPYJPEG_PYTHONPATH}") # 18 - jpegdecodebatched_test add_test(NAME jpeg_decode_batched_python COMMAND ${Python3_EXECUTABLE} ${ROCPYJPEG_SAMPLE_DIR}/jpegdecodebatched.py