From 4918b53c0b822c485629cd1180a11519ddbc086c Mon Sep 17 00:00:00 2001
From: Essam Aly <essam.aly@amd.com>
Date: Tue, 31 Mar 2026 10:15:38 -0700
Subject: [PATCH 1/4] 1st round of warnning cause removal

---
 CMakeLists.txt                           |  64 ++++++-
 src/common/roc_pybuffer.cpp              | 208 ++++++++++++++---------
 src/common/roc_pybuffer.h                |  14 +-
 src/common/roc_pydlpack.cpp              | 125 +++++++++-----
 src/rocdecode/roc_pydecode.cpp           |  11 +-
 src/rocdecode/roc_pydecode.h             |  12 +-
 src/rocdecode/roc_pyvideodecode.cpp      |  78 +++++----
 src/rocdecode/roc_pyvideodecode.h        |  16 +-
 src/rocdecode/roc_pyvideodecodecpu.cpp   |   4 +-
 src/rocdecode/roc_pyvideodecodecpu.h     |   6 +-
 src/rocjpeg/roc_pyjpeg.cpp               |   7 +
 src/rocjpeg/roc_pyjpeg.h                 |   4 +-
 src/rocjpeg/roc_pyjpeg_codestream.cpp    |  73 ++++++--
 src/rocjpeg/roc_pyjpeg_codestream.h      |  11 +-
 src/rocjpeg/roc_pyjpeg_decode_source.cpp |   2 +
 src/rocjpeg/roc_pyjpeg_decode_source.h   |   1 -
 src/rocjpeg/roc_pyjpeg_decoder.cpp       |  45 ++---
 src/rocjpeg/roc_pyjpeg_decoder.h         |   7 +-
 src/rocjpeg/roc_pyjpeg_images.cpp        |  57 ++++---
 src/rocjpeg/roc_pyjpeg_images.h          |  10 +-
 src/rocjpeg/roc_pyjpeg_utils.h           | 168 ++++++++++--------
 tests/CMakeLists.txt                     | 112 +++++-------
 22 files changed, 622 insertions(+), 413 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7a717c66..2597d2bc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -114,6 +114,33 @@ else()
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG -fPIC")
 endif()
 
+option(ROCPYDECODE_ENABLE_ALL_WARNINGS "Enable broad compiler warnings for C++ targets" ON)
+set(ROCPYDECODE_WARNING_FLAGS "")
+if(ROCPYDECODE_ENABLE_ALL_WARNINGS)
+    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+        list(APPEND ROCPYDECODE_WARNING_FLAGS
+            -Weverything
+            -Wno-c++98-compat
+            -Wno-c++98-compat-pedantic
+            -Wno-pre-c++14-compat
+            -Wno-pre-c++17-compat
+            -Wno-pre-c++20-compat)
+    elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+        list(APPEND ROCPYDECODE_WARNING_FLAGS
+            -Wall
+            -Wextra
+            -Wpedantic)
+    elseif(MSVC)
+        list(APPEND ROCPYDECODE_WARNING_FLAGS
+            /W4
+            /permissive-)
+    endif()
+
+    if(ROCPYDECODE_WARNING_FLAGS)
+        add_compile_options(${ROCPYDECODE_WARNING_FLAGS})
+    endif()
+endif()
+
 # Set supported GPU Targets
 if(NOT GPU_TARGETS AND NOT AMDGPU_TARGETS)
     set(DEFAULT_GPU_TARGETS "gfx908;gfx90a;gfx942;gfx1030;gfx1031;gfx1032;gfx1100;gfx1101;gfx1102;gfx950;gfx1200;gfx1201")
@@ -160,12 +187,19 @@ set (HIP_PLATFORM amd CACHE STRING "HIP platform")
 
 # Find dependencies
 find_package(HIP REQUIRED)
-find_package(rocdecode 1.0.0 QUIET)
-find_package(rocjpeg 1.0.0 QUIET)
+# ROCm package versions are currently 0.x even though the APIs used here are stable.
+find_package(rocdecode QUIET)
+find_package(rocjpeg QUIET)
 find_package(pybind11 REQUIRED)
 find_package(DLPACK REQUIRED)
 find_package(FFmpeg QUIET)
-set(ROCPYDECODE_USE_FFMPEG ${FFMPEG_FOUND})
+set(ROCPYDECODE_USE_FFMPEG FALSE)
+set(ROCPYDECODE_FFMPEG_UTILS_DIR "${ROCM_PATH}/share/rocdecode/utils/ffmpegvideodecode")
+if(FFMPEG_FOUND AND EXISTS "${ROCPYDECODE_FFMPEG_UTILS_DIR}/ffmpeg_video_dec.h")
+    set(ROCPYDECODE_USE_FFMPEG TRUE)
+elseif(FFMPEG_FOUND)
+    message(STATUS "FFmpeg found, but rocdecode FFmpeg utility sources are unavailable; building without demux/CPU backend.")
+endif()
 
 set(ROCPYDECODE_DEPENDENCY_READY FALSE)
 set(ROCPYJPEG_DEPENDENCY_READY FALSE)
@@ -180,8 +214,17 @@ if(rocdecode_FOUND)
     include_directories(src)
     file(GLOB pyfiles pyRocVideoDecode/*.py pyRocVideoDecode/*.pyi)
 
+    # TBD (essam):
+    # # The always-built rocdecode utility sources use libavutil's MD5 helpers.
+    # if(AVUTIL_LIBRARY)
+    #     list(APPEND LINK_LIBRARY_LIST ${AVUTIL_LIBRARY})
+    # else()
+    #     message(FATAL_ERROR "libavutil is required to build rocPyDecode because ROCm's roc_video_dec utilities use av_md5_* symbols.")
+    # endif()
+
     # Always build the GPU path
-    include_directories(${rocdecode_INCLUDE_DIR}
+    include_directories(SYSTEM
+        ${rocdecode_INCLUDE_DIR}
         ${ROCM_PATH}/include/rocdecode
         ${ROCM_PATH}/share/rocdecode/utils
         ${ROCM_PATH}/share/rocdecode/utils/rocvideodecode)
@@ -205,17 +248,25 @@ if(rocdecode_FOUND)
     # Core GPU sources
     file(GLOB include_base src/rocdecode/*.h src/common/*.h ${ROCM_PATH}/share/rocdecode/utils/rocvideodecode/*.h)
     file(GLOB sources_base src/rocdecode/roc_pydecode.cpp src/rocdecode/roc_pyvideodecode.cpp src/common/*.cpp ${ROCM_PATH}/share/rocdecode/utils/*.cpp ${ROCM_PATH}/share/rocdecode/utils/rocvideodecode/*.cpp)
+    file(GLOB sources_external_rocdecode ${ROCM_PATH}/share/rocdecode/utils/*.cpp ${ROCM_PATH}/share/rocdecode/utils/rocvideodecode/*.cpp)
     set(include ${include_base})
     set(sources ${sources_base})
+    if(sources_external_rocdecode)
+        set_source_files_properties(${sources_external_rocdecode} PROPERTIES COMPILE_OPTIONS "-w")
+    endif()
 
     if(ROCPYDECODE_USE_FFMPEG)
         # Demux + CPU backend (FFmpeg-dependent)
         file(GLOB sources_ffmpeg src/rocdecode/roc_pyvideodemuxer.cpp src/rocdecode/roc_pyvideodecodecpu.cpp src/rocdecode/roc_pydecode_test.cpp ${ROCM_PATH}/share/rocdecode/utils/ffmpegvideodecode/*.cpp)
         file(GLOB include_ffmpeg ${ROCM_PATH}/share/rocdecode/utils/ffmpegvideodecode/*.h)
+        file(GLOB sources_external_ffmpeg ${ROCM_PATH}/share/rocdecode/utils/ffmpegvideodecode/*.cpp)
         list(APPEND include ${include_ffmpeg})
         list(APPEND sources ${sources_ffmpeg})
-        include_directories(${AVUTIL_INCLUDE_DIR} ${AVCODEC_INCLUDE_DIR} ${AVFORMAT_INCLUDE_DIR})
+        include_directories(SYSTEM ${AVUTIL_INCLUDE_DIR} ${AVCODEC_INCLUDE_DIR} ${AVFORMAT_INCLUDE_DIR})
         set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${FFMPEG_LIBRARIES})
+        if(sources_external_ffmpeg)
+            set_source_files_properties(${sources_external_ffmpeg} PROPERTIES COMPILE_OPTIONS "-w")
+        endif()
     else()
         message(STATUS "FFmpeg not found; building rocPyDecode without demux/CPU backend (GPU decode only).")
         set(ROCPYDECODE_HOST_LIBRARY_FOUND FALSE)
@@ -225,7 +276,7 @@ else()
 endif()
 # rocJPEG
 if(rocjpeg_FOUND)
-    include_directories(${rocjpeg_INCLUDE_DIR} ${ROCM_PATH}/share/rocjpeg/samples)
+    include_directories(SYSTEM ${ROCM_PATH}/include ${rocjpeg_INCLUDE_DIR} ${ROCM_PATH}/share/rocjpeg/samples)
     set(LINK_LIBRARY_LIST_JPEG ${LINK_LIBRARY_LIST_JPEG} rocjpeg::rocjpeg)
 
     file(GLOB pyfiles_jpeg pyRocJpegDecode/*.py pyRocJpegDecode/*.pyi)
@@ -247,6 +298,7 @@ else()
     message("-- ${Yellow}rocPyJPEG skipped -- missing rocjpeg dependency${ColourReset}")
 endif()
 message("-- ${White}rocPyDecode/rocPyJPEG -- CMAKE_CXX_FLAGS:${CMAKE_CXX_FLAGS}${ColourReset}")
+message("-- ${White}rocPyDecode/rocPyJPEG -- Warning Flags:${ROCPYDECODE_WARNING_FLAGS}${ColourReset}")
 
 # set license information
 set(CPACK_RPM_PACKAGE_LICENSE "MIT")
diff --git a/src/common/roc_pybuffer.cpp b/src/common/roc_pybuffer.cpp
index 7d55ca3d..62df3964 100644
--- a/src/common/roc_pybuffer.cpp
+++ b/src/common/roc_pybuffer.cpp
@@ -20,18 +20,39 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */
 
-#include "roc_pybuffer.h"
-#include <iostream>
-
-#include <pybind11/numpy.h>
-#include <pybind11/stl.h>
-
-using namespace std;
-using namespace py::literals;
-
-static void CheckValidBuffer(const void *ptr) {
-    if (ptr == nullptr) {
-        throw std::runtime_error("NULL buffer not accepted");
+#include "roc_pybuffer.h"
+#include <algorithm>
+#include <iostream>
+#include <limits>
+#include <memory>
+
+#include <pybind11/numpy.h>
+#include <pybind11/stl.h>
+
+using namespace std;
+using namespace py::literals;
+
+namespace {
+template <typename Target, typename Source>
+Target CheckedNumericCast(Source value, const char *context) {
+    using Limit = std::numeric_limits<Target>;
+    if (value > static_cast<Source>(Limit::max())) {
+        throw std::runtime_error(std::string(context) + " is too large");
+    }
+    return static_cast<Target>(value);
+}
+
+void ResetTensorMetadata(DLTensor &tensor) {
+    delete[] tensor.shape;
+    tensor.shape = nullptr;
+    delete[] tensor.strides;
+    tensor.strides = nullptr;
+}
+} // namespace
+
+static void CheckValidBuffer(const void *ptr) {
+    if (ptr == nullptr) {
+        throw std::runtime_error("NULL buffer not accepted");
     }
 }
 
@@ -42,22 +63,35 @@ BufferInterface::BufferInterface(DLPackPyTensor &&dlTensor) {
     m_dlTensor = std::move(dlTensor);
 }
 
-py::tuple BufferInterface::shape() const {
-    py::tuple shape(m_dlTensor->ndim);
-    for (size_t i = 0; i < shape.size(); ++i) {
-        shape[i] = m_dlTensor->shape[i];
-    }
-    return shape;
-}
-
-py::tuple BufferInterface::strides() const {
-    py::tuple strides(m_dlTensor->ndim);
-
-    for (size_t i = 0; i < strides.size(); ++i) {
-        strides[i] = m_dlTensor->strides[i];
-    }
-    return strides;
-}
+py::tuple BufferInterface::shape() const {
+    const auto ndim = static_cast<size_t>(m_dlTensor->ndim);
+    py::tuple shape(ndim);
+    if (m_dlTensor->shape == nullptr) {
+        return shape;
+    }
+
+    std::vector<int64_t> values(ndim);
+    std::copy_n(m_dlTensor->shape, ndim, values.begin());
+    for (size_t i = 0; i < ndim; ++i) {
+        shape[i] = values[i];
+    }
+    return shape;
+}
+
+py::tuple BufferInterface::strides() const {
+    const auto ndim = static_cast<size_t>(m_dlTensor->ndim);
+    py::tuple strides(ndim);
+    if (m_dlTensor->strides == nullptr) {
+        return strides;
+    }
+
+    std::vector<int64_t> values(ndim);
+    std::copy_n(m_dlTensor->strides, ndim, values.begin());
+    for (size_t i = 0; i < ndim; ++i) {
+        strides[i] = values[i];
+    }
+    return strides;
+}
 
 std::string BufferInterface::dtype() const {
     if (m_dlTensor->dtype.bits == 8)
@@ -71,21 +105,22 @@ void *BufferInterface::data() const {
     return m_dlTensor->data;
 }
 
-py::capsule BufferInterface::dlpack(py::object stream) const {
-    
-    struct ManagerCtx {
-        DLManagedTensor tensor;
-        std::shared_ptr<const BufferInterface> extBuffer;
+py::capsule BufferInterface::dlpack(py::object stream) const {
+    static_cast<void>(stream);
+
+    struct ManagerCtx {
+        DLManagedTensor tensor;
+        std::shared_ptr<const BufferInterface> extBuffer;
     };
 
     auto ctx = std::make_unique<ManagerCtx>();
 
-    // Set up tensor deleter to delete the ManagerCtx
-    ctx->tensor.manager_ctx = ctx.get();
-    ctx->tensor.deleter = [](DLManagedTensor *tensor) {
-        auto *ctx = static_cast<ManagerCtx *>(tensor->manager_ctx);
-        delete ctx;
-    };
+    // Set up tensor deleter to delete the ManagerCtx
+    ctx->tensor.manager_ctx = ctx.get();
+    ctx->tensor.deleter = [](DLManagedTensor *tensor) {
+        auto *manager_ctx = static_cast<ManagerCtx *>(tensor->manager_ctx);
+        delete manager_ctx;
+    };
 
     // Copy tensor data
     ctx->tensor.dl_tensor = *m_dlTensor;
@@ -131,54 +166,59 @@ void BufferInterface::ExportToPython(py::module &m) {
         .def("__dlpack_device__", &BufferInterface::dlpackDevice, "Get the device associated with the buffer");
 }
 
-int BufferInterface::LoadDLPack(std::vector<size_t>& _shape, std::vector<size_t>& _stride, uint32_t bit_depth, std::string& _type_str, void* _data, int device_id_) {
-    m_dlTensor->byte_offset = 0;
-    m_dlTensor->device.device_type = kDLROCM;   // TODO: infer the device type from the memory buffer
-    m_dlTensor->device.device_id = device_id_;
+int BufferInterface::LoadDLPack(const std::vector<size_t>& _shape, const std::vector<size_t>& _stride, uint32_t bit_depth, const std::string& _type_str, void* _data, int device_id_) {
+    if (_shape.size() != _stride.size()) {
+        throw std::runtime_error("Shape and stride rank must match");
+    }
+
+    m_dlTensor->byte_offset = 0;
+    m_dlTensor->device.device_type = kDLROCM;   // TODO: infer the device type from the memory buffer
+    m_dlTensor->device.device_id = device_id_;
 
     // Convert data
     void* ptr = _data;
     CheckValidBuffer(ptr);
     m_dlTensor->data = ptr;
 
-    // Convert DataType
-    if (_type_str != "|u1" && _type_str != "|u2") {  // TODO: can also be other letters
-        throw std::runtime_error("Could not create DL Pack tensor! Invalid typstr: " + _type_str);
-        return -1;
-    }
-
-    int itemSizeDT;
-
-    m_dlTensor->dtype.code = kDLUInt;
-
-    if (bit_depth == 8) {
-        m_dlTensor->dtype.bits = 8;
-        itemSizeDT = sizeof(uint8_t);
-    } else if (bit_depth == 10) {
-        m_dlTensor->dtype.bits = 16;
-        itemSizeDT = sizeof(uint16_t);
-    }
-    m_dlTensor->dtype.lanes = 1;
-
-    // Convert ndim
-    m_dlTensor->ndim = _shape.size();
-
-    // Convert shape
-    m_dlTensor->shape = new int64_t[m_dlTensor->ndim];
-    for (int i = 0; i < m_dlTensor->ndim; ++i) {
-        m_dlTensor->shape[i] = _shape[i];
-    }
-    
-    // Convert strides
-    int strides_dim = _stride.size();
-    m_dlTensor->strides = new int64_t[strides_dim];
-    for (int i = 0; i < strides_dim; ++i) {
-        m_dlTensor->strides[i] = _stride[i];
-        if (m_dlTensor->strides[i] % itemSizeDT != 0) {
-            throw std::runtime_error("Stride must be a multiple of the element size in bytes");
-            return -1;
-        }
-        m_dlTensor->strides[i] /= itemSizeDT;
-    }
-    return 0;
-}
+    // Convert DataType
+    if (_type_str != "|u1" && _type_str != "|u2") {  // TODO: can also be other letters
+        throw std::runtime_error("Could not create DL Pack tensor! Invalid typstr: " + _type_str);
+    }
+
+    m_dlTensor->dtype.code = kDLUInt;
+    int item_size_dt = 0;
+    if (bit_depth == 8U) {
+        m_dlTensor->dtype.bits = 8U;
+        item_size_dt = static_cast<int>(sizeof(uint8_t));
+    } else if (bit_depth == 10U) {
+        m_dlTensor->dtype.bits = 16U;
+        item_size_dt = static_cast<int>(sizeof(uint16_t));
+    } else {
+        throw std::runtime_error("Unsupported bit depth for DLPack export");
+    }
+    m_dlTensor->dtype.lanes = 1;
+
+    // Convert ndim
+    ResetTensorMetadata(*m_dlTensor);
+    const auto ndim = CheckedNumericCast<int>(_shape.size(), "tensor rank");
+    m_dlTensor->ndim = ndim;
+
+    // Convert shape
+    auto shape = std::make_unique<int64_t[]>(static_cast<size_t>(ndim));
+    for (size_t i = 0; i < _shape.size(); ++i) {
+        shape[i] = CheckedNumericCast<int64_t>(_shape[i], "shape dimension");
+    }
+    m_dlTensor->shape = shape.release();
+    
+    // Convert strides
+    auto strides = std::make_unique<int64_t[]>(static_cast<size_t>(ndim));
+    for (size_t i = 0; i < _stride.size(); ++i) {
+        const auto stride_bytes = CheckedNumericCast<int64_t>(_stride[i], "stride");
+        if (stride_bytes % item_size_dt != 0) {
+            throw std::runtime_error("Stride must be a multiple of the element size in bytes");
+        }
+        strides[i] = stride_bytes / item_size_dt;
+    }
+    m_dlTensor->strides = strides.release();
+    return 0;
+}
diff --git a/src/common/roc_pybuffer.h b/src/common/roc_pybuffer.h
index 42395517..be26c1c2 100644
--- a/src/common/roc_pybuffer.h
+++ b/src/common/roc_pybuffer.h
@@ -44,9 +44,9 @@ class BufferInterface final : public std::enable_shared_from_this<BufferInterfac
 
         explicit BufferInterface(DLPackPyTensor&& dlTensor);
 
-        BufferInterface() = default;
-        py::capsule dlpack(py::object stream) const;
-        int LoadDLPack(std::vector<size_t>& _shape, std::vector<size_t>& _stride, uint32_t bit_depth, std::string& _type_str, void* _data, int device_id_);
+        BufferInterface() = default;
+        py::capsule dlpack(py::object stream) const;
+        int LoadDLPack(const std::vector<size_t>& _shape, const std::vector<size_t>& _stride, uint32_t bit_depth, const std::string& _type_str, void* _data, int device_id_);
 
         // to allow testing
         py::tuple dlpackDevice() const;
@@ -54,7 +54,7 @@ class BufferInterface final : public std::enable_shared_from_this<BufferInterfac
     private:
         friend py::detail::type_caster<BufferInterface>;
         DLPackPyTensor    m_dlTensor;
-};
-
-
-#endif // EXT_BUFFER_HEADER
\ No newline at end of file
+};
+
+
+#endif // EXT_BUFFER_HEADER
diff --git a/src/common/roc_pydlpack.cpp b/src/common/roc_pydlpack.cpp
index ec71ea1e..a24a71ec 100644
--- a/src/common/roc_pydlpack.cpp
+++ b/src/common/roc_pydlpack.cpp
@@ -20,58 +20,89 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */
 
-#include <pybind11/stl.h>
-#include <pybind11/pybind11.h>
-#include <pybind11/numpy.h>
-
-namespace py = pybind11;
-#include "roc_pydlpack.h"
-#include <iostream>
-#include <vector>
-
-DLPackPyTensor::DLPackPyTensor() noexcept : m_tensor{} {
-}
+#include <algorithm>
+#include <pybind11/stl.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+
+namespace py = pybind11;
+#include "roc_pydlpack.h"
+#include <iostream>
+#include <limits>
+#include <memory>
+#include <vector>
+
+namespace {
+void ReleaseTensorMetadata(DLManagedTensor *self) {
+    delete[] self->dl_tensor.shape;
+    self->dl_tensor.shape = nullptr;
+    delete[] self->dl_tensor.strides;
+    self->dl_tensor.strides = nullptr;
+}
+
+template <typename Target, typename Source>
+Target CheckedNumericCast(Source value, const char *context) {
+    if (value < 0 || value > static_cast<Source>(std::numeric_limits<Target>::max())) {
+        throw std::runtime_error(std::string(context) + " is out of range");
+    }
+    return static_cast<Target>(value);
+}
+
+DLManagedTensor MakeManagedTensor(const DLTensor &tensor) {
+    DLManagedTensor managed_tensor{};
+    managed_tensor.dl_tensor = tensor;
+    return managed_tensor;
+}
+} // namespace
+
+DLPackPyTensor::DLPackPyTensor() noexcept : m_tensor{} {
+    m_tensor.deleter = ReleaseTensorMetadata;
+}
 
 DLPackPyTensor::DLPackPyTensor(DLManagedTensor &&managedTensor) : m_tensor{std::move(managedTensor)} {
     managedTensor = {};
 }
 
-DLPackPyTensor::DLPackPyTensor(const DLTensor &tensor) : DLPackPyTensor(DLManagedTensor{tensor}) {
-}
-
-DLPackPyTensor::DLPackPyTensor(const py::buffer_info &info, const DLDevice &dev) : m_tensor{} {
-    DLTensor &dlTensor = m_tensor.dl_tensor;
-    dlTensor.data      = info.ptr;
-    //TBD dtype
-    dlTensor.dtype.code = kDLInt;
-    dlTensor.dtype.bits = 8;
-    dlTensor.dtype.lanes = 1;
-    dlTensor.ndim        = info.ndim;
-    dlTensor.device      = dev;
-    dlTensor.byte_offset = 0;
-
-    m_tensor.deleter = [](DLManagedTensor *self) {
-        delete[] self->dl_tensor.shape;
-        self->dl_tensor.shape = nullptr;
-        delete[] self->dl_tensor.strides;
-        self->dl_tensor.strides = nullptr;
-    };
-
-    try {
-        dlTensor.shape = new int64_t[info.ndim];
-        std::copy_n(info.shape.begin(), info.shape.size(), dlTensor.shape);
-
-        dlTensor.strides = new int64_t[info.ndim];
-        for (int i = 0; i < info.ndim; ++i) {
-            if (info.strides[i] % info.itemsize != 0) {
-                throw std::runtime_error("Stride must be a multiple of the element size in bytes");
-            }
-
-            dlTensor.strides[i] = info.strides[i] / info.itemsize;
-        }
-    } catch (...) {
-        m_tensor.deleter(&m_tensor);
-        throw;
+DLPackPyTensor::DLPackPyTensor(const DLTensor &tensor) : DLPackPyTensor(MakeManagedTensor(tensor)) {
+}
+
+DLPackPyTensor::DLPackPyTensor(const py::buffer_info &info, const DLDevice &dev) : m_tensor{} {
+    DLTensor &dlTensor = m_tensor.dl_tensor;
+    const auto rank = CheckedNumericCast<size_t>(info.ndim, "tensor rank");
+    dlTensor.data      = info.ptr;
+    //TBD dtype
+    dlTensor.dtype.code = kDLInt;
+    dlTensor.dtype.bits = 8;
+    dlTensor.dtype.lanes = 1;
+    dlTensor.ndim        = CheckedNumericCast<int32_t>(info.ndim, "tensor rank");
+    dlTensor.device      = dev;
+    dlTensor.byte_offset = 0;
+
+    m_tensor.deleter = ReleaseTensorMetadata;
+
+    try {
+        std::vector<int64_t> shape_values(rank);
+        std::transform(info.shape.begin(), info.shape.end(), shape_values.begin(), [](ssize_t dimension) {
+            return static_cast<int64_t>(dimension);
+        });
+        auto shape = std::make_unique<int64_t[]>(rank);
+        std::copy(shape_values.begin(), shape_values.end(), shape.get());
+        dlTensor.shape = shape.release();
+
+        std::vector<int64_t> stride_values(rank);
+        for (size_t i = 0; i < rank; ++i) {
+            const auto stride = info.strides[i];
+            if (stride % info.itemsize != 0) {
+                throw std::runtime_error("Stride must be a multiple of the element size in bytes");
+            }
+            stride_values[i] = static_cast<int64_t>(stride / info.itemsize);
+        }
+        auto strides = std::make_unique<int64_t[]>(rank);
+        std::copy(stride_values.begin(), stride_values.end(), strides.get());
+        dlTensor.strides = strides.release();
+    } catch (...) {
+        m_tensor.deleter(&m_tensor);
+        throw;
     }
 }
 
diff --git a/src/rocdecode/roc_pydecode.cpp b/src/rocdecode/roc_pydecode.cpp
index 928ab71b..18280379 100644
--- a/src/rocdecode/roc_pydecode.cpp
+++ b/src/rocdecode/roc_pydecode.cpp
@@ -37,6 +37,10 @@ void Test_PyReconfigureFlushCallback();
 void Test_CalculateRgbImageSize();
 #endif
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
+#endif
 PYBIND11_MODULE(rocpydecode, m) {
  
     m.doc() = "Python bindings for the C++ portions of rocDecode ..";
@@ -222,8 +226,8 @@ PYBIND11_MODULE(rocpydecode, m) {
             }, "Get the data type of the buffer")
         .def("__dlpack__", [](std::shared_ptr<PyPacketData>& self, py::object stream) {
             return self->ext_buf[0]->dlpack(stream);
-            }, py::arg("stream") = NULL, "Export the buffer as a DLPack tensor")
-        .def("__dlpack_device__", [](std::shared_ptr<PyPacketData>& self) {
+            }, py::arg("stream") = py::none(), "Export the buffer as a DLPack tensor")
+        .def("__dlpack_device__", [](std::shared_ptr<PyPacketData>& /*self*/) {
                 return py::make_tuple(py::int_(static_cast<int>(DLDeviceType::kDLROCM)),
                         py::int_(static_cast<int>(0)));
             }, "Get the device associated with the buffer");   
@@ -241,3 +245,6 @@ PYBIND11_MODULE(rocpydecode, m) {
         .def(py::init<>())
         .def_static("test_all", &DLPackPyTensor::test_all);
 }
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
diff --git a/src/rocdecode/roc_pydecode.h b/src/rocdecode/roc_pydecode.h
index 85126605..5a077c04 100644
--- a/src/rocdecode/roc_pydecode.h
+++ b/src/rocdecode/roc_pydecode.h
@@ -48,8 +48,6 @@ extern "C" {
 namespace py = pybind11;
 
 struct PyPacketData {
-    bool      end_of_stream;
-    int       pkt_flags;
     int64_t   frame_pts;
     int64_t   frame_size;
     int64_t   bitstream_size;
@@ -58,6 +56,9 @@ struct PyPacketData {
     uintptr_t frame_adrs_rgb;   // rgb frame address
     uintptr_t frame_adrs_resized; // new resized yuv frame
     std::vector<std::shared_ptr<BufferInterface>> ext_buf;
+    int       pkt_flags = 0;
+    bool      end_of_stream = false;
+    unsigned char padding_[3]{};
     PyPacketData(){
         ext_buf.push_back(std::make_shared<BufferInterface>()); //index[0]: always Y Tensor
         ext_buf.push_back(std::make_shared<BufferInterface>()); //index[1]: UV tensor in case of NV12, otherwise only U tensor when YUV444/P016 is supported
@@ -68,9 +69,10 @@ struct PyPacketData {
 struct ConfigInfo {
     std::string device_name;
     std::string gcn_arch_name;
-    int         pci_bus_id;
-    int         pci_domain_id;
-    int         pci_device_id;
+    int         pci_bus_id = 0;
+    int         pci_domain_id = 0;
+    int         pci_device_id = 0;
+    int         padding_ = 0;
 };
 
 // defined in roc_pyvideodemuxer.cpp (FFmpeg dependent)
diff --git a/src/rocdecode/roc_pyvideodecode.cpp b/src/rocdecode/roc_pyvideodecode.cpp
index b82c6b02..9f4a2cad 100644
--- a/src/rocdecode/roc_pyvideodecode.cpp
+++ b/src/rocdecode/roc_pyvideodecode.cpp
@@ -167,20 +167,20 @@ py::object PyRocVideoDecoder::PyGetFrameYuv(PyPacketData& packet, bool SeparateY
         uint32_t bit_depth = GetBitDepth();
         std::string type_str;
         std::vector<size_t> stride;
-        if (bit_depth == 8) {
-            type_str = static_cast<const char*>("|u1");
+        if (bit_depth == 8U) {
+            type_str = "|u1";
             stride.push_back(static_cast<size_t>(surf_stride));
             stride.push_back(sizeof(uint8_t));
-        } else if (bit_depth <= 16) {
-            type_str = static_cast<const char*>("|u2");
+        } else if (bit_depth <= 16U) {
+            type_str = "|u2";
             stride.push_back(static_cast<size_t>(surf_stride));
             stride.push_back(sizeof(uint16_t));
         }
         // for NV12 format (also YUV444 & P016 when supported), Y always in ext_buf vector index [0]
         // The tensor shape->height will be all the Yuv planes if user specify 'FALSE' in 'SeparateYuvPlanes' argument
-        float plane_height_multiplier = SeparateYuvPlanes ? 1.0 : 1.5; // 1.5 for YUV NV12
-        std::vector<size_t> shape{ static_cast<size_t>(height * plane_height_multiplier), static_cast<size_t>(width)};
-        packet.ext_buf[0]->LoadDLPack(shape, stride, bit_depth, type_str, (void *)packet.frame_adrs, device_id_);
+        const double plane_height_multiplier = SeparateYuvPlanes ? 1.0 : 1.5; // 1.5 for YUV NV12
+        std::vector<size_t> shape{ static_cast<size_t>(static_cast<double>(height) * plane_height_multiplier), static_cast<size_t>(width)};
+        packet.ext_buf[0]->LoadDLPack(shape, stride, bit_depth, type_str, reinterpret_cast<void *>(packet.frame_adrs), device_id_);
         if (SeparateYuvPlanes) {
             // get surface format
             OutputSurfaceInfo* p_surf_info;
@@ -188,9 +188,9 @@ py::object PyRocVideoDecoder::PyGetFrameYuv(PyPacketData& packet, bool SeparateY
             if (ret) {
                 // for NV12 only the UV interleaved in one tensor: ext_buf vector index [1]
                 if (p_surf_info->surface_format == rocDecVideoSurfaceFormat_NV12 || p_surf_info->surface_format == rocDecVideoSurfaceFormat_P016) {
-                    std::vector<size_t> shape{ static_cast<size_t>(height >> 1), static_cast<size_t>(width)};
-                    uintptr_t uv_offset = p_surf_info->output_pitch * p_surf_info->output_vstride; // count for possible padding
-                    packet.ext_buf[1]->LoadDLPack(shape, stride, bit_depth, type_str, (void *)(packet.frame_adrs + uv_offset), device_id_);
+                    std::vector<size_t> uv_shape{ static_cast<size_t>(height >> 1U), static_cast<size_t>(width)};
+                    const uintptr_t uv_offset = static_cast<uintptr_t>(p_surf_info->output_pitch) * p_surf_info->output_vstride; // count for possible padding
+                    packet.ext_buf[1]->LoadDLPack(uv_shape, stride, bit_depth, type_str, reinterpret_cast<void *>(packet.frame_adrs + uv_offset), device_id_);
                 } else {
                     cout << "surf fmt: " << p_surf_info->surface_format << " [not supported]" << "\n";
                 }
@@ -202,13 +202,12 @@ py::object PyRocVideoDecoder::PyGetFrameYuv(PyPacketData& packet, bool SeparateY
 
 size_t PyRocVideoDecoder::CalculateRgbImageSize(OutputFormatEnum& e_output_format, OutputSurfaceInfo * p_surf_info) {
     size_t rgb_image_size = 0;
-    int rgb_width = 0;
-    if (p_surf_info->bit_depth == 8) {
-        rgb_width = (p_surf_info->output_width + 1) & ~1; // has to be a multiple of 2 for hip colorconvert kernels
-        rgb_image_size = ((e_output_format == bgr) || (e_output_format == rgb)) ? rgb_width * p_surf_info->output_height * 3 : rgb_width * p_surf_info->output_height * 4;
+    const size_t rgb_width = static_cast<size_t>((p_surf_info->output_width + 1U) & ~1U); // has to be a multiple of 2 for hip colorconvert kernels
+    const size_t output_height = static_cast<size_t>(p_surf_info->output_height);
+    if (p_surf_info->bit_depth == 8U) {
+        rgb_image_size = ((e_output_format == bgr) || (e_output_format == rgb)) ? rgb_width * output_height * 3U : rgb_width * output_height * 4U;
     } else {
-        rgb_width = (p_surf_info->output_width + 1) & ~1;
-        rgb_image_size = ((e_output_format == bgr) || (e_output_format == rgb)) ? rgb_width * p_surf_info->output_height * 3 : ((e_output_format == bgr48) || (e_output_format == rgb48)) ? rgb_width * p_surf_info->output_height * 6 : rgb_width * p_surf_info->output_height * 8;
+        rgb_image_size = ((e_output_format == bgr) || (e_output_format == rgb)) ? rgb_width * output_height * 3U : ((e_output_format == bgr48) || (e_output_format == rgb48)) ? rgb_width * output_height * 6U : rgb_width * output_height * 8U;
     }
     return rgb_image_size;
 }
@@ -233,7 +232,7 @@ py::object PyRocVideoDecoder::PyGetFrameRgb(PyPacketData& packet, int rgb_format
             return py::cast(-1); // ret failure
         // allocate 'new' RGB image device-memory if wasn't
         if(frame_ptr_rgb == nullptr) {
-            HIP_API_CALL(hipMalloc((void **)&frame_ptr_rgb, rgb_image_size));
+            HIP_API_CALL(hipMalloc(reinterpret_cast<void **>(&frame_ptr_rgb), rgb_image_size));
             if(frame_ptr_rgb == nullptr)
                 return py::cast(-1); // ret failure
         }
@@ -244,7 +243,7 @@ py::object PyRocVideoDecoder::PyGetFrameRgb(PyPacketData& packet, int rgb_format
         // use post process instance
         VideoPostProcess * post_proc = post_process_class;
         // Get Stream, and convert YUV 2 RGB
-        post_proc->ColorConvertYUV2RGB(reinterpret_cast<uint8_t*>(packet.frame_adrs), surf_info, frame_ptr_rgb, e_output_format, 0);
+        post_proc->ColorConvertYUV2RGB(reinterpret_cast<uint8_t*>(packet.frame_adrs), surf_info, frame_ptr_rgb, e_output_format, nullptr);
         // save the rgb ptr
         packet.frame_adrs_rgb = reinterpret_cast<std::uintptr_t>(frame_ptr_rgb);
         // Load DLPack Tensor
@@ -253,10 +252,10 @@ py::object PyRocVideoDecoder::PyGetFrameRgb(PyPacketData& packet, int rgb_format
             uint32_t height = GetHeight();
             uint32_t surf_stride = post_proc->GetRgbStride(e_output_format, surf_info);
             uint32_t bit_depth = GetBitDepth();
-            std::string type_str(static_cast<const char*>("|u1"));
-            std::vector<size_t> shape{ static_cast<size_t>(height), static_cast<size_t>(width), 3}; // 3 rgb channels
-            std::vector<size_t> stride{ static_cast<size_t>(surf_stride), 1, 0}; // python assumes same dim for both shape & strides
-            packet.ext_buf[0]->LoadDLPack(shape, stride, bit_depth, type_str, (void *)frame_ptr_rgb, device_id_);
+            std::string type_str("|u1");
+            std::vector<size_t> shape{ static_cast<size_t>(height), static_cast<size_t>(width), size_t{3} }; // 3 rgb channels
+            std::vector<size_t> stride{ static_cast<size_t>(surf_stride), size_t{3}, size_t{1} };
+            packet.ext_buf[0]->LoadDLPack(shape, stride, bit_depth, type_str, static_cast<void *>(frame_ptr_rgb), device_id_);
         }
     }
     return py::cast(packet.frame_pts);
@@ -275,11 +274,13 @@ uintptr_t PyRocVideoDecoder::PyResizeFrame(PyPacketData& packet, Dim *resized_di
     if((reinterpret_cast<uint8_t*>(packet.frame_adrs) == nullptr) || resized_dim->w == 0 || resized_dim->h == 0)
         return 0;
     OutputSurfaceInfo *surf_info = reinterpret_cast<OutputSurfaceInfo*>(in_surf_info);
+    const uint32_t resized_width = static_cast<uint32_t>(resized_dim->w);
+    const uint32_t resized_height = static_cast<uint32_t>(resized_dim->h);
     // validate request
-    if ((surf_info->output_width == resized_dim->w) && (surf_info->output_height == resized_dim->h))
+    if ((surf_info->output_width == resized_width) && (surf_info->output_height == resized_height))
         return 0;
     uint8_t *in_yuv_frame = reinterpret_cast<uint8_t*>(packet.frame_adrs);
-    size_t requested_size_in_bytes = resized_dim->w * (resized_dim->h + (resized_dim->h >> 1)) * surf_info->bytes_per_pixel;
+    const size_t requested_size_in_bytes = static_cast<size_t>(resized_width) * static_cast<size_t>(resized_height + (resized_height >> 1U)) * surf_info->bytes_per_pixel;
     // alloc or refill surf-info one time, and refill if size changed
     if (resized_image_size_in_bytes != requested_size_in_bytes) {
         resized_image_size_in_bytes = requested_size_in_bytes;
@@ -291,11 +292,11 @@ uintptr_t PyRocVideoDecoder::PyResizeFrame(PyPacketData& packet, Dim *resized_di
             }
         }
         memcpy(resized_surf_info, surf_info, sizeof(OutputSurfaceInfo));
-        resized_surf_info->output_width = resized_dim->w;
-        resized_surf_info->output_height = resized_dim->h;
-        resized_surf_info->output_pitch = resized_dim->w * surf_info->bytes_per_pixel;
-        resized_surf_info->output_vstride = resized_dim->h;
-        resized_surf_info->output_surface_size_in_bytes = resized_surf_info->output_pitch * (resized_dim->h + (resized_dim->h >> 1));
+        resized_surf_info->output_width = resized_width;
+        resized_surf_info->output_height = resized_height;
+        resized_surf_info->output_pitch = resized_width * surf_info->bytes_per_pixel;
+        resized_surf_info->output_vstride = resized_height;
+        resized_surf_info->output_surface_size_in_bytes = resized_surf_info->output_pitch * (resized_height + (resized_height >> 1U));
 
         // new size means new MEM, dealloc old one if exist
         if (frame_ptr_resized != nullptr) {
@@ -308,17 +309,24 @@ uintptr_t PyRocVideoDecoder::PyResizeFrame(PyPacketData& packet, Dim *resized_di
     }
     // new MEM if not allocated
     if (frame_ptr_resized == nullptr)  {
-        hipError_t hip_status = hipMalloc((void **)&frame_ptr_resized, resized_image_size_in_bytes);
+        hipError_t hip_status = hipMalloc(reinterpret_cast<void **>(&frame_ptr_resized), resized_image_size_in_bytes);
         if (hip_status != hipSuccess) {
             std::cerr << "ERROR: hipMalloc failed to allocate the device memory for the output!" << hip_status << std::endl;
             return 0;
         }
     }
     // call resize kernel, TODO: below code assumes NV12/P016 for decoded surface. Modify to take other surface formats in future
+    const int resized_width_int = static_cast<int>(resized_width);
+    const int resized_height_int = static_cast<int>(resized_height);
+    const int output_pitch = static_cast<int>(surf_info->output_pitch);
+    const int output_width = static_cast<int>(surf_info->output_width);
+    const int output_height = static_cast<int>(surf_info->output_height);
+    const uintptr_t chroma_offset = static_cast<uintptr_t>(surf_info->output_vstride) * surf_info->output_pitch;
+    uint8_t *const chroma_plane = reinterpret_cast<uint8_t *>(packet.frame_adrs + chroma_offset);
     if (surf_info->bytes_per_pixel == 2) {
-        ResizeP016(frame_ptr_resized, resized_dim->w * 2, resized_dim->w, resized_dim->h, in_yuv_frame, surf_info->output_pitch, surf_info->output_width, surf_info->output_height, (in_yuv_frame + surf_info->output_vstride * surf_info->output_pitch), nullptr, 0);
+        ResizeP016(frame_ptr_resized, resized_width_int * 2, resized_width_int, resized_height_int, in_yuv_frame, output_pitch, output_width, output_height, chroma_plane, nullptr, nullptr);
     } else {
-        ResizeNv12(frame_ptr_resized, resized_dim->w, resized_dim->w, resized_dim->h, in_yuv_frame, surf_info->output_pitch, surf_info->output_width, surf_info->output_height, (in_yuv_frame + surf_info->output_vstride * surf_info->output_pitch), nullptr, 0);
+        ResizeNv12(frame_ptr_resized, resized_width_int, resized_width_int, resized_height_int, in_yuv_frame, output_pitch, output_width, output_height, chroma_plane, nullptr, nullptr);
     }
     // save new resized frame address
     packet.frame_adrs_resized = reinterpret_cast<std::uintptr_t>(frame_ptr_resized);
@@ -351,7 +359,7 @@ py::object PyRocVideoDecoder::PySaveFrameToFile(std::string& output_file_name_in
         if (e_output_format != OutputFormatEnum::native) { // native == YUV frame
             image_size = CalculateRgbImageSize(e_output_format, p_surf_info);
         }
-        SaveFrameToFile(output_file_name, (void *)surf_mem, p_surf_info, image_size);
+        SaveFrameToFile(output_file_name, reinterpret_cast<void *>(surf_mem), p_surf_info, image_size);
     }
     return py::cast<py::none>(Py_None);
 }
@@ -404,13 +412,13 @@ uint32_t PyRocVideoDecoder::PyGetBitDepth() {
 
 #if ROCDECODE_CHECK_VERSION(0,6,0)
 // for python binding, Session overhead refers to decoder initialization and deinitialization time
-py::object PyRocVideoDecoder::PyAddDecoderSessionOverHead(int session_id, double duration) {
+py::object PyRocVideoDecoder::PyAddDecoderSessionOverHead(std::uintptr_t session_id, double duration) {
     AddDecoderSessionOverHead(static_cast<std::thread::id>(session_id), duration);
     return py::cast<py::none>(Py_None);
 }
 
 // for python binding, Session overhead refers to decoder initialization and deinitialization time
-py::object PyRocVideoDecoder::PyGetDecoderSessionOverHead(int session_id) {
+py::object PyRocVideoDecoder::PyGetDecoderSessionOverHead(std::uintptr_t session_id) {
     return py::cast(GetDecoderSessionOverHead(static_cast<std::thread::id>(session_id)));
 }
 
diff --git a/src/rocdecode/roc_pyvideodecode.h b/src/rocdecode/roc_pyvideodecode.h
index f35ac460..c2ee2db3 100644
--- a/src/rocdecode/roc_pyvideodecode.h
+++ b/src/rocdecode/roc_pyvideodecode.h
@@ -34,7 +34,8 @@ typedef enum ReconfigFlushMode_enum {
 
 // this struct is used by videodecode and videodecodeMultiFiles to dump last frames to file
 typedef struct ReconfigDumpFileStruct_t {
-    bool b_dump_frames_to_file;
+    bool b_dump_frames_to_file = false;
+    unsigned char padding_[7]{};
     std::string output_file_name;
 } ReconfigDumpFileStruct;
 
@@ -49,7 +50,7 @@ class PyRocVideoDecoder : public RocVideoDecoder {
         PyRocVideoDecoder(int device_id, int mem_type, rocDecVideoCodec codec, bool force_zero_latency = false,
                           const Rect *p_crop_rect = nullptr, int max_width = 0, int max_height = 0,
                           uint32_t clk_rate = 0) : RocVideoDecoder(device_id, static_cast<OutputSurfaceMemoryType>(mem_type), codec, force_zero_latency,
-                          p_crop_rect, false, max_width, max_height, clk_rate) { 
+                          p_crop_rect, false, 0U, max_width, max_height, clk_rate) { 
                 InitConfigStructure();
                 device_id_ = device_id; }
         ~PyRocVideoDecoder();
@@ -108,11 +109,10 @@ class PyRocVideoDecoder : public RocVideoDecoder {
 
 #if ROCDECODE_CHECK_VERSION(0,6,0)
         // Session overhead refers to decoder initialization and deinitialization time
-        py::object PyAddDecoderSessionOverHead(int session_id, double duration);
-        py::object PyGetDecoderSessionOverHead(int session_id);
+        py::object PyAddDecoderSessionOverHead(std::uintptr_t session_id, double duration);
+        py::object PyGetDecoderSessionOverHead(std::uintptr_t session_id);
 #endif
     private:
-        int device_id_;
         std::shared_ptr <ConfigInfo> configInfo;
         void InitConfigStructure();
 
@@ -128,4 +128,8 @@ class PyRocVideoDecoder : public RocVideoDecoder {
         uint8_t *frame_ptr_resized = nullptr;
         size_t resized_image_size_in_bytes = 0;
         OutputSurfaceInfo *resized_surf_info = nullptr;
-};
\ No newline at end of file
+
+    private:
+        int device_id_ = 0;
+        [[maybe_unused]] int padding_ = 0;
+};
diff --git a/src/rocdecode/roc_pyvideodecodecpu.cpp b/src/rocdecode/roc_pyvideodecodecpu.cpp
index 7c8f7d35..fe5a54ea 100644
--- a/src/rocdecode/roc_pyvideodecodecpu.cpp
+++ b/src/rocdecode/roc_pyvideodecodecpu.cpp
@@ -351,13 +351,13 @@ uint32_t PyRocVideoDecoderCpu::PyGetBitDepth() {
 
 #if ROCDECODE_CHECK_VERSION(0,6,0)
 // for python binding, Session overhead refers to decoder initialization and deinitialization time
-py::object PyRocVideoDecoderCpu::PyAddDecoderSessionOverHead(int session_id, double duration) {
+py::object PyRocVideoDecoderCpu::PyAddDecoderSessionOverHead(std::uintptr_t session_id, double duration) {
     AddDecoderSessionOverHead(static_cast<std::thread::id>(session_id), duration);
     return py::cast<py::none>(Py_None);
 }
 
 // for python binding, Session overhead refers to decoder initialization and deinitialization time
-py::object PyRocVideoDecoderCpu::PyGetDecoderSessionOverHead(int session_id) {
+py::object PyRocVideoDecoderCpu::PyGetDecoderSessionOverHead(std::uintptr_t session_id) {
     return py::cast(GetDecoderSessionOverHead(static_cast<std::thread::id>(session_id)));
 }
 
diff --git a/src/rocdecode/roc_pyvideodecodecpu.h b/src/rocdecode/roc_pyvideodecodecpu.h
index 45426d90..51ced086 100644
--- a/src/rocdecode/roc_pyvideodecodecpu.h
+++ b/src/rocdecode/roc_pyvideodecodecpu.h
@@ -90,8 +90,8 @@ class PyRocVideoDecoderCpu : public FFMpegVideoDecoder {
 
 #if ROCDECODE_CHECK_VERSION(0,6,0)
         // Session overhead refers to decoder initialization and deinitialization time
-        py::object PyAddDecoderSessionOverHead(int session_id, double duration);
-        py::object PyGetDecoderSessionOverHead(int session_id);
+        py::object PyAddDecoderSessionOverHead(std::uintptr_t session_id, double duration);
+        py::object PyGetDecoderSessionOverHead(std::uintptr_t session_id);
 #endif
     private:
         std::shared_ptr <ConfigInfo> configInfo;
@@ -105,4 +105,4 @@ class PyRocVideoDecoderCpu : public FFMpegVideoDecoder {
         uint8_t *frame_ptr_resized = nullptr;
         size_t resized_image_size_in_bytes = 0;
         OutputSurfaceInfo *resized_surf_info = nullptr;
-};
\ No newline at end of file
+};
diff --git a/src/rocjpeg/roc_pyjpeg.cpp b/src/rocjpeg/roc_pyjpeg.cpp
index 347d9155..3f2abfe1 100644
--- a/src/rocjpeg/roc_pyjpeg.cpp
+++ b/src/rocjpeg/roc_pyjpeg.cpp
@@ -32,6 +32,10 @@ using namespace std;
 namespace py = pybind11;
 using namespace py::literals;
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
+#endif
 PYBIND11_MODULE(rocpyjpegdecode, m) {
  
     m.doc() = "Python bindings for the C++ portions of rocJPEG ..";
@@ -108,3 +112,6 @@ PYBIND11_MODULE(rocpyjpegdecode, m) {
         .def("init_hip_device",&PyRocJpegUtils::InitHipDevice);
 
 }
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
diff --git a/src/rocjpeg/roc_pyjpeg.h b/src/rocjpeg/roc_pyjpeg.h
index d61a8f87..803f6414 100644
--- a/src/rocjpeg/roc_pyjpeg.h
+++ b/src/rocjpeg/roc_pyjpeg.h
@@ -36,7 +36,5 @@ THE SOFTWARE.
 #include <pybind11/chrono.h>
 
 namespace py = pybind11;
-using namespace py::literals;
 
-
-#endif // PY_ROC_JPEG_PYBIND11_HEADER
\ No newline at end of file
+#endif // PY_ROC_JPEG_PYBIND11_HEADER
diff --git a/src/rocjpeg/roc_pyjpeg_codestream.cpp b/src/rocjpeg/roc_pyjpeg_codestream.cpp
index 5ec2b6b9..518bd33c 100644
--- a/src/rocjpeg/roc_pyjpeg_codestream.cpp
+++ b/src/rocjpeg/roc_pyjpeg_codestream.cpp
@@ -28,6 +28,7 @@ THE SOFTWARE.
 #include <functional>
 
 using namespace std;
+using namespace py::literals;
 
 void CodeStream::ExportToPython(py::module& m) {
     py::class_<CodeStream>(m, "CodeStream",
@@ -69,7 +70,7 @@ void CodeStream::ExportToPython(py::module& m) {
             )pbdoc");
 }
 
-int CodeStream::ReadFromFile(const std::filesystem::path& filename, std::shared_ptr<std::vector<char>>& file_data, int& file_size) {
+int CodeStream::ReadFromFile(const std::filesystem::path& filename, std::shared_ptr<std::vector<char>>& file_buffer, size_t& file_size) {
     // Open image file in binary mode and go to the end to get file size
     std::ifstream input(filename, std::ios::in | std::ios::binary | std::ios::ate);
     if (!input.is_open()) {
@@ -77,14 +78,19 @@ int CodeStream::ReadFromFile(const std::filesystem::path& filename, std::shared_
         return EXIT_FAILURE;
     }
     // Get the size
-    file_size = static_cast<int>(input.tellg());
+    const auto raw_file_size = input.tellg();
+    if (raw_file_size <= 0) {
+        std::cerr << "ERROR: Invalid image size: " << filename << std::endl;
+        return EXIT_FAILURE;
+    }
+    file_size = static_cast<size_t>(raw_file_size);
     input.seekg(0, std::ios::beg);
     // Allocate shared buffer if not already allocated or too small
-    if (!file_data || file_data->size() < static_cast<size_t>(file_size)) {
-        file_data = std::make_shared<std::vector<char>>(file_size);
+    if (!file_buffer || file_buffer->size() < file_size) {
+        file_buffer = std::make_shared<std::vector<char>>(file_size);
     }
     // Read the file into the buffer
-    if (!input.read(file_data->data(), file_size)) {
+    if (!input.read(file_buffer->data(), static_cast<std::streamsize>(file_size))) {
         std::cerr << "ERROR: Cannot read from file: " << filename << std::endl;
         return EXIT_FAILURE;
     }
@@ -92,7 +98,7 @@ int CodeStream::ReadFromFile(const std::filesystem::path& filename, std::shared_
 }
 
 // Use the dat and its size if valid, otherwise use the file to load the data
-int CodeStream::InitializeSingleImage(const std::filesystem::path& filename, const unsigned char* data, int data_size) {
+int CodeStream::InitializeSingleImage(const std::filesystem::path& filename, const unsigned char* data, size_t data_size) {
     // File sanity check
     if(!filename.empty()) {
         if(!std::filesystem::exists(filename)) {
@@ -102,13 +108,23 @@ int CodeStream::InitializeSingleImage(const std::filesystem::path& filename, con
     }
     // Read file data, if no data sent
     if (data != nullptr && data_size > 0) {
-        file_data = std::make_shared<std::vector<char>>(reinterpret_cast<const char*>(data), reinterpret_cast<const char*>(data) + data_size);
+        auto buffer = std::make_shared<std::vector<char>>(data_size);
+        std::copy_n(reinterpret_cast<const char *>(data), data_size, buffer->begin());
+        file_data = std::move(buffer);
     } else if(data == nullptr) {
-        int ret = EXIT_SUCCESS;
-        if((ret = ReadFromFile(filename, file_data, data_size)) != EXIT_SUCCESS) {
-            return ret;
+        if (ReadFromFile(filename, file_data, data_size) != EXIT_SUCCESS) {
+            return EXIT_FAILURE;
         }
     }
+    return InitializeStreamFromCurrentData();
+}
+
+int CodeStream::InitializeStreamFromCurrentData() {
+    if (!file_data || file_data->empty()) {
+        std::cerr << "ERROR: Empty JPEG stream" << std::endl;
+        return EXIT_FAILURE;
+    }
+
     RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_NOT_INITIALIZED;
     rocjpeg_status = rocJpegStreamCreate(&stream_handle);
     if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {
@@ -116,9 +132,9 @@ int CodeStream::InitializeSingleImage(const std::filesystem::path& filename, con
         return EXIT_FAILURE;
     }
     // Stream Parse
-    rocjpeg_status = rocJpegStreamParse(reinterpret_cast<uint8_t*>(file_data->data()), data_size, stream_handle);
+    rocjpeg_status = rocJpegStreamParse(reinterpret_cast<unsigned char*>(file_data->data()), file_data->size(), stream_handle);
     if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {
-        std::cerr << "ERROR: Failed to parse the input jpeg stream with " << rocJpegGetErrorName(rocjpeg_status) << ": Input File : " << (!filename.empty() ? filename : "") << std::endl;
+        std::cerr << "ERROR: Failed to parse the input jpeg stream with " << rocJpegGetErrorName(rocjpeg_status) << std::endl;
         Release();
         return EXIT_FAILURE;
     }
@@ -127,11 +143,36 @@ int CodeStream::InitializeSingleImage(const std::filesystem::path& filename, con
 
 void CodeStream::Release() {
     if(stream_handle) {
-        RocJpegStatus rocjpeg_status = rocJpegStreamDestroy(stream_handle);
+        rocJpegStreamDestroy(stream_handle);
         stream_handle = nullptr;
     }
 }
 
+CodeStream::CodeStream(const CodeStream& other)
+    : file_data(other.file_data),
+      data_ref_bytes_(other.data_ref_bytes_),
+      data_ref_arr_(other.data_ref_arr_) {
+    if (other.stream_handle != nullptr) {
+        InitializeStreamFromCurrentData();
+    }
+}
+
+CodeStream& CodeStream::operator=(const CodeStream& other) {
+    if (this == &other) {
+        return *this;
+    }
+
+    Release();
+    file_data = other.file_data;
+    data_ref_bytes_ = other.data_ref_bytes_;
+    data_ref_arr_ = other.data_ref_arr_;
+
+    if (other.stream_handle != nullptr) {
+        InitializeStreamFromCurrentData();
+    }
+    return *this;
+}
+
 CodeStream::CodeStream(const std::filesystem::path& filename) {
     py::gil_scoped_release release;
     InitializeSingleImage(filename, nullptr, 0);
@@ -139,7 +180,7 @@ CodeStream::CodeStream(const std::filesystem::path& filename) {
 
 CodeStream::CodeStream(const unsigned char* data, size_t length) {
     py::gil_scoped_release release;
-    InitializeSingleImage(static_cast<const std::filesystem::path>(""), data, length);
+    InitializeSingleImage({}, data, length);
 }
 
 CodeStream::CodeStream(py::bytes data) {
@@ -147,14 +188,14 @@ CodeStream::CodeStream(py::bytes data) {
     std::string data_str = static_cast<std::string>(data_ref_bytes_); // Convert py::bytes to std::string
     std::string_view data_view(data_str);
     py::gil_scoped_release release;
-    InitializeSingleImage(static_cast<const std::filesystem::path>(""), reinterpret_cast<const unsigned char*>(data_view.data()), data_view.size());
+    InitializeSingleImage({}, reinterpret_cast<const unsigned char*>(data_view.data()), data_view.size());
 }
 
 CodeStream::CodeStream(py::array_t<uint8_t> arr) {
     data_ref_arr_ = arr;
     auto data = data_ref_arr_.unchecked<1>();
     py::gil_scoped_release release;
-    InitializeSingleImage(static_cast<const std::filesystem::path>(""), data.data(0), data.size());
+    InitializeSingleImage({}, data.data(0), static_cast<size_t>(data.size()));
 }
 
 CodeStream::CodeStream() {
diff --git a/src/rocjpeg/roc_pyjpeg_codestream.h b/src/rocjpeg/roc_pyjpeg_codestream.h
index b9b7e787..41d04346 100644
--- a/src/rocjpeg/roc_pyjpeg_codestream.h
+++ b/src/rocjpeg/roc_pyjpeg_codestream.h
@@ -36,6 +36,10 @@ class CodeStream {
     CodeStream(const unsigned char*, size_t);
     CodeStream(py::bytes);
     CodeStream(py::array_t<uint8_t>);
+    CodeStream(const CodeStream&);
+    CodeStream& operator=(const CodeStream&);
+    CodeStream(CodeStream&&) noexcept = default;
+    CodeStream& operator=(CodeStream&&) noexcept = default;
     ~CodeStream();
     CodeStream();
 
@@ -49,8 +53,9 @@ class CodeStream {
     py::bytes data_ref_bytes_;
     py::array_t<uint8_t> data_ref_arr_;
     void Release();
-    int ReadFromFile(const std::filesystem::path& filename, std::shared_ptr<std::vector<char>>& file_data, int& file_size);
-    int InitializeSingleImage(const std::filesystem::path& filename, const unsigned char* data, int data_size);
+    int ReadFromFile(const std::filesystem::path& filename, std::shared_ptr<std::vector<char>>& file_buffer, size_t& file_size);
+    int InitializeSingleImage(const std::filesystem::path& filename, const unsigned char* data, size_t data_size);
+    int InitializeStreamFromCurrentData();
 };
 
-#endif // PY_ROC_JPEG_CODE_STREAM_HEADER
\ No newline at end of file
+#endif // PY_ROC_JPEG_CODE_STREAM_HEADER
diff --git a/src/rocjpeg/roc_pyjpeg_decode_source.cpp b/src/rocjpeg/roc_pyjpeg_decode_source.cpp
index e6b37baf..0c9f6b8a 100644
--- a/src/rocjpeg/roc_pyjpeg_decode_source.cpp
+++ b/src/rocjpeg/roc_pyjpeg_decode_source.cpp
@@ -23,6 +23,8 @@ THE SOFTWARE.
 #include <iostream>
 #include "roc_pyjpeg_decode_source.h"
 
+using namespace py::literals;
+
 DecodeSource::DecodeSource(const CodeStream* code_stream_ptr)
     : code_stream_(std::make_unique<CodeStream>(*code_stream_ptr))  // make a copy
     , code_stream_ptr_(code_stream_.get()) {
diff --git a/src/rocjpeg/roc_pyjpeg_decode_source.h b/src/rocjpeg/roc_pyjpeg_decode_source.h
index 8911f224..b1292853 100644
--- a/src/rocjpeg/roc_pyjpeg_decode_source.h
+++ b/src/rocjpeg/roc_pyjpeg_decode_source.h
@@ -25,7 +25,6 @@ THE SOFTWARE.
 #include "roc_pyjpeg_codestream.h"
 
 namespace py = pybind11;
-using namespace py::literals;
 
 class DecodeSource {
   public:
diff --git a/src/rocjpeg/roc_pyjpeg_decoder.cpp b/src/rocjpeg/roc_pyjpeg_decoder.cpp
index ef08f9bc..39f46843 100644
--- a/src/rocjpeg/roc_pyjpeg_decoder.cpp
+++ b/src/rocjpeg/roc_pyjpeg_decoder.cpp
@@ -24,8 +24,10 @@ THE SOFTWARE.
 #include "roc_pyjpeg_utils.h"
 #include "roc_pyjpeg_codestream.h"
 #include "roc_pyjpeg_images.h"
+#include <array>
 
 using namespace std;
+using namespace py::literals;
 
 void Decoder::ExportToPython(py::module& m) {
     // Decoder Class
@@ -135,8 +137,8 @@ std::pair<float, std::vector<PyJpegImages>> Decoder::decode(std::vector<DecodeSo
 
     float elapsed_ms = 0.0;
     RocJpegStatus status = ROCJPEG_STATUS_SUCCESS;
-    int batch_size = decode_source_arg.size();
-    int count_of_valid_instances = 0;
+    const auto batch_size = decode_source_arg.size();
+    size_t count_of_valid_instances = 0;
     std::vector<RocJpegStreamHandle> stream_handles;
     std::vector<RocJpegDecodeParams> decode_params_list;
     std::vector<RocJpegImage> destinations;
@@ -144,7 +146,7 @@ std::pair<float, std::vector<PyJpegImages>> Decoder::decode(std::vector<DecodeSo
     // we return a list of images
     std::vector<PyJpegImages> images_;
 
-    if(batch_size <= 0)
+    if(batch_size == 0U)
         return {elapsed_ms, images_};
 
     // loop the whole list length - Process as one BATCH
@@ -175,7 +177,7 @@ std::pair<float, std::vector<PyJpegImages>> Decoder::decode(std::vector<DecodeSo
         auto start = std::chrono::high_resolution_clock::now();
         status = rocJpegDecodeBatched(  rocjpeg_handle,
                                         stream_handles.data(),
-                                        count_of_valid_instances, // less or equal to the batch_size
+                                        static_cast<int>(count_of_valid_instances), // less or equal to the batch_size
                                         decode_params_list.data(),
                                         destinations.data()
                                         );
@@ -188,7 +190,7 @@ std::pair<float, std::vector<PyJpegImages>> Decoder::decode(std::vector<DecodeSo
         // here 'images_' vector carries the count of 'valid' images
         // to export to python (use dlpack(GPU MEM) {and numpy host array})
         if (status == ROCJPEG_STATUS_SUCCESS) {
-            for(int i = 0; i < count_of_valid_instances; i++) {
+            for(size_t i = 0; i < count_of_valid_instances; ++i) {
                 images_[i].ToDlpackTensor(user_output_format, m_device_id); // GPU Tensor
             }
         }
@@ -217,13 +219,13 @@ Decoder::~Decoder() {
 // Get Image Info, Pitch, Sizes, and alloc GPU MEM
 int Decoder::GetImageInfo(RocJpegStreamHandle stream_handle, PyJpegImages& img) {
     uint8_t num_components = 0;
-    uint32_t widths[ROCJPEG_MAX_COMPONENT] = {};
-    uint32_t heights[ROCJPEG_MAX_COMPONENT] = {};
-    uint32_t channel_sizes[ROCJPEG_MAX_COMPONENT] = {};
+    PyRocJpegUtils::ChannelArray widths{};
+    PyRocJpegUtils::ChannelArray heights{};
+    PyRocJpegUtils::ChannelArray channel_sizes{};
     // default, reset
     img.decode_params.output_format = user_output_format;
     // Get the image info
-    RocJpegStatus rocjpeg_status = rocJpegGetImageInfo(rocjpeg_handle, stream_handle, &num_components, &img.subsampling, widths, heights);
+    RocJpegStatus rocjpeg_status = rocJpegGetImageInfo(rocjpeg_handle, stream_handle, &num_components, &img.subsampling, widths.data(), heights.data());
     if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {
         std::cerr << "ERROR: Failed to  get image info with " << rocJpegGetErrorName(rocjpeg_status) << std::endl;
         return EXIT_FAILURE;
@@ -238,8 +240,8 @@ int Decoder::GetImageInfo(RocJpegStreamHandle stream_handle, PyJpegImages& img)
         return EXIT_FAILURE;
     }    
     // save the output w/h to the image instance
-    img.m_width = widths[0];
-    img.m_height = heights[0];
+    img.m_width = static_cast<int>(widths[0]);
+    img.m_height = static_cast<int>(heights[0]);
     // Get Channel Pitch And Sizes
     PyRocJpegUtils rocjpeg_utils;
     if (rocjpeg_utils.GetChannelPitchAndSizes(img.decode_params, img.subsampling, widths, heights, img.num_channels, img.output_image, channel_sizes)) {
@@ -248,16 +250,21 @@ int Decoder::GetImageInfo(RocJpegStreamHandle stream_handle, PyJpegImages& img)
     }
     // allocate memory for each channel
     hipError_t hip_status = hipSuccess;
-    for (int i = 0; i < img.num_channels; i++) {
-        if (img.output_image.channel[i] != nullptr) {
-            hip_status = hipFree((void *)img.output_image.channel[i]);
-            if (hip_status != hipSuccess)
+    std::array<uint8_t *, ROCJPEG_MAX_COMPONENT> channels{};
+    std::copy(std::begin(img.output_image.channel), std::end(img.output_image.channel), channels.begin());
+    for (uint32_t i = 0; i < img.num_channels; ++i) {
+        if (channels[i] != nullptr) {
+            hip_status = hipFree(static_cast<void *>(channels[i]));
+            if (hip_status != hipSuccess) {
                 return EXIT_FAILURE;
-                img.output_image.channel[i] = nullptr;
+            }
+            channels[i] = nullptr;
         }
-        hip_status = hipMalloc(&img.output_image.channel[i], channel_sizes[i]);
-        if (hip_status != hipSuccess)
+        hip_status = hipMalloc(&channels[i], static_cast<size_t>(channel_sizes[i]));
+        if (hip_status != hipSuccess) {
             return EXIT_FAILURE;
+        }
     }
+    std::copy(channels.begin(), channels.end(), std::begin(img.output_image.channel));
     return EXIT_SUCCESS;
-}
\ No newline at end of file
+}
diff --git a/src/rocjpeg/roc_pyjpeg_decoder.h b/src/rocjpeg/roc_pyjpeg_decoder.h
index 1bf53bce..4559456f 100644
--- a/src/rocjpeg/roc_pyjpeg_decoder.h
+++ b/src/rocjpeg/roc_pyjpeg_decoder.h
@@ -42,15 +42,16 @@ class Decoder {
     // set output image format
     void SetOutputFormat(RocJpegOutputFormat output_format);
 
-    RocJpegOutputFormat GetFormat() {return user_output_format;};
-    void SetFormat(RocJpegOutputFormat fmt) { user_output_format = fmt;};
+    RocJpegOutputFormat GetFormat() const { return user_output_format; }
+    void SetFormat(RocJpegOutputFormat fmt) { user_output_format = fmt; }
 
 private:
     int m_device_id;
     RocJpegBackend m_backend;
     RocJpegHandle rocjpeg_handle;               // main session
     RocJpegOutputFormat user_output_format;     // user can adjust
+    [[maybe_unused]] uint32_t padding_ = 0;
     int GetImageInfo(RocJpegStreamHandle stream_handle, PyJpegImages& img); // finalize the parsing job
 };
 
-#endif // PY_ROC_JPEG_HEADER
\ No newline at end of file
+#endif // PY_ROC_JPEG_HEADER
diff --git a/src/rocjpeg/roc_pyjpeg_images.cpp b/src/rocjpeg/roc_pyjpeg_images.cpp
index 1696e102..e86c39ea 100644
--- a/src/rocjpeg/roc_pyjpeg_images.cpp
+++ b/src/rocjpeg/roc_pyjpeg_images.cpp
@@ -20,6 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 */
 
+#include <array>
 #include <iostream>
 #include "rocjpeg/rocjpeg.h"
 #include "common/roc_pybuffer.h"
@@ -63,8 +64,8 @@ void PyJpegImages::ExportToPython(py::module& m) {
             }, "Get the data type of the buffer")
         .def("__dlpack__", [](std::shared_ptr<PyJpegImages>& self, py::object stream) {
             return self->ext_buf[0]->dlpack(stream);
-            }, py::arg("stream") = NULL, "Export the buffer as a DLPack tensor")
-        .def("__dlpack_device__", [](std::shared_ptr<PyJpegImages>& self) {
+            }, py::arg("stream") = py::none(), "Export the buffer as a DLPack tensor")
+        .def("__dlpack_device__", [](std::shared_ptr<PyJpegImages>& /*self*/) {
                 return py::make_tuple(py::int_(static_cast<int>(DLDeviceType::kDLROCM)), py::int_(static_cast<int>(0)));
             }, "Get the device associated with the buffer")
         .def_readwrite("height", &PyJpegImages::m_height, 
@@ -81,7 +82,7 @@ py::array_t<uint8_t> PyJpegImages::to_numpy(int index) {
     py::array_t<uint8_t> ret;
     if (index < 0 || index >= static_cast<int>(ext_buf.size()))
         throw std::out_of_range("Invalid channel index");
-    auto& buf = ext_buf[index];
+    auto& buf = ext_buf[static_cast<size_t>(index)];
     uint8_t* data_ptr = static_cast<uint8_t*>(buf->data());
     py::tuple py_shape = buf->shape();
     if (py_shape.size() == 3) {
@@ -106,10 +107,10 @@ py::array_t<uint8_t> PyJpegImages::to_numpy(int index) {
 
 bool PyJpegImages::GetOutputDims(std::vector<uint32_t>& widths, std::vector<uint32_t>& heights, 
                                 uint32_t img_width, uint32_t img_height, RocJpegOutputFormat output_format, 
-                                RocJpegChromaSubsampling subsampling) {
+                                RocJpegChromaSubsampling image_subsampling) {
     switch (output_format) {
         case ROCJPEG_OUTPUT_NATIVE:
-            switch (subsampling) {
+            switch (image_subsampling) {
                 case ROCJPEG_CSS_444:
                     widths[2] = widths[1] = widths[0] = img_width;
                     heights[2] = heights[1] = heights[0] = img_height;
@@ -120,7 +121,7 @@ bool PyJpegImages::GetOutputDims(std::vector<uint32_t>& widths, std::vector<uint
                     heights[2] = heights[1] = img_height >> 1;
                     break;
                 case ROCJPEG_CSS_422:
-                    widths[0] = img_width * 2;
+                    widths[0] = img_width * 2U;
                     heights[0] = img_height;
                     break;
                 case ROCJPEG_CSS_420:
@@ -132,13 +133,14 @@ bool PyJpegImages::GetOutputDims(std::vector<uint32_t>& widths, std::vector<uint
                     widths[0] = img_width;
                     heights[0] = img_height;
                     break;
-                default:
+                case ROCJPEG_CSS_411:
+                case ROCJPEG_CSS_UNKNOWN:
                     std::cout << "Unknown chroma subsampling!" << std::endl;
                     return false;
             }
             break;
         case ROCJPEG_OUTPUT_YUV_PLANAR:
-            switch (subsampling) {
+            switch (image_subsampling) {
                 case ROCJPEG_CSS_444:
                     widths[2] = widths[1] = widths[0] = img_width;
                     heights[2] = heights[1] = heights[0] = img_height;
@@ -163,7 +165,8 @@ bool PyJpegImages::GetOutputDims(std::vector<uint32_t>& widths, std::vector<uint
                     widths[0] = img_width;
                     heights[0] = img_height;
                     break;
-                default:
+                case ROCJPEG_CSS_411:
+                case ROCJPEG_CSS_UNKNOWN:
                     std::cout << "Unknown chroma subsampling!" << std::endl;
                     return false;
             }
@@ -173,14 +176,14 @@ bool PyJpegImages::GetOutputDims(std::vector<uint32_t>& widths, std::vector<uint
             heights[0] = img_height;
             break;
         case ROCJPEG_OUTPUT_RGB:
-            widths[0] = img_width * 3;
+            widths[0] = img_width * 3U;
             heights[0] = img_height;
             break;
         case ROCJPEG_OUTPUT_RGB_PLANAR:
             widths[2] = widths[1] = widths[0] = img_width;
             heights[2] = heights[1] = heights[0] = img_height;
             break;
-        default:
+        case ROCJPEG_OUTPUT_FORMAT_MAX:
             std::cout << "Unknown output format!" << std::endl;
             return false;
     }
@@ -188,34 +191,40 @@ bool PyJpegImages::GetOutputDims(std::vector<uint32_t>& widths, std::vector<uint
 }
 
 bool PyJpegImages::ToDlpackTensor(RocJpegOutputFormat output_format, int device_id) {
-    uint32_t img_width = m_width;
-    uint32_t img_height = m_height;    
+    uint32_t img_width = static_cast<uint32_t>(m_width);
+    uint32_t img_height = static_cast<uint32_t>(m_height);    
     std::vector<uint32_t> widths;
     std::vector<uint32_t> heights;
     widths.resize(ROCJPEG_MAX_COMPONENT);
     heights.resize(ROCJPEG_MAX_COMPONENT);
      if(GetOutputDims(widths, heights, img_width, img_height, output_format, subsampling) == false)
         return false;
-    uint32_t bit_depth = 8;
-    std::string type_str(static_cast<const char*>("|u1"));
+    const uint32_t bit_depth = 8U;
+    const std::string type_str("|u1");
+    std::array<uint8_t *, ROCJPEG_MAX_COMPONENT> channels{};
+    std::copy(std::begin(output_image.channel), std::end(output_image.channel), channels.begin());
     switch(output_format) {
+        case ROCJPEG_OUTPUT_NATIVE:
+        case ROCJPEG_OUTPUT_YUV_PLANAR:
+        case ROCJPEG_OUTPUT_Y:
+        case ROCJPEG_OUTPUT_FORMAT_MAX:
+            return false;
         case ROCJPEG_OUTPUT_RGB_PLANAR: { // each color plane in a channel separately R[0], G[1], and B[2]
-            uint32_t surf_stride[3] = {widths[0], widths[1], widths[2]}; // ROCJPEG_OUTPUT_RGB_PLANAR all same width = img_width
-            for(int i = 0; i < 3; i++) {
+            const std::array<uint32_t, 3> surf_stride{widths[0], widths[1], widths[2]}; // ROCJPEG_OUTPUT_RGB_PLANAR all same width = img_width
+            for(size_t i = 0; i < 3U; ++i) {
                 std::vector<size_t> shape{ static_cast<size_t>(heights[i]), static_cast<size_t>(widths[i])}; // depend on get_output_dims()
-                std::vector<size_t> stride{ static_cast<size_t>(surf_stride[i]), 1, 0};
+                std::vector<size_t> stride{ static_cast<size_t>(surf_stride[i]), size_t{1} };
                 // RGB PLANAR using VCN JPEG decoder @ first, second, and third channel of RocJpegImage
-                ext_buf[i]->LoadDLPack(shape, stride, bit_depth, type_str, (void *)output_image.channel[i], device_id); // device_id was set/saved at the constructor
+                ext_buf[i]->LoadDLPack(shape, stride, bit_depth, type_str, static_cast<void *>(channels[i]), device_id); // device_id was set/saved at the constructor
             }
         }
         break;
-        default:
         case ROCJPEG_OUTPUT_RGB: { // all the RGB interleaved in one channel [0]
-            uint32_t surf_stride = widths[0]; // ROCJPEG_OUTPUT_RGB width is * 3 for RGB interleaved
-            std::vector<size_t> shape{ static_cast<size_t>(heights[0]), static_cast<size_t>(widths[0]/3), 3}; // widths[0]/3 for ROCJPEG_OUTPUT_RGB
-            std::vector<size_t> stride{ static_cast<size_t>(surf_stride), 1, 0}; // python assumes same dim for both shape & strides
+            const uint32_t surf_stride = widths[0]; // ROCJPEG_OUTPUT_RGB width is * 3 for RGB interleaved
+            std::vector<size_t> shape{ static_cast<size_t>(heights[0]), static_cast<size_t>(widths[0] / 3U), size_t{3} }; // widths[0]/3 for ROCJPEG_OUTPUT_RGB
+            std::vector<size_t> stride{ static_cast<size_t>(surf_stride), size_t{3}, size_t{1} };
             // interleaved RGB using VCN JPEG decoder written to first channel of RocJpegImage
-            ext_buf[0]->LoadDLPack(shape, stride, bit_depth, type_str, (void *)output_image.channel[0], device_id); // device_id was set/saved at the constructor
+            ext_buf[0]->LoadDLPack(shape, stride, bit_depth, type_str, static_cast<void *>(channels[0]), device_id); // device_id was set/saved at the constructor
         }
         break;
     }
diff --git a/src/rocjpeg/roc_pyjpeg_images.h b/src/rocjpeg/roc_pyjpeg_images.h
index b993aa10..455df4fb 100644
--- a/src/rocjpeg/roc_pyjpeg_images.h
+++ b/src/rocjpeg/roc_pyjpeg_images.h
@@ -24,6 +24,7 @@ THE SOFTWARE.
 #define PY_ROC_JPEG_IMAGES_HEADER
 #pragma once
 
+#include <cstring>
 #include <iostream>
 #include "rocjpeg/rocjpeg.h"
 #include "common/roc_pybuffer.h"
@@ -45,16 +46,14 @@ class PyJpegImages {
         num_channels = 0;
         subsampling = ROCJPEG_CSS_UNKNOWN;
     }
-    ~PyJpegImages() {};
-
     static void ExportToPython(py::module& m);
  
     // The image in the GPU MEM represented with dlpack via this ext_buf (for external buffer)
     std::vector<std::shared_ptr<BufferInterface>> ext_buf; // external buffer, a view on the GPU MEM of the decoded image
 
     // public to be accessed by python pybind
-    int m_width;
-    int m_height;
+    int m_width = 0;
+    int m_height = 0;
     py::array_t<uint8_t> to_numpy(int index = 0);
     RocJpegChromaSubsampling subsampling;
 
@@ -66,6 +65,7 @@ class PyJpegImages {
 
 private:
     bool GetOutputDims(std::vector<uint32_t>& widths, std::vector<uint32_t>& heights, uint32_t img_width, uint32_t img_height, RocJpegOutputFormat output_format, RocJpegChromaSubsampling subsampling);
+    [[maybe_unused]] uint32_t padding_ = 0;
 };
 
-#endif // PY_ROC_JPEG_IMAGES_HEADER
\ No newline at end of file
+#endif // PY_ROC_JPEG_IMAGES_HEADER
diff --git a/src/rocjpeg/roc_pyjpeg_utils.h b/src/rocjpeg/roc_pyjpeg_utils.h
index eb364a97..895099aa 100644
--- a/src/rocjpeg/roc_pyjpeg_utils.h
+++ b/src/rocjpeg/roc_pyjpeg_utils.h
@@ -27,11 +27,15 @@ THE SOFTWARE.
 #include <iostream>
 #include <fstream>
 #include <iomanip>
+#include <iterator>
 #include <string>
 #include <vector>
 #include <thread>
 #include <mutex>
 #include <algorithm>
+#include <array>
+#include <cstdint>
+#include <cstdlib>
 #include <functional>
 #include <condition_variable>
 #include <queue>
@@ -41,21 +45,23 @@ namespace fs = std::experimental::filesystem;
 #include <chrono>
 #include "rocjpeg/rocjpeg.h"
 
-#define PY_CHECK_ROCJPEG(call) {                                          \
-    RocJpegStatus rocjpeg_status = (call);                                \
-    if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {                       \
-        std::cerr << #call << " returned " << rocJpegGetErrorName(rocjpeg_status) << " at " <<  __FILE__ << ":" << __LINE__ << std::endl;\
-        exit(1);                                                          \
-    }                                                                     \
-}
-
-#define PY_CHECK_HIP(call) {                                          \
-    hipError_t hip_status = (call);                                   \
-    if (hip_status != hipSuccess) {                                   \
-        std::cout << "rocJPEG failure: '#" << hip_status << "' at " <<  __FILE__ << ":" << __LINE__ << std::endl;\
-        exit(1);                                                      \
-    }                                                                 \
-}
+#define PY_CHECK_ROCJPEG(call)                                             \
+    do {                                                                   \
+        const RocJpegStatus rocjpeg_status = (call);                       \
+        if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {                    \
+            std::cerr << #call << " returned " << rocJpegGetErrorName(rocjpeg_status) << " at " << __FILE__ << ":" << __LINE__ << std::endl; \
+            std::exit(EXIT_FAILURE);                                       \
+        }                                                                  \
+    } while (false)
+
+#define PY_CHECK_HIP(call)                                                 \
+    do {                                                                   \
+        const hipError_t hip_status = (call);                              \
+        if (hip_status != hipSuccess) {                                    \
+            std::cout << "rocJPEG failure: '#" << hip_status << "' at " << __FILE__ << ":" << __LINE__ << std::endl; \
+            std::exit(EXIT_FAILURE);                                       \
+        }                                                                  \
+    } while (false)
 
 /**
  * @class PyRocJpegUtils
@@ -66,6 +72,8 @@ namespace fs = std::experimental::filesystem;
  */
 class PyRocJpegUtils {
 public:
+    using ChannelArray = std::array<uint32_t, ROCJPEG_MAX_COMPONENT>;
+
     /**
      * @brief Initializes the HIP device.
      *
@@ -130,9 +138,6 @@ class PyRocJpegUtils {
             case ROCJPEG_CSS_UNKNOWN:
                 chroma_sub_sampling = "UNKNOWN";
                 break;
-            default:
-                chroma_sub_sampling = "";
-                break;
         }
     }
 
@@ -151,103 +156,122 @@ class PyRocJpegUtils {
      * @param channel_sizes The array to store the channel sizes.
      * @return The channel pitch.
      */
-    int GetChannelPitchAndSizes(RocJpegDecodeParams decode_params, RocJpegChromaSubsampling subsampling, uint32_t *widths, uint32_t *heights,
-                                uint32_t &num_channels, RocJpegImage &output_image, uint32_t *channel_sizes) {
-        
-        bool is_roi_valid = false;
-        uint32_t roi_width;
-        uint32_t roi_height;
-        roi_width = decode_params.crop_rectangle.right - decode_params.crop_rectangle.left;
-        roi_height = decode_params.crop_rectangle.bottom - decode_params.crop_rectangle.top;
-        if (roi_width > 0 && roi_height > 0 && roi_width <= widths[0] && roi_height <= heights[0]) {
-            is_roi_valid = true; 
-        }
+    int GetChannelPitchAndSizes(RocJpegDecodeParams decode_params, RocJpegChromaSubsampling subsampling,
+                                const ChannelArray &widths, const ChannelArray &heights,
+                                uint32_t &num_channels, RocJpegImage &output_image,
+                                ChannelArray &channel_sizes) {
+        const int roi_width_raw = decode_params.crop_rectangle.right - decode_params.crop_rectangle.left;
+        const int roi_height_raw = decode_params.crop_rectangle.bottom - decode_params.crop_rectangle.top;
+        const uint32_t roi_width = static_cast<uint32_t>(roi_width_raw);
+        const uint32_t roi_height = static_cast<uint32_t>(roi_height_raw);
+        const bool is_roi_valid = roi_width_raw > 0 && roi_height_raw > 0 && roi_width <= widths[0] && roi_height <= heights[0];
+        const uint32_t full_width = is_roi_valid ? roi_width : widths[0];
+        const uint32_t full_height = is_roi_valid ? roi_height : heights[0];
+        ChannelArray pitches{};
+        channel_sizes.fill(0U);
+
+        const auto set_channel = [&](std::size_t index, uint32_t pitch, uint32_t height) {
+            pitches[index] = pitch;
+            channel_sizes[index] = AlignSize(pitch, height, mem_alignment);
+        };
+
         switch (decode_params.output_format) {
             case ROCJPEG_OUTPUT_NATIVE:
                 switch (subsampling) {
                     case ROCJPEG_CSS_444:
-                        num_channels = 3;
-                        output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
-                        channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
+                        num_channels = 3U;
+                        set_channel(0U, full_width, full_height);
+                        set_channel(1U, full_width, full_height);
+                        set_channel(2U, full_width, full_height);
                         break;
                     case ROCJPEG_CSS_440:
-                        num_channels = 3;
-                        output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
-                        channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
-                        channel_sizes[2] = channel_sizes[1] = align(output_image.pitch[0] * ((is_roi_valid ? roi_height : heights[0]) >> 1), mem_alignment);
+                        num_channels = 3U;
+                        set_channel(0U, full_width, full_height);
+                        set_channel(1U, full_width, full_height >> 1U);
+                        set_channel(2U, full_width, full_height >> 1U);
                         break;
                     case ROCJPEG_CSS_422:
-                        num_channels = 1;
-                        output_image.pitch[0] = (is_roi_valid ? roi_width : widths[0]) * 2;
-                        channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
+                        num_channels = 1U;
+                        set_channel(0U, full_width * 2U, full_height);
                         break;
                     case ROCJPEG_CSS_420:
-                        num_channels = 2;
-                        output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
-                        channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
-                        channel_sizes[1] = align(output_image.pitch[1] * ((is_roi_valid ? roi_height : heights[0]) >> 1), mem_alignment);
+                        num_channels = 2U;
+                        set_channel(0U, full_width, full_height);
+                        set_channel(1U, full_width, full_height >> 1U);
                         break;
                     case ROCJPEG_CSS_400:
-                        num_channels = 1;
-                        output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
-                        channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
+                        num_channels = 1U;
+                        set_channel(0U, full_width, full_height);
                         break;
-                    default:
+                    case ROCJPEG_CSS_411:
+                    case ROCJPEG_CSS_UNKNOWN:
                         std::cout << "Unknown chroma subsampling!" << std::endl;
                         return EXIT_FAILURE;
                 }
                 break;
             case ROCJPEG_OUTPUT_YUV_PLANAR:
                 if (subsampling == ROCJPEG_CSS_400) {
-                    num_channels = 1;
-                    output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
-                    channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
+                    num_channels = 1U;
+                    set_channel(0U, full_width, full_height);
                 } else {
-                    num_channels = 3;
-                    output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
-                    output_image.pitch[1] = is_roi_valid ? roi_width : widths[1];
-                    output_image.pitch[2] = is_roi_valid ? roi_width : widths[2];
-                    channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
-                    channel_sizes[1] = align(output_image.pitch[1] * (is_roi_valid ? roi_height : heights[1]), mem_alignment);
-                    channel_sizes[2] = align(output_image.pitch[2] * (is_roi_valid ? roi_height : heights[2]), mem_alignment);
+                    switch (subsampling) {
+                        case ROCJPEG_CSS_444:
+                        case ROCJPEG_CSS_440:
+                        case ROCJPEG_CSS_422:
+                        case ROCJPEG_CSS_420:
+                            num_channels = 3U;
+                            set_channel(0U, full_width, full_height);
+                            set_channel(1U, is_roi_valid ? roi_width : widths[1], is_roi_valid ? roi_height : heights[1]);
+                            set_channel(2U, is_roi_valid ? roi_width : widths[2], is_roi_valid ? roi_height : heights[2]);
+                            break;
+                        case ROCJPEG_CSS_400:
+                            break;
+                        case ROCJPEG_CSS_411:
+                        case ROCJPEG_CSS_UNKNOWN:
+                            std::cout << "Unknown chroma subsampling!" << std::endl;
+                            return EXIT_FAILURE;
+                    }
                 }
                 break;
             case ROCJPEG_OUTPUT_Y:
-                num_channels = 1;
-                output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
-                channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
+                num_channels = 1U;
+                set_channel(0U, full_width, full_height);
                 break;
             case ROCJPEG_OUTPUT_RGB:
-                num_channels = 1;
-                output_image.pitch[0] = (is_roi_valid ? roi_width : widths[0]) * 3;
-                channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
+                num_channels = 1U;
+                set_channel(0U, full_width * 3U, full_height);
                 break;
             case ROCJPEG_OUTPUT_RGB_PLANAR:
-                num_channels = 3;
-                output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
-                channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
+                num_channels = 3U;
+                set_channel(0U, full_width, full_height);
+                set_channel(1U, full_width, full_height);
+                set_channel(2U, full_width, full_height);
                 break;
-            default:
+            case ROCJPEG_OUTPUT_FORMAT_MAX:
                 std::cout << "Unknown output format!" << std::endl;
                 return EXIT_FAILURE;
         }
+        std::copy(pitches.begin(), pitches.end(), std::begin(output_image.pitch));
         return EXIT_SUCCESS;
     }
 
 private:
-    static const int mem_alignment = 4 * 1024 * 1024;
+    static constexpr uint32_t mem_alignment = 4U * 1024U * 1024U;
     /**
      * @brief Aligns a value to a specified alignment.
      *
      * This function takes a value and aligns it to the specified alignment. It returns the aligned value.
      *
-     * @param value The value to be aligned.
+     * @param pitch The pitch of the channel in bytes.
+     * @param height The channel height in rows.
      * @param alignment The alignment value.
      * @return The aligned value.
      */
-    static inline int align(int value, int alignment) {
-        return (value + alignment - 1) & ~(alignment - 1);
+    static inline uint32_t AlignSize(uint32_t pitch, uint32_t height, uint32_t alignment) {
+        const auto size = static_cast<uint64_t>(pitch) * static_cast<uint64_t>(height);
+        const auto aligned = (size + alignment - 1U) & ~(static_cast<uint64_t>(alignment) - 1U);
+        return static_cast<uint32_t>(aligned);
     }
 };
 
-#endif //ROC_PY_JPEG_UTILS
\ No newline at end of file
+#endif //ROC_PY_JPEG_UTILS
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 9befe73c..26c5b6c3 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -80,46 +80,36 @@ set(ROCPYJPEG_BUILD_PY_DIR "${CMAKE_BINARY_DIR}/rocpyjpegdecode_${PY_VERSION_SUF
 
 # check installed or build-tree pybind bindings
 set(VIDEO_DECODE_BINDINGS_INSTALLED OFF)
-if(EXISTS "${ROCPYDECODE_PY_DIR}")
+file(GLOB ROCPYDECODE_PYMODULES "${ROCPYDECODE_BUILD_LIB_DIR}/rocpydecode*.so" "${ROCPYDECODE_BUILD_LIB_DIR}/rocpydecode*.pyd")
+if(ROCPYDECODE_PYMODULES AND EXISTS "${ROCPYDECODE_BUILD_PY_DIR}")
+  set(VIDEO_DECODE_BINDINGS_INSTALLED ON)
+  set(ROCPYDECODE_LIB_DIR "${ROCPYDECODE_BUILD_LIB_DIR}")
+  set(ROCPYDECODE_PY_DIR "${ROCPYDECODE_BUILD_PY_DIR}")
+  message("-- ${White}${PROJECT_NAME}: using in-tree rocPyDecode build outputs at ${ROCPYDECODE_LIB_DIR}${ColourReset}")
+elseif(EXISTS "${ROCPYDECODE_PY_DIR}")
   set(VIDEO_DECODE_BINDINGS_INSTALLED ON)
   message("-- ${White}${PROJECT_NAME}: rocPyDecode found at ${ROCPYDECODE_PY_DIR}${ColourReset}")
-endif()
-if(VIDEO_DECODE_BINDINGS_INSTALLED)
   file(GLOB ROCPYDECODE_PYMODULES "${ROCPYDECODE_LIB_DIR}/rocpydecode*.so" "${ROCPYDECODE_LIB_DIR}/rocpydecode*.pyd")
   if(NOT ROCPYDECODE_PYMODULES)
     set(VIDEO_DECODE_BINDINGS_INSTALLED OFF)
-    message("-- ${Yellow}${PROJECT_NAME}: rocPyDecode bindings directory present but shared object not found in ${ROCPYDECODE_LIB_DIR}; will try build-tree outputs.${ColourReset}")
-  endif()
-endif()
-if(NOT VIDEO_DECODE_BINDINGS_INSTALLED)
-  file(GLOB ROCPYDECODE_PYMODULES "${ROCPYDECODE_BUILD_LIB_DIR}/rocpydecode*.so" "${ROCPYDECODE_BUILD_LIB_DIR}/rocpydecode*.pyd")
-  if(ROCPYDECODE_PYMODULES AND EXISTS "${ROCPYDECODE_BUILD_PY_DIR}")
-    set(VIDEO_DECODE_BINDINGS_INSTALLED ON)
-    set(ROCPYDECODE_LIB_DIR "${ROCPYDECODE_BUILD_LIB_DIR}")
-    set(ROCPYDECODE_PY_DIR "${ROCPYDECODE_BUILD_PY_DIR}")
-    message("-- ${White}${PROJECT_NAME}: using in-tree rocPyDecode build outputs at ${ROCPYDECODE_LIB_DIR}${ColourReset}")
+    message("-- ${Yellow}${PROJECT_NAME}: rocPyDecode bindings directory present but shared object not found in ${ROCPYDECODE_LIB_DIR}.${ColourReset}")
   endif()
 endif()
 
 set(JPEG_DECODE_PYBIND_SCRIPTS OFF)
-if(EXISTS "${ROCPYJPEG_PY_DIR}")
+file(GLOB ROCPYJPEG_PYMODULES "${ROCPYJPEG_BUILD_LIB_DIR}/rocpyjpegdecode*.so" "${ROCPYJPEG_BUILD_LIB_DIR}/rocpyjpegdecode*.pyd")
+if(ROCPYJPEG_PYMODULES AND EXISTS "${ROCPYJPEG_BUILD_PY_DIR}")
+  set(JPEG_DECODE_PYBIND_SCRIPTS ON)
+  set(ROCPYJPEG_LIB_DIR "${ROCPYJPEG_BUILD_LIB_DIR}")
+  set(ROCPYJPEG_PY_DIR "${ROCPYJPEG_BUILD_PY_DIR}")
+  message("-- ${White}${PROJECT_NAME}: using in-tree rocPyJPEG build outputs at ${ROCPYJPEG_LIB_DIR}${ColourReset}")
+elseif(EXISTS "${ROCPYJPEG_PY_DIR}")
   set(JPEG_DECODE_PYBIND_SCRIPTS ON)
   message("-- ${White}${PROJECT_NAME}: rocPyJPEG found at ${ROCPYJPEG_PY_DIR}${ColourReset}")
-endif()
-if(JPEG_DECODE_PYBIND_SCRIPTS)
   file(GLOB ROCPYJPEG_PYMODULES "${ROCPYJPEG_LIB_DIR}/rocpyjpegdecode*.so" "${ROCPYJPEG_LIB_DIR}/rocpyjpegdecode*.pyd")
   if(NOT ROCPYJPEG_PYMODULES)
     set(JPEG_DECODE_PYBIND_SCRIPTS OFF)
-    message("-- ${Yellow}${PROJECT_NAME}: rocPyJPEG bindings directory present but shared object not found in ${ROCPYJPEG_LIB_DIR}; will try build-tree outputs.${ColourReset}")
-  endif()
-endif()
-if(NOT JPEG_DECODE_PYBIND_SCRIPTS)
-  file(GLOB ROCPYJPEG_PYMODULES "${ROCPYJPEG_BUILD_LIB_DIR}/rocpyjpegdecode*.so" "${ROCPYJPEG_BUILD_LIB_DIR}/rocpyjpegdecode*.pyd")
-  if(ROCPYJPEG_PYMODULES AND EXISTS "${ROCPYJPEG_BUILD_PY_DIR}")
-    set(JPEG_DECODE_PYBIND_SCRIPTS ON)
-    set(ROCPYJPEG_LIB_DIR "${ROCPYJPEG_BUILD_LIB_DIR}")
-    set(ROCPYJPEG_PY_DIR "${ROCPYJPEG_BUILD_PY_DIR}")
-    message("-- ${White}${PROJECT_NAME}: using in-tree rocPyJPEG build outputs at ${ROCPYJPEG_LIB_DIR}${ColourReset}")
+    message("-- ${Yellow}${PROJECT_NAME}: rocPyJPEG bindings directory present but shared object not found in ${ROCPYJPEG_LIB_DIR}.${ColourReset}")
   endif()
 endif()
 
@@ -149,8 +139,8 @@ else()
 endif()
 
 # find requirements (optional to allow skipping tests)
-find_package(rocdecode 1.0.0 QUIET)
-find_package(rocjpeg 1.0.0 QUIET)
+find_package(rocdecode QUIET)
+find_package(rocjpeg QUIET)
 find_library(rocdecode_HOST_LIBRARY NAMES rocdecodehost rocdecode-host PATHS ${ROCM_PATH}/lib)
 set(RUN_ROCPYDECODE_TESTS OFF)
 set(RUN_ROCPYJPEG_TESTS OFF)
@@ -194,6 +184,12 @@ if(EXISTS "${ROCM_PATH}/lib/rocm_sysdeps/lib")
   set(USING_THE_ROCK ON)
 endif()
 
+function(set_rocpy_test_env test_name pythonpath)
+  set_property(TEST ${test_name} PROPERTY ENVIRONMENT
+    "PYTHONPATH=${pythonpath}"
+    "LD_LIBRARY_PATH=${ROCM_PATH}/lib:$ENV{LD_LIBRARY_PATH}")
+endfunction()
+
 if(RUN_ROCPYDECODE_TESTS OR RUN_ROCPYJPEG_TESTS)
   # Tests
   execute_process(COMMAND ${Python3_EXECUTABLE} -c "import torch" RESULT_VARIABLE TORCH_PYTHON_RESULT OUTPUT_QUIET ERROR_QUIET)
@@ -212,7 +208,7 @@ if(RUN_ROCPYDECODE_TESTS)
       -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4
       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
-    set_property(TEST rocpydecode_test_decoders PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
+    set_rocpy_test_env(rocpydecode_test_decoders "${ROCPYDECODE_PYTHONPATH}")
     # 2 - decodercpu test
     if(RUN_ROCPYDECODE_HOST_TESTS)
       add_test(NAME rocpydecode_test_decodercpu
@@ -220,7 +216,7 @@ if(RUN_ROCPYDECODE_TESTS)
         -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4
         WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
       )
-      set_property(TEST rocpydecode_test_decodercpu PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
+      set_rocpy_test_env(rocpydecode_test_decodercpu "${ROCPYDECODE_PYTHONPATH}")
     endif()
     # 3 - demuxer test
     add_test(NAME rocpydecode_test_demuxer
@@ -228,7 +224,7 @@ if(RUN_ROCPYDECODE_TESTS)
       -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4
       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
-    set_property(TEST rocpydecode_test_demuxer PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
+    set_rocpy_test_env(rocpydecode_test_demuxer "${ROCPYDECODE_PYTHONPATH}")
   else()
     message("-- ${Yellow}${PROJECT_NAME}: FFmpeg not available; skipping decoder/demuxer/CPU sample tests.${ColourReset}")
   endif()
@@ -237,7 +233,7 @@ if(RUN_ROCPYDECODE_TESTS)
     COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/types_test.py
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
-  set_property(TEST rocpydecode_test_types PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
+  set_rocpy_test_env(rocpydecode_test_types "${ROCPYDECODE_PYTHONPATH}")
   if(ROCPYDECODE_USE_FFMPEG)
     # 5 - video_decode_python_H265 test
     add_test(NAME video_decode_python_H265
@@ -245,20 +241,14 @@ if(RUN_ROCPYDECODE_TESTS)
       -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H265.mp4
       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
-    set_property(TEST video_decode_python_H265 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
+    set_rocpy_test_env(video_decode_python_H265 "${ROCPYDECODE_PYTHONPATH}")
     # 6 - video_decode_perf_python_H265 test
     add_test(NAME video_decode_perf_python_H265
       COMMAND ${Python3_EXECUTABLE} ${ROCPYDECODE_SAMPLE_DIR}/videodecodeperf.py
       -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H265.mp4
       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
-    if(USING_THE_ROCK)
-      set_property(TEST video_decode_perf_python_H265 PROPERTY ENVIRONMENT
-        "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$ENV{PYTHONPATH}"
-        "LD_LIBRARY_PATH=${ROCM_PATH}/lib:$ENV{LD_LIBRARY_PATH}")
-    else()
-      set_property(TEST video_decode_perf_python_H265 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
-    endif()
+    set_rocpy_test_env(video_decode_perf_python_H265 "${ROCPYDECODE_PYTHONPATH}")
     # 7 - video_decode_rgb_python_H265 test
     add_test(NAME video_decode_rgb_python_H265
       COMMAND ${Python3_EXECUTABLE} ${ROCPYDECODE_SAMPLE_DIR}/videodecodergb.py
@@ -266,41 +256,35 @@ if(RUN_ROCPYDECODE_TESTS)
       -of 3
       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
-    set_property(TEST video_decode_rgb_python_H265 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
+    set_rocpy_test_env(video_decode_rgb_python_H265 "${ROCPYDECODE_PYTHONPATH}")
     # 8 - video_decode_python_H264 test
     add_test(NAME video_decode_python_H264
       COMMAND ${Python3_EXECUTABLE} ${ROCPYDECODE_SAMPLE_DIR}/videodecode.py
       -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4 -resize 640 360 -p yes
       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
-    set_property(TEST video_decode_python_H264 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
+    set_rocpy_test_env(video_decode_python_H264 "${ROCPYDECODE_PYTHONPATH}")
     # 9 - video_decode_perf_python_H264 test
     add_test(NAME video_decode_perf_python_H264
       COMMAND ${Python3_EXECUTABLE} ${ROCPYDECODE_SAMPLE_DIR}/videodecodeperf.py
       -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4
       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
-    if(USING_THE_ROCK)
-      set_property(TEST video_decode_perf_python_H264 PROPERTY ENVIRONMENT
-        "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$ENV{PYTHONPATH}"
-        "LD_LIBRARY_PATH=${ROCM_PATH}/lib:$ENV{LD_LIBRARY_PATH}")
-    else()
-      set_property(TEST video_decode_perf_python_H264 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
-    endif()
+    set_rocpy_test_env(video_decode_perf_python_H264 "${ROCPYDECODE_PYTHONPATH}")
     # 10 - video_decode_python_AV1 test
     add_test(NAME video_decode_python_AV1
       COMMAND ${Python3_EXECUTABLE} ${ROCPYDECODE_SAMPLE_DIR}/videodecode.py
       -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-AV1.mp4 -resize 640 360
       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
-    set_property(TEST video_decode_python_AV1 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
+    set_rocpy_test_env(video_decode_python_AV1 "${ROCPYDECODE_PYTHONPATH}")
     # 11 - video_decode_python_AV9 test
     add_test(NAME video_decode_python_AV9
       COMMAND ${Python3_EXECUTABLE} ${ROCPYDECODE_SAMPLE_DIR}/videodecode.py
       -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-VP9.ivf -resize 640 360
       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
-    set_property(TEST video_decode_python_AV9 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
+    set_rocpy_test_env(video_decode_python_AV9 "${ROCPYDECODE_PYTHONPATH}")
     if(TORCH_PYTHON_RESULT EQUAL 0 AND RUN_ROCPYDECODE_HOST_TESTS)
       # 12 - video_decode_python_ffmpeg(torch) test
       add_test(NAME video_decode_python_ffmpeg_torch
@@ -308,7 +292,7 @@ if(RUN_ROCPYDECODE_TESTS)
         -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4 -m 2
         WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
       )
-      set_property(TEST video_decode_python_ffmpeg_torch PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
+      set_rocpy_test_env(video_decode_python_ffmpeg_torch "${ROCPYDECODE_PYTHONPATH}")
     endif()
     # 13 - video_decode_python_ffmpeg test
     if(RUN_ROCPYDECODE_HOST_TESTS)
@@ -317,7 +301,7 @@ if(RUN_ROCPYDECODE_TESTS)
         -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4 -m 2
         WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
       )
-      set_property(TEST video_decode_python_ffmpeg PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
+      set_rocpy_test_env(video_decode_python_ffmpeg "${ROCPYDECODE_PYTHONPATH}")
     endif()
   endif()
   # 14 - video_decode_raw_python (Annex-B raw bitstream)
@@ -326,7 +310,7 @@ if(RUN_ROCPYDECODE_TESTS)
     -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.264
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
-  set_property(TEST video_decode_raw_python PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
+  set_rocpy_test_env(video_decode_raw_python "${ROCPYDECODE_PYTHONPATH}")
   # 15 - video_decode_raw_python_h265 (Annex-B raw HEVC bitstream)
   add_test(NAME video_decode_raw_python_h265
     COMMAND ${Python3_EXECUTABLE} ${ROCPYDECODE_SAMPLE_DIR}/videodecoderaw.py
@@ -334,14 +318,14 @@ if(RUN_ROCPYDECODE_TESTS)
     --codec h265
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
-  set_property(TEST video_decode_raw_python_h265 PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
+  set_rocpy_test_env(video_decode_raw_python_h265 "${ROCPYDECODE_PYTHONPATH}")
   # 16 - all_other_decoder_apis test
   add_test(NAME all_other_decoder_apis
     COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/decoder_api_test.py
     -i ${ROCM_PATH}/share/rocdecode/video/AMD_driving_virtual_20-H264.mp4
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
-  set_property(TEST all_other_decoder_apis PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYDECODE_PYTHONPATH}:$PYTHONPATH")
+  set_rocpy_test_env(all_other_decoder_apis "${ROCPYDECODE_PYTHONPATH}")
 endif()
 
 if(RUN_ROCPYJPEG_TESTS)
@@ -351,24 +335,12 @@ if(RUN_ROCPYJPEG_TESTS)
       COMMAND ${Python3_EXECUTABLE} ${ROCPYJPEG_SAMPLE_DIR}/jpegdecode.py
       -i ${ROCM_PATH}/share/rocjpeg/images/mug_420.jpg
       WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
-    if(USING_THE_ROCK)
-      set_property(TEST jpeg_decode_python PROPERTY ENVIRONMENT
-        "PYTHONPATH=${ROCPYJPEG_PYTHONPATH}:$ENV{PYTHONPATH}"
-        "LD_LIBRARY_PATH=${ROCM_PATH}/lib:$ENV{LD_LIBRARY_PATH}")
-    else()
-      set_property(TEST jpeg_decode_python PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYJPEG_PYTHONPATH}:$PYTHONPATH")
-    endif()
+    set_rocpy_test_env(jpeg_decode_python "${ROCPYJPEG_PYTHONPATH}")
   endif(TORCH_PYTHON_RESULT EQUAL 0)
   # 18 - jpegdecodebatched_test
   add_test(NAME jpeg_decode_batched_python
     COMMAND ${Python3_EXECUTABLE} ${ROCPYJPEG_SAMPLE_DIR}/jpegdecodebatched.py
     -i ${ROCM_PATH}/share/rocjpeg/images/
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
-  if(USING_THE_ROCK)
-    set_property(TEST jpeg_decode_batched_python PROPERTY ENVIRONMENT
-      "PYTHONPATH=${ROCPYJPEG_PYTHONPATH}:$ENV{PYTHONPATH}"
-      "LD_LIBRARY_PATH=${ROCM_PATH}/lib:$ENV{LD_LIBRARY_PATH}")
-  else()
-    set_property(TEST jpeg_decode_batched_python PROPERTY ENVIRONMENT "PYTHONPATH=${ROCPYJPEG_PYTHONPATH}:$PYTHONPATH")
-  endif()
+  set_rocpy_test_env(jpeg_decode_batched_python "${ROCPYJPEG_PYTHONPATH}")
 endif()

From 5e6c078e882b015dc69c0c70abeeb954d4b7ef7c Mon Sep 17 00:00:00 2001
From: Essam Aly <essam.aly@amd.com>
Date: Mon, 6 Apr 2026 11:18:42 -0700
Subject: [PATCH 2/4] 2nd round testing on TheRock, fixing warnings cause

---
 src/common/roc_pybuffer.cpp            | 20 ++++++++++++++------
 src/rocdecode/roc_pyvideodecode.cpp    |  4 +++-
 src/rocdecode/roc_pyvideodecode.h      | 19 +++++++++++--------
 src/rocdecode/roc_pyvideodecodecpu.cpp |  4 +++-
 src/rocdecode/roc_pyvideodecodecpu.h   | 10 +++++++++-
 src/rocjpeg/roc_pyjpeg_images.cpp      | 20 ++++++++++++++++++++
 src/rocjpeg/roc_pyjpeg_images.h        |  7 ++-----
 src/rocjpeg/roc_pyjpeg_utils.h         | 19 +++++++++++++++++++
 8 files changed, 81 insertions(+), 22 deletions(-)

diff --git a/src/common/roc_pybuffer.cpp b/src/common/roc_pybuffer.cpp
index 62df3964..a81be525 100644
--- a/src/common/roc_pybuffer.cpp
+++ b/src/common/roc_pybuffer.cpp
@@ -48,6 +48,15 @@ void ResetTensorMetadata(DLTensor &tensor) {
     delete[] tensor.strides;
     tensor.strides = nullptr;
 }
+
+std::unique_ptr<int64_t[]> MakeTensorMetadataArray(const std::vector<size_t> &values, const char *context) {
+    std::vector<int64_t> converted(values.size());
+    std::transform(values.begin(), values.end(), converted.begin(),
+                   [context](size_t value) { return CheckedNumericCast<int64_t>(value, context); });
+    auto data = std::make_unique<int64_t[]>(values.size());
+    std::copy(converted.begin(), converted.end(), data.get());
+    return data;
+}
 } // namespace
 
 static void CheckValidBuffer(const void *ptr) {
@@ -204,21 +213,20 @@ int BufferInterface::LoadDLPack(const std::vector<size_t>& _shape, const std::ve
     m_dlTensor->ndim = ndim;
 
     // Convert shape
-    auto shape = std::make_unique<int64_t[]>(static_cast<size_t>(ndim));
-    for (size_t i = 0; i < _shape.size(); ++i) {
-        shape[i] = CheckedNumericCast<int64_t>(_shape[i], "shape dimension");
-    }
+    auto shape = MakeTensorMetadataArray(_shape, "shape dimension");
     m_dlTensor->shape = shape.release();
     
     // Convert strides
-    auto strides = std::make_unique<int64_t[]>(static_cast<size_t>(ndim));
+    std::vector<size_t> stride_values;
+    stride_values.reserve(_stride.size());
     for (size_t i = 0; i < _stride.size(); ++i) {
         const auto stride_bytes = CheckedNumericCast<int64_t>(_stride[i], "stride");
         if (stride_bytes % item_size_dt != 0) {
             throw std::runtime_error("Stride must be a multiple of the element size in bytes");
         }
-        strides[i] = stride_bytes / item_size_dt;
+        stride_values.push_back(static_cast<size_t>(stride_bytes / item_size_dt));
     }
+    auto strides = MakeTensorMetadataArray(stride_values, "stride element");
     m_dlTensor->strides = strides.release();
     return 0;
 }
diff --git a/src/rocdecode/roc_pyvideodecode.cpp b/src/rocdecode/roc_pyvideodecode.cpp
index 9f4a2cad..f7941072 100644
--- a/src/rocdecode/roc_pyvideodecode.cpp
+++ b/src/rocdecode/roc_pyvideodecode.cpp
@@ -146,6 +146,8 @@ PyRocVideoDecoder::~PyRocVideoDecoder() {
     }
 }
 
+void PyRocVideoDecoder::VTableAnchor() {}
+
 int PyRocVideoDecoder::PyDecodeFrame(PyPacketData& packet) {
     if(packet.bitstream_size == 0)
         packet.pkt_flags |= ROCDEC_PKT_ENDOFSTREAM;
@@ -291,7 +293,7 @@ uintptr_t PyRocVideoDecoder::PyResizeFrame(PyPacketData& packet, Dim *resized_di
                 return 0;
             }
         }
-        memcpy(resized_surf_info, surf_info, sizeof(OutputSurfaceInfo));
+        *resized_surf_info = *surf_info;
         resized_surf_info->output_width = resized_width;
         resized_surf_info->output_height = resized_height;
         resized_surf_info->output_pitch = resized_width * surf_info->bytes_per_pixel;
diff --git a/src/rocdecode/roc_pyvideodecode.h b/src/rocdecode/roc_pyvideodecode.h
index c2ee2db3..a232e844 100644
--- a/src/rocdecode/roc_pyvideodecode.h
+++ b/src/rocdecode/roc_pyvideodecode.h
@@ -44,16 +44,19 @@ int PyReconfigureFlushCallback(void *p_viddec_obj, uint32_t flush_mode, void * p
 //
 // AMD Video Decoder Python Interface class
 //
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wweak-vtables"
+#endif
 class PyRocVideoDecoder : public RocVideoDecoder {
 
     public:
         PyRocVideoDecoder(int device_id, int mem_type, rocDecVideoCodec codec, bool force_zero_latency = false,
                           const Rect *p_crop_rect = nullptr, int max_width = 0, int max_height = 0,
                           uint32_t clk_rate = 0) : RocVideoDecoder(device_id, static_cast<OutputSurfaceMemoryType>(mem_type), codec, force_zero_latency,
-                          p_crop_rect, false, 0U, max_width, max_height, clk_rate) { 
-                InitConfigStructure();
-                device_id_ = device_id; }
-        ~PyRocVideoDecoder();
+                          p_crop_rect, false, 0U, max_width, max_height, clk_rate) {
+                InitConfigStructure(); }
+        ~PyRocVideoDecoder() override;
          
         // for python binding
         int PyDecodeFrame(PyPacketData& packet);
@@ -113,6 +116,7 @@ class PyRocVideoDecoder : public RocVideoDecoder {
         py::object PyGetDecoderSessionOverHead(std::uintptr_t session_id);
 #endif
     private:
+        virtual void VTableAnchor();
         std::shared_ptr <ConfigInfo> configInfo;
         void InitConfigStructure();
 
@@ -128,8 +132,7 @@ class PyRocVideoDecoder : public RocVideoDecoder {
         uint8_t *frame_ptr_resized = nullptr;
         size_t resized_image_size_in_bytes = 0;
         OutputSurfaceInfo *resized_surf_info = nullptr;
-
-    private:
-        int device_id_ = 0;
-        [[maybe_unused]] int padding_ = 0;
 };
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
diff --git a/src/rocdecode/roc_pyvideodecodecpu.cpp b/src/rocdecode/roc_pyvideodecodecpu.cpp
index fe5a54ea..db99d025 100644
--- a/src/rocdecode/roc_pyvideodecodecpu.cpp
+++ b/src/rocdecode/roc_pyvideodecodecpu.cpp
@@ -92,6 +92,8 @@ PyRocVideoDecoderCpu::~PyRocVideoDecoderCpu() {
     }
 }
 
+void PyRocVideoDecoderCpu::VTableAnchor() {}
+
 int PyRocVideoDecoderCpu::PyDecodeFrame(PyPacketData& packet) {
     if(packet.bitstream_size == 0)
         packet.pkt_flags |= ROCDEC_PKT_ENDOFSTREAM;
@@ -237,7 +239,7 @@ uintptr_t PyRocVideoDecoderCpu::PyResizeFrame(PyPacketData& packet, Dim *resized
                 return 0;
             }
         }
-        memcpy(resized_surf_info, surf_info, sizeof(OutputSurfaceInfo));
+        *resized_surf_info = *surf_info;
         resized_surf_info->output_width = resized_dim->w;
         resized_surf_info->output_height = resized_dim->h;
         resized_surf_info->output_pitch = resized_dim->w * surf_info->bytes_per_pixel;
diff --git a/src/rocdecode/roc_pyvideodecodecpu.h b/src/rocdecode/roc_pyvideodecodecpu.h
index 51ced086..fcb0f37e 100644
--- a/src/rocdecode/roc_pyvideodecodecpu.h
+++ b/src/rocdecode/roc_pyvideodecodecpu.h
@@ -31,13 +31,17 @@ THE SOFTWARE.
 //
 // AMD Video Decoder Python Interface class
 //
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wweak-vtables"
+#endif
 class PyRocVideoDecoderCpu : public FFMpegVideoDecoder {
     public:
         PyRocVideoDecoderCpu(int device_id, int mem_type = OUT_SURFACE_MEM_HOST_COPIED, rocDecVideoCodec codec = rocDecVideoCodec_HEVC, bool force_zero_latency = false,
                           const Rect *p_crop_rect = nullptr, int max_width = 0, int max_height = 0,
                           uint32_t clk_rate = 1000) : FFMpegVideoDecoder(device_id, static_cast<OutputSurfaceMemoryType>(mem_type), codec, force_zero_latency,
                           p_crop_rect, false, 0, max_width, max_height, clk_rate) { InitConfigStructure(); }
-        ~PyRocVideoDecoderCpu();                        
+        ~PyRocVideoDecoderCpu() override;
          
         // for python binding
         int PyDecodeFrame(PyPacketData& packet);
@@ -94,6 +98,7 @@ class PyRocVideoDecoderCpu : public FFMpegVideoDecoder {
         py::object PyGetDecoderSessionOverHead(std::uintptr_t session_id);
 #endif
     private:
+        virtual void VTableAnchor();
         std::shared_ptr <ConfigInfo> configInfo;
         void InitConfigStructure();
 
@@ -106,3 +111,6 @@ class PyRocVideoDecoderCpu : public FFMpegVideoDecoder {
         size_t resized_image_size_in_bytes = 0;
         OutputSurfaceInfo *resized_surf_info = nullptr;
 };
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
diff --git a/src/rocjpeg/roc_pyjpeg_images.cpp b/src/rocjpeg/roc_pyjpeg_images.cpp
index e86c39ea..c7799181 100644
--- a/src/rocjpeg/roc_pyjpeg_images.cpp
+++ b/src/rocjpeg/roc_pyjpeg_images.cpp
@@ -34,6 +34,11 @@ using namespace std;
 
 #include <pybind11/numpy.h>
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wcovered-switch-default"
+#endif
+
 void PyJpegImages::ExportToPython(py::module& m) {
     // PyJpegImages
     py::class_<PyJpegImages, shared_ptr<PyJpegImages>>(m, "PyJpegImages", py::module_local())
@@ -137,6 +142,9 @@ bool PyJpegImages::GetOutputDims(std::vector<uint32_t>& widths, std::vector<uint
                 case ROCJPEG_CSS_UNKNOWN:
                     std::cout << "Unknown chroma subsampling!" << std::endl;
                     return false;
+                default:
+                    std::cout << "Unknown chroma subsampling!" << std::endl;
+                    return false;
             }
             break;
         case ROCJPEG_OUTPUT_YUV_PLANAR:
@@ -169,6 +177,9 @@ bool PyJpegImages::GetOutputDims(std::vector<uint32_t>& widths, std::vector<uint
                 case ROCJPEG_CSS_UNKNOWN:
                     std::cout << "Unknown chroma subsampling!" << std::endl;
                     return false;
+                default:
+                    std::cout << "Unknown chroma subsampling!" << std::endl;
+                    return false;
             }
             break;
         case ROCJPEG_OUTPUT_Y:
@@ -186,6 +197,9 @@ bool PyJpegImages::GetOutputDims(std::vector<uint32_t>& widths, std::vector<uint
         case ROCJPEG_OUTPUT_FORMAT_MAX:
             std::cout << "Unknown output format!" << std::endl;
             return false;
+        default:
+            std::cout << "Unknown output format!" << std::endl;
+            return false;
     }
     return true;
 }
@@ -227,6 +241,12 @@ bool PyJpegImages::ToDlpackTensor(RocJpegOutputFormat output_format, int device_
             ext_buf[0]->LoadDLPack(shape, stride, bit_depth, type_str, static_cast<void *>(channels[0]), device_id); // device_id was set/saved at the constructor
         }
         break;
+        default:
+            return false;
     }
     return true;
 }
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
diff --git a/src/rocjpeg/roc_pyjpeg_images.h b/src/rocjpeg/roc_pyjpeg_images.h
index 455df4fb..bab454e3 100644
--- a/src/rocjpeg/roc_pyjpeg_images.h
+++ b/src/rocjpeg/roc_pyjpeg_images.h
@@ -40,9 +40,6 @@ class PyJpegImages {
         ext_buf.push_back(std::make_shared<BufferInterface>());
         ext_buf.push_back(std::make_shared<BufferInterface>());
         ext_buf.push_back(std::make_shared<BufferInterface>());
-        // default, reset
-        memset(&decode_params, 0, sizeof(RocJpegDecodeParams));
-        memset(&output_image, 0, sizeof(RocJpegImage));
         num_channels = 0;
         subsampling = ROCJPEG_CSS_UNKNOWN;
     }
@@ -59,8 +56,8 @@ class PyJpegImages {
 
     // not exposed to outside
     uint32_t num_channels = 0;
-    RocJpegImage output_image;
-    RocJpegDecodeParams decode_params;
+    RocJpegImage output_image{};
+    RocJpegDecodeParams decode_params{};
     bool ToDlpackTensor(RocJpegOutputFormat output_format, int device_id);
 
 private:
diff --git a/src/rocjpeg/roc_pyjpeg_utils.h b/src/rocjpeg/roc_pyjpeg_utils.h
index 895099aa..a61fd327 100644
--- a/src/rocjpeg/roc_pyjpeg_utils.h
+++ b/src/rocjpeg/roc_pyjpeg_utils.h
@@ -70,6 +70,10 @@ namespace fs = std::experimental::filesystem;
  * This class provides utility functions such as getting file paths, initializing HIP device, 
  * getting chroma subsampling string, getting channel pitch and sizes, getting output file extension, and saving images.
  */
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wcovered-switch-default"
+#endif
 class PyRocJpegUtils {
 public:
     using ChannelArray = std::array<uint32_t, ROCJPEG_MAX_COMPONENT>;
@@ -138,6 +142,9 @@ class PyRocJpegUtils {
             case ROCJPEG_CSS_UNKNOWN:
                 chroma_sub_sampling = "UNKNOWN";
                 break;
+            default:
+                chroma_sub_sampling = "UNKNOWN";
+                break;
         }
     }
 
@@ -207,6 +214,9 @@ class PyRocJpegUtils {
                     case ROCJPEG_CSS_UNKNOWN:
                         std::cout << "Unknown chroma subsampling!" << std::endl;
                         return EXIT_FAILURE;
+                    default:
+                        std::cout << "Unknown chroma subsampling!" << std::endl;
+                        return EXIT_FAILURE;
                 }
                 break;
             case ROCJPEG_OUTPUT_YUV_PLANAR:
@@ -230,6 +240,9 @@ class PyRocJpegUtils {
                         case ROCJPEG_CSS_UNKNOWN:
                             std::cout << "Unknown chroma subsampling!" << std::endl;
                             return EXIT_FAILURE;
+                        default:
+                            std::cout << "Unknown chroma subsampling!" << std::endl;
+                            return EXIT_FAILURE;
                     }
                 }
                 break;
@@ -250,6 +263,9 @@ class PyRocJpegUtils {
             case ROCJPEG_OUTPUT_FORMAT_MAX:
                 std::cout << "Unknown output format!" << std::endl;
                 return EXIT_FAILURE;
+            default:
+                std::cout << "Unknown output format!" << std::endl;
+                return EXIT_FAILURE;
         }
         std::copy(pitches.begin(), pitches.end(), std::begin(output_image.pitch));
         return EXIT_SUCCESS;
@@ -273,5 +289,8 @@ class PyRocJpegUtils {
         return static_cast<uint32_t>(aligned);
     }
 };
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
 
 #endif //ROC_PY_JPEG_UTILS

From 8a9d4724b0e91e1a91305c606a614af9305c0751 Mon Sep 17 00:00:00 2001
From: Essam Aly <essam.aly@amd.com>
Date: Mon, 6 Apr 2026 13:28:21 -0700
Subject: [PATCH 3/4] Fix bug for CI build

---
 src/common/roc_pybuffer.cpp | 22 +++++++++++++++++++---
 src/common/roc_pydlpack.cpp | 19 ++++++++++++++++++-
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/src/common/roc_pybuffer.cpp b/src/common/roc_pybuffer.cpp
index a81be525..e8e36f2e 100644
--- a/src/common/roc_pybuffer.cpp
+++ b/src/common/roc_pybuffer.cpp
@@ -25,6 +25,7 @@ THE SOFTWARE.
 #include <iostream>
 #include <limits>
 #include <memory>
+#include <type_traits>
 
 #include <pybind11/numpy.h>
 #include <pybind11/stl.h>
@@ -35,9 +36,24 @@ using namespace py::literals;
 namespace {
 template <typename Target, typename Source>
 Target CheckedNumericCast(Source value, const char *context) {
-    using Limit = std::numeric_limits<Target>;
-    if (value > static_cast<Source>(Limit::max())) {
-        throw std::runtime_error(std::string(context) + " is too large");
+    if constexpr (std::is_signed_v<Source> && std::is_signed_v<Target>) {
+        if (value < static_cast<Source>(std::numeric_limits<Target>::min()) ||
+            value > static_cast<Source>(std::numeric_limits<Target>::max())) {
+            throw std::runtime_error(std::string(context) + " is out of range");
+        }
+    } else if constexpr (std::is_signed_v<Source> && !std::is_signed_v<Target>) {
+        using UnsignedSource = std::make_unsigned_t<Source>;
+        if (value < 0 ||
+            static_cast<UnsignedSource>(value) > std::numeric_limits<Target>::max()) {
+            throw std::runtime_error(std::string(context) + " is out of range");
+        }
+    } else if constexpr (!std::is_signed_v<Source> && std::is_signed_v<Target>) {
+        using UnsignedTarget = std::make_unsigned_t<Target>;
+        if (value > static_cast<UnsignedTarget>(std::numeric_limits<Target>::max())) {
+            throw std::runtime_error(std::string(context) + " is out of range");
+        }
+    } else if (value > std::numeric_limits<Target>::max()) {
+        throw std::runtime_error(std::string(context) + " is out of range");
     }
     return static_cast<Target>(value);
 }
diff --git a/src/common/roc_pydlpack.cpp b/src/common/roc_pydlpack.cpp
index a24a71ec..be7aaef2 100644
--- a/src/common/roc_pydlpack.cpp
+++ b/src/common/roc_pydlpack.cpp
@@ -30,6 +30,7 @@ namespace py = pybind11;
 #include <iostream>
 #include <limits>
 #include <memory>
+#include <type_traits>
 #include <vector>
 
 namespace {
@@ -42,7 +43,23 @@ void ReleaseTensorMetadata(DLManagedTensor *self) {
 
 template <typename Target, typename Source>
 Target CheckedNumericCast(Source value, const char *context) {
-    if (value < 0 || value > static_cast<Source>(std::numeric_limits<Target>::max())) {
+    if constexpr (std::is_signed_v<Source> && std::is_signed_v<Target>) {
+        if (value < static_cast<Source>(std::numeric_limits<Target>::min()) ||
+            value > static_cast<Source>(std::numeric_limits<Target>::max())) {
+            throw std::runtime_error(std::string(context) + " is out of range");
+        }
+    } else if constexpr (std::is_signed_v<Source> && !std::is_signed_v<Target>) {
+        using UnsignedSource = std::make_unsigned_t<Source>;
+        if (value < 0 ||
+            static_cast<UnsignedSource>(value) > std::numeric_limits<Target>::max()) {
+            throw std::runtime_error(std::string(context) + " is out of range");
+        }
+    } else if constexpr (!std::is_signed_v<Source> && std::is_signed_v<Target>) {
+        using UnsignedTarget = std::make_unsigned_t<Target>;
+        if (value > static_cast<UnsignedTarget>(std::numeric_limits<Target>::max())) {
+            throw std::runtime_error(std::string(context) + " is out of range");
+        }
+    } else if (value > std::numeric_limits<Target>::max()) {
         throw std::runtime_error(std::string(context) + " is out of range");
     }
     return static_cast<Target>(value);

From 261a61d9527eba39467a7f77395ce0c6121cf51b Mon Sep 17 00:00:00 2001
From: Essam Aly <essam.aly@amd.com>
Date: Mon, 6 Apr 2026 19:55:06 -0700
Subject: [PATCH 4/4] small test/runtime bug fixes

---
 samples/rocjpeg/jpegdecode.py |  6 ++----
 tests/CMakeLists.txt          | 12 +++++-------
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/samples/rocjpeg/jpegdecode.py b/samples/rocjpeg/jpegdecode.py
index 178ef92c..4a25aada 100644
--- a/samples/rocjpeg/jpegdecode.py
+++ b/samples/rocjpeg/jpegdecode.py
@@ -18,7 +18,6 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
 
-import torch
 import pyRocJpegDecode.decoder as jdec
 import rocpyjpegdecode.jpegTypes as jpegt
 import argparse
@@ -57,8 +56,7 @@ def jpeg_decode(
     # example how to save the decoded image as a file
     if (output_file_path is not None):
         filename = output_file_path.strip() + ".png"
-        img1 = torch.from_numpy(img_tensor.to_numpy())
-        arr = img1.cpu().numpy()
+        arr = img_tensor.to_numpy()
         img = Image.fromarray(arr.astype(np.uint8))
         img.save(filename)
         print(f"Image saved as: {filename}")
@@ -121,4 +119,4 @@ def jpeg_decode(
         print("ERROR: input passed with -i must be an existing file.")
         exit()
 
-    jpeg_decode(input_file_path, output_format, device_id, backend, output_file_path)
\ No newline at end of file
+    jpeg_decode(input_file_path, output_format, device_id, backend, output_file_path)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 26c5b6c3..af029e1c 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -330,13 +330,11 @@ endif()
 
 if(RUN_ROCPYJPEG_TESTS)
   # 17 - jpeg_decode_single_file_test
-  if(TORCH_PYTHON_RESULT EQUAL 0)
-    add_test(NAME jpeg_decode_python
-      COMMAND ${Python3_EXECUTABLE} ${ROCPYJPEG_SAMPLE_DIR}/jpegdecode.py
-      -i ${ROCM_PATH}/share/rocjpeg/images/mug_420.jpg
-      WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
-    set_rocpy_test_env(jpeg_decode_python "${ROCPYJPEG_PYTHONPATH}")
-  endif(TORCH_PYTHON_RESULT EQUAL 0)
+  add_test(NAME jpeg_decode_python
+    COMMAND ${Python3_EXECUTABLE} ${ROCPYJPEG_SAMPLE_DIR}/jpegdecode.py
+    -i ${ROCM_PATH}/share/rocjpeg/images/mug_420.jpg
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+  set_rocpy_test_env(jpeg_decode_python "${ROCPYJPEG_PYTHONPATH}")
   # 18 - jpegdecodebatched_test
   add_test(NAME jpeg_decode_batched_python
     COMMAND ${Python3_EXECUTABLE} ${ROCPYJPEG_SAMPLE_DIR}/jpegdecodebatched.py