diff --git a/.gitmodules b/.gitmodules index 89b551a769..c70439d0ab 100644 --- a/.gitmodules +++ b/.gitmodules @@ -24,3 +24,6 @@ [submodule "external/pybind11"] path = external/pybind11 url = https://github.com/jrmadsen/pybind11.git +[submodule "external/sqlite"] + path = external/sqlite + url = https://github.com/sqlite/sqlite.git diff --git a/CHANGELOG.md b/CHANGELOG.md index 263c1572c8..88de0f6a25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Full documentation for ROCm Systems Profiler is available at [https://rocm.docs. - How-to document for VCN and JPEG activity sampling and tracing. - Support for tracing Fortran applications. - Support for tracing MPI API in Fortran. +- Initial support for rocPD database output with the `ROCPROFSYS_USE_ROCPD` configuration setting. - By default, group "kernel dispatch" and "memory copy" events by HIP stream ID in Perfetto traces. - Add the "ROCPROFSYS_ROCM_GROUP_BY_QUEUE" configuration setting to group events by queue, instead. diff --git a/CMakeLists.txt b/CMakeLists.txt index 04bb87d551..605b070f0d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -221,6 +221,12 @@ rocprofiler_systems_add_option(ROCPROFSYS_BUILD_CODECOV "Build for code coverage rocprofiler_systems_add_option(ROCPROFSYS_INSTALL_PERFETTO_TOOLS "Install perfetto tools (i.e. traced, perfetto, etc.)" OFF ) +rocprofiler_systems_add_option(ROCPROFILER_BUILD_SQLITE3 + "Enable building sqlite3 library internally" OFF +) +rocprofiler_systems_add_option(ROCPROFSYS_ENABLE_BENCHMARK + "Enable performance benchmarking capabilities for the project" OFF +) if(ROCPROFSYS_USE_PAPI) rocprofiler_systems_add_option(ROCPROFSYS_BUILD_PAPI "Build PAPI from submodule" ON) @@ -328,6 +334,10 @@ if(ROCPROFSYS_BUILD_TESTING OR "$ENV{ROCPROFSYS_CI}" MATCHES "[1-9]+|ON|on|y|yes include(CTest) endif() +if(ROCPROFSYS_ENABLE_BENCHMARK) + add_compile_definitions(-DROCPROFSYS_USE_BENCHMARK=1) +endif() + # ------------------------------------------------------------------------------# # # library and executables diff --git a/cmake/Packages.cmake b/cmake/Packages.cmake index f24c2c653a..fd9b302a0b 100644 --- a/cmake/Packages.cmake +++ b/cmake/Packages.cmake @@ -53,6 +53,9 @@ rocprofiler_systems_add_interface_library(rocprofiler-systems-python rocprofiler_systems_add_interface_library(rocprofiler-systems-perfetto "Enables Perfetto support" ) +rocprofiler_systems_add_interface_library(rocprofiler-systems-sqlite3 + "Use SQLite3 for rocpd data storage" +) rocprofiler_systems_add_interface_library(rocprofiler-systems-timemory "Provides timemory libraries" ) @@ -532,6 +535,14 @@ rocprofiler_systems_checkout_git_submodule( include(Perfetto) +# ----------------------------------------------------------------------------------------# +# +# SQLite3 +# +# ----------------------------------------------------------------------------------------# + +include(SQLite3) + # ----------------------------------------------------------------------------------------# # # ELFIO diff --git a/cmake/SQLite3.cmake b/cmake/SQLite3.cmake new file mode 100644 index 0000000000..44669c3280 --- /dev/null +++ b/cmake/SQLite3.cmake @@ -0,0 +1,48 @@ +include_guard(GLOBAL) + +if(ROCPROFILER_BUILD_SQLITE3) + message(STATUS "Building SQLite3 from source!") + execute_process( + COMMAND ${CMAKE_COMMAND} -E make_directory ${PROJECT_BINARY_DIR}/external/sqlite + ) + # checkout submodule if not already checked out or clone repo if no .gitmodules file + rocprofiler_systems_checkout_git_submodule( + RELATIVE_PATH external/sqlite + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + TEST_FILE configure + REPO_URL https://github.com/sqlite/sqlite.git + REPO_BRANCH "version-3.45.3" + ) + + find_program(MAKE_COMMAND NAMES make gmake PATH_SUFFIXES bin REQUIRED) + + include(ExternalProject) + ExternalProject_Add( + rocprofiler-systems-sqlite-build + PREFIX ${PROJECT_BINARY_DIR}/external/sqlite/build + SOURCE_DIR ${PROJECT_SOURCE_DIR}/external/sqlite + BUILD_IN_SOURCE 0 + CONFIGURE_COMMAND + /configure --prefix=${PROJECT_BINARY_DIR}/external/sqlite/install + --libdir=${PROJECT_BINARY_DIR}/external/sqlite/install/lib --disable-shared + --with-tempstore=yes --enable-all --disable-tcl CFLAGS=-O3\ -g1 + BUILD_COMMAND ${MAKE_COMMAND} install -s + INSTALL_COMMAND "" + ) + + target_link_libraries( + rocprofiler-systems-sqlite3 + INTERFACE + $ + ) + target_include_directories( + rocprofiler-systems-sqlite3 + SYSTEM + INTERFACE $ + ) + add_dependencies(rocprofiler-systems-sqlite3 rocprofiler-systems-sqlite-build) +else() + message(STATUS "Using system SQLite3 library") + find_package(SQLite3 REQUIRED) + target_link_libraries(rocprofiler-systems-sqlite3 INTERFACE SQLite::SQLite3) +endif() diff --git a/source/bin/rocprof-sys-avail/avail.cpp b/source/bin/rocprof-sys-avail/avail.cpp index 93128f8651..9313a06fe9 100644 --- a/source/bin/rocprof-sys-avail/avail.cpp +++ b/source/bin/rocprof-sys-avail/avail.cpp @@ -118,7 +118,7 @@ write_hw_counter_info(std::ostream&, const array_t& = {}, namespace { // initialize HIP before main so that librocprof-sys is not HSA_TOOLS_LIB -int gpu_count = rocprofsys::gpu::device_count(); +int gpu_count = 0; // statically allocated shared_ptrs to prevent use after free errors auto timemory_manager = tim::manager::master_instance(); @@ -138,6 +138,7 @@ main(int argc, char** argv) tim::unwind::set_bfd_verbose(3); tim::set_env("ROCPROFSYS_INIT_TOOLING", "OFF", 1); rocprofsys_init_library(); + gpu_count = rocprofsys::gpu::device_count(); std::set _category_options = component_categories{}(); { diff --git a/source/lib/CMakeLists.txt b/source/lib/CMakeLists.txt index 14e81112e7..e6699c833d 100644 --- a/source/lib/CMakeLists.txt +++ b/source/lib/CMakeLists.txt @@ -40,6 +40,7 @@ target_link_libraries( $ $ $ + $ $ $ $ diff --git a/source/lib/common/CMakeLists.txt b/source/lib/common/CMakeLists.txt index d096ebf933..8ea90f2aed 100644 --- a/source/lib/common/CMakeLists.txt +++ b/source/lib/common/CMakeLists.txt @@ -26,6 +26,8 @@ target_sources( ${CMAKE_CURRENT_SOURCE_DIR}/invoke.hpp ${CMAKE_CURRENT_SOURCE_DIR}/join.hpp ${CMAKE_CURRENT_SOURCE_DIR}/setup.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/traits.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/md5sum.hpp ${CMAKE_CURRENT_SOURCE_DIR}/static_object.hpp ${CMAKE_CURRENT_SOURCE_DIR}/synchronized.hpp ) diff --git a/source/lib/common/md5sum.hpp b/source/lib/common/md5sum.hpp new file mode 100644 index 0000000000..6de09280e6 --- /dev/null +++ b/source/lib/common/md5sum.hpp @@ -0,0 +1,469 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "traits.hpp" + +namespace rocprofsys +{ +inline namespace common +{ + +class md5sum +{ +public: + using size_type = uint32_t; // must be 32bit + using raw_digest_t = std::array; + static constexpr int blocksize = 64; + + template + explicit md5sum(Tp&& arg, Args&&... args); + + md5sum() = default; + ~md5sum() = default; + md5sum(const md5sum&) = default; + md5sum(md5sum&&) = default; + + md5sum& operator=(const md5sum&) = default; + md5sum& operator=(md5sum&&) = default; + + md5sum& update(std::string_view inp); + md5sum& update(const unsigned char* buf, size_type length); + md5sum& update(const char* buf, size_type length); + md5sum& finalize(); + std::string hexdigest() const; + std::string hexliteral() const; + raw_digest_t rawdigest() const { return digest; } + + template ::value, int>> + md5sum& update(Tp inp); + + friend std::ostream& operator<<(std::ostream&, md5sum md5); + +private: + void transform(const uint8_t block[blocksize]); + + bool finalized = false; + // 64bit counter for number of bits (lo, hi) + std::array count = { 0, 0 }; + std::array buffer{}; // overflow bytes from last 64 byte chunk + // digest so far, initialized to magic initialization constants. + std::array state = { 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476 }; + std::array digest{}; // result +}; + +template +md5sum::md5sum(Tp&& arg, Args&&... args) +{ + auto _update = [&](auto&& _val) { + using value_type = + std::remove_reference_t>>; + static_assert(!std::is_pointer::value, + "constructor cannot be called with pointer argument"); + update(std::forward(_val)); + }; + + _update(std::forward(arg)); + (_update(std::forward(args)), ...); + finalize(); +} + +template +md5sum& +md5sum::update(Tp inp) +{ + static_assert(std::is_arithmetic::value, "expected arithmetic type"); + return update(reinterpret_cast(&inp), sizeof(Tp)); +} + +template