From d8adfc9ba850700bddc848f1d107ca0e81f9a233 Mon Sep 17 00:00:00 2001 From: Mythreya Date: Sat, 19 Apr 2025 01:48:50 +0000 Subject: [PATCH 1/8] Add RCCL args serialization --- .../include/rocprofiler-sdk/buffer_tracing.h | 21 ++ .../rocprofiler-sdk/cxx/enum_string.hpp | 3 +- .../include/rocprofiler-sdk/cxx/perfetto.hpp | 1 + .../cxx/serialization/save.hpp | 10 + .../source/include/rocprofiler-sdk/fwd.h | 3 + .../source/lib/output/buffered_output.hpp | 2 +- .../source/lib/output/generateCSV.cpp | 8 +- .../source/lib/output/generateCSV.hpp | 8 +- .../source/lib/output/generateJSON.cpp | 4 +- .../source/lib/output/generateJSON.hpp | 2 +- .../source/lib/output/generateOTF2.cpp | 27 ++- .../source/lib/output/generateOTF2.hpp | 2 +- .../source/lib/output/generatePerfetto.cpp | 24 ++- .../source/lib/output/generatePerfetto.hpp | 2 +- .../source/lib/output/generateRocpd.cpp | 2 +- .../source/lib/output/generateRocpd.hpp | 2 +- .../source/lib/output/generateStats.cpp | 4 +- .../source/lib/output/generateStats.hpp | 6 +- .../source/lib/rocprofiler-sdk-tool/tool.cpp | 12 +- .../lib/rocprofiler-sdk/buffer_tracing.cpp | 11 + .../lib/rocprofiler-sdk/callback_tracing.cpp | 11 +- .../lib/rocprofiler-sdk/rccl/defines.hpp | 26 ++- .../rccl/details/CMakeLists.txt | 9 + .../lib/rocprofiler-sdk/rccl/details/fmt.hpp | 127 ++++++++++++ .../source/lib/rocprofiler-sdk/rccl/rccl.cpp | 191 ++++++++++++++---- .../lib/rocprofiler-sdk/rccl/rccl.def.cpp | 20 +- .../source/lib/rocprofiler-sdk/rccl/rccl.hpp | 18 +- .../source/lib/rocprofiler-sdk/rccl/utils.hpp | 71 +++++++ 28 files changed, 524 insertions(+), 103 deletions(-) create mode 100644 projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/details/CMakeLists.txt create mode 100644 projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/details/fmt.hpp create mode 100644 projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/utils.hpp diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h index 7f976bc62bc..fc98721ee17 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include @@ -208,6 +210,25 @@ typedef struct rocprofiler_buffer_tracing_rccl_api_record_t /// @brief Specification of the API function, e.g., ::rocprofiler_rccl_api_id_t } rocprofiler_buffer_tracing_rccl_api_record_t; +typedef struct rocprofiler_buffer_tracing_rccl_api_ext_record_t +{ + uint64_t size; ///< size of this struct + rocprofiler_buffer_tracing_kind_t kind; + rocprofiler_rccl_api_id_t operation; + rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record + rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds + rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds + rocprofiler_thread_id_t thread_id; ///< id for thread generating this record + rocprofiler_rccl_api_args_t args; ///< arguments of function call + rocprofiler_rccl_api_retval_t retval; ///< return value of function call + + /// @var kind + /// @brief ::ROCPROFILER_BUFFER_TRACING_RCCL_API_EXT + /// @var operation + /// @brief Specification of the API function (@see + /// ::rocprofiler_rccl_api_id_t) +} rocprofiler_buffer_tracing_rccl_api_ext_record_t; + /** * @brief ROCProfiler Buffer rocDecode API Record. */ diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp index 424b237e474..85f3daa5c1a 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp @@ -1491,7 +1491,8 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_EVENT_DROPPED_EVENTS); ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_PAGE_MIGRATE); ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_PAGE_FAULT); ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_QUEUE); -static_assert(ROCPROFILER_BUFFER_TRACING_LAST == 33); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_RCCL_API_EXT); +static_assert(ROCPROFILER_BUFFER_TRACING_LAST == 34); // rocprofiler_code_object_operation_t ROCPROFILER_ENUM_LABEL(ROCPROFILER_CODE_OBJECT_NONE); diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/perfetto.hpp b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/perfetto.hpp index a83fb2cd372..31be0bb760e 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/perfetto.hpp +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/perfetto.hpp @@ -222,6 +222,7 @@ ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(KERNEL_DISPATCH, kernel_dispatch) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(SCRATCH_MEMORY, scratch_memory) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(CORRELATION_ID_RETIREMENT, none) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(RCCL_API, rccl_api) +ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(RCCL_API_EXT, rccl_api) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(OMPT, openmp) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(RUNTIME_INITIALIZATION, none) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(ROCDECODE_API, rocdecode_api) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/serialization/save.hpp b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/serialization/save.hpp index 8af9a760315..da5a562c024 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/serialization/save.hpp +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/serialization/save.hpp @@ -608,6 +608,16 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_rccl_api_record_t data) save_buffer_tracing_api_record(ar, data); } +template +void +save(ArchiveT& ar, rocprofiler_buffer_tracing_rccl_api_ext_record_t data) +{ + save_buffer_tracing_api_record(ar, data); + auto args = sdk::serialization::get_buffer_tracing_args(data); + ROCP_SDK_SAVE_VALUE("args", args); + ROCP_SDK_SAVE_DATA_FIELD(retval); +} + template void save(ArchiveT& ar, rocprofiler_buffer_tracing_rocdecode_api_record_t data) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h index d8e37057462..375119d76b7 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h @@ -228,6 +228,7 @@ typedef enum rocprofiler_buffer_tracing_kind_t // NOLINT(performance-enum-size) ROCPROFILER_BUFFER_TRACING_KFD_QUEUE, ///< @see rocprofiler_kfd_queue_operation_t ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API, ///< @see ///< ::rocprofiler_marker_core_range_api_id_t + ROCPROFILER_BUFFER_TRACING_RCCL_API_EXT, ROCPROFILER_BUFFER_TRACING_LAST, /// @var ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT @@ -238,6 +239,8 @@ typedef enum rocprofiler_buffer_tracing_kind_t // NOLINT(performance-enum-size) /// contains the function argument(s) and return value /// @var ROCPROFILER_BUFFER_TRACING_ROCDECODE_API_EXT /// @brief Similar to ROCPROFILER_BUFFER_TRACING_ROCDECODE_API except the buffer record + /// @var ROCPROFILER_BUFFER_TRACING_RCCL_API_EXT + /// @brief Similar to ROCPROFILER_BUFFER_TRACING_RCCL_API except the buffer record /// contains the function argument(s) and return value } rocprofiler_buffer_tracing_kind_t; diff --git a/projects/rocprofiler-sdk/source/lib/output/buffered_output.hpp b/projects/rocprofiler-sdk/source/lib/output/buffered_output.hpp index efe238eb7eb..bd4b659ce2f 100644 --- a/projects/rocprofiler-sdk/source/lib/output/buffered_output.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/buffered_output.hpp @@ -169,7 +169,7 @@ using hsa_buffered_output_t = using marker_buffered_output_t = buffered_output; using rccl_buffered_output_t = - buffered_output; + buffered_output; using counter_collection_buffered_output_t = buffered_output; using scratch_memory_buffered_output_t = diff --git a/projects/rocprofiler-sdk/source/lib/output/generateCSV.cpp b/projects/rocprofiler-sdk/source/lib/output/generateCSV.cpp index cccdfc834ab..13bdebe1501 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateCSV.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateCSV.cpp @@ -706,10 +706,10 @@ generate_csv(const output_config& } void -generate_csv(const output_config& cfg, - const metadata& tool_metadata, - const generator& data, - const stats_entry_t& stats) +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats) { if(data.empty()) return; diff --git a/projects/rocprofiler-sdk/source/lib/output/generateCSV.hpp b/projects/rocprofiler-sdk/source/lib/output/generateCSV.hpp index 0f75f487841..cde5f53b774 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateCSV.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateCSV.hpp @@ -82,10 +82,10 @@ generate_csv(const output_config& const stats_entry_t& stats); void -generate_csv(const output_config& cfg, - const metadata& tool_metadata, - const generator& data, - const stats_entry_t& stats); +generate_csv(const output_config& cfg, + const metadata& tool_metadata, + const generator& data, + const stats_entry_t& stats); void generate_csv(const output_config& cfg, diff --git a/projects/rocprofiler-sdk/source/lib/output/generateJSON.cpp b/projects/rocprofiler-sdk/source/lib/output/generateJSON.cpp index 7eb8b243ff1..f54e754ff70 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateJSON.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateJSON.cpp @@ -193,7 +193,7 @@ write_json( const generator& counter_collection_gen, const generator& marker_api_gen, const generator& scratch_memory_gen, - const generator& rccl_api_gen, + const generator& rccl_api_ext_gen, const generator& memory_allocation_gen, const generator& rocdecode_api_gen, const generator& rocjpeg_api_gen, @@ -234,7 +234,7 @@ write_json( json_ar(cereal::make_nvp("hip_api", hip_api_gen)); json_ar(cereal::make_nvp("hsa_api", hsa_api_gen)); json_ar(cereal::make_nvp("marker_api", marker_api_gen)); - json_ar(cereal::make_nvp("rccl_api", rccl_api_gen)); + json_ar(cereal::make_nvp("rccl_api", rccl_api_ext_gen)); json_ar(cereal::make_nvp("memory_copy", memory_copy_gen)); json_ar(cereal::make_nvp("memory_allocation", memory_allocation_gen)); json_ar(cereal::make_nvp("scratch_memory", scratch_memory_gen)); diff --git a/projects/rocprofiler-sdk/source/lib/output/generateJSON.hpp b/projects/rocprofiler-sdk/source/lib/output/generateJSON.hpp index 4ffc78bd584..55112ce490e 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateJSON.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateJSON.hpp @@ -93,7 +93,7 @@ write_json( const generator& counter_collection_gen, const generator& marker_api_gen, const generator& scratch_memory_gen, - const generator& rccl_api_gen, + const generator& rccl_api_ext_gen, const generator& memory_allocation_gen, const generator& rocdecode_api_gen, const generator& rocjpeg_api_gen, diff --git a/projects/rocprofiler-sdk/source/lib/output/generateOTF2.cpp b/projects/rocprofiler-sdk/source/lib/output/generateOTF2.cpp index d95894fecba..a5a1092888e 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateOTF2.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateOTF2.cpp @@ -365,7 +365,7 @@ write_otf2(const output_config& cfg, std::deque* memory_copy_data, std::deque* marker_api_data, std::deque* /*scratch_memory_data*/, - std::deque* rccl_api_data, + std::deque* rccl_api_ext_data, std::deque* memory_allocation_data, std::deque* rocdecode_api_data, std::deque* rocjpeg_api_data) @@ -417,7 +417,7 @@ write_otf2(const output_config& cfg, tids.emplace(itr.thread_id); for(auto itr : *marker_api_data) tids.emplace(itr.thread_id); - for(auto itr : *rccl_api_data) + for(auto itr : *rccl_api_ext_data) tids.emplace(itr.thread_id); for(auto itr : *rocdecode_api_data) tids.emplace(itr.thread_id); @@ -615,7 +615,6 @@ write_otf2(const output_config& cfg, add_event_data(hsa_api_data, sdk::category::hsa_api{}); add_event_data(hip_api_data, sdk::category::hip_api{}); add_event_data(marker_api_data, sdk::category::marker_api{}); - add_event_data(rccl_api_data, sdk::category::rccl_api{}); add_event_data(rocjpeg_api_data, sdk::category::rocjpeg_api{}); } @@ -641,6 +640,28 @@ write_otf2(const output_config& cfg, nullptr}); } + for(auto itr : *rccl_api_ext_data) + { + auto name = buffer_names.at(itr.kind, itr.operation); + _hash_data.emplace( + get_hash_id(name), + region_info{std::string{name}, OTF2_REGION_ROLE_FUNCTION, OTF2_PARADIGM_USER}); + + auto& _evt_info = thread_event_info.at(itr.thread_id); + _evt_info.event_count += 1; + + _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER, + name, + _evt_info.get_location(), + itr.start_timestamp, + get_attr(sdk::category::rccl_api{})}); + _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT, + name, + _evt_info.get_location(), + itr.end_timestamp, + nullptr}); + } + for(auto itr : *memory_copy_data) { auto name = buffer_names.at(itr.kind, itr.operation); diff --git a/projects/rocprofiler-sdk/source/lib/output/generateOTF2.hpp b/projects/rocprofiler-sdk/source/lib/output/generateOTF2.hpp index 1c3b94b03b9..2d3eee610c8 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateOTF2.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateOTF2.hpp @@ -45,7 +45,7 @@ write_otf2(const output_config& cfg, std::deque* memory_copy_data, std::deque* marker_api_data, std::deque* scratch_memory_data, - std::deque* rccl_api_data, + std::deque* rccl_api_ext_data, std::deque* memory_allocation_data, std::deque* rocdecode_api_data, std::deque* rocjpeg_api_data); diff --git a/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.cpp b/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.cpp index 315fa7a6420..606a8bf4dee 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.cpp @@ -75,7 +75,7 @@ write_perfetto( const generator& counter_collection_gen, const generator& marker_api_gen, const generator& scratch_memory_gen, - const generator& rccl_api_gen, + const generator& rccl_api_ext_gen, const generator& memory_allocation_gen, const generator& rocdecode_api_gen, const generator& rocjpeg_api_gen) @@ -175,8 +175,8 @@ write_perfetto( for(auto ditr : marker_api_gen) for(auto itr : marker_api_gen.get(ditr)) tids.emplace(itr.thread_id); - for(auto ditr : rccl_api_gen) - for(auto itr : rccl_api_gen.get(ditr)) + for(auto ditr : rccl_api_ext_gen) + for(auto itr : rccl_api_ext_gen.get(ditr)) tids.emplace(itr.thread_id); for(auto ditr : rocdecode_api_gen) for(auto itr : rocdecode_api_gen.get(ditr)) @@ -424,11 +424,12 @@ write_perfetto( tracing_session->FlushBlocking(); } - for(auto ditr : rccl_api_gen) - for(auto itr : rccl_api_gen.get(ditr)) + for(auto ditr : rccl_api_ext_gen) + for(auto itr : rccl_api_ext_gen.get(ditr)) { - auto name = buffer_names.at(itr.kind, itr.operation); - auto& track = thread_tracks.at(itr.thread_id); + auto name = buffer_names.at(itr.kind, itr.operation); + auto& track = thread_tracks.at(itr.thread_id); + auto rccl_args = sdk::serialization::get_buffer_tracing_args(itr); TRACE_EVENT_BEGIN(sdk::perfetto_category::name, ::perfetto::StaticString(name.data()), @@ -450,7 +451,14 @@ write_perfetto( "corr_id", itr.correlation_id.internal, "ancestor_id", - itr.correlation_id.ancestor); + itr.correlation_id.ancestor, + [&](::perfetto::EventContext ctx) { + for(const auto& rccl_arg : rccl_args) + { + sdk::add_perfetto_annotation( + ctx, rccl_arg.name, rccl_arg.value); + } + }); TRACE_EVENT_END(sdk::perfetto_category::name, track, itr.end_timestamp); diff --git a/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.hpp b/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.hpp index 15fa43a7aeb..7d8c74e96a7 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/generatePerfetto.hpp @@ -47,7 +47,7 @@ write_perfetto( const generator& counter_collection_gen, const generator& marker_api_gen, const generator& scratch_memory_gen, - const generator& rccl_api_gen, + const generator& rccl_api_ext_gen, const generator& memory_allocation_gen, const generator& rocdecode_api_gen, const generator& rocjpeg_api_gen); diff --git a/projects/rocprofiler-sdk/source/lib/output/generateRocpd.cpp b/projects/rocprofiler-sdk/source/lib/output/generateRocpd.cpp index a08bdbf6e32..17cfd500815 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateRocpd.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateRocpd.cpp @@ -559,7 +559,7 @@ write_rocpd( const generator& marker_api_gen, const generator& memory_alloc_gen, const generator& scratch_memory_gen, - const generator& rccl_api_gen, + const generator& rccl_api_gen, const generator& rocdecode_api_gen, const generator& counter_collection_gen) { diff --git a/projects/rocprofiler-sdk/source/lib/output/generateRocpd.hpp b/projects/rocprofiler-sdk/source/lib/output/generateRocpd.hpp index 1ed8fac9d15..bc6ac5dae4e 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateRocpd.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateRocpd.hpp @@ -47,7 +47,7 @@ write_rocpd( const generator& marker_api_gen, const generator& memory_alloc_gen, const generator& scratch_memory_gen, - const generator& rccl_api_gen, + const generator& rccl_api_gen, const generator& rocdecode_api_gen, const generator& counter_collection_gen); diff --git a/projects/rocprofiler-sdk/source/lib/output/generateStats.cpp b/projects/rocprofiler-sdk/source/lib/output/generateStats.cpp index 6bd72e271d3..0e48ad62320 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateStats.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateStats.cpp @@ -212,8 +212,8 @@ generate_stats(const output_config& /*cfg*/, stats_entry_t generate_stats(const output_config& /*cfg*/, - const metadata& tool_metadata, - const generator& data) + const metadata& tool_metadata, + const generator& data) { auto rccl_stats = stats_map_t{}; for(auto ditr : data) diff --git a/projects/rocprofiler-sdk/source/lib/output/generateStats.hpp b/projects/rocprofiler-sdk/source/lib/output/generateStats.hpp index a75095c6f40..26bc0300207 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateStats.hpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateStats.hpp @@ -67,9 +67,9 @@ generate_stats(const output_config& const generator& data); stats_entry_t -generate_stats(const output_config& cfg, - const metadata& tool_metadata, - const generator& data); +generate_stats(const output_config& cfg, + const metadata& tool_metadata, + const generator& data); stats_entry_t generate_stats(const output_config& cfg, diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp index 14a0ec8ae84..b3c31d8e80b 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -191,7 +191,7 @@ struct buffer_ids rocprofiler_buffer_id_t memory_allocation_trace = {}; rocprofiler_buffer_id_t counter_collection = {}; rocprofiler_buffer_id_t scratch_memory = {}; - rocprofiler_buffer_id_t rccl_api_trace = {}; + rocprofiler_buffer_id_t rccl_api_ext_trace = {}; rocprofiler_buffer_id_t pc_sampling_host_trap = {}; rocprofiler_buffer_id_t rocdecode_api_trace = {}; rocprofiler_buffer_id_t rocjpeg_api_trace = {}; @@ -206,7 +206,7 @@ struct buffer_ids memory_allocation_trace, counter_collection, scratch_memory, - rccl_api_trace, + rccl_api_ext_trace, pc_sampling_host_trap, rocdecode_api_trace, rocjpeg_api_trace, @@ -1061,10 +1061,10 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/, tool::tool_buffer_tracing_hip_api_ext_record_t{*record, stream_id}, domain_type::HIP); } - else if(header->kind == ROCPROFILER_BUFFER_TRACING_RCCL_API) + else if(header->kind == ROCPROFILER_BUFFER_TRACING_RCCL_API_EXT) { auto* record = - static_cast(header->payload); + static_cast(header->payload); tool::write_ring_buffer(*record, domain_type::RCCL); } @@ -1882,8 +1882,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT, get_buffers().hip_api_trace}, buffer_service_config{tool::get_config().rccl_api_trace, - ROCPROFILER_BUFFER_TRACING_RCCL_API, - get_buffers().rccl_api_trace}, + ROCPROFILER_BUFFER_TRACING_RCCL_API_EXT, + get_buffers().rccl_api_ext_trace}, buffer_service_config{tool::get_config().memory_allocation_trace, ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION, get_buffers().memory_allocation_trace}, diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/buffer_tracing.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/buffer_tracing.cpp index 1e72623ab2d..d26363fcd06 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/buffer_tracing.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/buffer_tracing.cpp @@ -94,6 +94,7 @@ ROCPROFILER_BUFFER_TRACING_KIND_STRING(KERNEL_DISPATCH) ROCPROFILER_BUFFER_TRACING_KIND_STRING(SCRATCH_MEMORY) ROCPROFILER_BUFFER_TRACING_KIND_STRING(CORRELATION_ID_RETIREMENT) ROCPROFILER_BUFFER_TRACING_KIND_STRING(RCCL_API) +ROCPROFILER_BUFFER_TRACING_KIND_STRING(RCCL_API_EXT) ROCPROFILER_BUFFER_TRACING_KIND_STRING(OMPT) ROCPROFILER_BUFFER_TRACING_KIND_STRING(RUNTIME_INITIALIZATION) ROCPROFILER_BUFFER_TRACING_KIND_STRING(ROCDECODE_API) @@ -282,6 +283,7 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_ break; } case ROCPROFILER_BUFFER_TRACING_RCCL_API: + case ROCPROFILER_BUFFER_TRACING_RCCL_API_EXT: { val = rocprofiler::rccl::name_by_id(operation); break; @@ -445,6 +447,7 @@ rocprofiler_iterate_buffer_tracing_kind_operations( break; } case ROCPROFILER_BUFFER_TRACING_RCCL_API: + case ROCPROFILER_BUFFER_TRACING_RCCL_API_EXT: { ops = rocprofiler::rccl::get_ids(); break; @@ -574,6 +577,14 @@ rocprofiler_iterate_buffer_tracing_record_args( _payload->operation, _payload->args, callback, user_data); return ROCPROFILER_STATUS_SUCCESS; } + case ROCPROFILER_BUFFER_TRACING_RCCL_API_EXT: + { + auto* _payload = + static_cast(record.payload); + rocprofiler::rccl::iterate_args( + _payload->operation, _payload->args, callback, user_data); + return ROCPROFILER_STATUS_SUCCESS; + } } return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/callback_tracing.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/callback_tracing.cpp index 99d8a5a9cba..4a1f72437fc 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/callback_tracing.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/callback_tracing.cpp @@ -597,12 +597,21 @@ rocprofiler_iterate_callback_tracing_kind_operation_args( user_data); return ROCPROFILER_STATUS_SUCCESS; } + case ROCPROFILER_CALLBACK_TRACING_RCCL_API: + { + rocprofiler::rccl::iterate_args( + record.operation, + static_cast(record.payload)->args, + callback, + max_deref, + user_data); + return ROCPROFILER_STATUS_SUCCESS; + } case ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY: case ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT: case ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH: case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY: case ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION: - case ROCPROFILER_CALLBACK_TRACING_RCCL_API: case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION: case ROCPROFILER_CALLBACK_TRACING_ROCJPEG_API: case ROCPROFILER_CALLBACK_TRACING_HIP_STREAM: diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/defines.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/defines.hpp index 34d8c91ca3e..1f0fd0571a7 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/defines.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/defines.hpp @@ -46,6 +46,8 @@ using domain_type::retval_type; \ using domain_type::callback_data_type; \ \ + static constexpr auto get_args_type() { return common::mpl::type_list<>{}; } \ + \ static constexpr auto offset() \ { \ return offsetof(rccl_table_lookup::type, RCCL_FUNC_PTR); \ @@ -68,8 +70,7 @@ { \ if constexpr(std::is_pointer::value) \ { \ - assert(_table != nullptr && "nullptr to MARKER table for " #RCCL_FUNC \ - " function"); \ + assert(_table != nullptr && "nullptr to RCCL table for " #RCCL_FUNC " function"); \ return _table->RCCL_FUNC_PTR; \ } \ else \ @@ -92,9 +93,13 @@ return &base_type::functor; \ } \ \ - static std::vector as_arg_addr(callback_data_type) { return std::vector{}; } \ + static std::vector as_arg_addr(rocprofiler_rccl_api_args_t) \ + { \ + return std::vector{}; \ + } \ \ - static std::vector as_arg_list(callback_data_type, int32_t) \ + static std::vector as_arg_list(rocprofiler_rccl_api_args_t, \ + int32_t) \ { \ return {}; \ } \ @@ -147,8 +152,7 @@ { \ if constexpr(std::is_pointer::value) \ { \ - assert(_table != nullptr && "nullptr to MARKER table for " #RCCL_FUNC \ - " function"); \ + assert(_table != nullptr && "nullptr to RCCL table for " #RCCL_FUNC " function"); \ return _table->RCCL_FUNC_PTR; \ } \ else \ @@ -171,10 +175,16 @@ return &base_type::functor; \ } \ \ - static std::vector as_arg_addr(callback_data_type trace_data) \ + static std::vector as_arg_addr(rocprofiler_rccl_api_args_t args) \ { \ return std::vector{ \ - GET_ADDR_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)}; \ + GET_ADDR_MEMBER_FIELDS(get_api_data_args(args), __VA_ARGS__)}; \ + } \ + \ + static auto as_arg_list(rocprofiler_rccl_api_args_t args, int32_t max_deref) \ + { \ + return utils::stringize( \ + max_deref, GET_NAMED_MEMBER_FIELDS(get_api_data_args(args), __VA_ARGS__)); \ } \ }; \ } \ diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/details/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/details/CMakeLists.txt new file mode 100644 index 00000000000..97be8f1841c --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/details/CMakeLists.txt @@ -0,0 +1,9 @@ +# +# +# +set(ROCPROFILER_LIB_RCCL_DETAILS_SOURCES) +set(ROCPROFILER_LIB_RCCL_DETAILS_HEADERS fmt.hpp) + +target_sources( + rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_RCCL_DETAILS_SOURCES} + ${ROCPROFILER_LIB_RCCL_DETAILS_HEADERS}) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/details/fmt.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/details/fmt.hpp new file mode 100644 index 00000000000..9c4e98b5658 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/details/fmt.hpp @@ -0,0 +1,127 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include +#include + +#include "lib/common/stringize_arg.hpp" + +#include +#include + +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace rccl +{ +namespace utils +{ +template +struct handle_formatter +{ + template + constexpr auto parse(ParseContext& ctx) + { + return ctx.begin(); + } + + template + auto format(const Tp& v, Ctx& ctx) const + { + return fmt::format_to(ctx.out(), "handle={}", v.handle); + } +}; + +template +struct handle_formatter : handle_formatter +{}; +} // namespace utils +} // namespace rccl +} // namespace rocprofiler + +namespace fmt +{ +template <> +struct formatter +{ + template + constexpr auto parse(ParseContext& ctx) + { + return ctx.begin(); + } + + template + auto format(const ncclUniqueId v, Ctx& ctx) const + { + static_assert(sizeof(v) == 128 * sizeof(char), "NCCL ID type changed. Expected char[128]"); + + return fmt::format_to(ctx.out(), "0x{:0x}", fmt::join(v.internal, "")); + } +}; + +template <> +struct formatter +{ + template + constexpr auto parse(ParseContext& ctx) + { + return ctx.begin(); + } + + template + auto format(const ncclConfig_t& v, Ctx& ctx) const + { + return fmt::format_to(ctx.out(), + "blocking={}, cgaClusterSize={}, minCTAs={}, maxCTAs={}, " + "netName={}, splitShare={}", + v.blocking, + v.cgaClusterSize, + v.minCTAs, + v.maxCTAs, + v.netName, + v.splitShare); + } +}; + +template <> +struct formatter +{ + template + constexpr auto parse(ParseContext& ctx) + { + return ctx.begin(); + } + + template + auto format(const ncclComm_t& v, Ctx& ctx) const + { + return fmt::format_to(ctx.out(), "0x{:0x}", v); + } +}; + +} // namespace fmt diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.cpp index 322ccfe63e3..e50840fd72f 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.cpp @@ -22,13 +22,15 @@ #include "lib/rocprofiler-sdk/rccl/rccl.hpp" #include "lib/common/defines.hpp" +#include "lib/common/mpl.hpp" #include "lib/common/static_object.hpp" +#include "lib/common/string_entry.hpp" #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" -#include "lib/rocprofiler-sdk/hip/hip.hpp" -#include "lib/rocprofiler-sdk/hip/utils.hpp" +#include "lib/rocprofiler-sdk/rccl/utils.hpp" #include "lib/rocprofiler-sdk/registration.hpp" +#include "lib/rocprofiler-sdk/tracing/fwd.hpp" #include "lib/rocprofiler-sdk/tracing/tracing.hpp" #include @@ -36,11 +38,6 @@ #include #include -#include -#include -// must be included after runtime api -#include - #include #include #include @@ -90,6 +87,30 @@ set_data_retval(DataT& _data, Tp _val) } } +template +decltype(auto) +convert_arg_type(Tp&& val) +{ + using data_type = common::mpl::unqualified_type_t; + // TODO(mkuriche): add size_t* backing, like for string + if constexpr(common::mpl::is_string_type::value) + { + if constexpr(std::is_same::value) + { + return common::get_string_entry(val)->c_str(); + } + else + { + static_assert(common::mpl::assert_false::value, + "argument type is a string type. preceding if constexpr is incorrect"); + } + } + else + { + return std::forward(val); + } +} + template Tp* get_table_impl() @@ -136,7 +157,7 @@ rccl_api_impl::exec(FuncT&& _func, Args&&... args) } using info_type = rccl_api_info; - ROCP_ERROR << "nullptr to next hip function for " << info_type::name << " (" + ROCP_ERROR << "nullptr to next rccl function for " << info_type::name << " (" << info_type::operation_idx << ")"; return get_default_retval(); @@ -150,6 +171,7 @@ rccl_api_impl::functor(Args... args) using info_type = rccl_api_info; using callback_api_data_t = typename rccl_domain_info::callback_data_type; using buffered_api_data_t = typename rccl_domain_info::buffer_data_type; + using buffered_ext_data_t = typename rccl_domain_info::buffered_ext_data_type; constexpr auto external_corr_id_domain_idx = rccl_domain_info::external_correlation_id_domain_idx; @@ -167,6 +189,7 @@ rccl_api_impl::functor(Args... args) auto thr_id = common::get_tid(); auto callback_contexts = tracing::callback_context_data_vec_t{}; auto buffered_contexts = tracing::buffered_context_data_vec_t{}; + auto extended_contexts = tracing::buffered_context_data_vec_t{}; auto external_corr_ids = tracing::external_correlation_id_map_t{}; tracing::populate_contexts(info_type::callback_domain_idx, @@ -176,7 +199,12 @@ rccl_api_impl::functor(Args... args) buffered_contexts, external_corr_ids); - if(callback_contexts.empty() && buffered_contexts.empty()) + tracing::populate_contexts(info_type::buffered_ext_domain_idx, + info_type::operation_idx, + extended_contexts, + external_corr_ids); + + if(callback_contexts.empty() && buffered_contexts.empty() && extended_contexts.empty()) { [[maybe_unused]] auto _ret = exec(info_type::get_table_func(), std::forward(args)...); if constexpr(!std::is_void::value) @@ -186,10 +214,11 @@ rccl_api_impl::functor(Args... args) } auto buffer_record = common::init_public_api_struct(buffered_api_data_t{}); + auto extended_record = common::init_public_api_struct(buffered_ext_data_t{}); auto tracer_data = common::init_public_api_struct(callback_api_data_t{}); auto* corr_id = tracing::correlation_service::construct(ref_count); auto internal_corr_id = corr_id->internal; - auto ancestor_corr_id = corr_id->internal; + auto ancestor_corr_id = corr_id->ancestor; tracing::populate_external_correlation_ids(external_corr_ids, thr_id, @@ -197,11 +226,16 @@ rccl_api_impl::functor(Args... args) info_type::operation_idx, internal_corr_id); + // set the arguments + if(!callback_contexts.empty() || !extended_contexts.empty()) + { + set_data_args(info_type::get_api_data_args(tracer_data.args), + convert_arg_type(std::forward(args))...); + } + // invoke the callbacks if(!callback_contexts.empty()) { - set_data_args(info_type::get_api_data_args(tracer_data.args), std::forward(args)...); - tracing::execute_phase_enter_callbacks(callback_contexts, thr_id, internal_corr_id, @@ -217,7 +251,7 @@ rccl_api_impl::functor(Args... args) external_corr_ids, thr_id, external_corr_id_domain_idx); // record the start timestamp as close to the function call as possible - if(!buffered_contexts.empty()) + if(!buffered_contexts.empty() || !extended_contexts.empty()) { buffer_record.start_timestamp = common::timestamp_ns(); } @@ -228,15 +262,18 @@ rccl_api_impl::functor(Args... args) auto _ret = exec(info_type::get_table_func(), std::forward(args)...); // record the end timestamp as close to the function call as possible - if(!buffered_contexts.empty()) + if(!buffered_contexts.empty() || !extended_contexts.empty()) { buffer_record.end_timestamp = common::timestamp_ns(); } - if(!callback_contexts.empty()) + if(!callback_contexts.empty() || !extended_contexts.empty()) { set_data_retval(tracer_data.retval, _ret); + } + if(!callback_contexts.empty()) + { tracing::execute_phase_exit_callbacks(callback_contexts, external_corr_ids, info_type::callback_domain_idx, @@ -256,6 +293,23 @@ rccl_api_impl::functor(Args... args) buffer_record); } + if(!extended_contexts.empty()) + { + extended_record.start_timestamp = buffer_record.start_timestamp; + extended_record.end_timestamp = buffer_record.end_timestamp; + extended_record.args = tracer_data.args; + extended_record.retval = tracer_data.retval; + + tracing::execute_buffer_record_emplace(extended_contexts, + thr_id, + internal_corr_id, + external_corr_ids, + ancestor_corr_id, + info_type::buffered_ext_domain_idx, + info_type::operation_idx, + extended_record); + } + // decrement the reference count after usage in the callback/buffers corr_id->sub_ref_count(); @@ -324,13 +378,13 @@ get_names(std::vector& _name_list, std::index_sequence(_name_list, std::index_sequence{}); } -template +template void -iterate_args(const uint32_t id, - const DataT& data, - rocprofiler_callback_tracing_operation_args_cb_t func, - int32_t max_deref, - void* user_data, +iterate_args(const uint32_t id, + const DataT& data, + FuncT func, + int32_t max_deref, + void* user_data, std::index_sequence) { if(OpIdx == id) @@ -340,16 +394,42 @@ iterate_args(const uint32_t id, auto&& arg_addr = info_type::as_arg_addr(data); for(size_t i = 0; i < std::min(arg_list.size(), arg_addr.size()); ++i) { - auto ret = func(info_type::callback_domain_idx, // kind - id, // operation - i, // arg_number - arg_addr.at(i), // arg_value_addr - arg_list.at(i).indirection_level, // indirection - arg_list.at(i).type, // arg_type - arg_list.at(i).name, // arg_name - arg_list.at(i).value.c_str(), // arg_value_str - arg_list.at(i).dereference_count, // num deref in str - user_data); + using return_type = typename common::mpl::function_traits::result_type; + + auto ret = return_type{}; + if constexpr(std::is_same::value) + { + ret = func(info_type::callback_domain_idx, // kind + id, // operation + i, // arg_number + arg_addr.at(i), // arg_value_addr + arg_list.at(i).indirection_level, // indirection + arg_list.at(i).type, // arg_type + arg_list.at(i).name, // arg_name + arg_list.at(i).value.c_str(), // arg_value_str + arg_list.at(i).dereference_count, // num deref in str + user_data); + } + else if constexpr(std::is_same::value) + { + ret = func(info_type::buffered_ext_domain_idx, // kind + id, // operation + i, // arg_number + arg_addr.at(i), // arg_value_addr + arg_list.at(i).indirection_level, // indirection + arg_list.at(i).type, // arg_type + arg_list.at(i).name, // arg_name + arg_list.at(i).value.c_str(), // arg_value_str + user_data); + } + else + { + static_assert(common::mpl::assert_false::value, + "Error! unsupported callback type"); + } + if(ret != 0) break; } return; @@ -362,6 +442,7 @@ iterate_args(const uint32_t id, bool should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain, rocprofiler_buffer_tracing_kind_t _buffered_domain, + rocprofiler_buffer_tracing_kind_t _buffered_ext_domain, int _operation) { // we loop over all the *registered* contexts and see if any of them, at any point in time, @@ -379,6 +460,11 @@ should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain, if(itr->buffered_tracer && itr->buffered_tracer->domains(_buffered_domain) && itr->buffered_tracer->domains(_buffered_domain, _operation)) return true; + + // if there is a buffered tracer enabled for the given domain and op, we need to wrap + if(itr->buffered_tracer && itr->buffered_tracer->domains(_buffered_ext_domain) && + itr->buffered_tracer->domains(_buffered_ext_domain, _operation)) + return true; } return false; } @@ -436,9 +522,11 @@ update_table(Tp* _orig, std::integral_constant) // make sure we don't access a field that doesn't exist in input table if(_info.offset() >= _orig->size) return; - // check to see if there are any contexts which enable this operation in the HIP API domain - if(!should_wrap_functor( - _info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx)) + // check to see if there are any contexts which enable this operation in the RCCL API domain + if(!should_wrap_functor(_info.callback_domain_idx, + _info.buffered_domain_idx, + _info.buffered_ext_domain_idx, + _info.operation_idx)) return; ROCP_TRACE << "updating table entry for " << _info.name; @@ -510,11 +598,11 @@ get_names() template void -iterate_args(uint32_t id, - const rocprofiler_callback_tracing_hip_api_data_t& data, - rocprofiler_callback_tracing_operation_args_cb_t callback, - int32_t max_deref, - void* user_data) +iterate_args(uint32_t id, + const rocprofiler_rccl_api_args_t& data, + rocprofiler_callback_tracing_operation_args_cb_t callback, + int32_t max_deref, + void* user_data) { if(callback) iterate_args(id, @@ -525,6 +613,22 @@ iterate_args(uint32_t id, std::make_index_sequence::last>{}); } +template +void +iterate_args(uint32_t id, + const rocprofiler_rccl_api_args_t& data, + rocprofiler_buffer_tracing_operation_args_cb_t callback, + void* user_data) +{ + if(callback) + iterate_args(id, + data, + callback, + 0, + user_data, + std::make_index_sequence::last>{}); +} + template void copy_table(TableT* _orig, uint64_t _tbl_instance) @@ -544,8 +648,9 @@ update_table(TableT* _orig) update_table(_orig, std::make_index_sequence::last>{}); } -using rccl_api_data_t = rocprofiler_callback_tracing_rccl_api_data_t; +using rccl_api_data_t = rocprofiler_rccl_api_args_t; using rccl_op_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t; +using rccl_op_args_bf_t = rocprofiler_buffer_tracing_operation_args_cb_t; #define INSTANTIATE_RCCL_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \ template void copy_table(TABLE_TYPE * _tbl, uint64_t _instv); \ @@ -553,7 +658,11 @@ using rccl_op_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t; template const char* name_by_id(uint32_t); \ template uint32_t id_by_name(const char*); \ template std::vector get_ids(); \ - template std::vector get_names(); + template std::vector get_names(); \ + template void iterate_args( \ + uint32_t, const rccl_api_data_t&, rccl_op_args_cb_t, int32_t, void*); \ + template void iterate_args( \ + uint32_t, const rccl_api_data_t&, rccl_op_args_bf_t, void*); INSTANTIATE_RCCL_TABLE_FUNC(rccl_api_func_table_t, ROCPROFILER_RCCL_TABLE_ID) } // namespace rccl diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.def.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.def.cpp index 324ad8c0f95..a2fb2fbf072 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.def.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.def.cpp @@ -35,21 +35,23 @@ namespace rccl template <> struct rccl_domain_info { - using args_type = rocprofiler_rccl_api_args_t; - using retval_type = rocprofiler_rccl_api_retval_t; - using callback_data_type = rocprofiler_callback_tracing_rccl_api_data_t; - using buffer_data_type = rocprofiler_buffer_tracing_rccl_api_record_t; + using args_type = rocprofiler_rccl_api_args_t; + using retval_type = rocprofiler_rccl_api_retval_t; + using callback_data_type = rocprofiler_callback_tracing_rccl_api_data_t; + using buffer_data_type = rocprofiler_buffer_tracing_rccl_api_record_t; + using buffered_ext_data_type = rocprofiler_buffer_tracing_rccl_api_ext_record_t; }; template <> struct rccl_domain_info : rccl_domain_info { - using enum_type = rocprofiler_marker_core_api_id_t; - static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_RCCL_API; - static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_RCCL_API; - static constexpr auto none = ROCPROFILER_RCCL_API_ID_NONE; - static constexpr auto last = ROCPROFILER_RCCL_API_ID_LAST; + using enum_type = rocprofiler_marker_core_api_id_t; + static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_RCCL_API; + static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_RCCL_API; + static constexpr auto buffered_ext_domain_idx = ROCPROFILER_BUFFER_TRACING_RCCL_API_EXT; + static constexpr auto none = ROCPROFILER_RCCL_API_ID_NONE; + static constexpr auto last = ROCPROFILER_RCCL_API_ID_LAST; static constexpr auto external_correlation_id_domain_idx = ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_RCCL_API; }; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.hpp index 1b9881cf5c1..721149f0178 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.hpp @@ -42,6 +42,7 @@ # include #endif +#include #include #include @@ -106,11 +107,18 @@ get_ids(); template void -iterate_args(uint32_t id, - const rocprofiler_callback_tracing_rccl_api_data_t& data, - rocprofiler_callback_tracing_operation_args_cb_t callback, - int32_t max_deref, - void* user_data); +iterate_args(uint32_t id, + const rocprofiler_rccl_api_args_t& data, + rocprofiler_callback_tracing_operation_args_cb_t callback, + int32_t max_deref, + void* user_data); + +template +void +iterate_args(uint32_t id, + const rocprofiler_rccl_api_args_t& data, + rocprofiler_buffer_tracing_operation_args_cb_t callback, + void* user_data); template void diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/utils.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/utils.hpp new file mode 100644 index 00000000000..62f4d111781 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/utils.hpp @@ -0,0 +1,71 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +#include "lib/common/mpl.hpp" +#include "lib/common/stringize_arg.hpp" +#include "lib/rocprofiler-sdk/rccl/details/fmt.hpp" + +#include +#include + +#include +#include + +namespace rocprofiler +{ +namespace rccl +{ +namespace utils +{ +template +auto +stringize_impl(const Tp& _v) +{ + using value_type = std::decay_t; + + if constexpr(fmt::is_formattable::value && !std::is_pointer::value) + { + return fmt::format("{}", _v); + } + else + { + auto _ss = std::stringstream{}; + _ss << _v; + return _ss.str(); + } +} + +template +auto +stringize(int32_t max_deref, Args... args) +{ + using array_type = common::stringified_argument_array_t; + return array_type{common::stringize_arg( + max_deref, args, [](const auto& _v) { return stringize_impl(_v); })...}; +} +} // namespace utils +} // namespace rccl +} // namespace rocprofiler From 5f6e2ad8acf106d5fd7e71ce9a221e826046468e Mon Sep 17 00:00:00 2001 From: Mythreya Date: Wed, 25 Jun 2025 02:35:25 +0000 Subject: [PATCH 2/8] update fail regex --- projects/rocprofiler-sdk/cmake/rocprofiler_options.cmake | 2 +- projects/rocprofiler-sdk/samples/common/CMakeLists.txt | 2 +- projects/rocprofiler-sdk/tests/common/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/rocprofiler-sdk/cmake/rocprofiler_options.cmake b/projects/rocprofiler-sdk/cmake/rocprofiler_options.cmake index 08d10326351..cb841dac8c3 100644 --- a/projects/rocprofiler-sdk/cmake/rocprofiler_options.cmake +++ b/projects/rocprofiler-sdk/cmake/rocprofiler_options.cmake @@ -156,7 +156,7 @@ include(rocprofiler_memcheck) # default FAIL_REGULAR_EXPRESSION for tests set(ROCPROFILER_DEFAULT_FAIL_REGEX - "threw an exception|Permission denied|Could not create logging file|failed with error code|Subprocess aborted" + "threw an exception|terminate called after throwing|Permission denied|Could not create logging file|failed with error code|Subprocess aborted" CACHE INTERNAL "Default FAIL_REGULAR_EXPRESSION for tests" FORCE) # this should be defaulted to OFF by ROCm 7.0.1 or 7.1 this should only used to disable diff --git a/projects/rocprofiler-sdk/samples/common/CMakeLists.txt b/projects/rocprofiler-sdk/samples/common/CMakeLists.txt index a8f8a1977b7..eef89c9307f 100644 --- a/projects/rocprofiler-sdk/samples/common/CMakeLists.txt +++ b/projects/rocprofiler-sdk/samples/common/CMakeLists.txt @@ -9,7 +9,7 @@ find_package(libdw REQUIRED) # default FAIL_REGULAR_EXPRESSION for tests set(ROCPROFILER_DEFAULT_FAIL_REGEX - "threw an exception|Permission denied|Could not create logging file|failed with error code|Subprocess aborted" + "threw an exception|terminate called after throwing|Permission denied|Could not create logging file|failed with error code|Subprocess aborted" CACHE INTERNAL "Default FAIL_REGULAR_EXPRESSION for tests") # build flags diff --git a/projects/rocprofiler-sdk/tests/common/CMakeLists.txt b/projects/rocprofiler-sdk/tests/common/CMakeLists.txt index 7e3c26b1d96..eea244c770a 100644 --- a/projects/rocprofiler-sdk/tests/common/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/common/CMakeLists.txt @@ -9,7 +9,7 @@ set(FETCHCONTENT_BASE_DIR ${PROJECT_BINARY_DIR}/external) # default FAIL_REGULAR_EXPRESSION for tests set(ROCPROFILER_DEFAULT_FAIL_REGEX - "threw an exception|Permission denied|Could not create logging file|failed with error code|Subprocess aborted" + "threw an exception|terminate called after throwing|Permission denied|Could not create logging file|failed with error code|Subprocess aborted" CACHE INTERNAL "Default FAIL_REGULAR_EXPRESSION for tests") set(DEFAULT_GPU_TARGETS From a629b5e38ce3ed6981ccbd2c6667bd4e7506bc60 Mon Sep 17 00:00:00 2001 From: Mythreya Date: Wed, 25 Jun 2025 02:39:42 +0000 Subject: [PATCH 3/8] json-tool: add `get_filename` --- .../rocprofiler-sdk/tests/tools/json-tool.cpp | 42 +++++++++++++++++-- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/projects/rocprofiler-sdk/tests/tools/json-tool.cpp b/projects/rocprofiler-sdk/tests/tools/json-tool.cpp index ed5cbf94525..9ab6d1cecda 100644 --- a/projects/rocprofiler-sdk/tests/tools/json-tool.cpp +++ b/projects/rocprofiler-sdk/tests/tools/json-tool.cpp @@ -2213,11 +2213,46 @@ tool_fini(void* tool_data) delete _call_stack; } +std::string +get_filename(std::string fname, std::string extension) +{ + auto ofname = fname + "." + extension; + if(auto* eofname = getenv("ROCPROFILER_TOOL_OUTPUT_FILE")) ofname = eofname; + + if(getenv("ROCPROFILER_TOOL_OUTPUT_FILE_APPEND_PID")) + { + const auto fpath = common::fs::path(ofname); + std::string filename_buf(512, '\0'); + + if(fpath.has_extension()) + { + std::snprintf(filename_buf.data(), + filename_buf.size() - 1, + "%s-%d.%s", + fpath.stem().c_str(), + getpid(), + fpath.extension().c_str()); + } + else + { + std::snprintf(filename_buf.data(), + filename_buf.size() - 1, + "%s-%d.json", + fpath.stem().c_str(), + getpid()); + } + return filename_buf; + } + else + { + return ofname; + } +} + void write_json(call_stack_t* _call_stack) { - auto ofname = std::string{"rocprofiler-tool-results.json"}; - if(auto* eofname = getenv("ROCPROFILER_TOOL_OUTPUT_FILE")) ofname = eofname; + auto ofname = get_filename("rocprofiler-tool-results", "json"); std::ostream* ofs = nullptr; auto cleanup = std::function{}; @@ -2914,8 +2949,7 @@ write_perfetto() if(!trace_data.empty()) { - auto ofname = std::string{"rocprofiler-tool-results.pftrace"}; - if(auto* eofname = getenv("ROCPROFILER_TOOL_OUTPUT_FILE")) ofname = eofname; + auto ofname = get_filename("rocprofiler-tool-results", "pftrace"); auto jpos = ofname.find(".json"); if(jpos != std::string::npos) ofname = ofname.substr(0, jpos) + std::string{".pftrace"}; From 36e56f34aab558f6c1b2ffa93e5dcd57844b1e07 Mon Sep 17 00:00:00 2001 From: Mythreya Date: Wed, 25 Jun 2025 07:12:41 +0000 Subject: [PATCH 4/8] json-tool: add rccl ext records --- .../rocprofiler-sdk/tests/tools/json-tool.cpp | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/projects/rocprofiler-sdk/tests/tools/json-tool.cpp b/projects/rocprofiler-sdk/tests/tools/json-tool.cpp index 9ab6d1cecda..65339af2d27 100644 --- a/projects/rocprofiler-sdk/tests/tools/json-tool.cpp +++ b/projects/rocprofiler-sdk/tests/tools/json-tool.cpp @@ -906,7 +906,7 @@ auto memory_allocation_bf_records = auto scratch_memory_records = std::deque{}; auto corr_id_retire_records = std::deque{}; -auto rccl_api_bf_records = std::deque{}; +auto rccl_api_ext_bf_records = std::deque{}; auto rocdecode_api_bf_records = std::deque{}; auto rocdecode_api_ext_bf_records = std::deque{}; @@ -1023,12 +1023,12 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/, corr_id_retire_records.emplace_back(*record); } - else if(header->kind == ROCPROFILER_BUFFER_TRACING_RCCL_API) + else if(header->kind == ROCPROFILER_BUFFER_TRACING_RCCL_API_EXT) { auto* record = - static_cast(header->payload); + static_cast(header->payload); - rccl_api_bf_records.emplace_back(*record); + rccl_api_ext_bf_records.emplace_back(*record); } else if(header->kind == ROCPROFILER_BUFFER_TRACING_OMPT) { @@ -1214,7 +1214,7 @@ rocprofiler_context_id_t memory_copy_callback_ctx = {0}; rocprofiler_context_id_t memory_copy_buffered_ctx = {0}; rocprofiler_context_id_t memory_allocation_callback_ctx = {0}; rocprofiler_context_id_t memory_allocation_buffered_ctx = {0}; -rocprofiler_context_id_t rccl_api_buffered_ctx = {0}; +rocprofiler_context_id_t rccl_api_ext_buffered_ctx = {0}; rocprofiler_context_id_t ompt_buffered_ctx = {0}; rocprofiler_context_id_t counter_collection_ctx = {0}; rocprofiler_context_id_t scratch_memory_ctx = {0}; @@ -1248,7 +1248,7 @@ rocprofiler_buffer_id_t memory_allocation_buffer = {}; rocprofiler_buffer_id_t counter_collection_buffer = {}; rocprofiler_buffer_id_t scratch_memory_buffer = {}; rocprofiler_buffer_id_t corr_id_retire_buffer = {}; -rocprofiler_buffer_id_t rccl_api_buffered_buffer = {}; +rocprofiler_buffer_id_t rccl_api_ext_buffered_buffer = {}; rocprofiler_buffer_id_t rocdecode_api_buffer = {}; rocprofiler_buffer_id_t rocdecode_api_ext_buffer = {}; rocprofiler_buffer_id_t rocjpeg_api_buffer = {}; @@ -1283,7 +1283,7 @@ auto contexts = std::unordered_map{ {"COUNTER_COLLECTION", &counter_collection_ctx}, {"SCRATCH_MEMORY", &scratch_memory_ctx}, {"CORRELATION_ID_RETIREMENT", &corr_id_retire_ctx}, - {"RCCL_API_BUFFERED", &rccl_api_buffered_ctx}, + {"RCCL_API_EXT_BUFFERED", &rccl_api_ext_buffered_ctx}, {"ROCDECODE_API_CALLBACK", &rocdecode_api_callback_ctx}, {"ROCDECODE_API_BUFFERED", &rocdecode_api_buffered_ctx}, {"ROCDECODE_API_EXT_BUFFERED", &rocdecode_api_ext_buffered_ctx}, @@ -1310,7 +1310,7 @@ auto buffers = std::array{&runtime_init_buffered_b &scratch_memory_buffer, &counter_collection_buffer, &corr_id_retire_buffer, - &rccl_api_buffered_buffer, + &rccl_api_ext_buffered_buffer, &ompt_buffered_buffer, &rocdecode_api_buffer, &rocdecode_api_ext_buffer, @@ -1625,13 +1625,13 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) &counter_collection_buffer), "buffer creation"); - ROCPROFILER_CALL(rocprofiler_create_buffer(rccl_api_buffered_ctx, + ROCPROFILER_CALL(rocprofiler_create_buffer(rccl_api_ext_buffered_ctx, buffer_size, watermark, ROCPROFILER_BUFFER_POLICY_LOSSLESS, tool_tracing_buffered, tool_data, - &rccl_api_buffered_buffer), + &rccl_api_ext_buffered_buffer), "buffer creation"); ROCPROFILER_CALL(rocprofiler_create_buffer(rocdecode_api_buffered_ctx, @@ -1984,11 +1984,11 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) "buffer tracing service for memory copy configure"); ROCPROFILER_CALL( - rocprofiler_configure_buffer_tracing_service(rccl_api_buffered_ctx, - ROCPROFILER_BUFFER_TRACING_RCCL_API, + rocprofiler_configure_buffer_tracing_service(rccl_api_ext_buffered_ctx, + ROCPROFILER_BUFFER_TRACING_RCCL_API_EXT, nullptr, 0, - rccl_api_buffered_buffer), + rccl_api_ext_buffered_buffer), "buffer tracing service for rccl api configure"); ROCPROFILER_CALL( @@ -2188,7 +2188,7 @@ tool_fini(void* tool_data) << ", hip_api_bf_records=" << hip_api_bf_records.size() << ", marker_api_bf_records=" << marker_api_bf_records.size() << ", corr_id_retire_records=" << corr_id_retire_records.size() - << ", rccl_api_bf_records=" << rccl_api_bf_records.size() + << ", rccl_api_ext_bf_records=" << rccl_api_ext_bf_records.size() << ", ompt_bf_records=" << ompt_bf_records.size() << ", counter_collection_value_records=" << counter_collection_bf_records.size() << ", rocdecode_api_callback_records=" << rocdecode_api_cb_records.size() @@ -2358,7 +2358,7 @@ write_json(call_stack_t* _call_stack) json_ar(cereal::make_nvp("hsa_api_traces", hsa_api_bf_records)); json_ar(cereal::make_nvp("hip_api_traces", hip_api_bf_records)); json_ar(cereal::make_nvp("marker_api_traces", marker_api_bf_records)); - json_ar(cereal::make_nvp("rccl_api_traces", rccl_api_bf_records)); + json_ar(cereal::make_nvp("rccl_api_traces", rccl_api_ext_bf_records)); json_ar(cereal::make_nvp("ompt_traces", ompt_bf_records)); json_ar(cereal::make_nvp("retired_correlation_ids", corr_id_retire_records)); json_ar(cereal::make_nvp("counter_collection", counter_collection_bf_records)); @@ -2437,7 +2437,7 @@ write_perfetto() tids.emplace(itr.thread_id); for(const auto& itr : marker_api_bf_records) tids.emplace(itr.thread_id); - for(const auto& itr : rccl_api_bf_records) + for(const auto& itr : rccl_api_ext_bf_records) tids.emplace(itr.thread_id); for(const auto& itr : ompt_bf_records) tids.emplace(itr.thread_id); @@ -2671,7 +2671,7 @@ write_perfetto() itr.end_timestamp); } - for(const auto& itr : rccl_api_bf_records) + for(const auto& itr : rccl_api_ext_bf_records) { auto name = buffer_names.at(itr.kind, itr.operation); auto& track = thread_tracks.at(itr.thread_id); From e6d37e7b5a2933d561c387edc9cd58b104a05ce1 Mon Sep 17 00:00:00 2001 From: Mythreya Date: Wed, 25 Jun 2025 07:10:32 +0000 Subject: [PATCH 5/8] add `rocprofiler_add_test` --- projects/rocprofiler-sdk/.cmake-format.yaml | 19 +++++ .../tests/common/CMakeLists.txt | 76 +++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/projects/rocprofiler-sdk/.cmake-format.yaml b/projects/rocprofiler-sdk/.cmake-format.yaml index 4e40836786a..7217077aa9e 100644 --- a/projects/rocprofiler-sdk/.cmake-format.yaml +++ b/projects/rocprofiler-sdk/.cmake-format.yaml @@ -262,6 +262,25 @@ parse: DIRECTORY: '*' TARGET: '*' SOURCE: '*' + rocprofiler_add_test: + flags: + - WILL_FAIL + kwargs: + NAME: 1 + TARGET: 1 + WORKING_DIRECTORY: 1 + DEPENDS: 1 + TIMEOUT: 1 + LABELS: 1 + DISABLED: 1 + ARGS: '*' + COMMAND: '*' + ATTACHED_FILES: '*' + ATTACHED_FILES_ON_FAIL: '*' + ENVIRONMENT: '*' + PASS_REGULAR_EXPRESSION: '*' + FAIL_REGULAR_EXPRESSION: '*' + SKIP_REGULAR_EXPRESSION: '*' override_spec: {} vartags: [] proptags: [] diff --git a/projects/rocprofiler-sdk/tests/common/CMakeLists.txt b/projects/rocprofiler-sdk/tests/common/CMakeLists.txt index eea244c770a..c5cc7e362b8 100644 --- a/projects/rocprofiler-sdk/tests/common/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/common/CMakeLists.txt @@ -180,3 +180,79 @@ function(rocprofiler_configure_pytest_files) ${RCPF_OUTPUT_DIRECTORY}/${FILENAME} @ONLY) endforeach() endfunction() + +function(rocprofiler_add_test) + cmake_parse_arguments( + arg + "WILL_FAIL" # + "NAME;TARGET;WORKING_DIRECTORY;DEPENDS;TIMEOUT;LABELS;DISABLED" # + "ARGS;COMMAND;ATTACHED_FILES;ATTACHED_FILES_ON_FAIL;ENVIRONMENT;PASS_REGULAR_EXPRESSION;FAIL_REGULAR_EXPRESSION;SKIP_REGULAR_EXPRESSION" # + ${ARGN}) + + # Helper to set a property only if the arg is set + function(_rocprofiler_set_test_property_if_arg TEST_NAME PROPERTY_NAME) + if(DEFINED "arg_${PROPERTY_NAME}") + set_tests_properties(${TEST_NAME} PROPERTIES ${PROPERTY_NAME} + "${arg_${PROPERTY_NAME}}") + endif() + endfunction() + + if(DEFINED arg_COMMAND AND DEFINED arg_TARGET) + message(SEND_ERROR "Cannot specify both TARGET and COMMAND arguments at once") + return() + elseif(NOT DEFINED arg_COMMAND AND NOT DEFINED arg_TARGET) + message(SEND_ERROR "One of COMMAND or TARGET must be specified") + return() + endif() + + if(DEFINED arg_COMMAND AND NOT DEFINED arg_DEPENDS) + message(SEND_ERROR "COMMAND signature without DEPENDS specified") + + elseif( + DEFINED arg_COMMAND + AND DEFINED arg_DEPENDS + AND TARGET ${arg_DEPENDS}) + + list(JOIN arg_COMMAND " " joined_CMD) + + add_test(NAME "${arg_NAME}" COMMAND ${arg_COMMAND}) + _rocprofiler_set_test_property_if_arg(${arg_NAME} TIMEOUT) + _rocprofiler_set_test_property_if_arg(${arg_NAME} LABELS) + _rocprofiler_set_test_property_if_arg(${arg_NAME} WORKING_DIRECTORY) + _rocprofiler_set_test_property_if_arg(${arg_NAME} ENVIRONMENT) + _rocprofiler_set_test_property_if_arg(${arg_NAME} PASS_REGULAR_EXPRESSION) + _rocprofiler_set_test_property_if_arg(${arg_NAME} FAIL_REGULAR_EXPRESSION) + _rocprofiler_set_test_property_if_arg(${arg_NAME} SKIP_REGULAR_EXPRESSION) + _rocprofiler_set_test_property_if_arg(${arg_NAME} ATTACHED_FILES) + _rocprofiler_set_test_property_if_arg(${arg_NAME} ATTACHED_FILES_ON_FAIL) + _rocprofiler_set_test_property_if_arg(${arg_NAME} DEPENDS) + _rocprofiler_set_test_property_if_arg(${arg_NAME} DISABLED) + + elseif(DEFINED arg_TARGET AND TARGET ${arg_TARGET}) + list(JOIN arg_ARGS " " joined_ARGS) + + add_test(NAME "${arg_NAME}" COMMAND $ ${arg_ARGS}) + _rocprofiler_set_test_property_if_arg(${arg_NAME} TIMEOUT) + _rocprofiler_set_test_property_if_arg(${arg_NAME} LABELS) + _rocprofiler_set_test_property_if_arg(${arg_NAME} WORKING_DIRECTORY) + _rocprofiler_set_test_property_if_arg(${arg_NAME} ENVIRONMENT) + _rocprofiler_set_test_property_if_arg(${arg_NAME} PASS_REGULAR_EXPRESSION) + _rocprofiler_set_test_property_if_arg(${arg_NAME} FAIL_REGULAR_EXPRESSION) + _rocprofiler_set_test_property_if_arg(${arg_NAME} SKIP_REGULAR_EXPRESSION) + _rocprofiler_set_test_property_if_arg(${arg_NAME} ATTACHED_FILES) + _rocprofiler_set_test_property_if_arg(${arg_NAME} ATTACHED_FILES_ON_FAIL) + _rocprofiler_set_test_property_if_arg(${arg_NAME} DEPENDS) + _rocprofiler_set_test_property_if_arg(${arg_NAME} DISABLED) + else() + add_test( + NAME "${arg_NAME}" + COMMAND + ${CMAKE_COMMAND} -E echo + "'${arg_TARGET}' with args '${arg_ARGS}' could not be executed because the target does not exist" + ) + + # If this target is accidentally run, report it as a failed test + set_tests_properties("${arg_NAME}" PROPERTIES FAIL_REGULAR_EXPRESSION ".*" + DISABLED ${arg_DISABLED}) + endif() +endfunction() From d3d8e1e90e6bb1ba55fdc553b2960cc9279290ee Mon Sep 17 00:00:00 2001 From: Mythreya Date: Wed, 25 Jun 2025 08:08:01 +0000 Subject: [PATCH 6/8] Add tests --- projects/rocprofiler-sdk/tests/CMakeLists.txt | 1 + .../rocprofiler-sdk/tests/bin/CMakeLists.txt | 1 + .../tests/bin/rccl/CMakeLists.txt | 8 + .../bin/rccl/single-process/CMakeLists.txt | 37 ++ .../tests/bin/rccl/single-process/main.cpp | 173 ++++++++ .../rocprofiler-sdk/tests/rccl/CMakeLists.txt | 72 ++++ .../rocprofiler-sdk/tests/rccl/conftest.py | 32 ++ .../rocprofiler-sdk/tests/rccl/pytest.ini | 5 + .../tests/rccl/validate-single-process.py | 403 ++++++++++++++++++ .../tests/rocprofv3/CMakeLists.txt | 1 + .../tests/rocprofv3/rccl-trace/CMakeLists.txt | 70 +++ .../tests/rocprofv3/rccl-trace/conftest.py | 54 +++ .../tests/rocprofv3/rccl-trace/pytest.ini | 4 + .../rccl-trace/validate-single-process.py | 181 ++++++++ 14 files changed, 1042 insertions(+) create mode 100644 projects/rocprofiler-sdk/tests/bin/rccl/CMakeLists.txt create mode 100644 projects/rocprofiler-sdk/tests/bin/rccl/single-process/CMakeLists.txt create mode 100644 projects/rocprofiler-sdk/tests/bin/rccl/single-process/main.cpp create mode 100644 projects/rocprofiler-sdk/tests/rccl/CMakeLists.txt create mode 100644 projects/rocprofiler-sdk/tests/rccl/conftest.py create mode 100644 projects/rocprofiler-sdk/tests/rccl/pytest.ini create mode 100644 projects/rocprofiler-sdk/tests/rccl/validate-single-process.py create mode 100644 projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/CMakeLists.txt create mode 100644 projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/conftest.py create mode 100644 projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/pytest.ini create mode 100644 projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/validate-single-process.py diff --git a/projects/rocprofiler-sdk/tests/CMakeLists.txt b/projects/rocprofiler-sdk/tests/CMakeLists.txt index dcdc3f410e8..f93e4c63b86 100644 --- a/projects/rocprofiler-sdk/tests/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/CMakeLists.txt @@ -82,6 +82,7 @@ add_subdirectory(counter-collection) add_subdirectory(openmp-tools) add_subdirectory(rocdecode) add_subdirectory(rocjpeg) +add_subdirectory(rccl) # rocpd validation tests add_subdirectory(rocpd) diff --git a/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt index 9f50f8c0144..e575640f1fc 100644 --- a/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt @@ -40,3 +40,4 @@ endif() add_subdirectory(hsa-code-object) add_subdirectory(hip-streams) add_subdirectory(hip-streams-per-thread) +add_subdirectory(rccl) diff --git a/projects/rocprofiler-sdk/tests/bin/rccl/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/rccl/CMakeLists.txt new file mode 100644 index 00000000000..7cce6623522 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/rccl/CMakeLists.txt @@ -0,0 +1,8 @@ +find_package(rccl) +find_package(Threads REQUIRED) + +if(rccl_FOUND) + add_subdirectory(single-process) +else() + message(STATUS "RCCL package was not found, not building RCCL tests") +endif() diff --git a/projects/rocprofiler-sdk/tests/bin/rccl/single-process/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/rccl/single-process/CMakeLists.txt new file mode 100644 index 00000000000..7e777c07536 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/rccl/single-process/CMakeLists.txt @@ -0,0 +1,37 @@ +# +# +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +if(NOT CMAKE_HIP_COMPILER) + find_program( + amdclangpp_EXECUTABLE + NAMES amdclang++ + HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATH_SUFFIXES bin llvm/bin NO_CACHE) + mark_as_advanced(amdclangpp_EXECUTABLE) + + if(amdclangpp_EXECUTABLE) + set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}") + endif() +endif() + +project(rocprofiler-tests-bin-rccl LANGUAGES CXX HIP) + +foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO) + if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "") + set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}") + endif() +endforeach() + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_HIP_STANDARD 17) +set(CMAKE_HIP_EXTENSIONS OFF) +set(CMAKE_HIP_STANDARD_REQUIRED ON) + +set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) +add_executable(rccl-single-process "main.cpp") +target_link_libraries(rccl-single-process PRIVATE rccl Threads::Threads) diff --git a/projects/rocprofiler-sdk/tests/bin/rccl/single-process/main.cpp b/projects/rocprofiler-sdk/tests/bin/rccl/single-process/main.cpp new file mode 100644 index 00000000000..b54f7150f10 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/rccl/single-process/main.cpp @@ -0,0 +1,173 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +template +void +TEST_EXPECT(T&& arg, std::string_view message) +{ + if(!arg) + { + std::cerr << "Error: " << message << " (" + << "\n"; + } +} + +#define HIPCHECK(cmd) \ + do \ + { \ + hipError_t err = cmd; \ + if(err != hipSuccess) \ + { \ + printf("Failed: HIP error %s:%d '%s'\n", __FILE__, __LINE__, hipGetErrorString(err)); \ + exit(EXIT_FAILURE); \ + } \ + } while(0) + +#define NCCLCHECK(cmd) \ + do \ + { \ + ncclResult_t res = cmd; \ + if(res != ncclSuccess) \ + { \ + printf( \ + "Failed, NCCL error %s:%d '%s'\n", __FILE__, __LINE__, ncclGetErrorString(res)); \ + exit(EXIT_FAILURE); \ + } \ + } while(0) + +int +main(int argc, const char* argv[]) +{ + if(argc != 2) + { + fprintf(stderr, "Usage: %s allocation size in MiB\n", argv[0]); + exit(EXIT_FAILURE); + } + + const size_t alloc_size = std::atoll(argv[1]) * 1024UL * 1024UL; + size_t nelems = alloc_size / sizeof(float); + printf("%s: Allocating %lu Bytes (%lu elements)\n", argv[0], alloc_size, nelems); + + int device_count{}; + HIPCHECK(hipGetDeviceCount(&device_count)); + TEST_EXPECT(device_count != 0, "Device count is zero"); + + for(int i = 0; i < device_count; ++i) + { + hipDeviceProp_t props{}; + HIPCHECK(hipGetDeviceProperties(&props, i)); + + printf("GFX arch: '%s'\n", props.gcnArchName); + + if(std::string_view{props.gcnArchName} == std::string_view{"gfx906"}) + { + printf("SKIP - %s\n", props.gcnArchName); + return 0; + } + } + + std::vector devs(device_count); + std::iota(devs.begin(), devs.end(), 0); // 0, 1, 2, 3 ... + for(uint32_t i = 0; i < devs.size(); ++i) + { + printf("dev[%d]: %d\n", i, devs[i]); + } + + std::vector comms(device_count); + std::vector streams(device_count); + + // allocating and initializing device buffers + std::vector sendbuff(device_count); + std::vector recvbuff(device_count); + + for(int i = 0; i < device_count; ++i) + { + HIPCHECK(hipSetDevice(i)); + HIPCHECK(hipMalloc(&sendbuff[i], alloc_size)); + HIPCHECK(hipMalloc(&recvbuff[i], alloc_size)); + HIPCHECK(hipMemset(sendbuff[i], 1, alloc_size)); + HIPCHECK(hipMemset(recvbuff[i], 0, alloc_size)); + HIPCHECK(hipStreamCreate(&streams[i])); + } + + // initializing NCCL + NCCLCHECK(ncclCommInitAll(comms.data(), device_count, devs.data())); + + // calling NCCL communication API. Group API is required when using + // multiple devices per thread + { + NCCLCHECK(ncclGroupStart()); + for(int i = 0; i < device_count; ++i) + { + NCCLCHECK(ncclAllReduce((const void*) sendbuff[i], + (void*) recvbuff[i], + nelems, + ncclFloat, + ncclSum, + comms[i], + streams[i])); + } + NCCLCHECK(ncclGroupEnd()); + } + + // synchronizing on CUDA streams to wait for completion of NCCL operation + for(int i = 0; i < device_count; ++i) + { + HIPCHECK(hipSetDevice(i)); + HIPCHECK(hipStreamSynchronize(streams[i])); + } + + // free device buffers + for(int i = 0; i < device_count; ++i) + { + HIPCHECK(hipSetDevice(i)); + HIPCHECK(hipFree(sendbuff[i])); + HIPCHECK(hipFree(recvbuff[i])); + } + + // finalizing NCCL + for(int i = 0; i < device_count; ++i) + { + ncclCommDestroy(comms[i]); + } + + for(int i = 0; i < device_count; ++i) + { + HIPCHECK(hipSetDevice(i)); + HIPCHECK(hipStreamSynchronize(streams[i])); + } + + printf("Success \n"); + return 0; +} diff --git a/projects/rocprofiler-sdk/tests/rccl/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rccl/CMakeLists.txt new file mode 100644 index 00000000000..a3891e4b14e --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rccl/CMakeLists.txt @@ -0,0 +1,72 @@ +# +# +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project( + rocprofiler-tests-rccl-tracing + LANGUAGES CXX + VERSION 0.0.0) + +find_package(rocprofiler-sdk REQUIRED) +find_package(rccl) + +# copy to binary directory +rocprofiler_configure_pytest_files(COPY validate-single-process.py conftest.py + CONFIG pytest.ini) + +if(ROCPROFILER_MEMCHECK_PRELOAD_ENV) + set(PRELOAD_ENV + "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}:$") +else() + set(PRELOAD_ENV "LD_PRELOAD=$") +endif() + +set(UNSUPPORTED_GFX OFF) +# disable when system has a vega gpu +if("gfx906" IN_LIST rocprofiler-sdk-tests-gfx-info) + set(UNSUPPORTED_GFX ON) +endif() + +find_program(BASH_EXE "bash" REQUIRED) + +set(rccl-tracing-env + "${PRELOAD_ENV}" + "ROCPROFILER_TOOL_OUTPUT_FILE=rccl-tracing-test.json" + "ROCPROFILER_TOOL_CONTEXTS_EXCLUDE=HSA_API_CALLBACK:HSA_API_BUFFERED" + "ROCPROFILER_DISABLE_PERFETTO_ANNOTATIONS=1" + "LD_LIBRARY_PATH=$:$ENV{LD_LIBRARY_PATH}" + ) + +rocprofiler_add_test( + NAME test-rccl-tracing-single-process-execute + TARGET rccl-single-process + ARGS 64 + TIMEOUT 240 + LABELS "integration-tests" + ENVIRONMENT "${rccl-tracing-env}" + FAIL_REGULAR_EXPRESSION ${ROCPROFILER_DEFAULT_FAIL_REGEX} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DISABLED $,$>) + +add_test( + NAME test-rccl-tracing-single-process-validate + COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate-single-process.py + --input ${CMAKE_CURRENT_BINARY_DIR}/rccl-tracing-test.json) + +set_tests_properties( + test-rccl-tracing-single-process-validate + PROPERTIES TIMEOUT + 240 + LABELS + "integration-tests" + DEPENDS + test-rccl-tracing-single-process-execute + FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}" + WORKING_DIRECTORY + ${CMAKE_CURRENT_BINARY_DIR} + ATTACHED_FILES_ON_FAIL + ${CMAKE_CURRENT_BINARY_DIR}/rccl-tracing-test.json + DISABLED + $,$>) diff --git a/projects/rocprofiler-sdk/tests/rccl/conftest.py b/projects/rocprofiler-sdk/tests/rccl/conftest.py new file mode 100644 index 00000000000..ef3562b2f5d --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rccl/conftest.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +import json +import glob +import pytest + +from rocprofiler_sdk.pytest_utils.dotdict import dotdict +from rocprofiler_sdk.pytest_utils import collapse_dict_list + + +def pytest_addoption(parser): + parser.addoption( + "--input", + action="store", + default="rccl-tracing-test.json", + help="Input JSON", + ) + parser.addoption( + "--input-dir", + action="store", + help="Input JSON directory", + ) + + +@pytest.fixture +def input_data(request): + filename = request.config.getoption("--input") + data = None + with open(filename, "r") as inp: + data = json.load(inp) + + return data diff --git a/projects/rocprofiler-sdk/tests/rccl/pytest.ini b/projects/rocprofiler-sdk/tests/rccl/pytest.ini new file mode 100644 index 00000000000..99d19a96f35 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rccl/pytest.ini @@ -0,0 +1,5 @@ + +[pytest] +addopts = --durations=20 -rA -s -vv +testpaths = [ validate-single-process.py ] +pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages diff --git a/projects/rocprofiler-sdk/tests/rccl/validate-single-process.py b/projects/rocprofiler-sdk/tests/rccl/validate-single-process.py new file mode 100644 index 00000000000..2719a906ee6 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rccl/validate-single-process.py @@ -0,0 +1,403 @@ +#!/usr/bin/env python3 + +from collections import defaultdict +import os +import sys +import pytest + + +# helper function +def node_exists(name, data, min_len=1): + assert name in data + assert data[name] is not None + if isinstance(data[name], (list, tuple, dict, set)): + assert len(data[name]) >= min_len + + +def to_dict(key_values): + a = defaultdict() + for kv in key_values: + a[kv["key"]] = kv["value"] + return a + + +def get_operation(record, kind_name, op_name=None): + for idx, itr in enumerate(record["names"]): + if kind_name == itr["kind"]: + if op_name is None: + return idx, itr["operations"] + else: + for oidx, oname in enumerate(itr["operations"]): + if op_name == oname: + return oidx + + return None + + +def dict_from_value_key(d): + ret_d = defaultdict() + + for k, v in d.items(): + assert v not in ret_d + ret_d[v] = k + return ret_d + + +def sort_by_timestamp(lines): + timestamp_line_map = {} + + for log_line in lines: + timestamp = log_line.split(" ")[1] + timestamp_line_map[timestamp] = log_line + + timestamps_sorted = sorted([l.split(" ")[1] for l in lines]) + return timestamps_sorted, timestamp_line_map + + +# ------------------------------ Tests ------------------------------ # + + +def test_data_structure(input_data): + """verify minimum amount of expected data is present""" + data = input_data + + node_exists("rocprofiler-sdk-json-tool", data) + + sdk_data = data["rocprofiler-sdk-json-tool"] + + node_exists("metadata", sdk_data) + node_exists("pid", sdk_data["metadata"]) + node_exists("main_tid", sdk_data["metadata"]) + node_exists("init_time", sdk_data["metadata"]) + node_exists("fini_time", sdk_data["metadata"]) + + node_exists("agents", sdk_data) + node_exists("call_stack", sdk_data) + node_exists("callback_records", sdk_data) + node_exists("buffer_records", sdk_data) + + node_exists("names", sdk_data["callback_records"]) + node_exists("code_objects", sdk_data["callback_records"]) + node_exists("kernel_symbols", sdk_data["callback_records"]) + # Disabled for rccl + # node_exists("hsa_api_traces", sdk_data["callback_records"]) + node_exists("hip_api_traces", sdk_data["callback_records"], 0) + node_exists("marker_api_traces", sdk_data["callback_records"], 0) + node_exists("rccl_api_traces", sdk_data["callback_records"], 0) + + node_exists("names", sdk_data["buffer_records"]) + node_exists("kernel_dispatch", sdk_data["buffer_records"]) + node_exists("memory_copies", sdk_data["buffer_records"], 0) + # Disabled for rccl + # node_exists("hsa_api_traces", sdk_data["buffer_records"]) + node_exists("hip_api_traces", sdk_data["buffer_records"], 0) + node_exists("marker_api_traces", sdk_data["buffer_records"], 0) + node_exists("rccl_api_traces", sdk_data["buffer_records"], 0) + node_exists("retired_correlation_ids", sdk_data["buffer_records"]) + + +def test_timestamps(input_data): + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + cb_start = {} + cb_end = {} + for titr in [ + "hsa_api_traces", + "marker_api_traces", + "hip_api_traces", + "rccl_api_traces", + ]: + for itr in sdk_data["callback_records"][titr]: + cid = itr["correlation_id"]["internal"] + phase = itr["phase"] + if phase == 1: + cb_start[cid] = itr["timestamp"] + elif phase == 2: + cb_end[cid] = itr["timestamp"] + assert cb_start[cid] <= itr["timestamp"] + else: + assert phase == 1 or phase == 2 + + for itr in sdk_data["buffer_records"][titr]: + assert itr["start_timestamp"] <= itr["end_timestamp"] + + for titr in ["kernel_dispatch", "memory_copies"]: + for itr in sdk_data["buffer_records"][titr]: + assert itr["start_timestamp"] < itr["end_timestamp"] + assert itr["correlation_id"]["internal"] > 0 + assert itr["correlation_id"]["external"] > 0 + assert sdk_data["metadata"]["init_time"] < itr["start_timestamp"] + assert sdk_data["metadata"]["init_time"] < itr["end_timestamp"] + assert sdk_data["metadata"]["fini_time"] > itr["start_timestamp"] + assert sdk_data["metadata"]["fini_time"] > itr["end_timestamp"] + + # api_start = cb_start[itr["correlation_id"]["internal"]] + # api_end = cb_end[itr["correlation_id"]["internal"]] + # assert api_start < itr["start_timestamp"] + # assert api_end <= itr["end_timestamp"] + + +def test_internal_correlation_ids(input_data): + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + api_corr_ids = [] + for titr in [ + # "hsa_api_traces", + "marker_api_traces", + "hip_api_traces", + "rccl_api_traces", + ]: + for itr in sdk_data["callback_records"][titr]: + api_corr_ids.append(itr["correlation_id"]["internal"]) + + for itr in sdk_data["buffer_records"][titr]: + api_corr_ids.append(itr["correlation_id"]["internal"]) + + api_corr_ids_sorted = sorted(api_corr_ids) + api_corr_ids_unique = list(set(api_corr_ids)) + + for itr in sdk_data["buffer_records"]["kernel_dispatch"]: + assert itr["correlation_id"]["internal"] in api_corr_ids_unique + + for itr in sdk_data["buffer_records"]["memory_copies"]: + assert itr["correlation_id"]["internal"] in api_corr_ids_unique + + for itr in sdk_data["buffer_records"]["memory_allocations"]: + assert itr["correlation_id"]["internal"] in api_corr_ids_unique + + len_corr_id_unq = len(api_corr_ids_unique) + assert len(api_corr_ids) != len_corr_id_unq + assert max(api_corr_ids_sorted) == len_corr_id_unq + + +def test_external_correlation_ids(input_data): + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + extern_corr_ids = [] + for titr in [ + # "hsa_api_traces", + "marker_api_traces", + "hip_api_traces", + "rccl_api_traces", + ]: + for itr in sdk_data["callback_records"][titr]: + assert itr["correlation_id"]["external"] > 0 + assert itr["thread_id"] == itr["correlation_id"]["external"] + extern_corr_ids.append(itr["correlation_id"]["external"]) + + extern_corr_ids = list(set(sorted(extern_corr_ids))) + for titr in [ + # "hsa_api_traces", + "marker_api_traces", + "hip_api_traces", + "rccl_api_traces", + ]: + for itr in sdk_data["buffer_records"][titr]: + assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}" + assert ( + itr["thread_id"] == itr["correlation_id"]["external"] + ), f"[{titr}] {itr}" + assert itr["thread_id"] in extern_corr_ids, f"[{titr}] {itr}" + assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}" + + for titr in ["kernel_dispatch", "memory_copies", "memory_allocations"]: + for itr in sdk_data["buffer_records"][titr]: + assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}" + assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}" + + for itr in sdk_data["callback_records"][titr]: + assert itr["correlation_id"]["external"] > 0, f"[{titr}] {itr}" + assert itr["correlation_id"]["external"] in extern_corr_ids, f"[{titr}] {itr}" + + +def test_kernel_ids(input_data): + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + symbol_info = {} + for itr in sdk_data["callback_records"]["kernel_symbols"]: + phase = itr["phase"] + payload = itr["payload"] + kern_id = payload["kernel_id"] + + assert phase == 1 or phase == 2 + assert kern_id > 0 + if phase == 1: + assert len(payload["kernel_name"]) > 0 + symbol_info[kern_id] = payload + elif phase == 2: + assert payload["kernel_id"] in symbol_info.keys() + assert payload["kernel_name"] == symbol_info[kern_id]["kernel_name"] + + for itr in sdk_data["buffer_records"]["kernel_dispatch"]: + assert itr["dispatch_info"]["kernel_id"] in symbol_info.keys() + + for itr in sdk_data["callback_records"]["kernel_dispatch"]: + assert itr["payload"]["dispatch_info"]["kernel_id"] in symbol_info.keys() + + +def test_rccl_sp_api_traces(input_data): + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + gpu_count = len([x for x in sdk_data["agents"] if x["type"] == 2]) + + callback_records = sdk_data["callback_records"] + buffer_records = sdk_data["buffer_records"] + + rccl_bf_traces = sdk_data["buffer_records"]["rccl_api_traces"] + rccl_api_bf_ops = get_operation(buffer_records, "RCCL_API_EXT") + assert len(rccl_api_bf_ops[1]) == 38 + + rccl_cb_traces = sdk_data["callback_records"]["rccl_api_traces"] + rccl_api_cb_ops = get_operation(callback_records, "RCCL_API") + + assert rccl_api_bf_ops[1] == rccl_api_cb_ops[1] and len(rccl_api_cb_ops[1]) == 38 + + # check that buffer and callback records agree + phase_enter_count = 0 + phase_end_count = 0 + + api_calls = [] + + for api_call in rccl_cb_traces: + if api_call["phase"] == 1: + phase_enter_count += 1 + api_calls.append(rccl_api_cb_ops[1][api_call["operation"]]) + if api_call["phase"] == 2: + phase_end_count += 1 + + assert phase_enter_count == phase_end_count == len(rccl_bf_traces) + + for call in [ + "ncclCommInitAll", + "ncclGetUniqueId", + "ncclGroupStart", + "ncclGroupEnd", + "ncclAllReduce", + "ncclCommDestroy", + ]: + assert call in api_calls + + # check for buffer args + # these checks must be in sync with json tool test's validate + alloc_size = 64 * 1024 * 1024 # 64MB in the test + elem_count = alloc_size // 4 # sizeof(float) + + def validate_nccl_count(value): + assert int(value) == elem_count, f"Expected {elem_count} elements, got {value}" + + def validate_nccl_dtype(value): + assert value == "7" # ncclFloat = 7 + + def validate_nccl_op(value): + assert value == "0" # ncclSum = 0 + + for record in rccl_bf_traces: + op_name = rccl_api_bf_ops[1][record["operation"]] + + if op_name == "ncclAllReduce": + checked_args = 0 + + for arg in record["args"]: + + if arg["name"] == "count": + checked_args += 1 + validate_nccl_count(arg["value"]) + + if arg["name"] == "datatype": + checked_args += 1 + validate_nccl_dtype(arg["value"]) + + if arg["name"] == "op": + checked_args += 1 + validate_nccl_op(arg["value"]) + + assert ( + checked_args == 3 + ), f"Expected to validate 3 args, found only {checked_args}" + + # check cakkback records args + for record in rccl_cb_traces: + op_name = rccl_api_cb_ops[1][record["operation"]] + + if op_name == "ncclAllReduce" and record["phase"] == 2: + checked_args = 0 + + for arg_name, value in record["args"].items(): + + if arg_name == "count": + checked_args += 1 + validate_nccl_count(value) + + if arg_name == "datatype": + checked_args += 1 + validate_nccl_dtype(value) + + if arg_name == "op": + checked_args += 1 + validate_nccl_op(value) + + assert ( + checked_args == 3 + ), f"Expected to validate 3 args, found only {checked_args}" + + +@pytest.mark.skip("Temporarily disabled") +def test_retired_correlation_ids(input_data): + data = input_data + sdk_data = data["rocprofiler-sdk-json-tool"] + + def _sort_dict(inp): + return dict(sorted(inp.items())) + + api_corr_ids = {} + for titr in [ + # "hsa_api_traces", + "marker_api_traces", + "hip_api_traces", + "rccl_api_traces", + ]: + for itr in sdk_data["buffer_records"][titr]: + corr_id = itr["correlation_id"]["internal"] + assert corr_id not in api_corr_ids.keys() + api_corr_ids[corr_id] = itr + + async_corr_ids = {} + for titr in ["kernel_dispatch", "memory_copies", "memory_allocation"]: + for itr in sdk_data["buffer_records"][titr]: + corr_id = itr["correlation_id"]["internal"] + assert corr_id not in async_corr_ids.keys() + async_corr_ids[corr_id] = itr + + retired_corr_ids = {} + for itr in sdk_data["buffer_records"]["retired_correlation_ids"]: + corr_id = itr["internal_correlation_id"] + assert corr_id not in retired_corr_ids.keys() + retired_corr_ids[corr_id] = itr + + api_corr_ids = _sort_dict(api_corr_ids) + async_corr_ids = _sort_dict(async_corr_ids) + retired_corr_ids = _sort_dict(retired_corr_ids) + + for cid, itr in async_corr_ids.items(): + assert cid in retired_corr_ids.keys() + retired_ts = retired_corr_ids[cid]["timestamp"] + end_ts = itr["end_timestamp"] + assert (retired_ts - end_ts) > 0, f"correlation-id: {cid}, data: {itr}" + + for cid, itr in api_corr_ids.items(): + assert cid in retired_corr_ids.keys() + retired_ts = retired_corr_ids[cid]["timestamp"] + end_ts = itr["end_timestamp"] + assert (retired_ts - end_ts) > 0, f"correlation-id: {cid}, data: {itr}" + + assert len(api_corr_ids.keys()) == (len(retired_corr_ids.keys())) + + +if __name__ == "__main__": + exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) + sys.exit(exit_code) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt index bea39c9d9fd..a78c6a7343f 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt @@ -47,3 +47,4 @@ add_subdirectory(minimum-bytes) add_subdirectory(conversion-script) add_subdirectory(python-bindings) add_subdirectory(rocpd) +add_subdirectory(rccl-trace) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/CMakeLists.txt new file mode 100644 index 00000000000..6f75cba5a07 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/CMakeLists.txt @@ -0,0 +1,70 @@ +# +# rocprofv3 tool test +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project( + rocprofiler-tests-rocprofv3-rccl-tracing + LANGUAGES CXX + VERSION 0.0.0) + +find_package(rocprofiler-sdk REQUIRED) +find_package(rccl) + +rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate-single-process.py + conftest.py) + +string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV + "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}") + +set(rccl-tracing-env "${PRELOAD_ENV}") + +set(UNSUPPORTED_GFX OFF) +# disable when system has a vega gpu +if("gfx906" IN_LIST rocprofiler-sdk-tests-gfx-info) + set(UNSUPPORTED_GFX ON) +endif() + +set(TEST_NAME_SINGLE_PROCESS "rccl-tracing-single-process") + +find_program(BASH_EXE "bash" REQUIRED) +message(STATUS "ROCPROFv3 ENV: ${rccl-tracing-env}") +set(VALIDATION_DEPENDS_SINGLE_PROCESS) +foreach(_OUTPUT_FORMAT csv json pftrace) + rocprofiler_add_test( + NAME rocprofv3-test-${TEST_NAME_SINGLE_PROCESS}-${_OUTPUT_FORMAT}-execute + COMMAND + ${BASH_EXE} -c + "rm -fr ${CMAKE_CURRENT_BINARY_DIR}/${TEST_NAME_SINGLE_PROCESS}/*.${_OUTPUT_FORMAT} &&\ + $ --rccl-trace -o ${TEST_NAME_SINGLE_PROCESS}-%pid% \ + --output-format ${_OUTPUT_FORMAT} -d ${TEST_NAME_SINGLE_PROCESS} --log-level env -- $ 64" + DEPENDS rccl-single-process + TIMEOUT 120 + LABELS "integration-tests" + ENVIRONMENT "${rccl-tracing-env}" + FAIL_REGULAR_EXPRESSION "threw an exception" + DISABLED $,$>) + + list(APPEND VALIDATION_DEPENDS_SINGLE_PROCESS + rocprofv3-test-${TEST_NAME_SINGLE_PROCESS}-${_OUTPUT_FORMAT}-execute) +endforeach() + +add_test( + NAME rocprofv3-test-${TEST_NAME_SINGLE_PROCESS}-validate + COMMAND + ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate-single-process.py + --json-input-dir "${CMAKE_CURRENT_BINARY_DIR}/${TEST_NAME_SINGLE_PROCESS}" + --csv-input-dir "${CMAKE_CURRENT_BINARY_DIR}/${TEST_NAME_SINGLE_PROCESS}") + +set_tests_properties( + rocprofv3-test-${TEST_NAME_SINGLE_PROCESS}-validate + PROPERTIES TIMEOUT + 120 + LABELS + "integration-tests" + DEPENDS + "${VALIDATION_DEPENDS_SINGLE_PROCESS}" + FAIL_REGULAR_EXPRESSION + "AssertionError" + DISABLED + $,$>) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/conftest.py b/projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/conftest.py new file mode 100644 index 00000000000..eab01c16c4c --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/conftest.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +import csv +import json +import glob +import pytest +from pathlib import Path + +from rocprofiler_sdk.pytest_utils.dotdict import dotdict +from rocprofiler_sdk.pytest_utils import collapse_dict_list + + +def pytest_addoption(parser): + parser.addoption( + "--json-input-dir", + action="store", + default="rccl-tracing", + help="Input JSON", + ) + parser.addoption( + "--csv-input-dir", + action="store", + default="rccl-tracing", + help="Input CSV", + ) + + +@pytest.fixture +def json_sp_input_data(request): + dirname = request.config.getoption("--json-input-dir") + files = glob.glob(f"{dirname}/rccl-tracing-single-process*.json") + + data_items = [] + for file in files: + with open(file, "r") as inp: + data_items.append(dotdict(collapse_dict_list(json.load(inp)))) + return data_items + + +@pytest.fixture +def csv_sp_input_data(request): + dirname = request.config.getoption("--csv-input-dir") + files = glob.glob(f"{dirname}/*trace.csv") + + data_items = [] + for file in files: + data = [] + with open(file, "r") as inp: + reader = csv.DictReader(inp) + for row in reader: + data.append(row) + data_items.append(data) + + return data_items diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/pytest.ini b/projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/pytest.ini new file mode 100644 index 00000000000..4e17b32dcd2 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +addopts = --durations=20 -rA -s -vv +testpaths = [ validate-sp.py, validate-mp.py ] +pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/validate-single-process.py b/projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/validate-single-process.py new file mode 100644 index 00000000000..f60a85befb7 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/rccl-trace/validate-single-process.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 + +import sys +import pytest +import json + +from collections import defaultdict + + +# helper function +def node_exists(name, data, min_len=1): + assert name in data + assert data[name] is not None + if isinstance(data[name], (list, tuple, dict, set)): + assert len(data[name]) >= min_len + + +def get_operation(record, kind_name, op_name=None): + for idx, itr in enumerate(record["strings"]["buffer_records"]): + if kind_name == itr["kind"]: + if op_name is None: + return idx, itr["operations"] + else: + for oidx, oname in enumerate(itr["operations"]): + if op_name == oname: + return oidx + return None + + +def _test_rccl_api_json_traces(json_data): + data = json_data["rocprofiler-sdk-tool"] + + callback_records = data["callback_records"] + buffer_records = data["buffer_records"] + + rccl_bf_traces = buffer_records["rccl_api"] + rccl_api_bf_ops = get_operation(data, "RCCL_API") + assert len(rccl_api_bf_ops[1]) == 38 + + api_calls = [] + + for api_call in rccl_bf_traces: + api_calls.append(rccl_api_bf_ops[1][api_call["operation"]]) + + parent_process = False + if "ncclGetUniqueId" in api_calls: + parent_process = True + + for call in [ + "ncclCommInitAll", + "ncclGetUniqueId", + "ncclGroupStart", + "ncclGroupEnd", + "ncclAllReduce", + "ncclCommDestroy", + ]: + assert call in api_calls + + # check for buffer args + # these checks must be in sync with rocprofv3 test's validate + alloc_size = 64 * 1024 * 1024 # 64MB in the test + elem_count = alloc_size // 4 # sizeof(float) + + def validate_nccl_count(arg): + assert ( + int(arg["value"]) == elem_count + ), f'Expected {elem_count} elements, got {arg["value"]}' + + def validate_nccl_dtype(arg): + assert arg["value"] == "7" # ncclFloat = 7 + + def validate_nccl_op(arg): + assert arg["value"] == "0" # ncclSum = 0 + + for record in rccl_bf_traces: + op_name = rccl_api_bf_ops[1][record["operation"]] + + if op_name == "ncclAllReduce": + checked_args = 0 + + for arg in record["args"]: + + if arg["name"] == "count": + checked_args += 1 + validate_nccl_count(arg) + + if arg["name"] == "datatype": + checked_args += 1 + validate_nccl_dtype(arg) + + if arg["name"] == "op": + checked_args += 1 + validate_nccl_op(arg) + + assert ( + checked_args == 3 + ), f"Expected to validate 3 args, found only {checked_args}" + + return parent_process + + +def _test_rccl_api_csv_traces(csv_data): + assert len(csv_data) > 0, "Expected non-empty csv data" + + api_calls = [] + + for row in csv_data: + assert "Domain" in row, "'Domain' was not present in csv data for rccl-trace" + assert "Function" in row, "'Function' was not present in csv data for rccl-trace" + assert ( + "Process_Id" in row + ), "'Process_Id' was not present in csv data for rccl-trace" + assert ( + "Thread_Id" in row + ), "'Thread_Id' was not present in csv data for rccl-trace" + assert ( + "Correlation_Id" in row + ), "'Correlation_Id' was not present in csv data for rccl-trace" + assert ( + "Start_Timestamp" in row + ), "'Start_Timestamp' was not present in csv data for rccl-trace" + assert ( + "End_Timestamp" in row + ), "'End_Timestamp' was not present in csv data for rccl-trace" + + api_calls.append(row["Function"]) + + assert row["Domain"] == "RCCL_API_EXT" + assert int(row["Process_Id"]) > 0 + assert int(row["Thread_Id"]) > 0 + assert int(row["Start_Timestamp"]) > 0 + assert int(row["End_Timestamp"]) > 0 + assert int(row["Start_Timestamp"]) < int(row["End_Timestamp"]) + + for call in [ + "ncclAllReduce", + "ncclCommInitAll", + "ncclCommGetAsyncError", + ]: + assert call in api_calls + + parent_process = False + if "ncclGetUniqueId" in api_calls: + parent_process = True + + return parent_process + + +def test_rccl_sp_trace(json_sp_input_data, csv_sp_input_data): + assert len(json_sp_input_data) != 0, "Expected non-zero json output files" + assert len(csv_sp_input_data) != 0, "Expected non-zero csv output files" + assert len(json_sp_input_data) == len(csv_sp_input_data) + + def test_data(func, data): + """ + func: should return True if data is from a parent process + data: list of json or csv data to pass to func + """ + num_parents = 0 + num_children = 0 + + for _data in data: + is_parent = func(_data) + if is_parent: + num_parents += 1 + else: + num_children += 1 + + assert num_parents == 1, "Expected one parent process" + assert num_children + num_parents == len( + json_sp_input_data + ), "Expected parent + child processes to be same as number of files" + + test_data(_test_rccl_api_json_traces, json_sp_input_data) + test_data(_test_rccl_api_csv_traces, csv_sp_input_data) + + +if __name__ == "__main__": + print(sys.argv[1:], sys.stderr) + exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) + sys.exit(exit_code) From 5a23185d44812d8890228d8c78576f91029b9d63 Mon Sep 17 00:00:00 2001 From: Mythreya Date: Mon, 30 Jun 2025 19:14:24 +0000 Subject: [PATCH 7/8] Update schema --- .../source/docs/rocprofv3-schema.json | 78 ++++++++++++++++++- 1 file changed, 74 insertions(+), 4 deletions(-) diff --git a/projects/rocprofiler-sdk/source/docs/rocprofv3-schema.json b/projects/rocprofiler-sdk/source/docs/rocprofv3-schema.json index 918ca0a53b1..78f8bbdf151 100644 --- a/projects/rocprofiler-sdk/source/docs/rocprofv3-schema.json +++ b/projects/rocprofiler-sdk/source/docs/rocprofv3-schema.json @@ -1684,13 +1684,13 @@ "start_timestamp", "end_timestamp", "thread_id", - "agent_id", + "agent_id", "address", "allocation_size" ] - } - }, - "rocdecoder_api": { + } + }, + "rocdecoder_api": { "type": "array", "description": "ROCDecode API records.", "items": { @@ -1749,6 +1749,76 @@ "thread_id" ] } + }, + "rccl_api_traces": { + "type": "array", + "description": "RCCL API records.", + "items": { + "type": "object", + "properties": { + "size": { + "type": "integer", + "description": "Size of the RCCL API record." + }, + "kind": { + "type": "integer", + "description": "Kind of the RCCL API." + }, + "operation": { + "type": "integer", + "description": "Operation of the RCCL API." + }, + "correlation_id": { + "type": "object", + "description": "Correlation ID information.", + "properties": { + "internal": { + "type": "integer", + "description": "Internal correlation ID." + }, + "external": { + "type": "integer", + "description": "External correlation ID." + }, + "ancestor": { + "type": "integer", + "description": "Ancestor correlation ID." + } + }, + "required": [ + "internal", + "external", + "ancestor" + ] + }, + "start_timestamp": { + "type": "integer", + "description": "Start timestamp." + }, + "end_timestamp": { + "type": "integer", + "description": "End timestamp." + }, + "thread_id": { + "type": "integer", + "description": "Thread ID." + }, + "args": { + "type": "array", + "description": "RCCL API args" + } + }, + "required": [ + "size", + "kind", + "operation", + "correlation_id", + "start_timestamp", + "end_timestamp", + "thread_id", + "args" + ] + } } } } From 67fdb0cca5bee1cbc4eb3a8ab171e32424fefac0 Mon Sep 17 00:00:00 2001 From: Mythreya Date: Wed, 23 Jul 2025 00:18:22 +0000 Subject: [PATCH 8/8] Add null check --- .../rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.cpp index e50840fd72f..10ea09dc595 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.cpp @@ -97,7 +97,10 @@ convert_arg_type(Tp&& val) { if constexpr(std::is_same::value) { - return common::get_string_entry(val)->c_str(); + if(val) + return common::get_string_entry(val)->c_str(); + else + return std::remove_reference_t(val); } else {