From 588463774b1144cef27061f7b9f55f58ef499c7b Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 27 Jan 2026 13:09:29 -0800 Subject: [PATCH 01/39] Update to C++20. --- cmake/CMakeLists.txt | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 42432041a8b01..6b17389f7f4bf 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -35,12 +35,7 @@ include(CheckSymbolExists) include(GNUInstallDirs) # onnxruntime_providers_* require CMAKE_INSTALL_* variables if (NOT CMAKE_CXX_STANDARD) - # TODO: update this once all system adapt c++20 - if (CMAKE_SYSTEM_NAME STREQUAL "Darwin") - set(CMAKE_CXX_STANDARD 20) - else() - set(CMAKE_CXX_STANDARD 17) - endif() + set(CMAKE_CXX_STANDARD 20) endif() if (MSVC) From 98c31aef4bd50e6ab8febce2e7df5feb92a1074f Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 27 Jan 2026 13:10:31 -0800 Subject: [PATCH 02/39] Fix ostream::operator<< usage with wchar_t*. --- onnxruntime/core/session/utils.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/session/utils.cc b/onnxruntime/core/session/utils.cc index a354cf26368d4..7ad09e1a2cd5e 100644 --- a/onnxruntime/core/session/utils.cc +++ b/onnxruntime/core/session/utils.cc @@ -549,7 +549,7 @@ Status LoadPluginOrProviderBridge(const std::string& registration_name, true, ProviderLibraryPathType::Absolute); bool is_provider_bridge = provider_library->Load() == Status::OK(); // library has GetProvider - LOGS_DEFAULT(INFO) << "Loading EP library: " << library_path + LOGS_DEFAULT(INFO) << "Loading EP library: " << resolved_library_path << (is_provider_bridge ? " as a provider bridge" : " as a plugin"); // create EpLibraryPlugin to ensure CreateEpFactories and ReleaseEpFactory are available From 9171d14ed45ae153e2c54cf7fb445fa396e2417e Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 27 Jan 2026 17:03:56 -0800 Subject: [PATCH 03/39] try to fix time_point output operator availability check --- .../onnxruntime/core/common/logging/logging.h | 30 ++----------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/include/onnxruntime/core/common/logging/logging.h b/include/onnxruntime/core/common/logging/logging.h index dc930ce52eaa9..38e3aa3725b71 100644 --- a/include/onnxruntime/core/common/logging/logging.h +++ b/include/onnxruntime/core/common/logging/logging.h @@ -58,33 +58,9 @@ namespace logging { using Timestamp = std::chrono::time_point; -// C++20 has operator<< in std::chrono for Timestamp type but mac builds need additional checks -// to ensure usage is valid. -// TODO: As we enable C++20 on other platforms we may need similar checks. -// define a temporary value to determine whether to use the std::chrono or date implementation. -#define ORT_USE_CXX20_STD_CHRONO __cplusplus >= 202002L - -// Apply constraints for mac builds -#if __APPLE__ -#include - -// Catalyst check must be first as it has both TARGET_OS_MACCATALYST and TARGET_OS_MAC set -#if TARGET_OS_MACCATALYST -// maccatalyst requires version 16.3 -#if (defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 160300) -#undef ORT_USE_CXX20_STD_CHRONO -#endif - -#elif TARGET_OS_MAC -// Xcode added support for C++20's std::chrono::operator<< in SDK version 14.4, -// but the target macOS version must also be >= 13.3 for it to be used. -#if (defined(__MAC_OS_X_VERSION_MAX_ALLOWED) && __MAC_OS_X_VERSION_MAX_ALLOWED < 140400) || \ - (defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 130300) -#undef ORT_USE_CXX20_STD_CHRONO -#endif - -#endif -#endif // __APPLE__ +// C++20 has operator<< in std::chrono for Timestamp type but we need to check if it is available. +// define temporary macro ORT_USE_CXX20_STD_CHRONO to determine whether to use the std::chrono or date implementation. +#define ORT_USE_CXX20_STD_CHRONO __cpp_lib_chrono >= 201803L #if ORT_USE_CXX20_STD_CHRONO namespace timestamp_ns = std::chrono; From 92b3334f0cd5b685501ae5baa59787f9063e80c6 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 27 Jan 2026 19:01:16 -0800 Subject: [PATCH 04/39] add back Apple stuff and handle IOS --- .../onnxruntime/core/common/logging/logging.h | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/include/onnxruntime/core/common/logging/logging.h b/include/onnxruntime/core/common/logging/logging.h index 38e3aa3725b71..329c0ed1bf8c5 100644 --- a/include/onnxruntime/core/common/logging/logging.h +++ b/include/onnxruntime/core/common/logging/logging.h @@ -58,10 +58,32 @@ namespace logging { using Timestamp = std::chrono::time_point; -// C++20 has operator<< in std::chrono for Timestamp type but we need to check if it is available. +// C++20 has operator<< in std::chrono for Timestamp type but we need to check if usage is valid. // define temporary macro ORT_USE_CXX20_STD_CHRONO to determine whether to use the std::chrono or date implementation. #define ORT_USE_CXX20_STD_CHRONO __cpp_lib_chrono >= 201803L +// Apply constraints for Apple builds +#if __APPLE__ +#include + +// iOS check must be first as it also has TARGET_OS_MAC set +#if TARGET_OS_IOS +// iOS requires version 16.3 +#if (defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 160300) +#undef ORT_USE_CXX20_STD_CHRONO +#endif + +#elif TARGET_OS_MAC +// Xcode added support for C++20's std::chrono::operator<< in SDK version 14.4, +// but the target macOS version must also be >= 13.3 for it to be used. +#if (defined(__MAC_OS_X_VERSION_MAX_ALLOWED) && __MAC_OS_X_VERSION_MAX_ALLOWED < 140400) || \ + (defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 130300) +#undef ORT_USE_CXX20_STD_CHRONO +#endif + +#endif +#endif // __APPLE__ + #if ORT_USE_CXX20_STD_CHRONO namespace timestamp_ns = std::chrono; #else From 831ae91ad7007826403f605cb8ce04bda29e1cc6 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 28 Jan 2026 20:34:16 -0800 Subject: [PATCH 05/39] make Timestamp a separate type and implement stream insertion operator for it --- .../onnxruntime/core/common/logging/logging.h | 26 +++++++++++++++---- .../core/common/logging/sinks/ostream_sink.cc | 6 +---- .../platform/apple/logging/apple_log_sink.mm | 4 +-- onnxruntime/test/common/logging/helpers.h | 2 -- .../test/util/include/test/capturing_sink.h | 2 -- 5 files changed, 23 insertions(+), 17 deletions(-) diff --git a/include/onnxruntime/core/common/logging/logging.h b/include/onnxruntime/core/common/logging/logging.h index 329c0ed1bf8c5..386a9421fc47e 100644 --- a/include/onnxruntime/core/common/logging/logging.h +++ b/include/onnxruntime/core/common/logging/logging.h @@ -56,9 +56,7 @@ struct OrtLogger; // opaque API type. is always an instance of Logger namespace onnxruntime { namespace logging { -using Timestamp = std::chrono::time_point; - -// C++20 has operator<< in std::chrono for Timestamp type but we need to check if usage is valid. +// C++20 has std::chrono::operator<< for std::chrono::system_clock::time_point but we need to check if usage is valid. // define temporary macro ORT_USE_CXX20_STD_CHRONO to determine whether to use the std::chrono or date implementation. #define ORT_USE_CXX20_STD_CHRONO __cpp_lib_chrono >= 201803L @@ -85,13 +83,31 @@ using Timestamp = std::chrono::time_point; #endif // __APPLE__ #if ORT_USE_CXX20_STD_CHRONO -namespace timestamp_ns = std::chrono; +namespace timestamp_stream_insertion_op_ns = std::chrono; #else -namespace timestamp_ns = ::date; +namespace timestamp_stream_insertion_op_ns = ::date; #endif #undef ORT_USE_CXX20_STD_CHRONO +// This class wraps `std::chrono::system_clock::time_point` and provides `operator<<`. +// It is a workaround for the inconsistent availability of `std::chrono::operator<<` for +// `std::chrono::system_clock::time_point`. +// When all builds support `std::chrono::operator<<`, we can simplify to this: +// `using Timestamp = std::chrono::system_clock::time_point;` +class Timestamp { + public: + using TimePoint = std::chrono::system_clock::time_point; + Timestamp(const TimePoint& time_point) noexcept : time_point_{time_point} {} + + friend std::ostream& operator<<(std::ostream& os, const Timestamp& time_stamp) { + return timestamp_stream_insertion_op_ns::operator<<(os, time_stamp.time_point_); + } + + private: + TimePoint time_point_{}; +}; + #ifndef NDEBUG ORT_ATTRIBUTE_UNUSED static bool vlog_enabled = true; // Set directly based on your needs. #else diff --git a/onnxruntime/core/common/logging/sinks/ostream_sink.cc b/onnxruntime/core/common/logging/sinks/ostream_sink.cc index 64441a2b20de2..f8fdb5e1906ed 100644 --- a/onnxruntime/core/common/logging/sinks/ostream_sink.cc +++ b/onnxruntime/core/common/logging/sinks/ostream_sink.cc @@ -23,9 +23,6 @@ struct Color { #ifndef _WIN32 void OStreamSink::SendImpl(const Timestamp& timestamp, const std::string& logger_id, const Capture& message) { - // operator for formatting of timestamp in ISO8601 format including microseconds - using timestamp_ns::operator<<; - // Two options as there may be multiple calls attempting to write to the same sink at once: // 1) Use mutex to synchronize access to the stream. // 2) Create the message in an ostringstream and output in one call. @@ -45,8 +42,7 @@ void OStreamSink::SendImpl(const Timestamp& timestamp, const std::string& logger } #endif - timestamp_ns::operator<<(msg, timestamp); // handle ambiguity with C++20 where date and std::chrono have operator<< - msg << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", " + msg << timestamp << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", " << message.Location().ToString() << "] " << message.Message(); #ifndef ORT_MINIMAL_BUILD diff --git a/onnxruntime/core/platform/apple/logging/apple_log_sink.mm b/onnxruntime/core/platform/apple/logging/apple_log_sink.mm index 6abbe76a7f151..862ea0bf3c825 100644 --- a/onnxruntime/core/platform/apple/logging/apple_log_sink.mm +++ b/onnxruntime/core/platform/apple/logging/apple_log_sink.mm @@ -11,11 +11,9 @@ namespace logging { void AppleLogSink::SendImpl(const Timestamp& timestamp, const std::string& logger_id, const Capture& message) { - using timestamp_ns::operator<<; std::ostringstream msg; - timestamp_ns::operator<<(msg, timestamp); // handle ambiguity with C++20 where date and std::chrono have operator<< - msg << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", " + msg << timestamp << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", " << message.Location().ToString() << "] " << message.Message(); NSLog(@"%s", msg.str().c_str()); } diff --git a/onnxruntime/test/common/logging/helpers.h b/onnxruntime/test/common/logging/helpers.h index 0b623fe9ee09a..bf4d30184b7f6 100644 --- a/onnxruntime/test/common/logging/helpers.h +++ b/onnxruntime/test/common/logging/helpers.h @@ -39,8 +39,6 @@ class MockEtwSink : public ::onnxruntime::logging::ISink { #endif ACTION(PrintArgs) { - using onnxruntime::logging::timestamp_ns::operator<<; - // const Timestamp ×tamp, const std::string &logger_id, const Message &message // arg0 arg1 arg2 std::cout << arg1 << "@" << arg0 << " " diff --git a/onnxruntime/test/util/include/test/capturing_sink.h b/onnxruntime/test/util/include/test/capturing_sink.h index 7d978d1bd1e56..37e1aecabdf25 100644 --- a/onnxruntime/test/util/include/test/capturing_sink.h +++ b/onnxruntime/test/util/include/test/capturing_sink.h @@ -14,8 +14,6 @@ using namespace ::onnxruntime::logging; class CapturingSink : public logging::ISink { public: void SendImpl(const Timestamp& timestamp, const std::string& logger_id, const Capture& message) override { - // operator for formatting of timestamp in ISO8601 format including microseconds - using timestamp_ns::operator<<; std::ostringstream msg; msg << timestamp << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", " From 1e2ad83b6b147bae1bc9a11d241c7479b81d7c5f Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 29 Jan 2026 10:28:08 -0800 Subject: [PATCH 06/39] move date.h dependency into logging.cc --- .../onnxruntime/core/common/logging/logging.h | 39 ++---------------- onnxruntime/core/common/logging/logging.cc | 41 +++++++++++++++++++ .../platform/posix/logging/syslog_sink.cc | 2 - .../core/platform/windows/logging/etw_sink.h | 1 - .../test/common/logging/logging_test.cc | 2 - .../test/util/include/capturing_sink.h | 4 -- 6 files changed, 44 insertions(+), 45 deletions(-) diff --git a/include/onnxruntime/core/common/logging/logging.h b/include/onnxruntime/core/common/logging/logging.h index 386a9421fc47e..ec6f5df38778d 100644 --- a/include/onnxruntime/core/common/logging/logging.h +++ b/include/onnxruntime/core/common/logging/logging.h @@ -17,7 +17,6 @@ #include "core/common/logging/macros.h" #include "core/common/logging/severity.h" #include "core/common/logging/sink_types.h" -#include "date/date.h" /* @@ -56,40 +55,6 @@ struct OrtLogger; // opaque API type. is always an instance of Logger namespace onnxruntime { namespace logging { -// C++20 has std::chrono::operator<< for std::chrono::system_clock::time_point but we need to check if usage is valid. -// define temporary macro ORT_USE_CXX20_STD_CHRONO to determine whether to use the std::chrono or date implementation. -#define ORT_USE_CXX20_STD_CHRONO __cpp_lib_chrono >= 201803L - -// Apply constraints for Apple builds -#if __APPLE__ -#include - -// iOS check must be first as it also has TARGET_OS_MAC set -#if TARGET_OS_IOS -// iOS requires version 16.3 -#if (defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 160300) -#undef ORT_USE_CXX20_STD_CHRONO -#endif - -#elif TARGET_OS_MAC -// Xcode added support for C++20's std::chrono::operator<< in SDK version 14.4, -// but the target macOS version must also be >= 13.3 for it to be used. -#if (defined(__MAC_OS_X_VERSION_MAX_ALLOWED) && __MAC_OS_X_VERSION_MAX_ALLOWED < 140400) || \ - (defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 130300) -#undef ORT_USE_CXX20_STD_CHRONO -#endif - -#endif -#endif // __APPLE__ - -#if ORT_USE_CXX20_STD_CHRONO -namespace timestamp_stream_insertion_op_ns = std::chrono; -#else -namespace timestamp_stream_insertion_op_ns = ::date; -#endif - -#undef ORT_USE_CXX20_STD_CHRONO - // This class wraps `std::chrono::system_clock::time_point` and provides `operator<<`. // It is a workaround for the inconsistent availability of `std::chrono::operator<<` for // `std::chrono::system_clock::time_point`. @@ -101,10 +66,12 @@ class Timestamp { Timestamp(const TimePoint& time_point) noexcept : time_point_{time_point} {} friend std::ostream& operator<<(std::ostream& os, const Timestamp& time_stamp) { - return timestamp_stream_insertion_op_ns::operator<<(os, time_stamp.time_point_); + return time_stamp.WriteToStream(os); } private: + std::ostream& WriteToStream(std::ostream& os) const; + TimePoint time_point_{}; }; diff --git a/onnxruntime/core/common/logging/logging.cc b/onnxruntime/core/common/logging/logging.cc index a79e7300cffce..3f80ee4bcec50 100644 --- a/onnxruntime/core/common/logging/logging.cc +++ b/onnxruntime/core/common/logging/logging.cc @@ -28,8 +28,49 @@ #include "logging.h" #endif +// C++20 has std::chrono::operator<< for std::chrono::system_clock::time_point but we need to check if usage is valid. +// define temporary macro ORT_USE_CXX20_STD_CHRONO to determine whether to use the std::chrono or date implementation. +#define ORT_USE_CXX20_STD_CHRONO __cpp_lib_chrono >= 201803L + +// Apply constraints for Apple builds +#if __APPLE__ +#include + +// iOS check must be first as it also has TARGET_OS_MAC set +#if TARGET_OS_IOS +// iOS requires version 16.3 +#if (defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 160300) +#undef ORT_USE_CXX20_STD_CHRONO +#endif + +#elif TARGET_OS_MAC +// Xcode added support for C++20's std::chrono::operator<< in SDK version 14.4, +// but the target macOS version must also be >= 13.3 for it to be used. +#if (defined(__MAC_OS_X_VERSION_MAX_ALLOWED) && __MAC_OS_X_VERSION_MAX_ALLOWED < 140400) || \ + (defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 130300) +#undef ORT_USE_CXX20_STD_CHRONO +#endif + +#endif +#endif // __APPLE__ + +#if ORT_USE_CXX20_STD_CHRONO +namespace timestamp_stream_insertion_op_ns = std::chrono; +#else +#include "date/date.h" + +namespace timestamp_stream_insertion_op_ns = ::date; +#endif + +#undef ORT_USE_CXX20_STD_CHRONO + namespace onnxruntime { namespace logging { + +std::ostream& Timestamp::WriteToStream(std::ostream& os) const { + return timestamp_stream_insertion_op_ns::operator<<(os, time_point_); +} + const char* Category::onnxruntime = "onnxruntime"; const char* Category::System = "System"; diff --git a/onnxruntime/core/platform/posix/logging/syslog_sink.cc b/onnxruntime/core/platform/posix/logging/syslog_sink.cc index 9fbd26f093498..e5b60cf4742ef 100644 --- a/onnxruntime/core/platform/posix/logging/syslog_sink.cc +++ b/onnxruntime/core/platform/posix/logging/syslog_sink.cc @@ -4,7 +4,6 @@ #include "core/common/logging/logging.h" #include "core/common/logging/capture.h" #include "syslog_sink.h" -#include "date/date.h" namespace onnxruntime { namespace logging { @@ -12,7 +11,6 @@ namespace logging { constexpr const char* SYSLOG_LEVEL = "76432"; void SysLogSink::SendImpl(const Timestamp& timestamp, const std::string& logger_id, const Capture& message) { - using date::operator<<; std::stringstream msg; // syslog has it own timestamp but not as accurate as our timestamp. So we are going to keep both, diff --git a/onnxruntime/core/platform/windows/logging/etw_sink.h b/onnxruntime/core/platform/windows/logging/etw_sink.h index 62b762886ca82..d0c08a2144c20 100644 --- a/onnxruntime/core/platform/windows/logging/etw_sink.h +++ b/onnxruntime/core/platform/windows/logging/etw_sink.h @@ -16,7 +16,6 @@ #ifdef ETW_TRACE_LOGGING_SUPPORTED -#include #include #include #include diff --git a/onnxruntime/test/common/logging/logging_test.cc b/onnxruntime/test/common/logging/logging_test.cc index d3af022f83e86..8e1817e777d9e 100644 --- a/onnxruntime/test/common/logging/logging_test.cc +++ b/onnxruntime/test/common/logging/logging_test.cc @@ -14,8 +14,6 @@ #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(disable : 26400) #endif -// if we pull in the whole 'testing' namespace we get warnings from date.h as both use '_' in places. -// to avoid that we explicitly pull in the pieces we are using using testing::Eq; using testing::Field; using testing::Ge; diff --git a/onnxruntime/test/util/include/capturing_sink.h b/onnxruntime/test/util/include/capturing_sink.h index 39788947602df..37e1aecabdf25 100644 --- a/onnxruntime/test/util/include/capturing_sink.h +++ b/onnxruntime/test/util/include/capturing_sink.h @@ -6,8 +6,6 @@ #include "core/common/logging/logging.h" #include "core/common/logging/isink.h" -#include "date/date.h" - namespace onnxruntime { namespace test { @@ -16,8 +14,6 @@ using namespace ::onnxruntime::logging; class CapturingSink : public logging::ISink { public: void SendImpl(const Timestamp& timestamp, const std::string& logger_id, const Capture& message) override { - // operator for formatting of timestamp in ISO8601 format including microseconds - using date::operator<<; std::ostringstream msg; msg << timestamp << " [" << message.SeverityPrefix() << ":" << message.Category() << ":" << logger_id << ", " From dad1617c8abaf4c3f847ba564298efcb1cc58ad0 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:19:12 -0800 Subject: [PATCH 07/39] add exception for MSVC CUDA EP build for now... --- cmake/CMakeLists.txt | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 6b17389f7f4bf..8eca34f4f80e7 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -21,11 +21,6 @@ if("${CMAKE_CXX_COMPILER_ID}" MATCHES "IntelLLVM") endif() endif() -# Needed for Java -if (NOT CMAKE_CXX_STANDARD) - set(CMAKE_C_STANDARD 99) -endif() - include(CheckCXXCompilerFlag) include(CheckLanguage) include(CMakeDependentOption) @@ -34,10 +29,6 @@ include(CheckFunctionExists) include(CheckSymbolExists) include(GNUInstallDirs) # onnxruntime_providers_* require CMAKE_INSTALL_* variables -if (NOT CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 20) -endif() - if (MSVC) # Make sure Visual Studio sets __cplusplus macro correctly: https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus") @@ -249,6 +240,24 @@ option(onnxruntime_USE_OPENVINO_INTERFACE "Build ONNXRuntime shared lib which is option(onnxruntime_USE_VITISAI_INTERFACE "Build ONNXRuntime shared lib which is compatible with Vitis-AI EP interface" OFF) option(onnxruntime_USE_QNN_INTERFACE "Build ONNXRuntime shared lib which is compatible with QNN EP interface" OFF) +# Set C/C++ standard versions +if (NOT CMAKE_C_STANDARD) + # Needed for Java + set(CMAKE_C_STANDARD 99) +endif() + +if (NOT CMAKE_CXX_STANDARD) + # TODO move all builds to C++20 + if (MSVC AND onnxruntime_USE_CUDA) + # There's a compilation error from CUTLASS header "cute/tensor.hpp" when attempting to use C++20: + # cutlass-src\include\cute\stride.hpp(299,46): error C3545: 'Ints': parameter pack expects a non-type template + # argument + set(CMAKE_CXX_STANDARD 17) + else() + set(CMAKE_CXX_STANDARD 20) + endif() +endif() + if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 11.1) message(FATAL_ERROR "GCC version must be greater than or equal to 11.1") endif() From 7f34d3bf95466234b16a1a1adc5a674abb3c6a59 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 29 Jan 2026 18:48:11 -0800 Subject: [PATCH 08/39] add wostream overloads for Timestamp operator<< --- include/onnxruntime/core/common/logging/logging.h | 5 +++++ onnxruntime/core/common/logging/logging.cc | 4 ++++ onnxruntime/core/common/logging/sinks/ostream_sink.cc | 3 --- onnxruntime/core/session/inference_session.cc | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/include/onnxruntime/core/common/logging/logging.h b/include/onnxruntime/core/common/logging/logging.h index ec6f5df38778d..4e70f0414cb70 100644 --- a/include/onnxruntime/core/common/logging/logging.h +++ b/include/onnxruntime/core/common/logging/logging.h @@ -69,8 +69,13 @@ class Timestamp { return time_stamp.WriteToStream(os); } + friend std::wostream& operator<<(std::wostream& os, const Timestamp& time_stamp) { + return time_stamp.WriteToWStream(os); + } + private: std::ostream& WriteToStream(std::ostream& os) const; + std::wostream& WriteToWStream(std::wostream& os) const; TimePoint time_point_{}; }; diff --git a/onnxruntime/core/common/logging/logging.cc b/onnxruntime/core/common/logging/logging.cc index 3f80ee4bcec50..2f02199bde379 100644 --- a/onnxruntime/core/common/logging/logging.cc +++ b/onnxruntime/core/common/logging/logging.cc @@ -71,6 +71,10 @@ std::ostream& Timestamp::WriteToStream(std::ostream& os) const { return timestamp_stream_insertion_op_ns::operator<<(os, time_point_); } +std::wostream& Timestamp::WriteToWStream(std::wostream& os) const { + return timestamp_stream_insertion_op_ns::operator<<(os, time_point_); +} + const char* Category::onnxruntime = "onnxruntime"; const char* Category::System = "System"; diff --git a/onnxruntime/core/common/logging/sinks/ostream_sink.cc b/onnxruntime/core/common/logging/sinks/ostream_sink.cc index f8fdb5e1906ed..1c4968502eabb 100644 --- a/onnxruntime/core/common/logging/sinks/ostream_sink.cc +++ b/onnxruntime/core/common/logging/sinks/ostream_sink.cc @@ -62,9 +62,6 @@ void OStreamSink::SendImpl(const Timestamp& timestamp, const std::string& logger } #else void WOStreamSink::SendImpl(const Timestamp& timestamp, const std::string& logger_id, const Capture& message) { - // operator for formatting of timestamp in ISO8601 format including microseconds - using date::operator<<; - // Two options as there may be multiple calls attempting to write to the same sink at once: // 1) Use mutex to synchronize access to the stream. // 2) Create the message in an ostringstream and output in one call. diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index 0944be87591e2..863ea76e7aa3d 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -113,7 +113,7 @@ inline const wchar_t* GetDateFormatString() { return L"%Y-%m-%d_%H-%M-%S"; } #endif -// TODO: use LoggingManager::GetTimestamp and date::operator<< +// TODO: use LoggingManager::GetTimestamp and operator<< // (see ostream_sink.cc for an example) // to simplify this and match the log file timestamp format. template From ad1aab8198f4be00bdeb9d2d63a62e888ee47567 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Fri, 30 Jan 2026 11:28:17 -0800 Subject: [PATCH 09/39] use std::filesystem::path:string instead of u8string in onnxruntime/core/providers/vitisai/imp/global_api.cc --- onnxruntime/core/providers/vitisai/imp/global_api.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/providers/vitisai/imp/global_api.cc b/onnxruntime/core/providers/vitisai/imp/global_api.cc index ec529c2ad1fc2..b74eb1cae4a16 100644 --- a/onnxruntime/core/providers/vitisai/imp/global_api.cc +++ b/onnxruntime/core/providers/vitisai/imp/global_api.cc @@ -238,7 +238,7 @@ vaip_core::DllSafe>> c if (s_library_vitisaiep.compile_onnx_model_vitisai_ep_v4) { Status status = Status::OK(); auto status_ptr = reinterpret_cast(&status); - auto ret = vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_vitisai_ep_v4(model_path.u8string(), graph_viewer.GetGraph(), options, status_ptr, change_status_with_error, logger), vaip_execution_provider_deletor); + auto ret = vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_vitisai_ep_v4(model_path.string(), graph_viewer.GetGraph(), options, status_ptr, change_status_with_error, logger), vaip_execution_provider_deletor); if (!status.IsOK()) { ORT_THROW(status); } @@ -246,7 +246,7 @@ vaip_core::DllSafe>> c } else if (s_library_vitisaiep.compile_onnx_model_vitisai_ep_v3) { Status status = Status::OK(); auto status_ptr = reinterpret_cast(&status); - auto ret = vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_vitisai_ep_v3(model_path.u8string(), graph_viewer.GetGraph(), options, status_ptr, change_status_with_error), vaip_execution_provider_deletor); + auto ret = vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_vitisai_ep_v3(model_path.string(), graph_viewer.GetGraph(), options, status_ptr, change_status_with_error), vaip_execution_provider_deletor); if (!status.IsOK()) { ORT_THROW(status); } @@ -254,13 +254,13 @@ vaip_core::DllSafe>> c } else if (s_library_vitisaiep.compile_onnx_model_vitisai_ep_with_error_handling) { Status status = Status::OK(); auto status_ptr = reinterpret_cast(&status); - auto ret = vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_vitisai_ep_with_error_handling(model_path.u8string(), graph_viewer.GetGraph(), options, status_ptr, change_status_with_error), vaip_execution_provider_deletor); + auto ret = vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_vitisai_ep_with_error_handling(model_path.string(), graph_viewer.GetGraph(), options, status_ptr, change_status_with_error), vaip_execution_provider_deletor); if (!status.IsOK()) { ORT_THROW(status); } return ret; } else { - return vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_with_options(model_path.u8string(), graph_viewer.GetGraph(), options), vaip_execution_provider_deletor); + return vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_with_options(model_path.string(), graph_viewer.GetGraph(), options), vaip_execution_provider_deletor); } } @@ -707,4 +707,4 @@ CreateExecutionProviderFromAnotherEp(const std::string& lib, const OrtSessionOpt std::ignore = provider->CreateIExecutionProvider(nullptr, nullptr, 0, const_cast(provider_options), session_options, *((OrtLogger*)nullptr), ret); return ret; -} \ No newline at end of file +} From 848c8b1757d3ac9dde84542cc40f67ab515f2257 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Mon, 2 Feb 2026 11:05:12 -0800 Subject: [PATCH 10/39] don't build android from Docker in minimal build workflow --- .github/workflows/linux_minimal_build.yml | 62 +++-------------------- 1 file changed, 6 insertions(+), 56 deletions(-) diff --git a/.github/workflows/linux_minimal_build.yml b/.github/workflows/linux_minimal_build.yml index 7d481475e7ded..1f78f56c412a7 100644 --- a/.github/workflows/linux_minimal_build.yml +++ b/.github/workflows/linux_minimal_build.yml @@ -530,43 +530,18 @@ jobs: --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF # Job 7: Extended minimal build with NNAPI EP for Android(arm64-v8a) and skip tests. - # NOTE: Keeping this as direct docker run due to custom volume mounts needed for Android SDK/NDK build_extended_minimal_android: name: 7. Build Extended Minimal (Android NNAPI) - needs: build_full_ort # Depends on Job 1 for test data runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-github-Ubuntu2204-AMD-CPU", "JobId=build_extended_minimal_android-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}" ] - permissions: # Permissions needed for build-docker-image - contents: read - packages: write - id-token: write # If using OIDC for ACR login steps: - name: Checkout repository uses: actions/checkout@v6 with: submodules: false - - uses: actions/setup-node@v6 - with: - node-version: 20 - - name: Download Test Data Artifact - uses: actions/download-artifact@v7 - with: - name: test_data - path: ${{ runner.temp }}/.test_data/ - - - name: Get Docker Image using Action - uses: microsoft/onnxruntime-github-actions/build-docker-image@v0.0.9 - id: build_docker_image_step - with: - dockerfile: ${{ github.workspace }}/tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile - image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecpubuildcix64 - push: true - azure-container-registry-name: onnxruntimebuildcache - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Setup Android NDK uses: ./.github/actions/setup-android-ndk @@ -574,43 +549,18 @@ jobs: ndk-version: 28.0.13004108 # Use default android-sdk-root if not specified - - name: Run Build 7 (Using docker run) + - name: Run Build 7 shell: bash run: | - # Create the target dir for build output inside the runner's temp dir first - mkdir -p ${{ runner.temp }}/7 - - # Ensure ANDROID_NDK_HOME is available and get its real path - if [ -z "$ANDROID_NDK_HOME" ]; then - echo "ANDROID_NDK_HOME is not set." - exit 1 - fi - NDK_HOME_REALPATH=$(realpath $ANDROID_NDK_HOME) - - # Ensure ANDROID_HOME is available - if [ -z "$ANDROID_HOME" ]; then - echo "ANDROID_HOME is not set. Using default /usr/local/lib/android/sdk" - export ANDROID_HOME=/usr/local/lib/android/sdk - fi - - docker run --rm \ - --volume ${{ env.BUILD_SOURCES_DIRECTORY }}:/onnxruntime_src \ - --volume ${{ runner.temp }}:/build \ - --volume $ANDROID_HOME:/android_home \ - --volume $NDK_HOME_REALPATH:/ndk_home \ - -e ALLOW_RELEASED_ONNX_OPSET_ONLY=1 \ - -e NIGHTLY_BUILD=1 -e RUNNER_TEMP=/build \ - ${{ steps.build_docker_image_step.outputs.full-image-name }} \ - bash -c "python3 -m pip install -r /onnxruntime_src/tools/ci_build/requirements/pybind/requirements.txt \ - && python3 /onnxruntime_src/tools/ci_build/build.py \ - --build_dir /build/7 \ + python3 ./tools/ci_build/build.py \ + --build_dir ./build.extended_minimal.nnapi \ --cmake_generator Ninja \ --config MinSizeRel \ --skip_submodule_sync \ --parallel --use_binskim_compliant_compile_flags \ --android \ - --android_sdk_path /android_home \ - --android_ndk_path /ndk_home \ + --android_sdk_path "$ANDROID_HOME" \ + --android_ndk_path "$ANDROID_NDK_HOME" \ --android_abi=arm64-v8a \ --android_api=29 \ --use_nnapi \ @@ -618,5 +568,5 @@ jobs: --build_shared_lib \ --disable_ml_ops \ --disable_exceptions \ - --skip_tests" + --skip_tests working-directory: ${{ env.BUILD_SOURCES_DIRECTORY }} From 2c78745f754793203ea34df6fde23e0a0a8bef8d Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Mon, 2 Feb 2026 13:17:51 -0800 Subject: [PATCH 11/39] disable C++20 when onnxruntime_USE_CUDA is enabled for all builds, update comment --- cmake/CMakeLists.txt | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 8eca34f4f80e7..03a37764169df 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -247,11 +247,24 @@ if (NOT CMAKE_C_STANDARD) endif() if (NOT CMAKE_CXX_STANDARD) - # TODO move all builds to C++20 - if (MSVC AND onnxruntime_USE_CUDA) - # There's a compilation error from CUTLASS header "cute/tensor.hpp" when attempting to use C++20: - # cutlass-src\include\cute\stride.hpp(299,46): error C3545: 'Ints': parameter pack expects a non-type template - # argument + # TODO: enable C++20 for all builds + # set(CMAKE_CXX_STANDARD 20) + + if (onnxruntime_USE_CUDA) + # Known issues when updating from C++17 to C++20: + # - MSVC + onnxruntime_USE_CUDA: + # - Compilation error from CUTLASS header "cute/tensor.hpp" when attempting to use C++20: + # cutlass-src\include\cute\stride.hpp(299,46): error C3545: 'Ints': parameter pack expects a non-type + # template argument + # - GCC + onnxruntime_USE_CUDA: + # - Compilation error from onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc when adding an element to + # `std::vector` in + # onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc: + # /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/stl_construct.h:97:14: error: writing 1 byte into a + # region of size 0 [-Werror=stringop-overflow=] + # - Possibly a spurious warning + # + # When the CUDA EP becomes an independent plugin EP, we can keep building it with C++17 if needed. set(CMAKE_CXX_STANDARD 17) else() set(CMAKE_CXX_STANDARD 20) From a035d7f0f194d4326198f343b40bd61f8c4bfa48 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Mon, 2 Feb 2026 13:21:02 -0800 Subject: [PATCH 12/39] update comment again --- cmake/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 03a37764169df..3d3f19c829d85 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -264,7 +264,8 @@ if (NOT CMAKE_CXX_STANDARD) # region of size 0 [-Werror=stringop-overflow=] # - Possibly a spurious warning # - # When the CUDA EP becomes an independent plugin EP, we can keep building it with C++17 if needed. + # When the CUDA EP becomes an independent plugin EP, hopefully we can update all of onnxruntime to C++20. + # We can keep building the CUDA plugin EP with C++17 if needed. set(CMAKE_CXX_STANDARD 17) else() set(CMAKE_CXX_STANDARD 20) From 06fec4fa4eb28a5e79fd1f21c3e89d83e2fb4515 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 3 Feb 2026 18:15:05 -0800 Subject: [PATCH 13/39] tweak comment --- cmake/CMakeLists.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 3d3f19c829d85..6d8c09bea6f8e 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -253,13 +253,12 @@ if (NOT CMAKE_CXX_STANDARD) if (onnxruntime_USE_CUDA) # Known issues when updating from C++17 to C++20: # - MSVC + onnxruntime_USE_CUDA: - # - Compilation error from CUTLASS header "cute/tensor.hpp" when attempting to use C++20: + # - Compilation error from CUTLASS header cute/tensor.hpp: # cutlass-src\include\cute\stride.hpp(299,46): error C3545: 'Ints': parameter pack expects a non-type # template argument # - GCC + onnxruntime_USE_CUDA: # - Compilation error from onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc when adding an element to - # `std::vector` in - # onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc: + # `std::vector`: # /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/stl_construct.h:97:14: error: writing 1 byte into a # region of size 0 [-Werror=stringop-overflow=] # - Possibly a spurious warning From 7ec3538feff19a8f72d7ac18bc8276f02bbc1af6 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Mon, 9 Feb 2026 09:39:01 -0800 Subject: [PATCH 14/39] DEBUG: CMake generate tracing --- tools/ci_build/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index a0712af35e455..a2b361028ef36 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -364,7 +364,7 @@ def generate_build_tree( ): log.info("Generating CMake build tree") cmake_dir = os.path.join(source_dir, "cmake") - cmake_args = [cmake_path, cmake_dir] + cmake_args = [cmake_path, cmake_dir, "--trace-expand"] if not use_dev_mode(args): cmake_args += ["--compile-no-warning-as-error"] From 79de47aabebec2562ff23fe9351c793f81db181b Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Mon, 9 Feb 2026 11:29:56 -0800 Subject: [PATCH 15/39] try disabling module scan for Android QNN --- .../ci_build/github/android/default_qnn_aar_build_settings.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/ci_build/github/android/default_qnn_aar_build_settings.json b/tools/ci_build/github/android/default_qnn_aar_build_settings.json index 5ac49f582d23e..a260a2be4a2c5 100644 --- a/tools/ci_build/github/android/default_qnn_aar_build_settings.json +++ b/tools/ci_build/github/android/default_qnn_aar_build_settings.json @@ -12,6 +12,7 @@ "--build_java", "--build_shared_lib", "--use_qnn=static_lib", + "--cmake_extra_defines=CMAKE_CXX_SCAN_FOR_MODULES=OFF", "--cmake_extra_defines=onnxruntime_BUILD_UNIT_TESTS=OFF", "--skip_tests" From 2ef0bdbf6472058228d844cffc9d9ff887143152 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Mon, 9 Feb 2026 11:30:39 -0800 Subject: [PATCH 16/39] Revert "DEBUG: CMake generate tracing" This reverts commit 7ec3538feff19a8f72d7ac18bc8276f02bbc1af6. --- tools/ci_build/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index a2b361028ef36..a0712af35e455 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -364,7 +364,7 @@ def generate_build_tree( ): log.info("Generating CMake build tree") cmake_dir = os.path.join(source_dir, "cmake") - cmake_args = [cmake_path, cmake_dir, "--trace-expand"] + cmake_args = [cmake_path, cmake_dir] if not use_dev_mode(args): cmake_args += ["--compile-no-warning-as-error"] From 4d22c087a90f123b43033f8b9ec3aa8c724092df Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Tue, 24 Feb 2026 12:50:58 -0800 Subject: [PATCH 17/39] add cutlass issue link --- cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 50aa4ee84518d..ea2d33e42dd8b 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -258,7 +258,7 @@ if (NOT CMAKE_CXX_STANDARD) if (onnxruntime_USE_CUDA) # Known issues when updating from C++17 to C++20: # - MSVC + onnxruntime_USE_CUDA: - # - Compilation error from CUTLASS header cute/tensor.hpp: + # - Compilation error from CUTLASS header cute/tensor.hpp (https://github.com/NVIDIA/cutlass/issues/3065): # cutlass-src\include\cute\stride.hpp(299,46): error C3545: 'Ints': parameter pack expects a non-type # template argument # - GCC + onnxruntime_USE_CUDA: From 282545bde1d2c4fbfc270d2c50c3d5cdc3559d6b Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 25 Feb 2026 09:20:24 -0800 Subject: [PATCH 18/39] move cmake code around so that options and setting of CMAKE_CXX_STANDARD are earlier --- cmake/CMakeLists.txt | 65 ++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index ea2d33e42dd8b..33bbbcfeeb088 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -10,39 +10,7 @@ cmake_policy(SET CMP0104 OLD) # Project project(onnxruntime C CXX ASM) -# Disable fast-math for Intel oneAPI compiler -if("${CMAKE_CXX_COMPILER_ID}" MATCHES "IntelLLVM") - if("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC-like") - # Using icx-cl compiler driver with MSVC-like arguments - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:precise") - else() - # Using icpx compiler driver - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-fast-math") - endif() -endif() - -include(CheckCXXCompilerFlag) -include(CheckLanguage) include(CMakeDependentOption) -include(FetchContent) -include(CheckFunctionExists) -include(CheckSymbolExists) -include(GNUInstallDirs) # onnxruntime_providers_* require CMAKE_INSTALL_* variables - -if (MSVC) - # Make sure Visual Studio sets __cplusplus macro correctly: https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus") -endif() - -set_property(GLOBAL PROPERTY USE_FOLDERS ON) -# NOTE: POSITION INDEPENDENT CODE hurts performance, and it only make sense on POSIX systems -set(CMAKE_POSITION_INDEPENDENT_CODE ON) - -enable_testing() -if (NOT CMAKE_BUILD_TYPE) - message(STATUS "Build type not set - using RelWithDebInfo") - set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Choose build type: Debug Release RelWithDebInfo MinSizeRel." FORCE) -endif() # Options option(onnxruntime_USE_VCPKG "Build with the vcpkg package manager" OFF) @@ -276,6 +244,39 @@ if (NOT CMAKE_CXX_STANDARD) endif() endif() +# Disable fast-math for Intel oneAPI compiler +if("${CMAKE_CXX_COMPILER_ID}" MATCHES "IntelLLVM") + if("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC-like") + # Using icx-cl compiler driver with MSVC-like arguments + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:precise") + else() + # Using icpx compiler driver + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-fast-math") + endif() +endif() + +include(CheckCXXCompilerFlag) +include(CheckLanguage) +include(FetchContent) +include(CheckFunctionExists) +include(CheckSymbolExists) +include(GNUInstallDirs) # onnxruntime_providers_* require CMAKE_INSTALL_* variables + +if (MSVC) + # Make sure Visual Studio sets __cplusplus macro correctly: https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus") +endif() + +set_property(GLOBAL PROPERTY USE_FOLDERS ON) +# NOTE: POSITION INDEPENDENT CODE hurts performance, and it only make sense on POSIX systems +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +enable_testing() +if (NOT CMAKE_BUILD_TYPE) + message(STATUS "Build type not set - using RelWithDebInfo") + set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Choose build type: Debug Release RelWithDebInfo MinSizeRel." FORCE) +endif() + if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 11.1) message(FATAL_ERROR "GCC version must be greater than or equal to 11.1") endif() From 9c21b3fbb1e1bc996afc12a09726da4c6dd04cd5 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Mon, 2 Mar 2026 13:34:08 -0800 Subject: [PATCH 19/39] vitisai global_api.cc - use onnxruntime::ToUTF8String on model path --- onnxruntime/core/providers/vitisai/imp/global_api.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/providers/vitisai/imp/global_api.cc b/onnxruntime/core/providers/vitisai/imp/global_api.cc index 0db57196c3c68..ad22187a75cd9 100644 --- a/onnxruntime/core/providers/vitisai/imp/global_api.cc +++ b/onnxruntime/core/providers/vitisai/imp/global_api.cc @@ -12,6 +12,7 @@ #endif #include "./vai_assert.h" +#include "core/common/common.h" #include "core/common/exceptions.h" #include "core/framework/error_code_helper.h" #include "core/providers/shared/common.h" @@ -233,12 +234,13 @@ void change_status_with_error(void* status_ptr, int error_code, const char* erro vaip_core::DllSafe>> compile_onnx_model( const onnxruntime::GraphViewer& graph_viewer, const onnxruntime::logging::Logger& logger, const onnxruntime::ProviderOptions& options) { - auto model_path = graph_viewer.ModelPath(); + const auto model_path_string = onnxruntime::ToUTF8String(graph_viewer.ModelPath().native()); + auto vaip_execution_provider_deletor = s_library_vitisaiep.vaip_execution_provider_deletor; if (s_library_vitisaiep.compile_onnx_model_vitisai_ep_v4) { Status status = Status::OK(); auto status_ptr = reinterpret_cast(&status); - auto ret = vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_vitisai_ep_v4(model_path.string(), graph_viewer.GetGraph(), options, status_ptr, change_status_with_error, logger), vaip_execution_provider_deletor); + auto ret = vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_vitisai_ep_v4(model_path_string, graph_viewer.GetGraph(), options, status_ptr, change_status_with_error, logger), vaip_execution_provider_deletor); if (!status.IsOK()) { ORT_THROW(status); } @@ -246,7 +248,7 @@ vaip_core::DllSafe>> c } else if (s_library_vitisaiep.compile_onnx_model_vitisai_ep_v3) { Status status = Status::OK(); auto status_ptr = reinterpret_cast(&status); - auto ret = vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_vitisai_ep_v3(model_path.string(), graph_viewer.GetGraph(), options, status_ptr, change_status_with_error), vaip_execution_provider_deletor); + auto ret = vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_vitisai_ep_v3(model_path_string, graph_viewer.GetGraph(), options, status_ptr, change_status_with_error), vaip_execution_provider_deletor); if (!status.IsOK()) { ORT_THROW(status); } @@ -254,13 +256,13 @@ vaip_core::DllSafe>> c } else if (s_library_vitisaiep.compile_onnx_model_vitisai_ep_with_error_handling) { Status status = Status::OK(); auto status_ptr = reinterpret_cast(&status); - auto ret = vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_vitisai_ep_with_error_handling(model_path.string(), graph_viewer.GetGraph(), options, status_ptr, change_status_with_error), vaip_execution_provider_deletor); + auto ret = vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_vitisai_ep_with_error_handling(model_path_string, graph_viewer.GetGraph(), options, status_ptr, change_status_with_error), vaip_execution_provider_deletor); if (!status.IsOK()) { ORT_THROW(status); } return ret; } else { - return vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_with_options(model_path.string(), graph_viewer.GetGraph(), options), vaip_execution_provider_deletor); + return vaip_core::DllSafe(s_library_vitisaiep.compile_onnx_model_with_options(model_path_string, graph_viewer.GetGraph(), options), vaip_execution_provider_deletor); } } From 554363655eb1c252dc733a9a4eae149ff9ba1ed4 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 4 Mar 2026 11:25:55 -0800 Subject: [PATCH 20/39] try C++20 for everything, pass /permissive to CUDA MSVC build --- cmake/CMakeLists.txt | 41 +++++++------------------- cmake/onnxruntime_providers_cuda.cmake | 8 +++++ 2 files changed, 18 insertions(+), 31 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index ec662d75c53bf..de16ca1ac5fbe 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -10,6 +10,16 @@ cmake_policy(SET CMP0104 OLD) # Project project(onnxruntime C CXX ASM) +# Set C/C++ standard versions +if (NOT CMAKE_C_STANDARD) + # Needed for Java + set(CMAKE_C_STANDARD 99) +endif() + +if (NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 20) +endif() + include(CMakeDependentOption) # Options @@ -213,37 +223,6 @@ option(onnxruntime_USE_OPENVINO_INTERFACE "Build ONNXRuntime shared lib which is option(onnxruntime_USE_VITISAI_INTERFACE "Build ONNXRuntime shared lib which is compatible with Vitis-AI EP interface" OFF) option(onnxruntime_USE_QNN_INTERFACE "Build ONNXRuntime shared lib which is compatible with QNN EP interface" OFF) -# Set C/C++ standard versions -if (NOT CMAKE_C_STANDARD) - # Needed for Java - set(CMAKE_C_STANDARD 99) -endif() - -if (NOT CMAKE_CXX_STANDARD) - # TODO: enable C++20 for all builds - # set(CMAKE_CXX_STANDARD 20) - - if (onnxruntime_USE_CUDA) - # Known issues when updating from C++17 to C++20: - # - MSVC + onnxruntime_USE_CUDA: - # - Compilation error from CUTLASS header cute/tensor.hpp (https://github.com/NVIDIA/cutlass/issues/3065): - # cutlass-src\include\cute\stride.hpp(299,46): error C3545: 'Ints': parameter pack expects a non-type - # template argument - # - GCC + onnxruntime_USE_CUDA: - # - Compilation error from onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc when adding an element to - # `std::vector`: - # /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/stl_construct.h:97:14: error: writing 1 byte into a - # region of size 0 [-Werror=stringop-overflow=] - # - Possibly a spurious warning - # - # When the CUDA EP becomes an independent plugin EP, hopefully we can update all of onnxruntime to C++20. - # We can keep building the CUDA plugin EP with C++17 if needed. - set(CMAKE_CXX_STANDARD 17) - else() - set(CMAKE_CXX_STANDARD 20) - endif() -endif() - # Disable fast-math for Intel oneAPI compiler if("${CMAKE_CXX_COMPILER_ID}" MATCHES "IntelLLVM") if("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC-like") diff --git a/cmake/onnxruntime_providers_cuda.cmake b/cmake/onnxruntime_providers_cuda.cmake index 94dba5bcec93c..41ba18769e688 100644 --- a/cmake/onnxruntime_providers_cuda.cmake +++ b/cmake/onnxruntime_providers_cuda.cmake @@ -149,6 +149,14 @@ onnxruntime_add_shared_library_module(onnxruntime_providers_cuda ${onnxruntime_providers_cuda_all_srcs}) endif() + if (MSVC) + # Use /permissive to work around compilation error from CUTLASS header cute/tensor.hpp: + # cutlass-src\include\cute\stride.hpp(299,46): error C3545: 'Ints': parameter pack expects a non-type + # template argument + # See https://github.com/NVIDIA/cutlass/issues/3065 + target_compile_options(onnxruntime_providers_cuda PRIVATE "/permissive") + endif() + if(WIN32) # FILE_NAME preprocessor definition is used in onnxruntime_providers_cuda.rc target_compile_definitions(onnxruntime_providers_cuda PRIVATE FILE_NAME=\"onnxruntime_providers_cuda.dll\") From 1a8f8973772fa01b253b42bfd206204929186601 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 4 Mar 2026 13:47:49 -0800 Subject: [PATCH 21/39] try adding /permissive another way, update CUDA_STANDARD to 20 too --- cmake/CMakeLists.txt | 2 +- cmake/onnxruntime_providers_cuda.cmake | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index de16ca1ac5fbe..fd41fcbb1309c 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -1473,7 +1473,7 @@ configure_file(onnxruntime_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime_c get_property(onnxruntime_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) if (onnxruntime_USE_CUDA) - set(CMAKE_CUDA_STANDARD 17) + set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD}) if(onnxruntime_CUDA_HOME) file(TO_CMAKE_PATH ${onnxruntime_CUDA_HOME} CUDAToolkit_ROOT) endif() diff --git a/cmake/onnxruntime_providers_cuda.cmake b/cmake/onnxruntime_providers_cuda.cmake index 41ba18769e688..2cde0dd0dc269 100644 --- a/cmake/onnxruntime_providers_cuda.cmake +++ b/cmake/onnxruntime_providers_cuda.cmake @@ -154,7 +154,10 @@ # cutlass-src\include\cute\stride.hpp(299,46): error C3545: 'Ints': parameter pack expects a non-type # template argument # See https://github.com/NVIDIA/cutlass/issues/3065 - target_compile_options(onnxruntime_providers_cuda PRIVATE "/permissive") + target_compile_options(onnxruntime_providers_cuda PRIVATE + "$<$:/permissive>" + #"$<$:SHELL:-Xcompiler /permissive>" + ) endif() if(WIN32) From f2fcee0a6c7b82c4460a592f138647ca12026e4e Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 4 Mar 2026 14:18:26 -0800 Subject: [PATCH 22/39] add CUDA language /permissive too --- cmake/onnxruntime_providers_cuda.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/onnxruntime_providers_cuda.cmake b/cmake/onnxruntime_providers_cuda.cmake index 2cde0dd0dc269..e12757f86daef 100644 --- a/cmake/onnxruntime_providers_cuda.cmake +++ b/cmake/onnxruntime_providers_cuda.cmake @@ -156,7 +156,7 @@ # See https://github.com/NVIDIA/cutlass/issues/3065 target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$:/permissive>" - #"$<$:SHELL:-Xcompiler /permissive>" + "$<$:SHELL:-Xcompiler /permissive>" ) endif() From 5873aee8d65de58b0aeb4acf1a399ec07e31d5d3 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 4 Mar 2026 15:02:59 -0800 Subject: [PATCH 23/39] change GridDim::maxThreadsPerBlock and GridDim::maxElementsPerThread to be static constexpr data members instead of enum values --- onnxruntime/core/providers/cuda/cu_inc/common.cuh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/onnxruntime/core/providers/cuda/cu_inc/common.cuh b/onnxruntime/core/providers/cuda/cu_inc/common.cuh index ec794b46d3f0e..cf85ffbc92fa1 100644 --- a/onnxruntime/core/providers/cuda/cu_inc/common.cuh +++ b/onnxruntime/core/providers/cuda/cu_inc/common.cuh @@ -683,10 +683,8 @@ inline __host__ __device__ INT CeilDiv(INT a, INT2 b) // ceil(a/b) } struct GridDim { - enum : CUDA_LONG { - maxThreadsPerBlock = 256, // max threads per block - maxElementsPerThread = 4, // max element processed per thread - }; + static constexpr CUDA_LONG maxThreadsPerBlock = 256; // max threads per block + static constexpr CUDA_LONG maxElementsPerThread = 4; // max element processed per thread }; // aligned vector generates vectorized load/store on CUDA From 99f245fba90b4d704ed18f211705dd2b7a76e19b Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 4 Mar 2026 17:06:14 -0800 Subject: [PATCH 24/39] cutlass patch to work around error --- cmake/external/cutlass.cmake | 2 +- cmake/patches/cutlass/cutlass_4.2.1.patch | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cmake/external/cutlass.cmake b/cmake/external/cutlass.cmake index df554269dfc7f..83d8a156b630f 100644 --- a/cmake/external/cutlass.cmake +++ b/cmake/external/cutlass.cmake @@ -4,7 +4,7 @@ onnxruntime_fetchcontent_declare( URL ${DEP_URL_cutlass} URL_HASH SHA1=${DEP_SHA1_cutlass} EXCLUDE_FROM_ALL -PATCH_COMMAND ${Patch_EXECUTABLE} --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/cutlass/cutlass_4.2.1.patch + PATCH_COMMAND ${Patch_EXECUTABLE} --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/cutlass/cutlass_4.2.1.patch ) FetchContent_GetProperties(cutlass) diff --git a/cmake/patches/cutlass/cutlass_4.2.1.patch b/cmake/patches/cutlass/cutlass_4.2.1.patch index 3a3ec5ba103ef..e955327a7a494 100644 --- a/cmake/patches/cutlass/cutlass_4.2.1.patch +++ b/cmake/patches/cutlass/cutlass_4.2.1.patch @@ -11,6 +11,20 @@ index cb161369..2fdff179 100644 [&](auto init, auto i){ if constexpr (is_constant_v<0, decltype(get(flat_stride))>) { return append(init, i); } else { return init; } +diff --git a/include/cutlass/cuda_host_adapter.hpp b/include/cutlass/cuda_host_adapter.hpp +index a8af62be..74409054 100644 +--- a/include/cutlass/cuda_host_adapter.hpp ++++ b/include/cutlass/cuda_host_adapter.hpp +@@ -394,6 +394,9 @@ protected: + * Fills a buffer in Global Memory with a byte sequence copied from host memory. + * This function can be overridden to dispatch to the appropriate cuMemsetD*Async API + */ ++ // Patching to work around this error: ++ // include\cutlass/cuda_host_adapter.hpp(414): error #20011-D: calling a __host__ function("memsetDeviceImpl") from a __host__ __device__ function("memsetDevice") is not allowed ++ CUTLASS_HOST_DEVICE + virtual Status memsetDeviceImpl( + void* destination, ///< Device memory pointer to be filled + void const* fill_value, ///< Value to be filled in the buffer diff --git a/include/cutlass/exmy_base.h b/include/cutlass/exmy_base.h index be207a49..6028e01d 100644 --- a/include/cutlass/exmy_base.h From 042b504c71c14716ed620bde28446c631985ec93 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 4 Mar 2026 18:19:08 -0800 Subject: [PATCH 25/39] move #177 warning suppression to CUDA 12.8+ section --- cmake/onnxruntime_providers_cuda.cmake | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmake/onnxruntime_providers_cuda.cmake b/cmake/onnxruntime_providers_cuda.cmake index e12757f86daef..78be50473b56d 100644 --- a/cmake/onnxruntime_providers_cuda.cmake +++ b/cmake/onnxruntime_providers_cuda.cmake @@ -200,6 +200,10 @@ endif() # skip diagnosis error caused by cuda header files target_compile_options(${target} PRIVATE "$<$:--diag-suppress=221>") + + # suppress warnings like this: + # cutlass-src\include\cute/arch/mma_sm120.hpp(3128): error #177-D: variable "tidA" was declared but never referenced + target_compile_options(${target} PRIVATE "$<$:--diag-suppress=177>") endif() if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) @@ -292,7 +296,6 @@ target_compile_definitions(${target} PRIVATE COMPILE_HOPPER_TMA_GEMMS) if (MSVC) target_compile_options(${target} PRIVATE "$<$:SHELL:-Xcompiler /bigobj>") - target_compile_options(${target} PRIVATE "$<$:--diag-suppress=177>") target_compile_options(${target} PRIVATE "$<$:SHELL:-Xcompiler /wd4172>") endif() endif() From bacffa2f11186913d72287ffca4df1d480eacd03 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 4 Mar 2026 18:29:32 -0800 Subject: [PATCH 26/39] undo CMake code moving around --- cmake/CMakeLists.txt | 73 ++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index fd41fcbb1309c..234947470c9be 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -20,7 +20,43 @@ if (NOT CMAKE_CXX_STANDARD) set(CMAKE_CXX_STANDARD 20) endif() +# Disable fast-math for Intel oneAPI compiler +if("${CMAKE_CXX_COMPILER_ID}" MATCHES "IntelLLVM") + if("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC-like") + # Using icx-cl compiler driver with MSVC-like arguments + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:precise") + else() + # Using icpx compiler driver + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-fast-math") + endif() +endif() + +include(CheckCXXCompilerFlag) +include(CheckLanguage) include(CMakeDependentOption) +include(FetchContent) +include(CheckFunctionExists) +include(CheckSymbolExists) +include(GNUInstallDirs) # onnxruntime_providers_* require CMAKE_INSTALL_* variables + +if (MSVC) + # Make sure Visual Studio sets __cplusplus macro correctly: https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus") + + # Prevents CMake from injecting '#pragma system_header', which results in warnings being disabled in projects that + # use precompiled headers. + set(CMAKE_PCH_PROLOGUE "") +endif() + +set_property(GLOBAL PROPERTY USE_FOLDERS ON) +# NOTE: POSITION INDEPENDENT CODE hurts performance, and it only make sense on POSIX systems +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +enable_testing() +if (NOT CMAKE_BUILD_TYPE) + message(STATUS "Build type not set - using RelWithDebInfo") + set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Choose build type: Debug Release RelWithDebInfo MinSizeRel." FORCE) +endif() # Options option(onnxruntime_USE_VCPKG "Build with the vcpkg package manager" OFF) @@ -223,43 +259,6 @@ option(onnxruntime_USE_OPENVINO_INTERFACE "Build ONNXRuntime shared lib which is option(onnxruntime_USE_VITISAI_INTERFACE "Build ONNXRuntime shared lib which is compatible with Vitis-AI EP interface" OFF) option(onnxruntime_USE_QNN_INTERFACE "Build ONNXRuntime shared lib which is compatible with QNN EP interface" OFF) -# Disable fast-math for Intel oneAPI compiler -if("${CMAKE_CXX_COMPILER_ID}" MATCHES "IntelLLVM") - if("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC-like") - # Using icx-cl compiler driver with MSVC-like arguments - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:precise") - else() - # Using icpx compiler driver - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-fast-math") - endif() -endif() - -include(CheckCXXCompilerFlag) -include(CheckLanguage) -include(FetchContent) -include(CheckFunctionExists) -include(CheckSymbolExists) -include(GNUInstallDirs) # onnxruntime_providers_* require CMAKE_INSTALL_* variables - -if (MSVC) - # Make sure Visual Studio sets __cplusplus macro correctly: https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus") - - # Prevents CMake from injecting '#pragma system_header', which results in warnings being disabled in projects that - # use precompiled headers. - set(CMAKE_PCH_PROLOGUE "") -endif() - -set_property(GLOBAL PROPERTY USE_FOLDERS ON) -# NOTE: POSITION INDEPENDENT CODE hurts performance, and it only make sense on POSIX systems -set(CMAKE_POSITION_INDEPENDENT_CODE ON) - -enable_testing() -if (NOT CMAKE_BUILD_TYPE) - message(STATUS "Build type not set - using RelWithDebInfo") - set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Choose build type: Debug Release RelWithDebInfo MinSizeRel." FORCE) -endif() - if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 11.1) message(FATAL_ERROR "GCC version must be greater than or equal to 11.1") endif() From b4aca518206de3f027eca0737506427b3cf4db43 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 4 Mar 2026 18:39:37 -0800 Subject: [PATCH 27/39] break long line --- cmake/onnxruntime_providers_cuda.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/onnxruntime_providers_cuda.cmake b/cmake/onnxruntime_providers_cuda.cmake index 78be50473b56d..3e7d43e66b690 100644 --- a/cmake/onnxruntime_providers_cuda.cmake +++ b/cmake/onnxruntime_providers_cuda.cmake @@ -202,7 +202,8 @@ target_compile_options(${target} PRIVATE "$<$:--diag-suppress=221>") # suppress warnings like this: - # cutlass-src\include\cute/arch/mma_sm120.hpp(3128): error #177-D: variable "tidA" was declared but never referenced + # cutlass-src\include\cute/arch/mma_sm120.hpp(3128): error #177-D: variable "tidA" was declared but never + # referenced target_compile_options(${target} PRIVATE "$<$:--diag-suppress=177>") endif() From e6297ffe63fbc32a5e10f3f0a35e122e59e5fb1b Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 4 Mar 2026 18:42:39 -0800 Subject: [PATCH 28/39] update patch file for line length --- cmake/patches/cutlass/cutlass_4.2.1.patch | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/patches/cutlass/cutlass_4.2.1.patch b/cmake/patches/cutlass/cutlass_4.2.1.patch index e955327a7a494..e5af67eb25fcf 100644 --- a/cmake/patches/cutlass/cutlass_4.2.1.patch +++ b/cmake/patches/cutlass/cutlass_4.2.1.patch @@ -20,7 +20,8 @@ index a8af62be..74409054 100644 * This function can be overridden to dispatch to the appropriate cuMemsetD*Async API */ + // Patching to work around this error: -+ // include\cutlass/cuda_host_adapter.hpp(414): error #20011-D: calling a __host__ function("memsetDeviceImpl") from a __host__ __device__ function("memsetDevice") is not allowed ++ // include\cutlass/cuda_host_adapter.hpp(414): error #20011-D: calling a __host__ function("memsetDeviceImpl") ++ // from a __host__ __device__ function("memsetDevice") is not allowed + CUTLASS_HOST_DEVICE virtual Status memsetDeviceImpl( void* destination, ///< Device memory pointer to be filled From 6d7ff32dbc3bef5487d17029222a4c51b20be060 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 4 Mar 2026 19:00:22 -0800 Subject: [PATCH 29/39] fix patch file --- cmake/patches/cutlass/cutlass_4.2.1.patch | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/patches/cutlass/cutlass_4.2.1.patch b/cmake/patches/cutlass/cutlass_4.2.1.patch index e5af67eb25fcf..6776eded10640 100644 --- a/cmake/patches/cutlass/cutlass_4.2.1.patch +++ b/cmake/patches/cutlass/cutlass_4.2.1.patch @@ -12,10 +12,10 @@ index cb161369..2fdff179 100644 if constexpr (is_constant_v<0, decltype(get(flat_stride))>) { return append(init, i); } else { return init; } diff --git a/include/cutlass/cuda_host_adapter.hpp b/include/cutlass/cuda_host_adapter.hpp -index a8af62be..74409054 100644 +index a8af62be..22e7332d 100644 --- a/include/cutlass/cuda_host_adapter.hpp +++ b/include/cutlass/cuda_host_adapter.hpp -@@ -394,6 +394,9 @@ protected: +@@ -394,6 +394,10 @@ protected: * Fills a buffer in Global Memory with a byte sequence copied from host memory. * This function can be overridden to dispatch to the appropriate cuMemsetD*Async API */ From 8dbb80f9f5c766c041480ae3915c307340d999f3 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Wed, 4 Mar 2026 20:14:15 -0800 Subject: [PATCH 30/39] don't use deprecated move_iterator operator-> --- .../onnxruntime/core/common/gpu_profiler_common.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/include/onnxruntime/core/common/gpu_profiler_common.h b/include/onnxruntime/core/common/gpu_profiler_common.h index 00d5033ef2df4..999d74f196505 100644 --- a/include/onnxruntime/core/common/gpu_profiler_common.h +++ b/include/onnxruntime/core/common/gpu_profiler_common.h @@ -379,8 +379,8 @@ class GPUProfilerBase : public EpProfiler { void MergeEvents(std::map& events_to_merge, Events& events) { Events merged_events; - auto event_iter = std::make_move_iterator(events.begin()); - auto event_end = std::make_move_iterator(events.end()); + auto event_iter = events.begin(); + auto event_end = events.end(); for (auto& map_iter : events_to_merge) { if (map_iter.second.empty()) { continue; @@ -395,7 +395,7 @@ class GPUProfilerBase : public EpProfiler { (event_iter->ts == ts && (event_iter + 1) != event_end && (event_iter + 1)->ts == ts))) { - merged_events.emplace_back(*event_iter); + merged_events.emplace_back(*std::make_move_iterator(event_iter)); ++event_iter; } @@ -409,7 +409,7 @@ class GPUProfilerBase : public EpProfiler { copy_op_names = true; op_name = event_iter->args["op_name"]; parent_name = event_iter->name; - merged_events.emplace_back(*event_iter); + merged_events.emplace_back(*std::make_move_iterator(event_iter)); ++event_iter; } @@ -428,7 +428,9 @@ class GPUProfilerBase : public EpProfiler { } // move any remaining events - merged_events.insert(merged_events.end(), event_iter, event_end); + merged_events.insert(merged_events.end(), + std::make_move_iterator(event_iter), + std::make_move_iterator(event_end)); std::swap(events, merged_events); } From 6f801b2fafb776cea66f318720dda1fc92792e48 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 5 Mar 2026 11:03:32 -0800 Subject: [PATCH 31/39] try to suppress spurious stringop-overflow warning --- cmake/onnxruntime_config.h.in | 1 + .../contrib_ops/cuda/llm/cutlass_heuristic.cc | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/cmake/onnxruntime_config.h.in b/cmake/onnxruntime_config.h.in index a36f735c507ba..e5f759b9d705f 100644 --- a/cmake/onnxruntime_config.h.in +++ b/cmake/onnxruntime_config.h.in @@ -20,6 +20,7 @@ #cmakedefine HAS_PARENTHESES #cmakedefine HAS_REALLOCARRAY #cmakedefine HAS_SHORTEN_64_TO_32 +#cmakedefine HAS_STRINGOP_OVERFLOW #cmakedefine HAS_TAUTOLOGICAL_POINTER_COMPARE #cmakedefine HAS_UNUSED_BUT_SET_PARAMETER #cmakedefine HAS_UNUSED_BUT_SET_VARIABLE diff --git a/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc b/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc index d53fb558ba1a1..cb643997e543e 100644 --- a/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc +++ b/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc @@ -28,6 +28,7 @@ #include "cutlass/gemm/gemm.h" #include "cutlass/numeric_types.h" #include "core/common/common.h" +#include "onnxruntime_config.h" #include #include @@ -280,6 +281,13 @@ std::vector get_candidate_configs_sm90(CutlassGemmConfig::Can return candidate_configs; } +#ifdef __GNUC__ +#pragma GCC diagnostic push +#if defined(HAS_STRINGOP_OVERFLOW) +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif // defined(HAS_STRINGOP_OVERFLOW) +#endif // __GNUC__ + std::vector get_candidate_configs_sm100(CutlassGemmConfig::CandidateConfigTypeParam const config) { #ifdef FAST_BUILD // Fast build disables all configs except this one for SM100 @@ -354,7 +362,11 @@ std::vector get_candidate_configs_sm100(CutlassGemmConfig::Ca } #endif -} // namespace kernels +} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif // __GNUC__ std::vector get_candidate_configs( int sm, int const max_split_k, CutlassGemmConfig::CandidateConfigTypeParam const config_type_param) { From f5c4222d22b58f4423bc209c4ea0283a1884282d Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 5 Mar 2026 11:14:05 -0800 Subject: [PATCH 32/39] add comment --- onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc b/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc index cb643997e543e..2f758b55d1cbe 100644 --- a/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc +++ b/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc @@ -281,6 +281,16 @@ std::vector get_candidate_configs_sm90(CutlassGemmConfig::Can return candidate_configs; } +// Suppressing this warning from a Release build with GCC: +// +// In function ‘constexpr decltype (::new(void*(0)) _Tp) std::construct_at(_Tp*, _Args&& ...) [with _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}]’, +// inlined from ‘static constexpr void std::allocator_traits >::construct(allocator_type&, _Up*, _Args&& ...) [with _Up = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}; _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/alloc_traits.h:577:21, +// inlined from ‘constexpr std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::emplace_back(_Args&& ...) [with _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}; _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Alloc = std::allocator]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/vector.tcc:117:30, +// inlined from ‘constexpr void std::vector<_Tp, _Alloc>::push_back(value_type&&) [with _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Alloc = std::allocator]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/stl_vector.h:1301:21, +// inlined from ‘std::vector onnxruntime::llm::kernels::cutlass_kernels::get_candidate_configs_sm100(onnxruntime::llm::cutlass_extensions::CutlassGemmConfig::CandidateConfigTypeParam)’ at /onnxruntime_src/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc:298:34: +// /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/stl_construct.h:97:14: error: writing 1 byte into a region of size 0 [-Werror=stringop-overflow=] +// 97 | { return ::new((void*)__location) _Tp(std::forward<_Args>(__args)...); } +// | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef __GNUC__ #pragma GCC diagnostic push #if defined(HAS_STRINGOP_OVERFLOW) From 294dbd3ba5bfcc04c0bc284ac81f25cc2c222fee Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 5 Mar 2026 11:15:15 -0800 Subject: [PATCH 33/39] try to make suppression very local --- .../contrib_ops/cuda/llm/cutlass_heuristic.cc | 40 +++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc b/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc index 2f758b55d1cbe..79d2cca792602 100644 --- a/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc +++ b/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc @@ -281,23 +281,6 @@ std::vector get_candidate_configs_sm90(CutlassGemmConfig::Can return candidate_configs; } -// Suppressing this warning from a Release build with GCC: -// -// In function ‘constexpr decltype (::new(void*(0)) _Tp) std::construct_at(_Tp*, _Args&& ...) [with _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}]’, -// inlined from ‘static constexpr void std::allocator_traits >::construct(allocator_type&, _Up*, _Args&& ...) [with _Up = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}; _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/alloc_traits.h:577:21, -// inlined from ‘constexpr std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::emplace_back(_Args&& ...) [with _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}; _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Alloc = std::allocator]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/vector.tcc:117:30, -// inlined from ‘constexpr void std::vector<_Tp, _Alloc>::push_back(value_type&&) [with _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Alloc = std::allocator]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/stl_vector.h:1301:21, -// inlined from ‘std::vector onnxruntime::llm::kernels::cutlass_kernels::get_candidate_configs_sm100(onnxruntime::llm::cutlass_extensions::CutlassGemmConfig::CandidateConfigTypeParam)’ at /onnxruntime_src/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc:298:34: -// /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/stl_construct.h:97:14: error: writing 1 byte into a region of size 0 [-Werror=stringop-overflow=] -// 97 | { return ::new((void*)__location) _Tp(std::forward<_Args>(__args)...); } -// | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#ifdef __GNUC__ -#pragma GCC diagnostic push -#if defined(HAS_STRINGOP_OVERFLOW) -#pragma GCC diagnostic ignored "-Wstringop-overflow" -#endif // defined(HAS_STRINGOP_OVERFLOW) -#endif // __GNUC__ - std::vector get_candidate_configs_sm100(CutlassGemmConfig::CandidateConfigTypeParam const config) { #ifdef FAST_BUILD // Fast build disables all configs except this one for SM100 @@ -313,8 +296,27 @@ std::vector get_candidate_configs_sm100(CutlassGemmConfig::Ca MainloopScheduleType::AUTO, EpilogueScheduleType::AUTO, ClusterShape::ClusterShape_2x1x1}); // candidate_configs.push_back(CutlassGemmConfig{CutlassTileConfigSM100::CtaShape128x256x128B, // MainloopScheduleType::AUTO, EpilogueScheduleType::AUTO, ClusterShape::ClusterShape_1x1x1}); + // Suppressing this warning from a Release build with GCC: +// +// In function ‘constexpr decltype (::new(void*(0)) _Tp) std::construct_at(_Tp*, _Args&& ...) [with _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}]’, +// inlined from ‘static constexpr void std::allocator_traits >::construct(allocator_type&, _Up*, _Args&& ...) [with _Up = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}; _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/alloc_traits.h:577:21, +// inlined from ‘constexpr std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::emplace_back(_Args&& ...) [with _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}; _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Alloc = std::allocator]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/vector.tcc:117:30, +// inlined from ‘constexpr void std::vector<_Tp, _Alloc>::push_back(value_type&&) [with _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Alloc = std::allocator]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/stl_vector.h:1301:21, +// inlined from ‘std::vector onnxruntime::llm::kernels::cutlass_kernels::get_candidate_configs_sm100(onnxruntime::llm::cutlass_extensions::CutlassGemmConfig::CandidateConfigTypeParam)’ at /onnxruntime_src/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc:298:34: +// /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/stl_construct.h:97:14: error: writing 1 byte into a region of size 0 [-Werror=stringop-overflow=] +// 97 | { return ::new((void*)__location) _Tp(std::forward<_Args>(__args)...); } +// | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#if defined(HAS_STRINGOP_OVERFLOW) +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif // defined(HAS_STRINGOP_OVERFLOW) +#endif // __GNUC__ candidate_configs.push_back(CutlassGemmConfig{CutlassTileConfigSM100::CtaShape128x256x128B, MainloopScheduleType::AUTO, EpilogueScheduleType::AUTO, ClusterShape::ClusterShape_1x2x1}); +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif // __GNUC__ candidate_configs.push_back(CutlassGemmConfig{CutlassTileConfigSM100::CtaShape256x64x128B, MainloopScheduleType::AUTO, EpilogueScheduleType::AUTO, ClusterShape::ClusterShape_2x1x1}); candidate_configs.push_back(CutlassGemmConfig{CutlassTileConfigSM100::CtaShape128x64x128B, @@ -374,10 +376,6 @@ std::vector get_candidate_configs_sm100(CutlassGemmConfig::Ca } -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif // __GNUC__ - std::vector get_candidate_configs( int sm, int const max_split_k, CutlassGemmConfig::CandidateConfigTypeParam const config_type_param) { if ((config_type_param & CutlassGemmConfig::FP4_ONLY) && !(config_type_param & CutlassGemmConfig::BLACKWELL)) { From bddc3aca9557a0e743bc2357c885b94956d8d035 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 5 Mar 2026 11:20:25 -0800 Subject: [PATCH 34/39] indent comment and formatting --- .../contrib_ops/cuda/llm/cutlass_heuristic.cc | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc b/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc index 79d2cca792602..570ef4d9bbcdf 100644 --- a/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc +++ b/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc @@ -296,16 +296,17 @@ std::vector get_candidate_configs_sm100(CutlassGemmConfig::Ca MainloopScheduleType::AUTO, EpilogueScheduleType::AUTO, ClusterShape::ClusterShape_2x1x1}); // candidate_configs.push_back(CutlassGemmConfig{CutlassTileConfigSM100::CtaShape128x256x128B, // MainloopScheduleType::AUTO, EpilogueScheduleType::AUTO, ClusterShape::ClusterShape_1x1x1}); + // Suppressing this warning from a Release build with GCC: -// -// In function ‘constexpr decltype (::new(void*(0)) _Tp) std::construct_at(_Tp*, _Args&& ...) [with _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}]’, -// inlined from ‘static constexpr void std::allocator_traits >::construct(allocator_type&, _Up*, _Args&& ...) [with _Up = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}; _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/alloc_traits.h:577:21, -// inlined from ‘constexpr std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::emplace_back(_Args&& ...) [with _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}; _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Alloc = std::allocator]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/vector.tcc:117:30, -// inlined from ‘constexpr void std::vector<_Tp, _Alloc>::push_back(value_type&&) [with _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Alloc = std::allocator]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/stl_vector.h:1301:21, -// inlined from ‘std::vector onnxruntime::llm::kernels::cutlass_kernels::get_candidate_configs_sm100(onnxruntime::llm::cutlass_extensions::CutlassGemmConfig::CandidateConfigTypeParam)’ at /onnxruntime_src/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc:298:34: -// /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/stl_construct.h:97:14: error: writing 1 byte into a region of size 0 [-Werror=stringop-overflow=] -// 97 | { return ::new((void*)__location) _Tp(std::forward<_Args>(__args)...); } -// | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // + // In function ‘constexpr decltype (::new(void*(0)) _Tp) std::construct_at(_Tp*, _Args&& ...) [with _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}]’, + // inlined from ‘static constexpr void std::allocator_traits >::construct(allocator_type&, _Up*, _Args&& ...) [with _Up = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}; _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/alloc_traits.h:577:21, + // inlined from ‘constexpr std::vector<_Tp, _Alloc>::reference std::vector<_Tp, _Alloc>::emplace_back(_Args&& ...) [with _Args = {onnxruntime::llm::cutlass_extensions::CutlassGemmConfig}; _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Alloc = std::allocator]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/vector.tcc:117:30, + // inlined from ‘constexpr void std::vector<_Tp, _Alloc>::push_back(value_type&&) [with _Tp = onnxruntime::llm::cutlass_extensions::CutlassGemmConfig; _Alloc = std::allocator]’ at /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/stl_vector.h:1301:21, + // inlined from ‘std::vector onnxruntime::llm::kernels::cutlass_kernels::get_candidate_configs_sm100(onnxruntime::llm::cutlass_extensions::CutlassGemmConfig::CandidateConfigTypeParam)’ at /onnxruntime_src/onnxruntime/contrib_ops/cuda/llm/cutlass_heuristic.cc:298:34: + // /opt/rh/gcc-toolset-14/root/usr/include/c++/14/bits/stl_construct.h:97:14: error: writing 1 byte into a region of size 0 [-Werror=stringop-overflow=] + // 97 | { return ::new((void*)__location) _Tp(std::forward<_Args>(__args)...); } + // | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #ifdef __GNUC__ #pragma GCC diagnostic push #if defined(HAS_STRINGOP_OVERFLOW) @@ -317,6 +318,7 @@ std::vector get_candidate_configs_sm100(CutlassGemmConfig::Ca #ifdef __GNUC__ #pragma GCC diagnostic pop #endif // __GNUC__ + candidate_configs.push_back(CutlassGemmConfig{CutlassTileConfigSM100::CtaShape256x64x128B, MainloopScheduleType::AUTO, EpilogueScheduleType::AUTO, ClusterShape::ClusterShape_2x1x1}); candidate_configs.push_back(CutlassGemmConfig{CutlassTileConfigSM100::CtaShape128x64x128B, @@ -373,7 +375,6 @@ std::vector get_candidate_configs_sm100(CutlassGemmConfig::Ca ORT_THROW("Not Implemented: SM100 GEMM candidates have not been defined."); } #endif - } std::vector get_candidate_configs( From 73019d4bf00014e34fea33f86a37e3f0f7f77ee7 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 5 Mar 2026 12:17:02 -0800 Subject: [PATCH 35/39] set CMAKE_CXX_SCAN_FOR_MODULES in CMakeLists.txt --- cmake/CMakeLists.txt | 7 +++++++ .../github/android/default_full_aar_build_settings.json | 1 - .../github/android/default_qnn_aar_build_settings.json | 1 - 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 234947470c9be..7db6a070ab9ea 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -20,6 +20,13 @@ if (NOT CMAKE_CXX_STANDARD) set(CMAKE_CXX_STANDARD 20) endif() +# We don't use C++20 modules yet. +# There are some known issues to address first: +# - Android builds from Linux Docker containers have trouble finding clang-scan-deps. +# - The MSVC /permissive option is needed for compiling some of the CUDA EP code which uses CUTLASS. +# This option is not compatible with C++20 modules. +set(CMAKE_CXX_SCAN_FOR_MODULES OFF) + # Disable fast-math for Intel oneAPI compiler if("${CMAKE_CXX_COMPILER_ID}" MATCHES "IntelLLVM") if("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC-like") diff --git a/tools/ci_build/github/android/default_full_aar_build_settings.json b/tools/ci_build/github/android/default_full_aar_build_settings.json index 94c25d65a0937..bc3d02d65b167 100644 --- a/tools/ci_build/github/android/default_full_aar_build_settings.json +++ b/tools/ci_build/github/android/default_full_aar_build_settings.json @@ -17,7 +17,6 @@ "--use_nnapi", "--use_xnnpack", "--use_webgpu", - "--cmake_extra_defines=CMAKE_CXX_SCAN_FOR_MODULES=OFF", "--skip_tests" ] } diff --git a/tools/ci_build/github/android/default_qnn_aar_build_settings.json b/tools/ci_build/github/android/default_qnn_aar_build_settings.json index a260a2be4a2c5..5ac49f582d23e 100644 --- a/tools/ci_build/github/android/default_qnn_aar_build_settings.json +++ b/tools/ci_build/github/android/default_qnn_aar_build_settings.json @@ -12,7 +12,6 @@ "--build_java", "--build_shared_lib", "--use_qnn=static_lib", - "--cmake_extra_defines=CMAKE_CXX_SCAN_FOR_MODULES=OFF", "--cmake_extra_defines=onnxruntime_BUILD_UNIT_TESTS=OFF", "--skip_tests" From d3b2ff95ae3bc9a17c99ad2c0dc44d4c939b730a Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 5 Mar 2026 14:03:50 -0800 Subject: [PATCH 36/39] clarify comment about cxx_std_17 feature --- cmake/onnxruntime_fuzz_test.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/onnxruntime_fuzz_test.cmake b/cmake/onnxruntime_fuzz_test.cmake index eea411d938176..2935b58ffa61a 100644 --- a/cmake/onnxruntime_fuzz_test.cmake +++ b/cmake/onnxruntime_fuzz_test.cmake @@ -60,7 +60,7 @@ if (onnxruntime_FUZZ_ENABLED) # compile the executables onnxruntime_add_executable(onnxruntime_security_fuzz ${SEC_FUZ_SRC}) - # compile with c++17 + # compile with at least c++17 target_compile_features(onnxruntime_security_fuzz PUBLIC cxx_std_17) # Security fuzzing engine header file reference From 90b2c73a93135e103f86084e354c1e593fb52bc1 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 5 Mar 2026 14:21:16 -0800 Subject: [PATCH 37/39] update some lingering old CXX_STANDARD values. let's see what happens... --- cmake/winml.cmake | 9 ++++----- cmake/winml_unittests.cmake | 2 +- js/node/CMakeLists.txt | 2 +- js/react_native/android/CMakeLists.txt | 6 +++--- .../tools/tensorrt/perf/mem_test/CMakeLists.txt | 6 +++--- .../aarch64/python/cpu/scripts/install_protobuf.sh | 2 +- .../github/linux/docker/scripts/install_protobuf.sh | 2 +- tools/python/util/vcpkg_helpers.py | 11 +++++------ 8 files changed, 19 insertions(+), 21 deletions(-) diff --git a/cmake/winml.cmake b/cmake/winml.cmake index f2651d0cbc2b2..8f80299cc491c 100644 --- a/cmake/winml.cmake +++ b/cmake/winml.cmake @@ -316,8 +316,7 @@ if (onnxruntime_WINML_NAMESPACE_OVERRIDE STREQUAL "Windows") target_compile_definitions(winml_adapter PRIVATE "BUILD_INBOX=1") endif() -# will requires C++17 -set_target_properties(winml_adapter PROPERTIES CXX_STANDARD 17) +set_target_properties(winml_adapter PROPERTIES CXX_STANDARD 20) set_target_properties(winml_adapter PROPERTIES CXX_STANDARD_REQUIRED ON) # Compiler definitions @@ -645,7 +644,7 @@ onnxruntime_add_static_library(winml_lib_common ${winml_lib_common_dir}/CommonDeviceHelpers.cpp ) -set_target_properties(winml_lib_common PROPERTIES CXX_STANDARD 17) +set_target_properties(winml_lib_common PROPERTIES CXX_STANDARD 20) set_target_properties(winml_lib_common PROPERTIES CXX_STANDARD_REQUIRED ON) target_compile_options(winml_lib_common PRIVATE /GR- /await /bigobj /wd4238) target_link_libraries(winml_lib_common PRIVATE ${WIL_TARGET}) @@ -829,9 +828,9 @@ if (winml_is_inbox) target_link_libraries(${new_target} PRIVATE ${link_libraries}) target_link_options(${new_target} PRIVATE ${link_options}) - # Attempt to copy linker flags + # Attempt to copy linker flags get_target_property(link_flags ${target} LINK_FLAGS) - + if (NOT link_flags MATCHES ".*NOTFOUND") set_property(TARGET ${new_target} PROPERTY LINK_FLAGS "${link_flags}") endif() diff --git a/cmake/winml_unittests.cmake b/cmake/winml_unittests.cmake index d857a83f504a5..eb2d69e16223e 100644 --- a/cmake/winml_unittests.cmake +++ b/cmake/winml_unittests.cmake @@ -19,7 +19,7 @@ set(WINML_TEST_INC_DIR function(set_winml_target_properties target) set_target_properties(${target} PROPERTIES FOLDER "ONNXRuntimeTest/winml" - CXX_STANDARD 17 + CXX_STANDARD 20 CXX_STANDARD_REQUIRED YES CXX_EXTENSIONS NO ) diff --git a/js/node/CMakeLists.txt b/js/node/CMakeLists.txt index aedb1e35158ef..845d1a80b7b8f 100644 --- a/js/node/CMakeLists.txt +++ b/js/node/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.11) project (onnxruntime-node) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) add_compile_definitions(NAPI_VERSION=${napi_build_version}) add_compile_definitions(ORT_API_MANUAL_INIT) diff --git a/js/react_native/android/CMakeLists.txt b/js/react_native/android/CMakeLists.txt index 2f814e871ad77..0bcf552ff9e41 100644 --- a/js/react_native/android/CMakeLists.txt +++ b/js/react_native/android/CMakeLists.txt @@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 3.9.0) set(PACKAGE_NAME "onnxruntime-react-native") set(BUILD_DIR ${CMAKE_SOURCE_DIR}/build) set(CMAKE_VERBOSE_MAKEFILE ON) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) option(ORT_EXTENSIONS_ENABLED "Enable Ort Extensions" NO) option(USE_NNAPI "Use NNAPI" YES) @@ -80,10 +80,10 @@ add_library( ../cpp/SessionUtils.cpp ../cpp/TensorUtils.cpp) -# Configure C++ 17 +# Configure C++20 set_target_properties( onnxruntimejsi - PROPERTIES CXX_STANDARD 17 + PROPERTIES CXX_STANDARD 20 CXX_EXTENSIONS OFF POSITION_INDEPENDENT_CODE ON) diff --git a/onnxruntime/python/tools/tensorrt/perf/mem_test/CMakeLists.txt b/onnxruntime/python/tools/tensorrt/perf/mem_test/CMakeLists.txt index d77a763396f77..0f797255e918c 100644 --- a/onnxruntime/python/tools/tensorrt/perf/mem_test/CMakeLists.txt +++ b/onnxruntime/python/tools/tensorrt/perf/mem_test/CMakeLists.txt @@ -4,10 +4,10 @@ set(CMAKE_BUILD_TYPE Debug) cmake_minimum_required(VERSION 3.13) -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) -include_directories( +include_directories( /code/onnxruntime/include/onnxruntime/core/session/ /code/onnxruntime/include/onnxruntime/core/providers/tensorrt/ ) @@ -17,6 +17,6 @@ set(CMAKE_CXX_FLAGS "-fsanitize=address -fsanitize=leak -g ${CMAKE_CXX_FLAGS}") set(CMAKE_C_FLAGS "-fsanitize=address -fsanitize=leak -g ${CMAKE_C_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "-fsanitize=address -fsanitize=leak ${CMAKE_EXE_LINKER_FLAGS}") set(CMAKE_MODULE_LINKER_FLAGS "-fsanitize=address -fsanitize=leak ${CMAKE_MODULE_LINKER_FLAGS}") - + ADD_EXECUTABLE(onnx_memtest main.cpp) target_link_libraries(onnx_memtest onnxruntime) diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/install_protobuf.sh b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/install_protobuf.sh index 31b5ca6f9e69b..4b967c1f3ae3b 100755 --- a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/install_protobuf.sh +++ b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/install_protobuf.sh @@ -13,7 +13,7 @@ done -EXTRA_CMAKE_ARGS="-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CXX_STANDARD=17" +EXTRA_CMAKE_ARGS="-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CXX_STANDARD=20" case "$(uname -s)" in Darwin*) diff --git a/tools/ci_build/github/linux/docker/scripts/install_protobuf.sh b/tools/ci_build/github/linux/docker/scripts/install_protobuf.sh index 31b5ca6f9e69b..4b967c1f3ae3b 100755 --- a/tools/ci_build/github/linux/docker/scripts/install_protobuf.sh +++ b/tools/ci_build/github/linux/docker/scripts/install_protobuf.sh @@ -13,7 +13,7 @@ done -EXTRA_CMAKE_ARGS="-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CXX_STANDARD=17" +EXTRA_CMAKE_ARGS="-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CXX_STANDARD=20" case "$(uname -s)" in Darwin*) diff --git a/tools/python/util/vcpkg_helpers.py b/tools/python/util/vcpkg_helpers.py index f976e525aff93..966bfc41ce4c2 100644 --- a/tools/python/util/vcpkg_helpers.py +++ b/tools/python/util/vcpkg_helpers.py @@ -280,7 +280,7 @@ def generate_triplet_for_android( if ldflags: f.write(f'set(VCPKG_LINKER_FLAGS "{" ".join(ldflags)}")\n') - f.write("list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17)\n") + f.write("list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20)\n") add_build_type(f, config) add_port_configs( f, enable_exception, False, enable_minimal_build, use_full_protobuf=use_full_protobuf @@ -471,10 +471,9 @@ def generate_triplet_for_posix_platform( if ldflags: f.write(f'set(VCPKG_LINKER_FLAGS "{" ".join(ldflags)}")\n') - if os_name == "osx": - f.write("list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20)\n") - else: - f.write("list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=17)\n") + + f.write("list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20)\n") + add_build_type(f, config) add_port_configs( f, enable_exception, False, enable_minimal_build, use_full_protobuf=use_full_protobuf @@ -734,7 +733,7 @@ def generate_windows_triplets(build_dir: str, configs: set[str], toolset_version if cxxflags: f.write(f'set(VCPKG_CXX_FLAGS "{" ".join(cxxflags)}")\n') f.write( - "list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=17)\n" + "list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=20)\n" ) if ldflags: f.write(f'set(VCPKG_LINKER_FLAGS "{" ".join(ldflags)}")\n') From 0d613a3193e65f01e40ded6f7b86e16c7efc2e85 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 5 Mar 2026 14:54:57 -0800 Subject: [PATCH 38/39] move --diag-suppress=177 out of CUDA 12.8+ block --- cmake/onnxruntime_providers_cuda.cmake | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/onnxruntime_providers_cuda.cmake b/cmake/onnxruntime_providers_cuda.cmake index 3e7d43e66b690..7e62f72afaf97 100644 --- a/cmake/onnxruntime_providers_cuda.cmake +++ b/cmake/onnxruntime_providers_cuda.cmake @@ -191,6 +191,11 @@ target_compile_options(${target} PRIVATE "$<$:SHELL:--threads \"${onnxruntime_NVCC_THREADS}\">") endif() + # suppress warnings like this: + # cutlass-src\include\cute/arch/mma_sm120.hpp(3128): error #177-D: variable "tidA" was declared but never + # referenced + target_compile_options(${target} PRIVATE "$<$:--diag-suppress=177>") + # Since CUDA 12.8, compiling diagnostics become stricter if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8) target_compile_options(${target} PRIVATE "$<$:--static-global-template-stub=false>") @@ -200,11 +205,6 @@ endif() # skip diagnosis error caused by cuda header files target_compile_options(${target} PRIVATE "$<$:--diag-suppress=221>") - - # suppress warnings like this: - # cutlass-src\include\cute/arch/mma_sm120.hpp(3128): error #177-D: variable "tidA" was declared but never - # referenced - target_compile_options(${target} PRIVATE "$<$:--diag-suppress=177>") endif() if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) From 291f0a00c860f1f9826e0f74ca6746d5fadf86e8 Mon Sep 17 00:00:00 2001 From: edgchen1 <18449977+edgchen1@users.noreply.github.com> Date: Thu, 5 Mar 2026 16:57:06 -0800 Subject: [PATCH 39/39] update vcpkg_helpers.py to pass module scanning option too --- cmake/CMakeLists.txt | 1 + tools/python/util/vcpkg_helpers.py | 23 ++++++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 7db6a070ab9ea..d2967739cbde4 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -25,6 +25,7 @@ endif() # - Android builds from Linux Docker containers have trouble finding clang-scan-deps. # - The MSVC /permissive option is needed for compiling some of the CUDA EP code which uses CUTLASS. # This option is not compatible with C++20 modules. +# So we will skip module scanning for now. set(CMAKE_CXX_SCAN_FOR_MODULES OFF) # Disable fast-math for Intel oneAPI compiler diff --git a/tools/python/util/vcpkg_helpers.py b/tools/python/util/vcpkg_helpers.py index 966bfc41ce4c2..8d1c665f631d9 100644 --- a/tools/python/util/vcpkg_helpers.py +++ b/tools/python/util/vcpkg_helpers.py @@ -150,6 +150,21 @@ def add_build_type(f, build_type: str) -> None: ) +def _get_cxx_standard_cmake_configure_options_str() -> str: + # These should match what's specified in cmake/CMakeLists.txt. + options = [ + "-DCMAKE_CXX_STANDARD=20", + # We don't use C++20 modules yet. + # There are some known issues to address first: + # - Android builds from Linux Docker containers have trouble finding clang-scan-deps. + # - The MSVC /permissive option is needed for compiling some of the CUDA EP code which uses CUTLASS. + # This option is not compatible with C++20 modules. + # So we will skip module scanning for now. + "-DCMAKE_CXX_SCAN_FOR_MODULES=OFF", + ] + return " ".join(options) + + def generate_triplet_for_android( build_dir: str, configs: set[str], @@ -280,7 +295,9 @@ def generate_triplet_for_android( if ldflags: f.write(f'set(VCPKG_LINKER_FLAGS "{" ".join(ldflags)}")\n') - f.write("list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20)\n") + + f.write(f"list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS {_get_cxx_standard_cmake_configure_options_str()})\n") + add_build_type(f, config) add_port_configs( f, enable_exception, False, enable_minimal_build, use_full_protobuf=use_full_protobuf @@ -472,7 +489,7 @@ def generate_triplet_for_posix_platform( if ldflags: f.write(f'set(VCPKG_LINKER_FLAGS "{" ".join(ldflags)}")\n') - f.write("list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS -DCMAKE_CXX_STANDARD=20)\n") + f.write(f"list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS {_get_cxx_standard_cmake_configure_options_str()})\n") add_build_type(f, config) add_port_configs( @@ -733,7 +750,7 @@ def generate_windows_triplets(build_dir: str, configs: set[str], toolset_version if cxxflags: f.write(f'set(VCPKG_CXX_FLAGS "{" ".join(cxxflags)}")\n') f.write( - "list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error -DCMAKE_CXX_STANDARD=20)\n" + f"list(APPEND VCPKG_CMAKE_CONFIGURE_OPTIONS --compile-no-warning-as-error {_get_cxx_standard_cmake_configure_options_str()})\n" ) if ldflags: f.write(f'set(VCPKG_LINKER_FLAGS "{" ".join(ldflags)}")\n')