Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 37 additions & 10 deletions cmake/option.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,40 @@ macro(add_arch_flag FLAG VAR_NAME OPTION_NAME)
endif()
endmacro()

# Like add_arch_flag but tries -march=BASE+fp16 first, falls back to -march=BASE.
# FP16 NEON intrinsics (vfmaq_f16, etc.) in ailego math kernels require +fp16
# on GCC. Apple Clang enables FP16 by default on ARM64.
macro(add_arch_flag_with_fp16 BASE_ARCH VAR_NAME OPTION_NAME)
check_c_compiler_flag("-march=${BASE_ARCH}+fp16" COMPILER_SUPPORT_${VAR_NAME}_FP16)
if(COMPILER_SUPPORT_${VAR_NAME}_FP16)
add_arch_flag("-march=${BASE_ARCH}+fp16" ${VAR_NAME} ${OPTION_NAME})
message(STATUS "ARM64: using -march=${BASE_ARCH}+fp16 (FP16 NEON enabled)")
else()
add_arch_flag("-march=${BASE_ARCH}" ${VAR_NAME} ${OPTION_NAME})
message(STATUS "ARM64: using -march=${BASE_ARCH} (FP16 not supported by compiler)")
endif()
endmacro()

function(_detect_armv8_best)
set(_arm_flags
"armv8.6-a" "armv8.5-a" "armv8.4-a" "armv8.3-a" "armv8.2-a" "armv8.1-a" "armv8-a" "armv8"
)
foreach(_ver IN LISTS _arm_flags)
check_c_compiler_flag("-march=${_ver}" _COMP_SUPP_${_ver})
if(_COMP_SUPP_${_ver})
_AppendFlags(CMAKE_C_FLAGS "-march=${_ver}")
_AppendFlags(CMAKE_CXX_FLAGS "-march=${_ver}")
# Check if compiler supports +fp16 extension (required for FP16 NEON
# intrinsics used in ailego math kernels). Apple Clang enables FP16 by
# default on ARM64; GCC requires the explicit +fp16 flag.
check_c_compiler_flag("-march=${_ver}+fp16" _COMP_SUPP_${_ver}_fp16)
if(_COMP_SUPP_${_ver}_fp16)
set(_march_flag "-march=${_ver}+fp16")
message(STATUS "ARM64: using ${_march_flag} (FP16 NEON enabled)")
else()
set(_march_flag "-march=${_ver}")
message(STATUS "ARM64: using ${_march_flag} (FP16 not supported)")
endif()
_AppendFlags(CMAKE_C_FLAGS "${_march_flag}")
_AppendFlags(CMAKE_CXX_FLAGS "${_march_flag}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" PARENT_SCOPE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" PARENT_SCOPE)
return()
Expand Down Expand Up @@ -179,27 +204,29 @@ if(NOT AUTO_DETECT_ARCH)
endif()

# ARM (newest first — allow multiple? usually only one)
# But GCC allows only one -march=, so honor highest enabled
# But GCC allows only one -march=, so honor highest enabled.
# Try +fp16 first (required for FP16 NEON intrinsics in ailego math
# kernels), fall back to base flag if compiler doesn't support it.
if(ENABLE_ARMV8.6A)
add_arch_flag("-march=armv8.6-a" ARMV86A ENABLE_ARMV8.6A)
add_arch_flag_with_fp16("armv8.6-a" ARMV86A ENABLE_ARMV8.6A)
endif()
if(ENABLE_ARMV8.5A)
add_arch_flag("-march=armv8.5-a" ARMV85A ENABLE_ARMV8.5A)
add_arch_flag_with_fp16("armv8.5-a" ARMV85A ENABLE_ARMV8.5A)
endif()
if(ENABLE_ARMV8.4A)
add_arch_flag("-march=armv8.4-a" ARMV84A ENABLE_ARMV8.4A)
add_arch_flag_with_fp16("armv8.4-a" ARMV84A ENABLE_ARMV8.4A)
endif()
if(ENABLE_ARMV8.3A)
add_arch_flag("-march=armv8.3-a" ARMV83A ENABLE_ARMV8.3A)
add_arch_flag_with_fp16("armv8.3-a" ARMV83A ENABLE_ARMV8.3A)
endif()
if(ENABLE_ARMV8.2A)
add_arch_flag("-march=armv8.2-a" ARMV82A ENABLE_ARMV8.2A)
add_arch_flag_with_fp16("armv8.2-a" ARMV82A ENABLE_ARMV8.2A)
endif()
if(ENABLE_ARMV8.1A)
add_arch_flag("-march=armv8.1-a" ARMV81A ENABLE_ARMV8.1A)
add_arch_flag_with_fp16("armv8.1-a" ARMV81A ENABLE_ARMV8.1A)
endif()
if(ENABLE_ARMV8A)
add_arch_flag("-march=armv8-a" ARMV8A ENABLE_ARMV8A)
add_arch_flag_with_fp16("armv8-a" ARMV8A ENABLE_ARMV8A)
endif()
Comment on lines 210 to 230
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The manual ENABLE_ARMV8.X options (lines 197–217) unconditionally hardcode +fp16 and pass the combined flag to add_arch_flag. This creates a regression for users who explicitly enable one of these options on a compiler that supports the base architecture but not the +fp16 extension (e.g., GCC ≤7 or older cross-compilation toolchains).

When check_c_compiler_flag rejects the combined flag, add_arch_flag will emit:

FATAL_ERROR "Compiler does not support required flag: '-march=armv8.2-a+fp16' for ENABLE_ARMV8.2A"

The auto-detect path (_detect_armv8_best, lines 77–103) handles this correctly by probing the base arch first, then probing +fp16 separately, and falling back gracefully. The manual path should follow the same pattern:

if(ENABLE_ARMV8.2A)
  check_c_compiler_flag("-march=armv8.2-a+fp16" _COMP_SUPP_ARMV82A_fp16)
  if(_COMP_SUPP_ARMV82A_fp16)
    add_arch_flag("-march=armv8.2-a+fp16" ARMV82A ENABLE_ARMV8.2A)
  else()
    add_arch_flag("-march=armv8.2-a" ARMV82A ENABLE_ARMV8.2A)
  endif()
endif()

(Repeat for each ENABLE_ARMV8.XA option.)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated @greptile-apps

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perfect! The changes look great. You've implemented exactly what I suggested:

  1. Created the add_arch_flag_with_fp16 macro (lines 90-99) that probes for +fp16 support and falls back to the base architecture
  2. Updated all manual ARM options (lines 206-222) to use this new macro instead of hardcoding +fp16
  3. Added helpful status messages indicating whether FP16 is enabled or not

This prevents the FATAL_ERROR for users with older compilers (like GCC ≤7) that support ARMv8.X but not +fp16, while still enabling FP16 when available. The solution is clean, DRY, and matches the pattern already used in the auto-detect path.

✅ Issue resolved!


else()
Expand Down