From 52136f40d7361d4cb51fa9ca9354f62194e7db99 Mon Sep 17 00:00:00 2001 From: zebrapurring <> Date: Sat, 24 May 2025 13:01:06 +0200 Subject: [PATCH 1/2] fix: build error on Apple Silicon due to ambiguous call to `svwhilelt` functions --- crates/simd/cshim/aarch64.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/simd/cshim/aarch64.c b/crates/simd/cshim/aarch64.c index 614eaa55..0aec46d3 100644 --- a/crates/simd/cshim/aarch64.c +++ b/crates/simd/cshim/aarch64.c @@ -84,7 +84,7 @@ fp16_reduce_sum_of_xy_a2_fp16(f16 *restrict a, f16 *restrict b, size_t n) { __attribute__((target("+sve"))) float fp16_reduce_sum_of_xy_a3_512(f16 *restrict a, f16 *restrict b, size_t n) { svfloat16_t xy = svdup_f16(0.0); - for (size_t i = 0; i < n; i += svcnth()) { + for (uint64_t i = 0; i < n; i += svcnth()) { svbool_t mask = svwhilelt_b16(i, n); svfloat16_t x = svld1_f16(mask, a + i); svfloat16_t y = svld1_f16(mask, b + i); @@ -153,7 +153,7 @@ fp16_reduce_sum_of_d2_a2_fp16(f16 *restrict a, f16 *restrict b, size_t n) { __attribute__((target("+sve"))) float fp16_reduce_sum_of_d2_a3_512(f16 *restrict a, f16 *restrict b, size_t n) { svfloat16_t d2 = svdup_f16(0.0); - for (size_t i = 0; i < n; i += svcnth()) { + for (uint64_t i = 0; i < n; i += svcnth()) { svbool_t mask = svwhilelt_b16(i, n); svfloat16_t x = svld1_f16(mask, a + i); svfloat16_t y = svld1_f16(mask, b + i); @@ -166,7 +166,7 @@ fp16_reduce_sum_of_d2_a3_512(f16 *restrict a, f16 *restrict b, size_t n) { __attribute__((target("+sve"))) float fp32_reduce_sum_of_x_a3_256(float *restrict this, size_t n) { svfloat32_t sum = svdup_f32(0.0); - for (size_t i = 0; i < n; i += svcntw()) { + for (uint64_t i = 0; i < n; i += svcntw()) { svbool_t mask = svwhilelt_b32(i, n); svfloat32_t x = svld1_f32(mask, this + i); sum = svadd_f32_x(mask, sum, x); @@ -177,7 +177,7 @@ fp32_reduce_sum_of_x_a3_256(float *restrict this, size_t n) { __attribute__((target("+sve"))) float fp32_reduce_sum_of_abs_x_a3_256(float *restrict this, size_t n) { svfloat32_t sum = svdup_f32(0.0); - for (size_t i = 0; i < n; i += svcntw()) { + for (uint64_t i = 0; i < n; i += svcntw()) { svbool_t mask = svwhilelt_b32(i, n); svfloat32_t x = svld1_f32(mask, this + i); sum = svadd_f32_x(mask, sum, svabs_f32_x(mask, x)); @@ -188,7 +188,7 @@ fp32_reduce_sum_of_abs_x_a3_256(float *restrict this, size_t n) { __attribute__((target("+sve"))) float fp32_reduce_sum_of_x2_a3_256(float *restrict this, size_t n) { svfloat32_t sum = svdup_f32(0.0); - for (size_t i = 0; i < n; i += svcntw()) { + for (uint64_t i = 0; i < n; i += svcntw()) { svbool_t mask = svwhilelt_b32(i, n); svfloat32_t x = svld1_f32(mask, this + i); sum = svmla_f32_x(mask, sum, x, x); @@ -201,7 +201,7 @@ fp32_reduce_min_max_of_x_a3_256(float *restrict this, size_t n, float *out_min, float *out_max) { svfloat32_t min = svdup_f32(1.0 / 0.0); svfloat32_t max = svdup_f32(-1.0 / 0.0); - for (size_t i = 0; i < n; i += svcntw()) { + for (uint64_t i = 0; i < n; i += svcntw()) { svbool_t mask = svwhilelt_b32(i, n); svfloat32_t x = svld1_f32(mask, this + i); min = svmin_f32_x(mask, min, x); @@ -215,7 +215,7 @@ __attribute__((target("+sve"))) float fp32_reduce_sum_of_xy_a3_256(float *restrict lhs, float *restrict rhs, size_t n) { svfloat32_t sum = svdup_f32(0.0); - for (size_t i = 0; i < n; i += svcntw()) { + for (uint64_t i = 0; i < n; i += svcntw()) { svbool_t mask = svwhilelt_b32(i, n); svfloat32_t x = svld1_f32(mask, lhs + i); svfloat32_t y = svld1_f32(mask, rhs + i); @@ -228,7 +228,7 @@ __attribute__((target("+sve"))) float fp32_reduce_sum_of_d2_a3_256(float *restrict lhs, float *restrict rhs, size_t n) { svfloat32_t sum = svdup_f32(0.0); - for (size_t i = 0; i < n; i += svcntw()) { + for (uint64_t i = 0; i < n; i += svcntw()) { svbool_t mask = svwhilelt_b32(i, n); svfloat32_t x = svld1_f32(mask, lhs + i); svfloat32_t y = svld1_f32(mask, rhs + i); From fee45f6d53aa686deadec104ba83bc17f06d77b5 Mon Sep 17 00:00:00 2001 From: zebrapurring <> Date: Sat, 24 May 2025 13:45:21 +0200 Subject: [PATCH 2/2] fix: undefined symbols while linking on macOS for arm64 --- .cargo/config.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.cargo/config.toml b/.cargo/config.toml index 2196f5fc..b1866091 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,2 +1,5 @@ [env] CC = "clang" + +[target.'cfg(target_os="macos")'] +rustflags = ["-Clink-arg=-Wl,-undefined,dynamic_lookup"]