From 62b2fb494c93b041ad8ec62d14296352e94de002 Mon Sep 17 00:00:00 2001 From: puddingfjz <2811443837@qq.com> Date: Fri, 10 Apr 2026 17:39:22 +0800 Subject: [PATCH 1/7] Add: memfd-based SO loading for all runtimes - Add memfd_loader.h for in-memory SO loading using memfd_create - Integrate memfd loading into AICPU executors across all runtimes - Try memfd first, fall back to file-based loading if memfd fails - Eliminates temporary file pollution in /tmp directory - Provides consistent loading performance without filesystem overhead --- .../aicpu/aicpu_executor.cpp | 111 ++++++++------ .../aicpu_build_graph/aicpu/memfd_loader.h | 135 ++++++++++++++++++ .../aicpu/aicpu_executor.cpp | 111 ++++++++------ .../aicpu/memfd_loader.h | 129 +++++++++++++++++ .../aicpu/aicpu_executor.cpp | 111 ++++++++------ .../aicpu/memfd_loader.h | 135 ++++++++++++++++++ 6 files changed, 612 insertions(+), 120 deletions(-) create mode 100644 src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h create mode 100644 src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h create mode 100644 src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h diff --git a/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp b/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp index 31f07bea..8bf1a4dd 100644 --- a/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp +++ b/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp @@ -29,6 +29,9 @@ #include "runtime.h" #include "spin_hint.h" +// memfd-based SO loading +#include "memfd_loader.h" + // Runtime headers (full struct definition for create/destroy + PTO2_SCOPE) #include "pto_runtime2.h" #include "pto_runtime2_types.h" @@ -237,6 +240,7 @@ struct AicpuExecutor { // Orchestration SO handle - defer dlclose until all tasks complete void *orch_so_handle_{nullptr}; char orch_so_path_[256]{}; // Path to orchestration SO file for cleanup + int orch_so_memfd_{-1}; // memfd for memfd_create path (-1 if file-based) // Shared orchestration function pointer (loaded by first orch thread, used by all) DeviceOrchestrationFunc orch_func_{nullptr}; @@ -1596,50 +1600,69 @@ int32_t AicpuExecutor::run(Runtime *runtime) { return -1; } - // Try multiple paths that may allow execution on AICPU + // Try memfd first, fall back to file-based char so_path[256]; - bool file_created = false; - const char *candidate_dirs[] = { - "/usr/lib64/aicpu_kernels/0/aicpu_kernels_device", "/usr/lib64", "/lib64", "/var/tmp", "/tmp" - }; - const int32_t num_candidates = sizeof(candidate_dirs) / sizeof(candidate_dirs[0]); - - for (int32_t i = 0; i < num_candidates && !file_created; i++) { - snprintf(so_path, sizeof(so_path), "%s/libdevice_orch_%d.so", candidate_dirs[i], getpid()); - int32_t fd = open(so_path, O_WRONLY | O_CREAT | O_TRUNC, 0755); - if (fd < 0) { - DEV_INFO( - "Thread %d: Cannot create SO at %s (errno=%d), trying next path", thread_idx, so_path, errno - ); - continue; - } - ssize_t written = write(fd, so_data, so_size); - close(fd); - if (written != static_cast(so_size)) { - DEV_INFO( - "Thread %d: Cannot write SO to %s (errno=%d), trying next path", thread_idx, so_path, errno - ); - unlink(so_path); - continue; - } - file_created = true; - DEV_INFO("Thread %d: Created SO file at %s (%zu bytes)", thread_idx, so_path, so_size); - } + void *handle = nullptr; + int memfd = -1; - if (!file_created) { - DEV_ERROR("Thread %d: Failed to create SO file in any candidate path", thread_idx); - return -1; + // Attempt memfd-based loading first + int memfd_rc = load_orchestration_so_with_memfd( + so_data, so_size, thread_idx, &handle, so_path, &memfd + ); + + if (memfd_rc == 0 && handle != nullptr) { + // memfd loading succeeded, use memfd-loaded handle + orch_so_memfd_ = memfd; } - dlerror(); - void *handle = dlopen(so_path, RTLD_LAZY | RTLD_LOCAL); - const char *dlopen_err = dlerror(); if (handle == nullptr) { - DEV_ERROR("Thread %d: dlopen failed: %s", thread_idx, dlopen_err ? dlopen_err : "unknown"); - unlink(so_path); - return -1; + // memfd failed or unavailable - use file-based loading + orch_so_memfd_ = -1; + + // Try multiple paths that may allow execution on AICPU + bool file_created = false; + const char *candidate_dirs[] = { + "/usr/lib64/aicpu_kernels/0/aicpu_kernels_device", "/usr/lib64", "/lib64", "/var/tmp", "/tmp" + }; + const int32_t num_candidates = sizeof(candidate_dirs) / sizeof(candidate_dirs[0]); + + for (int32_t i = 0; i < num_candidates && !file_created; i++) { + snprintf(so_path, sizeof(so_path), "%s/libdevice_orch_%d.so", candidate_dirs[i], getpid()); + int32_t fd = open(so_path, O_WRONLY | O_CREAT | O_TRUNC, 0755); + if (fd < 0) { + DEV_INFO( + "Thread %d: Cannot create SO at %s (errno=%d), trying next path", thread_idx, so_path, errno + ); + continue; + } + ssize_t written = write(fd, so_data, so_size); + close(fd); + if (written != static_cast(so_size)) { + DEV_INFO( + "Thread %d: Cannot write SO to %s (errno=%d), trying next path", thread_idx, so_path, errno + ); + unlink(so_path); + continue; + } + file_created = true; + DEV_INFO("Thread %d: Created SO file at %s (%zu bytes)", thread_idx, so_path, so_size); + } + + if (!file_created) { + DEV_ERROR("Thread %d: Failed to create SO file in any candidate path", thread_idx); + return -1; + } + + dlerror(); + handle = dlopen(so_path, RTLD_LAZY | RTLD_LOCAL); + const char *dlopen_err = dlerror(); + if (handle == nullptr) { + DEV_ERROR("Thread %d: dlopen failed: %s", thread_idx, dlopen_err ? dlopen_err : "unknown"); + unlink(so_path); + return -1; + } + DEV_INFO("Thread %d: dlopen succeeded, handle=%p", thread_idx, handle); } - DEV_INFO("Thread %d: dlopen succeeded, handle=%p", thread_idx, handle); dlerror(); auto config_func = @@ -1970,8 +1993,15 @@ int32_t AicpuExecutor::run(Runtime *runtime) { // Destroy PTO2 runtime and close orchestration SO (moved from orchestrator path) if (!runtime->get_orch_built_on_host() && orch_so_handle_ != nullptr) { pto2_runtime_destroy(rt); - dlclose(orch_so_handle_); - unlink(orch_so_path_); + // Handle cleanup based on loading method + if (orch_so_memfd_ >= 0) { + // memfd-based: close fd AFTER dlclose + cleanup_memfd_so(orch_so_memfd_, orch_so_handle_); + } else { + // File-based: dlclose handle and unlink file + dlclose(orch_so_handle_); + unlink(orch_so_path_); + } } DEV_ALWAYS("Thread %d: Last thread, marking executor finished", thread_idx); } @@ -2029,6 +2059,7 @@ void AicpuExecutor::deinit(Runtime *runtime) { orch_args_cached_ = nullptr; orch_so_handle_ = nullptr; orch_so_path_[0] = '\0'; + orch_so_memfd_ = -1; // Reset register-related state for (int32_t i = 0; i < MAX_CORES_PER_THREAD; i++) { diff --git a/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h b/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h new file mode 100644 index 00000000..9734ff7e --- /dev/null +++ b/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) PyPTO Contributors. + * This program is free software, you can redistribute it and/or modify it under the terms and conditions of + * CANN Open Software License Agreement Version 2.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + * ----------------------------------------------------------------------------------------------------------- + */ + +/** + * @file memfd_loader.h + * @brief Memory file descriptor based SO loading for AICPU environment + */ + +// Enable GNU extensions for memfd_create and MFD_CLOEXEC +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#ifndef MEMFD_LOADER_H +#define MEMFD_LOADER_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include + +#include "aicpu/device_log.h" + +/** + * Load orchestration SO using memfd + */ +static inline int load_orchestration_so_with_memfd( + const void *so_data, + size_t so_size, + int orch_thread_num, + void **out_handle, + char *out_so_path, + int *out_memfd +) { + *out_handle = nullptr; + *out_memfd = -1; + out_so_path[0] = '\0'; + + if (so_data == nullptr || so_size == 0) { + return -1; + } + + // Create memfd + int fd = memfd_create("libdevice_orch", MFD_CLOEXEC); + + if (fd < 0) { + DEV_INFO("memfd_create failed: errno=%d", errno); + return -1; + } + + // Write SO data to memfd + ssize_t written = write(fd, so_data, so_size); + + if (written < 0) { + DEV_INFO("memfd write failed: errno=%d", errno); + close(fd); + return -1; + } + if (written != static_cast(so_size)) { + DEV_INFO("memfd partial write: %zd/%zu", written, so_size); + close(fd); + return -1; + } + + // Reset file position to beginning before dlopen + lseek(fd, 0, SEEK_SET); + + // Construct /proc/self/fd/N path for symlink target + char proc_fd_path[256]; + snprintf(proc_fd_path, sizeof(proc_fd_path), "/proc/self/fd/%d", fd); + + // Create a symlink to /proc/self/fd/N with a "normal" path + // This bypasses the AICPU dynamic linker's issue with /proc/self/fd/N paths + char link_path[256]; + snprintf(link_path, sizeof(link_path), "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); + + int symlink_rc = symlink(proc_fd_path, link_path); + if (symlink_rc != 0) { + DEV_INFO("symlink failed: errno=%d", errno); + close(fd); + return -1; + } + + snprintf(out_so_path, 256, "%s", link_path); + + // Try dlopen from the symlink + dlerror(); + void *handle = dlopen(out_so_path, RTLD_LAZY | RTLD_LOCAL); + + // Clean up symlink immediately after dlopen (dlopen has its own reference) + unlink(link_path); + + if (handle == nullptr) { + const char *dl_err = dlerror(); + DEV_INFO("dlopen from memfd symlink failed: %s", dl_err ? dl_err : "unknown"); + close(fd); + return -1; + } + + *out_handle = handle; + *out_memfd = fd; + return 0; +} + +/** + * Cleanup memfd-based SO + */ +static inline void cleanup_memfd_so(int memfd, void *handle) { + if (handle != nullptr) { + dlclose(handle); + } + if (memfd >= 0) { + close(memfd); + } +} + +#ifdef __cplusplus +} +#endif + +#endif // MEMFD_LOADER_H diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp index 97afd6a4..2cd0983a 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp @@ -29,6 +29,9 @@ #include "runtime.h" #include "spin_hint.h" +// memfd-based SO loading +#include "memfd_loader.h" + // Runtime headers (full struct definition for create/destroy + PTO2_SCOPE) #include "pto_runtime2.h" #include "pto_runtime2_types.h" @@ -340,6 +343,7 @@ struct AicpuExecutor { // Orchestration SO handle - defer dlclose until all tasks complete void *orch_so_handle_{nullptr}; char orch_so_path_[256]{}; // Path to orchestration SO file for cleanup + int orch_so_memfd_{-1}; // memfd for memfd_create path (-1 if file-based) // Shared orchestration function pointer (loaded by first orch thread, used by all) DeviceOrchestrationFunc orch_func_{nullptr}; @@ -1932,50 +1936,69 @@ int32_t AicpuExecutor::run(Runtime *runtime) { return -1; } - // Try multiple paths that may allow execution on AICPU + // Try memfd first, fall back to file-based char so_path[256]; - bool file_created = false; - const char *candidate_dirs[] = { - "/usr/lib64/aicpu_kernels/0/aicpu_kernels_device", "/usr/lib64", "/lib64", "/var/tmp", "/tmp" - }; - const int32_t num_candidates = sizeof(candidate_dirs) / sizeof(candidate_dirs[0]); - - for (int32_t i = 0; i < num_candidates && !file_created; i++) { - snprintf(so_path, sizeof(so_path), "%s/libdevice_orch_%d.so", candidate_dirs[i], getpid()); - int32_t fd = open(so_path, O_WRONLY | O_CREAT | O_TRUNC, 0755); - if (fd < 0) { - DEV_INFO( - "Thread %d: Cannot create SO at %s (errno=%d), trying next path", thread_idx, so_path, errno - ); - continue; - } - ssize_t written = write(fd, so_data, so_size); - close(fd); - if (written != static_cast(so_size)) { - DEV_INFO( - "Thread %d: Cannot write SO to %s (errno=%d), trying next path", thread_idx, so_path, errno - ); - unlink(so_path); - continue; - } - file_created = true; - DEV_INFO("Thread %d: Created SO file at %s (%zu bytes)", thread_idx, so_path, so_size); - } + void *handle = nullptr; + int memfd = -1; - if (!file_created) { - DEV_ERROR("Thread %d: Failed to create SO file in any candidate path", thread_idx); - return -1; + // Attempt memfd-based loading first + int memfd_rc = load_orchestration_so_with_memfd( + so_data, so_size, thread_idx, &handle, so_path, &memfd + ); + + if (memfd_rc == 0 && handle != nullptr) { + // memfd loading succeeded, use memfd-loaded handle + orch_so_memfd_ = memfd; } - dlerror(); - void *handle = dlopen(so_path, RTLD_LAZY | RTLD_LOCAL); - const char *dlopen_err = dlerror(); if (handle == nullptr) { - DEV_ERROR("Thread %d: dlopen failed: %s", thread_idx, dlopen_err ? dlopen_err : "unknown"); - unlink(so_path); - return -1; + // memfd failed or unavailable - use file-based loading + orch_so_memfd_ = -1; + + // Try multiple paths that may allow execution on AICPU + bool file_created = false; + const char *candidate_dirs[] = { + "/usr/lib64/aicpu_kernels/0/aicpu_kernels_device", "/usr/lib64", "/lib64", "/var/tmp", "/tmp" + }; + const int32_t num_candidates = sizeof(candidate_dirs) / sizeof(candidate_dirs[0]); + + for (int32_t i = 0; i < num_candidates && !file_created; i++) { + snprintf(so_path, sizeof(so_path), "%s/libdevice_orch_%d.so", candidate_dirs[i], getpid()); + int32_t fd = open(so_path, O_WRONLY | O_CREAT | O_TRUNC, 0755); + if (fd < 0) { + DEV_INFO( + "Thread %d: Cannot create SO at %s (errno=%d), trying next path", thread_idx, so_path, errno + ); + continue; + } + ssize_t written = write(fd, so_data, so_size); + close(fd); + if (written != static_cast(so_size)) { + DEV_INFO( + "Thread %d: Cannot write SO to %s (errno=%d), trying next path", thread_idx, so_path, errno + ); + unlink(so_path); + continue; + } + file_created = true; + DEV_INFO("Thread %d: Created SO file at %s (%zu bytes)", thread_idx, so_path, so_size); + } + + if (!file_created) { + DEV_ERROR("Thread %d: Failed to create SO file in any candidate path", thread_idx); + return -1; + } + + dlerror(); + handle = dlopen(so_path, RTLD_LAZY | RTLD_LOCAL); + const char *dlopen_err = dlerror(); + if (handle == nullptr) { + DEV_ERROR("Thread %d: dlopen failed: %s", thread_idx, dlopen_err ? dlopen_err : "unknown"); + unlink(so_path); + return -1; + } + DEV_INFO("Thread %d: dlopen succeeded, handle=%p", thread_idx, handle); } - DEV_INFO("Thread %d: dlopen succeeded, handle=%p", thread_idx, handle); dlerror(); auto config_func = @@ -2359,8 +2382,15 @@ int32_t AicpuExecutor::run(Runtime *runtime) { orch_bind_runtime_(nullptr); } pto2_runtime_destroy(rt); - dlclose(orch_so_handle_); - unlink(orch_so_path_); + // Handle cleanup based on loading method + if (orch_so_memfd_ >= 0) { + // memfd-based: close fd AFTER dlclose + cleanup_memfd_so(orch_so_memfd_, orch_so_handle_); + } else { + // File-based: dlclose handle and unlink file + dlclose(orch_so_handle_); + unlink(orch_so_path_); + } } } @@ -2415,6 +2445,7 @@ void AicpuExecutor::deinit(Runtime *runtime) { orch_args_cached_ = nullptr; orch_so_handle_ = nullptr; orch_so_path_[0] = '\0'; + orch_so_memfd_ = -1; // Clear file-scope PTO2Runtime pointer (freed by orchestrator thread before deinit) rt = nullptr; diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h new file mode 100644 index 00000000..94c71bc3 --- /dev/null +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) PyPTO Contributors. + * This program is free software, you can redistribute it and/or modify it under the terms and conditions of + * CANN Open Software License Agreement Version 2.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + * ----------------------------------------------------------------------------------------------------------- + */ + +/** + * @file memfd_loader.h + * @brief Memory file descriptor based SO loading for AICPU environment + */ + +// Enable GNU extensions for memfd_create and MFD_CLOEXEC +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#ifndef MEMFD_LOADER_H +#define MEMFD_LOADER_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include + +#include "aicpu/device_log.h" + +/** + * Load orchestration SO using memfd + */ +static inline int load_orchestration_so_with_memfd( + const void *so_data, + size_t so_size, + int orch_thread_num, + void **out_handle, + char *out_so_path, + int *out_memfd +) { + *out_handle = nullptr; + *out_memfd = -1; + out_so_path[0] = '\0'; + + if (so_data == nullptr || so_size == 0) { + return -1; + } + + // Create memfd + int fd = memfd_create("libdevice_orch", MFD_CLOEXEC); + + if (fd < 0) { + return -1; + } + + // Write SO data to memfd + ssize_t written = write(fd, so_data, so_size); + + if (written < 0) { + close(fd); + return -1; + } + if (written != static_cast(so_size)) { + close(fd); + return -1; + } + + // Reset file position to beginning before dlopen + lseek(fd, 0, SEEK_SET); + + // Construct /proc/self/fd/N path for symlink target + char proc_fd_path[256]; + snprintf(proc_fd_path, sizeof(proc_fd_path), "/proc/self/fd/%d", fd); + + // Create a symlink to /proc/self/fd/N with a "normal" path + // This bypasses the AICPU dynamic linker's issue with /proc/self/fd/N paths + char link_path[256]; + snprintf(link_path, sizeof(link_path), "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); + + int symlink_rc = symlink(proc_fd_path, link_path); + if (symlink_rc != 0) { + close(fd); + return -1; + } + + snprintf(out_so_path, 256, "%s", link_path); + + // Try dlopen from the symlink + dlerror(); + void *handle = dlopen(out_so_path, RTLD_LAZY | RTLD_LOCAL); + + // Clean up symlink immediately after dlopen (dlopen has its own reference) + unlink(link_path); + + if (handle == nullptr) { + close(fd); + return -1; + } + + *out_handle = handle; + *out_memfd = fd; + return 0; +} + +/** + * Cleanup memfd-based SO + */ +static inline void cleanup_memfd_so(int memfd, void *handle) { + if (handle != nullptr) { + dlclose(handle); + } + if (memfd >= 0) { + close(memfd); + } +} + +#ifdef __cplusplus +} +#endif + +#endif // MEMFD_LOADER_H diff --git a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp index dcf3d565..11b854c7 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp +++ b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp @@ -29,6 +29,9 @@ #include "runtime.h" #include "spin_hint.h" +// memfd-based SO loading +#include "memfd_loader.h" + // Runtime headers (full struct definition for create/destroy + PTO2_SCOPE) #include "pto_runtime2.h" #include "pto_runtime2_types.h" @@ -338,6 +341,7 @@ struct AicpuExecutor { // Orchestration SO handle - defer dlclose until all tasks complete void *orch_so_handle_{nullptr}; char orch_so_path_[256]{}; // Path to orchestration SO file for cleanup + int orch_so_memfd_{-1}; // memfd for memfd_create path (-1 if file-based) // Shared orchestration function pointer (loaded by first orch thread, used by all) DeviceOrchestrationFunc orch_func_{nullptr}; @@ -1910,50 +1914,69 @@ int32_t AicpuExecutor::run(Runtime *runtime) { return -1; } - // Try multiple paths that may allow execution on AICPU + // Try memfd first, fall back to file-based char so_path[256]; - bool file_created = false; - const char *candidate_dirs[] = { - "/usr/lib64/aicpu_kernels/0/aicpu_kernels_device", "/usr/lib64", "/lib64", "/var/tmp", "/tmp" - }; - const int32_t num_candidates = sizeof(candidate_dirs) / sizeof(candidate_dirs[0]); - - for (int32_t i = 0; i < num_candidates && !file_created; i++) { - snprintf(so_path, sizeof(so_path), "%s/libdevice_orch_%d.so", candidate_dirs[i], getpid()); - int32_t fd = open(so_path, O_WRONLY | O_CREAT | O_TRUNC, 0755); - if (fd < 0) { - DEV_INFO( - "Thread %d: Cannot create SO at %s (errno=%d), trying next path", thread_idx, so_path, errno - ); - continue; - } - ssize_t written = write(fd, so_data, so_size); - close(fd); - if (written != static_cast(so_size)) { - DEV_INFO( - "Thread %d: Cannot write SO to %s (errno=%d), trying next path", thread_idx, so_path, errno - ); - unlink(so_path); - continue; - } - file_created = true; - DEV_INFO("Thread %d: Created SO file at %s (%zu bytes)", thread_idx, so_path, so_size); - } + void *handle = nullptr; + int memfd = -1; - if (!file_created) { - DEV_ERROR("Thread %d: Failed to create SO file in any candidate path", thread_idx); - return -1; + // Attempt memfd-based loading first + int memfd_rc = load_orchestration_so_with_memfd( + so_data, so_size, thread_idx, &handle, so_path, &memfd + ); + + if (memfd_rc == 0 && handle != nullptr) { + // memfd loading succeeded, use memfd-loaded handle + orch_so_memfd_ = memfd; } - dlerror(); - void *handle = dlopen(so_path, RTLD_LAZY | RTLD_LOCAL); - const char *dlopen_err = dlerror(); if (handle == nullptr) { - DEV_ERROR("Thread %d: dlopen failed: %s", thread_idx, dlopen_err ? dlopen_err : "unknown"); - unlink(so_path); - return -1; + // memfd failed or unavailable - use file-based loading + orch_so_memfd_ = -1; + + // Try multiple paths that may allow execution on AICPU + bool file_created = false; + const char *candidate_dirs[] = { + "/usr/lib64/aicpu_kernels/0/aicpu_kernels_device", "/usr/lib64", "/lib64", "/var/tmp", "/tmp" + }; + const int32_t num_candidates = sizeof(candidate_dirs) / sizeof(candidate_dirs[0]); + + for (int32_t i = 0; i < num_candidates && !file_created; i++) { + snprintf(so_path, sizeof(so_path), "%s/libdevice_orch_%d.so", candidate_dirs[i], getpid()); + int32_t fd = open(so_path, O_WRONLY | O_CREAT | O_TRUNC, 0755); + if (fd < 0) { + DEV_INFO( + "Thread %d: Cannot create SO at %s (errno=%d), trying next path", thread_idx, so_path, errno + ); + continue; + } + ssize_t written = write(fd, so_data, so_size); + close(fd); + if (written != static_cast(so_size)) { + DEV_INFO( + "Thread %d: Cannot write SO to %s (errno=%d), trying next path", thread_idx, so_path, errno + ); + unlink(so_path); + continue; + } + file_created = true; + DEV_INFO("Thread %d: Created SO file at %s (%zu bytes)", thread_idx, so_path, so_size); + } + + if (!file_created) { + DEV_ERROR("Thread %d: Failed to create SO file in any candidate path", thread_idx); + return -1; + } + + dlerror(); + handle = dlopen(so_path, RTLD_LAZY | RTLD_LOCAL); + const char *dlopen_err = dlerror(); + if (handle == nullptr) { + DEV_ERROR("Thread %d: dlopen failed: %s", thread_idx, dlopen_err ? dlopen_err : "unknown"); + unlink(so_path); + return -1; + } + DEV_INFO("Thread %d: dlopen succeeded, handle=%p", thread_idx, handle); } - DEV_INFO("Thread %d: dlopen succeeded, handle=%p", thread_idx, handle); dlerror(); auto config_func = @@ -2336,8 +2359,15 @@ int32_t AicpuExecutor::run(Runtime *runtime) { orch_bind_runtime_(nullptr); } pto2_runtime_destroy(rt); - dlclose(orch_so_handle_); - unlink(orch_so_path_); + // Handle cleanup based on loading method + if (orch_so_memfd_ >= 0) { + // memfd-based: close fd AFTER dlclose + cleanup_memfd_so(orch_so_memfd_, orch_so_handle_); + } else { + // File-based: dlclose handle and unlink file + dlclose(orch_so_handle_); + unlink(orch_so_path_); + } } } @@ -2391,6 +2421,7 @@ void AicpuExecutor::deinit(Runtime *runtime) { orch_args_cached_ = nullptr; orch_so_handle_ = nullptr; orch_so_path_[0] = '\0'; + orch_so_memfd_ = -1; // Clear file-scope PTO2Runtime pointer (freed by orchestrator thread before deinit) rt = nullptr; diff --git a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h new file mode 100644 index 00000000..9734ff7e --- /dev/null +++ b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) PyPTO Contributors. + * This program is free software, you can redistribute it and/or modify it under the terms and conditions of + * CANN Open Software License Agreement Version 2.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + * ----------------------------------------------------------------------------------------------------------- + */ + +/** + * @file memfd_loader.h + * @brief Memory file descriptor based SO loading for AICPU environment + */ + +// Enable GNU extensions for memfd_create and MFD_CLOEXEC +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#ifndef MEMFD_LOADER_H +#define MEMFD_LOADER_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include + +#include "aicpu/device_log.h" + +/** + * Load orchestration SO using memfd + */ +static inline int load_orchestration_so_with_memfd( + const void *so_data, + size_t so_size, + int orch_thread_num, + void **out_handle, + char *out_so_path, + int *out_memfd +) { + *out_handle = nullptr; + *out_memfd = -1; + out_so_path[0] = '\0'; + + if (so_data == nullptr || so_size == 0) { + return -1; + } + + // Create memfd + int fd = memfd_create("libdevice_orch", MFD_CLOEXEC); + + if (fd < 0) { + DEV_INFO("memfd_create failed: errno=%d", errno); + return -1; + } + + // Write SO data to memfd + ssize_t written = write(fd, so_data, so_size); + + if (written < 0) { + DEV_INFO("memfd write failed: errno=%d", errno); + close(fd); + return -1; + } + if (written != static_cast(so_size)) { + DEV_INFO("memfd partial write: %zd/%zu", written, so_size); + close(fd); + return -1; + } + + // Reset file position to beginning before dlopen + lseek(fd, 0, SEEK_SET); + + // Construct /proc/self/fd/N path for symlink target + char proc_fd_path[256]; + snprintf(proc_fd_path, sizeof(proc_fd_path), "/proc/self/fd/%d", fd); + + // Create a symlink to /proc/self/fd/N with a "normal" path + // This bypasses the AICPU dynamic linker's issue with /proc/self/fd/N paths + char link_path[256]; + snprintf(link_path, sizeof(link_path), "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); + + int symlink_rc = symlink(proc_fd_path, link_path); + if (symlink_rc != 0) { + DEV_INFO("symlink failed: errno=%d", errno); + close(fd); + return -1; + } + + snprintf(out_so_path, 256, "%s", link_path); + + // Try dlopen from the symlink + dlerror(); + void *handle = dlopen(out_so_path, RTLD_LAZY | RTLD_LOCAL); + + // Clean up symlink immediately after dlopen (dlopen has its own reference) + unlink(link_path); + + if (handle == nullptr) { + const char *dl_err = dlerror(); + DEV_INFO("dlopen from memfd symlink failed: %s", dl_err ? dl_err : "unknown"); + close(fd); + return -1; + } + + *out_handle = handle; + *out_memfd = fd; + return 0; +} + +/** + * Cleanup memfd-based SO + */ +static inline void cleanup_memfd_so(int memfd, void *handle) { + if (handle != nullptr) { + dlclose(handle); + } + if (memfd >= 0) { + close(memfd); + } +} + +#ifdef __cplusplus +} +#endif + +#endif // MEMFD_LOADER_H From a3c6353ea0baeadb42288d8fcba34f1c07de1304 Mon Sep 17 00:00:00 2001 From: puddingfjz <2811443837@qq.com> Date: Fri, 10 Apr 2026 18:16:23 +0800 Subject: [PATCH 2/7] Fix: address cpplint issues in memfd_loader.h files - Move header guard before _GNU_SOURCE define for cpplint compliance - Wrap long lines (> 80 chars) across multiple lines - Use sizeof() instead of hardcoded 256 in snprintf calls - Update #endif comments to match header guard names --- .../aicpu_build_graph/aicpu/memfd_loader.h | 25 +++++++--------- .../aicpu/memfd_loader.h | 29 ++++++++++--------- .../aicpu/memfd_loader.h | 25 +++++++--------- 3 files changed, 38 insertions(+), 41 deletions(-) diff --git a/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h b/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h index 9734ff7e..38a519fe 100644 --- a/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h +++ b/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h @@ -14,14 +14,14 @@ * @brief Memory file descriptor based SO loading for AICPU environment */ +#ifndef SRC_A2A3_RUNTIME_AICPU_BUILD_GRAPH_AICPU_MEMFD_LOADER_H_ +#define SRC_A2A3_RUNTIME_AICPU_BUILD_GRAPH_AICPU_MEMFD_LOADER_H_ + // Enable GNU extensions for memfd_create and MFD_CLOEXEC #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif -#ifndef MEMFD_LOADER_H -#define MEMFD_LOADER_H - #ifdef __cplusplus extern "C" { #endif @@ -39,13 +39,8 @@ extern "C" { * Load orchestration SO using memfd */ static inline int load_orchestration_so_with_memfd( - const void *so_data, - size_t so_size, - int orch_thread_num, - void **out_handle, - char *out_so_path, - int *out_memfd -) { + const void *so_data, size_t so_size, int orch_thread_num, + void **out_handle, char *out_so_path, int *out_memfd) { *out_handle = nullptr; *out_memfd = -1; out_so_path[0] = '\0'; @@ -86,7 +81,8 @@ static inline int load_orchestration_so_with_memfd( // Create a symlink to /proc/self/fd/N with a "normal" path // This bypasses the AICPU dynamic linker's issue with /proc/self/fd/N paths char link_path[256]; - snprintf(link_path, sizeof(link_path), "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); + snprintf(link_path, sizeof(link_path), + "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); int symlink_rc = symlink(proc_fd_path, link_path); if (symlink_rc != 0) { @@ -95,7 +91,7 @@ static inline int load_orchestration_so_with_memfd( return -1; } - snprintf(out_so_path, 256, "%s", link_path); + snprintf(out_so_path, sizeof(out_so_path), "%s", link_path); // Try dlopen from the symlink dlerror(); @@ -106,7 +102,8 @@ static inline int load_orchestration_so_with_memfd( if (handle == nullptr) { const char *dl_err = dlerror(); - DEV_INFO("dlopen from memfd symlink failed: %s", dl_err ? dl_err : "unknown"); + DEV_INFO("dlopen from memfd symlink failed: %s", + dl_err ? dl_err : "unknown"); close(fd); return -1; } @@ -132,4 +129,4 @@ static inline void cleanup_memfd_so(int memfd, void *handle) { } #endif -#endif // MEMFD_LOADER_H +#endif // SRC_A2A3_RUNTIME_AICPU_BUILD_GRAPH_AICPU_MEMFD_LOADER_H_ diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h index 94c71bc3..7e33fb52 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h @@ -14,14 +14,14 @@ * @brief Memory file descriptor based SO loading for AICPU environment */ +#ifndef SRC_A2A3_RUNTIME_TENSORMAP_AND_RINGBUFFER_AICPU_MEMFD_LOADER_H_ +#define SRC_A2A3_RUNTIME_TENSORMAP_AND_RINGBUFFER_AICPU_MEMFD_LOADER_H_ + // Enable GNU extensions for memfd_create and MFD_CLOEXEC #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif -#ifndef MEMFD_LOADER_H -#define MEMFD_LOADER_H - #ifdef __cplusplus extern "C" { #endif @@ -39,13 +39,8 @@ extern "C" { * Load orchestration SO using memfd */ static inline int load_orchestration_so_with_memfd( - const void *so_data, - size_t so_size, - int orch_thread_num, - void **out_handle, - char *out_so_path, - int *out_memfd -) { + const void *so_data, size_t so_size, int orch_thread_num, + void **out_handle, char *out_so_path, int *out_memfd) { *out_handle = nullptr; *out_memfd = -1; out_so_path[0] = '\0'; @@ -58,6 +53,7 @@ static inline int load_orchestration_so_with_memfd( int fd = memfd_create("libdevice_orch", MFD_CLOEXEC); if (fd < 0) { + DEV_INFO("memfd_create failed: errno=%d", errno); return -1; } @@ -65,10 +61,12 @@ static inline int load_orchestration_so_with_memfd( ssize_t written = write(fd, so_data, so_size); if (written < 0) { + DEV_INFO("memfd write failed: errno=%d", errno); close(fd); return -1; } if (written != static_cast(so_size)) { + DEV_INFO("memfd partial write: %zd/%zu", written, so_size); close(fd); return -1; } @@ -83,15 +81,17 @@ static inline int load_orchestration_so_with_memfd( // Create a symlink to /proc/self/fd/N with a "normal" path // This bypasses the AICPU dynamic linker's issue with /proc/self/fd/N paths char link_path[256]; - snprintf(link_path, sizeof(link_path), "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); + snprintf(link_path, sizeof(link_path), + "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); int symlink_rc = symlink(proc_fd_path, link_path); if (symlink_rc != 0) { + DEV_INFO("symlink failed: errno=%d", errno); close(fd); return -1; } - snprintf(out_so_path, 256, "%s", link_path); + snprintf(out_so_path, sizeof(out_so_path), "%s", link_path); // Try dlopen from the symlink dlerror(); @@ -101,6 +101,9 @@ static inline int load_orchestration_so_with_memfd( unlink(link_path); if (handle == nullptr) { + const char *dl_err = dlerror(); + DEV_INFO("dlopen from memfd symlink failed: %s", + dl_err ? dl_err : "unknown"); close(fd); return -1; } @@ -126,4 +129,4 @@ static inline void cleanup_memfd_so(int memfd, void *handle) { } #endif -#endif // MEMFD_LOADER_H +#endif // SRC_A2A3_RUNTIME_TENSORMAP_AND_RINGBUFFER_AICPU_MEMFD_LOADER_H_ diff --git a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h index 9734ff7e..ff5e5d04 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h +++ b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h @@ -14,14 +14,14 @@ * @brief Memory file descriptor based SO loading for AICPU environment */ +#ifndef SRC_A5_RUNTIME_TENSORMAP_AND_RINGBUFFER_AICPU_MEMFD_LOADER_H_ +#define SRC_A5_RUNTIME_TENSORMAP_AND_RINGBUFFER_AICPU_MEMFD_LOADER_H_ + // Enable GNU extensions for memfd_create and MFD_CLOEXEC #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif -#ifndef MEMFD_LOADER_H -#define MEMFD_LOADER_H - #ifdef __cplusplus extern "C" { #endif @@ -39,13 +39,8 @@ extern "C" { * Load orchestration SO using memfd */ static inline int load_orchestration_so_with_memfd( - const void *so_data, - size_t so_size, - int orch_thread_num, - void **out_handle, - char *out_so_path, - int *out_memfd -) { + const void *so_data, size_t so_size, int orch_thread_num, + void **out_handle, char *out_so_path, int *out_memfd) { *out_handle = nullptr; *out_memfd = -1; out_so_path[0] = '\0'; @@ -86,7 +81,8 @@ static inline int load_orchestration_so_with_memfd( // Create a symlink to /proc/self/fd/N with a "normal" path // This bypasses the AICPU dynamic linker's issue with /proc/self/fd/N paths char link_path[256]; - snprintf(link_path, sizeof(link_path), "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); + snprintf(link_path, sizeof(link_path), + "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); int symlink_rc = symlink(proc_fd_path, link_path); if (symlink_rc != 0) { @@ -95,7 +91,7 @@ static inline int load_orchestration_so_with_memfd( return -1; } - snprintf(out_so_path, 256, "%s", link_path); + snprintf(out_so_path, sizeof(out_so_path), "%s", link_path); // Try dlopen from the symlink dlerror(); @@ -106,7 +102,8 @@ static inline int load_orchestration_so_with_memfd( if (handle == nullptr) { const char *dl_err = dlerror(); - DEV_INFO("dlopen from memfd symlink failed: %s", dl_err ? dl_err : "unknown"); + DEV_INFO("dlopen from memfd symlink failed: %s", + dl_err ? dl_err : "unknown"); close(fd); return -1; } @@ -132,4 +129,4 @@ static inline void cleanup_memfd_so(int memfd, void *handle) { } #endif -#endif // MEMFD_LOADER_H +#endif // SRC_A5_RUNTIME_TENSORMAP_AND_RINGBUFFER_AICPU_MEMFD_LOADER_H_ From 796866e332191974ee8b0e7ab75a62093418f06f Mon Sep 17 00:00:00 2001 From: puddingfjz <2811443837@qq.com> Date: Fri, 10 Apr 2026 18:26:52 +0800 Subject: [PATCH 3/7] Fix: correct copyright header typo and apply clang-format - Fix "WARRANTIES OR ANY KIND" -> "WARRANTIES OF ANY KIND" in memfd_loader.h - Apply clang-format to aicpu_executor.cpp files --- src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp | 4 +--- src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h | 2 +- .../runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp | 4 +--- .../runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h | 2 +- .../runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp | 4 +--- src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h | 2 +- 6 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp b/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp index 8bf1a4dd..d1b2fda9 100644 --- a/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp +++ b/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp @@ -1606,9 +1606,7 @@ int32_t AicpuExecutor::run(Runtime *runtime) { int memfd = -1; // Attempt memfd-based loading first - int memfd_rc = load_orchestration_so_with_memfd( - so_data, so_size, thread_idx, &handle, so_path, &memfd - ); + int memfd_rc = load_orchestration_so_with_memfd(so_data, so_size, thread_idx, &handle, so_path, &memfd); if (memfd_rc == 0 && handle != nullptr) { // memfd loading succeeded, use memfd-loaded handle diff --git a/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h b/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h index 38a519fe..925b7e93 100644 --- a/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h +++ b/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h @@ -3,7 +3,7 @@ * This program is free software, you can redistribute it and/or modify it under the terms and conditions of * CANN Open Software License Agreement Version 2.0 (the "License"). * Please refer to the License for details. You may not use this file except in compliance with the License. - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR ANY KIND, EITHER EXPRESS OR IMPLIED, + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. * See LICENSE in the root of the software repository for the full text of the License. * ----------------------------------------------------------------------------------------------------------- diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp index 2cd0983a..ed169e97 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp @@ -1942,9 +1942,7 @@ int32_t AicpuExecutor::run(Runtime *runtime) { int memfd = -1; // Attempt memfd-based loading first - int memfd_rc = load_orchestration_so_with_memfd( - so_data, so_size, thread_idx, &handle, so_path, &memfd - ); + int memfd_rc = load_orchestration_so_with_memfd(so_data, so_size, thread_idx, &handle, so_path, &memfd); if (memfd_rc == 0 && handle != nullptr) { // memfd loading succeeded, use memfd-loaded handle diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h index 7e33fb52..73fcde42 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h @@ -3,7 +3,7 @@ * This program is free software, you can redistribute it and/or modify it under the terms and conditions of * CANN Open Software License Agreement Version 2.0 (the "License"). * Please refer to the License for details. You may not use this file except in compliance with the License. - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR ANY KIND, EITHER EXPRESS OR IMPLIED, + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. * See LICENSE in the root of the software repository for the full text of the License. * ----------------------------------------------------------------------------------------------------------- diff --git a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp index 11b854c7..a86c7977 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp +++ b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp @@ -1920,9 +1920,7 @@ int32_t AicpuExecutor::run(Runtime *runtime) { int memfd = -1; // Attempt memfd-based loading first - int memfd_rc = load_orchestration_so_with_memfd( - so_data, so_size, thread_idx, &handle, so_path, &memfd - ); + int memfd_rc = load_orchestration_so_with_memfd(so_data, so_size, thread_idx, &handle, so_path, &memfd); if (memfd_rc == 0 && handle != nullptr) { // memfd loading succeeded, use memfd-loaded handle diff --git a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h index ff5e5d04..2156ca97 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h +++ b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h @@ -3,7 +3,7 @@ * This program is free software, you can redistribute it and/or modify it under the terms and conditions of * CANN Open Software License Agreement Version 2.0 (the "License"). * Please refer to the License for details. You may not use this file except in compliance with the License. - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR ANY KIND, EITHER EXPRESS OR IMPLIED, + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. * See LICENSE in the root of the software repository for the full text of the License. * ----------------------------------------------------------------------------------------------------------- From 6f94963f4d5401ce1a204481e13234cf183253d2 Mon Sep 17 00:00:00 2001 From: puddingfjz <2811443837@qq.com> Date: Fri, 10 Apr 2026 18:32:49 +0800 Subject: [PATCH 4/7] Fix: guard memfd-related code with __linux__ for macOS compatibility - memfd_create and MFD_CLOEXEC are Linux-specific APIs - Add #if defined(__linux__) guards around: - memfd_loader.h includes - memfd loading attempts - memfd cleanup calls - Non-Linux platforms fall back to file-based loading --- .../aicpu_build_graph/aicpu/aicpu_executor.cpp | 16 ++++++++++++++-- .../aicpu/aicpu_executor.cpp | 16 ++++++++++++++-- .../aicpu/aicpu_executor.cpp | 16 ++++++++++++++-- 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp b/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp index d1b2fda9..b6737240 100644 --- a/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp +++ b/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp @@ -29,8 +29,10 @@ #include "runtime.h" #include "spin_hint.h" -// memfd-based SO loading +// memfd-based SO loading (Linux only) +#if defined(__linux__) #include "memfd_loader.h" +#endif // Runtime headers (full struct definition for create/destroy + PTO2_SCOPE) #include "pto_runtime2.h" @@ -1600,11 +1602,12 @@ int32_t AicpuExecutor::run(Runtime *runtime) { return -1; } - // Try memfd first, fall back to file-based + // Try memfd first (Linux only), fall back to file-based char so_path[256]; void *handle = nullptr; int memfd = -1; +#if defined(__linux__) // Attempt memfd-based loading first int memfd_rc = load_orchestration_so_with_memfd(so_data, so_size, thread_idx, &handle, so_path, &memfd); @@ -1612,10 +1615,13 @@ int32_t AicpuExecutor::run(Runtime *runtime) { // memfd loading succeeded, use memfd-loaded handle orch_so_memfd_ = memfd; } +#endif if (handle == nullptr) { // memfd failed or unavailable - use file-based loading +#if defined(__linux__) orch_so_memfd_ = -1; +#endif // Try multiple paths that may allow execution on AICPU bool file_created = false; @@ -1992,6 +1998,7 @@ int32_t AicpuExecutor::run(Runtime *runtime) { if (!runtime->get_orch_built_on_host() && orch_so_handle_ != nullptr) { pto2_runtime_destroy(rt); // Handle cleanup based on loading method +#if defined(__linux__) if (orch_so_memfd_ >= 0) { // memfd-based: close fd AFTER dlclose cleanup_memfd_so(orch_so_memfd_, orch_so_handle_); @@ -2000,6 +2007,11 @@ int32_t AicpuExecutor::run(Runtime *runtime) { dlclose(orch_so_handle_); unlink(orch_so_path_); } +#else + // Non-Linux: only file-based loading + dlclose(orch_so_handle_); + unlink(orch_so_path_); +#endif } DEV_ALWAYS("Thread %d: Last thread, marking executor finished", thread_idx); } diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp index ed169e97..631394a9 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp @@ -29,8 +29,10 @@ #include "runtime.h" #include "spin_hint.h" -// memfd-based SO loading +// memfd-based SO loading (Linux only) +#if defined(__linux__) #include "memfd_loader.h" +#endif // Runtime headers (full struct definition for create/destroy + PTO2_SCOPE) #include "pto_runtime2.h" @@ -1936,11 +1938,12 @@ int32_t AicpuExecutor::run(Runtime *runtime) { return -1; } - // Try memfd first, fall back to file-based + // Try memfd first (Linux only), fall back to file-based char so_path[256]; void *handle = nullptr; int memfd = -1; +#if defined(__linux__) // Attempt memfd-based loading first int memfd_rc = load_orchestration_so_with_memfd(so_data, so_size, thread_idx, &handle, so_path, &memfd); @@ -1948,10 +1951,13 @@ int32_t AicpuExecutor::run(Runtime *runtime) { // memfd loading succeeded, use memfd-loaded handle orch_so_memfd_ = memfd; } +#endif if (handle == nullptr) { // memfd failed or unavailable - use file-based loading +#if defined(__linux__) orch_so_memfd_ = -1; +#endif // Try multiple paths that may allow execution on AICPU bool file_created = false; @@ -2381,6 +2387,7 @@ int32_t AicpuExecutor::run(Runtime *runtime) { } pto2_runtime_destroy(rt); // Handle cleanup based on loading method +#if defined(__linux__) if (orch_so_memfd_ >= 0) { // memfd-based: close fd AFTER dlclose cleanup_memfd_so(orch_so_memfd_, orch_so_handle_); @@ -2389,6 +2396,11 @@ int32_t AicpuExecutor::run(Runtime *runtime) { dlclose(orch_so_handle_); unlink(orch_so_path_); } +#else + // Non-Linux: only file-based loading + dlclose(orch_so_handle_); + unlink(orch_so_path_); +#endif } } diff --git a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp index a86c7977..5dc48221 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp +++ b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp @@ -29,8 +29,10 @@ #include "runtime.h" #include "spin_hint.h" -// memfd-based SO loading +// memfd-based SO loading (Linux only) +#if defined(__linux__) #include "memfd_loader.h" +#endif // Runtime headers (full struct definition for create/destroy + PTO2_SCOPE) #include "pto_runtime2.h" @@ -1914,11 +1916,12 @@ int32_t AicpuExecutor::run(Runtime *runtime) { return -1; } - // Try memfd first, fall back to file-based + // Try memfd first (Linux only), fall back to file-based char so_path[256]; void *handle = nullptr; int memfd = -1; +#if defined(__linux__) // Attempt memfd-based loading first int memfd_rc = load_orchestration_so_with_memfd(so_data, so_size, thread_idx, &handle, so_path, &memfd); @@ -1926,10 +1929,13 @@ int32_t AicpuExecutor::run(Runtime *runtime) { // memfd loading succeeded, use memfd-loaded handle orch_so_memfd_ = memfd; } +#endif if (handle == nullptr) { // memfd failed or unavailable - use file-based loading +#if defined(__linux__) orch_so_memfd_ = -1; +#endif // Try multiple paths that may allow execution on AICPU bool file_created = false; @@ -2358,6 +2364,7 @@ int32_t AicpuExecutor::run(Runtime *runtime) { } pto2_runtime_destroy(rt); // Handle cleanup based on loading method +#if defined(__linux__) if (orch_so_memfd_ >= 0) { // memfd-based: close fd AFTER dlclose cleanup_memfd_so(orch_so_memfd_, orch_so_handle_); @@ -2366,6 +2373,11 @@ int32_t AicpuExecutor::run(Runtime *runtime) { dlclose(orch_so_handle_); unlink(orch_so_path_); } +#else + // Non-Linux: only file-based loading + dlclose(orch_so_handle_); + unlink(orch_so_path_); +#endif } } From 3b8cd836e818f85349b5d1f20e50d1e2cecfac9b Mon Sep 17 00:00:00 2001 From: puddingfjz <2811443837@qq.com> Date: Fri, 10 Apr 2026 18:35:37 +0800 Subject: [PATCH 5/7] Fix: use constant buffer size instead of sizeof(pointer) out_so_path is a char* parameter, not an array. sizeof(out_so_path) returns pointer size (8 bytes) not buffer size (256 bytes). Use constant 256 for buffer size. --- src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h | 2 +- src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h | 2 +- src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h b/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h index 925b7e93..d78d3217 100644 --- a/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h +++ b/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h @@ -91,7 +91,7 @@ static inline int load_orchestration_so_with_memfd( return -1; } - snprintf(out_so_path, sizeof(out_so_path), "%s", link_path); + snprintf(out_so_path, 256, "%s", link_path); // Try dlopen from the symlink dlerror(); diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h index 73fcde42..59ff2ecd 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h @@ -91,7 +91,7 @@ static inline int load_orchestration_so_with_memfd( return -1; } - snprintf(out_so_path, sizeof(out_so_path), "%s", link_path); + snprintf(out_so_path, 256, "%s", link_path); // Try dlopen from the symlink dlerror(); diff --git a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h index 2156ca97..034a0feb 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h +++ b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h @@ -91,7 +91,7 @@ static inline int load_orchestration_so_with_memfd( return -1; } - snprintf(out_so_path, sizeof(out_so_path), "%s", link_path); + snprintf(out_so_path, 256, "%s", link_path); // Try dlopen from the symlink dlerror(); From 6d6b31707ed0ce0d881fbda883583cadcaac4307 Mon Sep 17 00:00:00 2001 From: puddingfjz <2811443837@qq.com> Date: Fri, 10 Apr 2026 18:39:33 +0800 Subject: [PATCH 6/7] Fix: move memfd variable inside #if defined(__linux__) block On non-Linux platforms, memfd variable was declared but unused, causing -Wunused-variable error with -Werror. --- src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp | 2 +- .../runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp | 2 +- .../runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp b/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp index b6737240..7e468fce 100644 --- a/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp +++ b/src/a2a3/runtime/aicpu_build_graph/aicpu/aicpu_executor.cpp @@ -1605,9 +1605,9 @@ int32_t AicpuExecutor::run(Runtime *runtime) { // Try memfd first (Linux only), fall back to file-based char so_path[256]; void *handle = nullptr; - int memfd = -1; #if defined(__linux__) + int memfd = -1; // Attempt memfd-based loading first int memfd_rc = load_orchestration_so_with_memfd(so_data, so_size, thread_idx, &handle, so_path, &memfd); diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp index 631394a9..b53dff6d 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp @@ -1941,9 +1941,9 @@ int32_t AicpuExecutor::run(Runtime *runtime) { // Try memfd first (Linux only), fall back to file-based char so_path[256]; void *handle = nullptr; - int memfd = -1; #if defined(__linux__) + int memfd = -1; // Attempt memfd-based loading first int memfd_rc = load_orchestration_so_with_memfd(so_data, so_size, thread_idx, &handle, so_path, &memfd); diff --git a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp index 5dc48221..86efe239 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp +++ b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp @@ -1919,9 +1919,9 @@ int32_t AicpuExecutor::run(Runtime *runtime) { // Try memfd first (Linux only), fall back to file-based char so_path[256]; void *handle = nullptr; - int memfd = -1; #if defined(__linux__) + int memfd = -1; // Attempt memfd-based loading first int memfd_rc = load_orchestration_so_with_memfd(so_data, so_size, thread_idx, &handle, so_path, &memfd); From 0bfa0e6166052074e2652a789feecec326b067b3 Mon Sep 17 00:00:00 2001 From: puddingfjz <2811443837@qq.com> Date: Fri, 10 Apr 2026 18:41:19 +0800 Subject: [PATCH 7/7] Apply: clang-format formatting changes - Put function parameters on single line (per clang-format) - Put snprintf arguments on single line (per clang-format) --- .../runtime/aicpu_build_graph/aicpu/memfd_loader.h | 10 ++++------ .../tensormap_and_ringbuffer/aicpu/memfd_loader.h | 10 ++++------ .../tensormap_and_ringbuffer/aicpu/memfd_loader.h | 10 ++++------ 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h b/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h index d78d3217..d95d00e6 100644 --- a/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h +++ b/src/a2a3/runtime/aicpu_build_graph/aicpu/memfd_loader.h @@ -39,8 +39,8 @@ extern "C" { * Load orchestration SO using memfd */ static inline int load_orchestration_so_with_memfd( - const void *so_data, size_t so_size, int orch_thread_num, - void **out_handle, char *out_so_path, int *out_memfd) { + const void *so_data, size_t so_size, int orch_thread_num, void **out_handle, char *out_so_path, int *out_memfd +) { *out_handle = nullptr; *out_memfd = -1; out_so_path[0] = '\0'; @@ -81,8 +81,7 @@ static inline int load_orchestration_so_with_memfd( // Create a symlink to /proc/self/fd/N with a "normal" path // This bypasses the AICPU dynamic linker's issue with /proc/self/fd/N paths char link_path[256]; - snprintf(link_path, sizeof(link_path), - "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); + snprintf(link_path, sizeof(link_path), "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); int symlink_rc = symlink(proc_fd_path, link_path); if (symlink_rc != 0) { @@ -102,8 +101,7 @@ static inline int load_orchestration_so_with_memfd( if (handle == nullptr) { const char *dl_err = dlerror(); - DEV_INFO("dlopen from memfd symlink failed: %s", - dl_err ? dl_err : "unknown"); + DEV_INFO("dlopen from memfd symlink failed: %s", dl_err ? dl_err : "unknown"); close(fd); return -1; } diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h index 59ff2ecd..e7249b35 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h @@ -39,8 +39,8 @@ extern "C" { * Load orchestration SO using memfd */ static inline int load_orchestration_so_with_memfd( - const void *so_data, size_t so_size, int orch_thread_num, - void **out_handle, char *out_so_path, int *out_memfd) { + const void *so_data, size_t so_size, int orch_thread_num, void **out_handle, char *out_so_path, int *out_memfd +) { *out_handle = nullptr; *out_memfd = -1; out_so_path[0] = '\0'; @@ -81,8 +81,7 @@ static inline int load_orchestration_so_with_memfd( // Create a symlink to /proc/self/fd/N with a "normal" path // This bypasses the AICPU dynamic linker's issue with /proc/self/fd/N paths char link_path[256]; - snprintf(link_path, sizeof(link_path), - "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); + snprintf(link_path, sizeof(link_path), "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); int symlink_rc = symlink(proc_fd_path, link_path); if (symlink_rc != 0) { @@ -102,8 +101,7 @@ static inline int load_orchestration_so_with_memfd( if (handle == nullptr) { const char *dl_err = dlerror(); - DEV_INFO("dlopen from memfd symlink failed: %s", - dl_err ? dl_err : "unknown"); + DEV_INFO("dlopen from memfd symlink failed: %s", dl_err ? dl_err : "unknown"); close(fd); return -1; } diff --git a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h index 034a0feb..6acabc1f 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h +++ b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/memfd_loader.h @@ -39,8 +39,8 @@ extern "C" { * Load orchestration SO using memfd */ static inline int load_orchestration_so_with_memfd( - const void *so_data, size_t so_size, int orch_thread_num, - void **out_handle, char *out_so_path, int *out_memfd) { + const void *so_data, size_t so_size, int orch_thread_num, void **out_handle, char *out_so_path, int *out_memfd +) { *out_handle = nullptr; *out_memfd = -1; out_so_path[0] = '\0'; @@ -81,8 +81,7 @@ static inline int load_orchestration_so_with_memfd( // Create a symlink to /proc/self/fd/N with a "normal" path // This bypasses the AICPU dynamic linker's issue with /proc/self/fd/N paths char link_path[256]; - snprintf(link_path, sizeof(link_path), - "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); + snprintf(link_path, sizeof(link_path), "/tmp/libdevice_orch_%d_%d.so", getpid(), orch_thread_num); int symlink_rc = symlink(proc_fd_path, link_path); if (symlink_rc != 0) { @@ -102,8 +101,7 @@ static inline int load_orchestration_so_with_memfd( if (handle == nullptr) { const char *dl_err = dlerror(); - DEV_INFO("dlopen from memfd symlink failed: %s", - dl_err ? dl_err : "unknown"); + DEV_INFO("dlopen from memfd symlink failed: %s", dl_err ? dl_err : "unknown"); close(fd); return -1; }