From 34c6d52ee3f478c284638624e44cc61230df142d Mon Sep 17 00:00:00 2001 From: yanghaoran29 Date: Sat, 11 Apr 2026 00:49:27 +0800 Subject: [PATCH] Fix: assert/LOG paths failing when pto2_current_runtime() is null on AICPU Problem: assert_impl and related code in orchestration/common.cpp use LOG_* macros that dereference pto2_current_runtime()->ops. On the device path, pto2_submit_mixed_task and other runtime code run in libaicpu_kernel.so, but only the orchestration plugin (libdevice_orch_*.so) had g_pto2_current_runtime set via dlsym(pto2_framework_bind_runtime). Each .so carries its own copy of common.cpp, so the AICPU image's g_ stayed nullptr and assertion reporting could crash or misbehave. Fix: In AicpuExecutor, call the link-resolved pto2_framework_bind_runtime(rt) for this DSO before orch_bind_runtime_, and pto2_framework_bind_runtime(nullptr) before pto2_runtime_destroy, mirroring the orchestration SO bind. Same change for a5 tensormap_and_ringbuffer. --- .../tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp | 9 +++++++-- .../tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp index 97afd6a4..4b703a1e 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp @@ -73,6 +73,10 @@ typedef void (*DeviceOrchestrationBindRuntimeFunc)(PTO2Runtime *rt); // Config function exported by orchestration .so typedef PTO2OrchestrationConfig (*DeviceOrchestrationConfigFunc)(const ChipStorageTaskArgs &orch_args); +// From orchestration/common.cpp linked into this DSO — updates g_pto2_current_runtime here (distinct from +// pto2_framework_bind_runtime in the dlopen'd libdevice_orch_*.so). +extern "C" void pto2_framework_bind_runtime(PTO2Runtime *rt); + constexpr int32_t MAX_AICPU_THREADS = PLATFORM_MAX_AICPU_THREADS; constexpr int32_t MAX_CORES_PER_THREAD = PLATFORM_MAX_CORES_PER_THREAD; @@ -2112,6 +2116,7 @@ int32_t AicpuExecutor::run(Runtime *runtime) { #if PTO2_PROFILING orch_cycle_start = get_sys_cnt_aicpu(); #endif + pto2_framework_bind_runtime(rt); if (orch_bind_runtime_ != nullptr) { orch_bind_runtime_(rt); } @@ -2353,8 +2358,8 @@ int32_t AicpuExecutor::run(Runtime *runtime) { finished_.store(true, std::memory_order_release); // Destroy PTO2 runtime and close orchestration SO (moved from orchestrator path) if (!runtime->get_orch_built_on_host() && orch_so_handle_ != nullptr) { - // Clear the borrowed pointer in the orchestration SO before destroying - // rt, so g_pto2_current_runtime never points to freed memory. + // Clear g_pto2_current_runtime in this DSO and in the orchestration SO before destroying rt. + pto2_framework_bind_runtime(nullptr); if (orch_bind_runtime_ != nullptr) { orch_bind_runtime_(nullptr); } diff --git a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp index dcf3d565..1f3de190 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp +++ b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp @@ -73,6 +73,10 @@ typedef void (*DeviceOrchestrationBindRuntimeFunc)(PTO2Runtime *rt); // Config function exported by orchestration .so typedef PTO2OrchestrationConfig (*DeviceOrchestrationConfigFunc)(const ChipStorageTaskArgs &orch_args); +// From orchestration/common.cpp linked into this DSO — updates g_pto2_current_runtime here (distinct from +// pto2_framework_bind_runtime in the dlopen'd libdevice_orch_*.so). +extern "C" void pto2_framework_bind_runtime(PTO2Runtime *rt); + constexpr int32_t MAX_AICPU_THREADS = PLATFORM_MAX_AICPU_THREADS; constexpr int32_t MAX_CORES_PER_THREAD = PLATFORM_MAX_CORES_PER_THREAD; @@ -2091,6 +2095,7 @@ int32_t AicpuExecutor::run(Runtime *runtime) { #if PTO2_PROFILING orch_cycle_start = get_sys_cnt_aicpu(); #endif + pto2_framework_bind_runtime(rt); if (orch_bind_runtime_ != nullptr) { orch_bind_runtime_(rt); } @@ -2330,8 +2335,8 @@ int32_t AicpuExecutor::run(Runtime *runtime) { finished_.store(true, std::memory_order_release); // Destroy PTO2 runtime and close orchestration SO (moved from orchestrator path) if (!runtime->get_orch_built_on_host() && orch_so_handle_ != nullptr) { - // Clear the borrowed pointer in the orchestration SO before destroying - // rt, so g_pto2_current_runtime never points to freed memory. + // Clear g_pto2_current_runtime in this DSO and in the orchestration SO before destroying rt. + pto2_framework_bind_runtime(nullptr); if (orch_bind_runtime_ != nullptr) { orch_bind_runtime_(nullptr); }