From ac7ef1f1396bfe6a4ba3d4a9837b5dcf393f795d Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Thu, 14 May 2026 23:15:27 +0000 Subject: [PATCH 01/19] ze: use zeCommandListGetContextHandle in _get_profiling_event Replace the FIND_AND_DEL_ZE_OBJ + read-stored-context + ADD_ZE_OBJ dance with a direct introspection call. The hot path (every NULL-signal Append) no longer touches the cmdlist hash. --- backends/ze/tracer_ze_helpers.include.c | 27 +++++++++++-------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index 0b78a1f8a..2ff0336aa 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -356,32 +356,32 @@ static inline void _register_ze_event(ze_event_handle_t event, } static struct _ze_event_h *_get_profiling_event(ze_command_list_handle_t command_list) { - struct _ze_obj_h *o_h = NULL; struct _ze_event_h *e_w; - FIND_AND_DEL_ZE_OBJ(&command_list, o_h); - if (!o_h) { - THAPI_DBGLOG("Could not get command list: %p", command_list); + ze_context_handle_t context = NULL; + ze_result_t res = ZE_COMMAND_LIST_GET_CONTEXT_HANDLE_PTR(command_list, &context); + if (res != ZE_RESULT_SUCCESS || !context) { + THAPI_DBGLOG("zeCommandListGetContextHandle failed with %d, for command list: %p", res, + command_list); return NULL; } - ze_context_handle_t context = ((struct _ze_command_list_obj_data *)(o_h->obj_data))->context; GET_ZE_EVENT(&context, e_w); if (e_w) { e_w->command_list = command_list; - goto cleanup; + return e_w; } GET_ZE_EVENT_WRAPPER(e_w); if (!e_w) { THAPI_DBGLOG("Could not create a new event wrapper for command list: %p", command_list); - goto cleanup; + return NULL; } e_w->command_list = command_list; ze_event_pool_desc_t desc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, NULL, ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP | ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1}; - ze_result_t res = ZE_EVENT_POOL_CREATE_PTR(context, &desc, 0, NULL, &e_w->event_pool); + res = ZE_EVENT_POOL_CREATE_PTR(context, &desc, 0, NULL, &e_w->event_pool); if (res != ZE_RESULT_SUCCESS) { THAPI_DBGLOG("zeEventPoolCreate failed with %d, for command list: %p, context: %p", res, command_list, context); @@ -391,19 +391,16 @@ static struct _ze_event_h *_get_profiling_event(ze_command_list_handle_t command ZE_EVENT_SCOPE_FLAG_HOST}; res = ZE_EVENT_CREATE_PTR(e_w->event_pool, &e_desc, &e_w->event); if (res != ZE_RESULT_SUCCESS) { - THAPI_DBGLOG("zeEventCreate failed with %d, for event pool: %p, command list: %p, context: %p", - res, e_w->event_pool, command_list, context); + THAPI_DBGLOG("zeEventCreate failed with %d, for event pool: %p, context: %p", + res, e_w->event_pool, context); goto cleanup_ep; } - goto cleanup; + return e_w; cleanup_ep: ZE_EVENT_POOL_DESTROY_PTR(e_w->event_pool); cleanup_wrapper: PUT_ZE_EVENT_WRAPPER(e_w); - e_w = NULL; -cleanup: - ADD_ZE_OBJ(o_h); - return e_w; + return NULL; } static void _profile_event_results(ze_event_handle_t event); From c2963e124c857724e16f0b916dc90e4397f76b51 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Thu, 14 May 2026 23:29:37 +0000 Subject: [PATCH 02/19] ze: use zeCommandListGetContextHandle in _register_ze_event Replace the cl_data->context read with a direct introspection call. Keeps the FIND_AND_DEL_ZE_OBJ dance in place because cl_data->events (the per-cl event linked list) and cl_data->flags & _ZE_IMMEDIATE are still read here. --- backends/ze/tracer_ze_helpers.include.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index 2ff0336aa..ced751152 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -336,12 +336,19 @@ static inline void _register_ze_event(ze_event_handle_t event, _ze_event->flags = 0; } + ze_context_handle_t context = NULL; + ze_result_t res = ZE_COMMAND_LIST_GET_CONTEXT_HANDLE_PTR(command_list, &context); + if (res == ZE_RESULT_SUCCESS && context) + _ze_event->context = context; + else + THAPI_DBGLOG("zeCommandListGetContextHandle failed with %d for command list: %p", res, + command_list); + struct _ze_obj_h *o_h = NULL; struct _ze_command_list_obj_data *cl_data = NULL; FIND_AND_DEL_ZE_OBJ(&command_list, o_h); if (o_h) { cl_data = (struct _ze_command_list_obj_data *)(o_h->obj_data); - _ze_event->context = cl_data->context; if (cl_data->flags & _ZE_IMMEDIATE) _ze_event->flags |= _ZE_IMMEDIATE_CMD; } else From 0ec4dfcb9d4834859e1869fb2dc73d5daeb9caa0 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Thu, 14 May 2026 23:33:42 +0000 Subject: [PATCH 03/19] ze: use introspection in _dump_command_list_device_timer / _dump_memory_info Both helpers were doing FIND_ZE_OBJ on the cmdlist hash purely to read cl_data->device or cl_data->context. Direct introspection calls remove the lookup and let cl_data->device / cl_data->context become write-only (cleaned up in a later commit). --- backends/ze/tracer_ze_helpers.include.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index ced751152..671630192 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -688,12 +688,9 @@ static void _dump_device_timer(ze_device_handle_t hDevice) { } static void _dump_command_list_device_timer(ze_command_list_handle_t hCommandList) { - struct _ze_obj_h *o_h = NULL; - FIND_ZE_OBJ(&hCommandList, o_h); - if (o_h) { - ze_device_handle_t hDevice = ((struct _ze_command_list_obj_data *)(o_h->obj_data))->device; + ze_device_handle_t hDevice = NULL; + if (ZE_COMMAND_LIST_GET_DEVICE_HANDLE_PTR(hCommandList, &hDevice) == ZE_RESULT_SUCCESS && hDevice) _dump_device_timer(hDevice); - } } static void _dump_driver_device_properties(ze_driver_handle_t hDriver) { @@ -787,12 +784,10 @@ static inline void _dump_memory_info_ctx(ze_context_handle_t hContext, const voi } static inline void _dump_memory_info(ze_command_list_handle_t hCommandList, const void *ptr) { - struct _ze_obj_h *o_h = NULL; - FIND_ZE_OBJ(&hCommandList, o_h); - if (o_h) { - ze_context_handle_t hContext = ((struct _ze_command_list_obj_data *)(o_h->obj_data))->context; + ze_context_handle_t hContext = NULL; + if (ZE_COMMAND_LIST_GET_CONTEXT_HANDLE_PTR(hCommandList, &hContext) == ZE_RESULT_SUCCESS && + hContext) _dump_memory_info_ctx(hContext, ptr); - } } //////////////////////////////////////////// From 772ea8f53b5875da530ed975df8bd56a802885e4 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Thu, 14 May 2026 23:41:55 +0000 Subject: [PATCH 04/19] ze: drop dead device hash, cl_data device/context/driver fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the introspection swaps, nothing reads cl_data->device, cl_data->context, or cl_data->driver — they were only populated to serve callers that now use zeCommandList{Get,Is}* directly. Cascade: - struct _ze_command_list_obj_data: drop device/context/driver - _on_create_command_list: drop hContext/hDevice params and the device-hash lookup that only existed to populate driver - ze_model.rb: drop the hContext/hDevice args at the two call sites - struct _ze_device_obj_data and _register_ze_device: fully unused now that nothing reads the device hash. Drop them and their two epilogue registrations (zeDeviceGet, zeDeviceGetSubDevices). - enum _ze_obj_type: drop DRIVER and DEVICE values. Net: ~80 LOC removed; the cmdlist hash entry now stores only flags and the per-cl events list. --- backends/ze/tracer_ze_helpers.include.c | 65 +------------------------ backends/ze/ze_model.rb | 22 +-------- 2 files changed, 4 insertions(+), 83 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index 671630192..2e4a9acd9 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -23,13 +23,7 @@ #define THAPI_ATTRIBUTE_DESTRUCTOR #endif -enum _ze_obj_type { UNKNOWN = 0, DRIVER, DEVICE, COMMAND_LIST, EVENT }; - -struct _ze_device_obj_data { - ze_driver_handle_t driver; - ze_device_handle_t parent; - ze_device_properties_t properties; -}; +enum _ze_obj_type { UNKNOWN = 0, COMMAND_LIST, EVENT }; static int _do_profile = 0; static int _do_cleanup = 0; @@ -60,9 +54,6 @@ typedef _ze_command_list_flag_t _ze_command_list_flags_t; struct _ze_event_h; struct _ze_command_list_obj_data { - ze_device_handle_t device; - ze_context_handle_t context; - ze_driver_handle_t driver; _ze_command_list_flags_t flags; struct _ze_event_h *events; }; @@ -112,58 +103,9 @@ static inline void _delete_ze_obj(struct _ze_obj_h *o_h) { pthread_mutex_unlock(&_ze_objs_mutex); \ } while (0) -static inline void _register_ze_device(ze_device_handle_t device, - ze_driver_handle_t driver, - ze_device_handle_t parent) { - struct _ze_obj_h *o_h = NULL; - struct _ze_device_obj_data *d_data = NULL; - - FIND_ZE_OBJ(&device, o_h); - if (o_h) { - THAPI_DBGLOG("Device already registered: %p", device); - return; - } - - intptr_t mem = (intptr_t)calloc(1, sizeof(struct _ze_obj_h) + sizeof(struct _ze_device_obj_data)); - if (mem == 0) { - THAPI_DBGLOG_NO_ARGS("Failed to allocate memory"); - return; - } - - o_h = (struct _ze_obj_h *)mem; - d_data = (struct _ze_device_obj_data *)(mem + sizeof(struct _ze_obj_h)); - - o_h->ptr = (void *)device; - o_h->type = DEVICE; - d_data->driver = driver; - d_data->parent = parent; - o_h->obj_data = (void *)d_data; - - d_data->properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; - ze_result_t res = ZE_DEVICE_GET_PROPERTIES_PTR(device, &(d_data->properties)); - if (res != ZE_RESULT_SUCCESS) { - free((void *)mem); - return; - } - - ADD_ZE_OBJ(o_h); -} - -static inline void _on_create_command_list(ze_command_list_handle_t command_list, - ze_context_handle_t context, - ze_device_handle_t device, - int immediate) { +static inline void _on_create_command_list(ze_command_list_handle_t command_list, int immediate) { struct _ze_obj_h *o_h = NULL; struct _ze_command_list_obj_data *cl_data = NULL; - ze_driver_handle_t driver; - - FIND_ZE_OBJ(&device, o_h); - if (!o_h) { - THAPI_DBGLOG("Could not find device: %p associated with command list: %p", device, - command_list); - return; - } - driver = ((struct _ze_device_obj_data *)(o_h->obj_data))->driver; FIND_ZE_OBJ(&command_list, o_h); if (o_h) { @@ -183,9 +125,6 @@ static inline void _on_create_command_list(ze_command_list_handle_t command_list o_h->ptr = (void *)command_list; o_h->type = COMMAND_LIST; - cl_data->device = device; - cl_data->context = context; - cl_data->driver = driver; if (immediate) cl_data->flags = _ZE_IMMEDIATE; diff --git a/backends/ze/ze_model.rb b/backends/ze/ze_model.rb index 1ee60800d..a3ceccea1 100644 --- a/backends/ze/ze_model.rb +++ b/backends/ze/ze_model.rb @@ -137,28 +137,10 @@ def upper_snake_case(str) end ZE_POINTER_NAMES = ze_pointer_names.to_h -register_epilogue 'zeDeviceGet', < Date: Fri, 15 May 2026 14:42:36 +0000 Subject: [PATCH 05/19] ze: use zeCommandListIsImmediate in _register_ze_event (only) Switches the immediate-flag source for events to introspection. Stops reading cl_data->flags & _ZE_IMMEDIATE here; the stored flag is still written/read in _on_destroy_command_list (see note below). Investigation of an attempted parallel swap in _on_destroy_command_list: _on_destroy_command_list runs as the EPILOGUE of zeCommandListDestroy. By the time it fires, the cmdlist handle has been freed by the driver, and zeCommandListIsImmediate(handle) segfaults. We must keep using the stored cl_data flag in that one place. A code comment makes this explicit so a future cleanup doesn't re-attempt the swap. Verified: 8/8 baseline iprof tests + sampled non-skipped extras, no regressions. --- backends/ze/tracer_ze_helpers.include.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index 2e4a9acd9..65525c871 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -283,15 +283,18 @@ static inline void _register_ze_event(ze_event_handle_t event, THAPI_DBGLOG("zeCommandListGetContextHandle failed with %d for command list: %p", res, command_list); + ze_bool_t is_immediate = 0; + if (ZE_COMMAND_LIST_IS_IMMEDIATE_PTR(command_list, &is_immediate) == ZE_RESULT_SUCCESS && + is_immediate) + _ze_event->flags |= _ZE_IMMEDIATE_CMD; + struct _ze_obj_h *o_h = NULL; struct _ze_command_list_obj_data *cl_data = NULL; FIND_AND_DEL_ZE_OBJ(&command_list, o_h); - if (o_h) { - cl_data = (struct _ze_command_list_obj_data *)(o_h->obj_data); - if (cl_data->flags & _ZE_IMMEDIATE) - _ze_event->flags |= _ZE_IMMEDIATE_CMD; - } else + if (!o_h) THAPI_DBGLOG("Could not get command list associated to event: %p", event); + else + cl_data = (struct _ze_command_list_obj_data *)(o_h->obj_data); /* only track our events, users are responsible for reseting/deleting their events */ if (cl_data && _ze_event->event_pool) @@ -563,6 +566,9 @@ static void _on_destroy_command_list(ze_command_list_handle_t command_list) { } if (_do_profile) { struct _ze_command_list_obj_data *cl_data = (struct _ze_command_list_obj_data *)(o_h->obj_data); + /* Note: do NOT call zeCommandListIsImmediate here — _on_destroy_command_list + * is the epilogue of zeCommandListDestroy, so command_list is already an + * invalid handle. Use the stored _ZE_IMMEDIATE flag instead. */ struct _ze_event_h *elt = NULL, *tmp = NULL; DL_FOREACH_SAFE(cl_data->events, elt, tmp) { DL_DELETE(cl_data->events, elt); From 5a0e8a146e73d6da46948e0cc0d767110c626e0e Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Fri, 15 May 2026 14:49:10 +0000 Subject: [PATCH 06/19] ze: collapse _ZE_IMMEDIATE into _ZE_EXECUTED on cl-side MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cl-side _ZE_IMMEDIATE flag existed only so destroy/reset drainers could ask "are this cl's events ready to query?" — true for immediate cls (which have no Execute step) and for regular cls that have been Executed. Stamping _ZE_EXECUTED at create time for immediate cls unifies both into a single check, lets _on_destroy_command_list drop its compound (immediate || executed) test, and removes the "epilogue can't introspect" foot-gun entirely from this path. --- backends/ze/tracer_ze_helpers.include.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index 65525c871..8861221c1 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -46,8 +46,7 @@ struct ze_closure { struct ze_closure *ze_closures = NULL; typedef enum _ze_command_list_flag { - _ZE_IMMEDIATE = ZE_BIT(0), - _ZE_EXECUTED = ZE_BIT(1) + _ZE_EXECUTED = ZE_BIT(0) } _ze_command_list_flag_t; typedef _ze_command_list_flag_t _ze_command_list_flags_t; @@ -125,8 +124,11 @@ static inline void _on_create_command_list(ze_command_list_handle_t command_list o_h->ptr = (void *)command_list; o_h->type = COMMAND_LIST; + /* Immediate cls have no Execute step; their appends run on the device the + * moment they're submitted. Treat them as already-executed so drainers + * (Reset/Destroy hooks) query their events via _ZE_EXECUTED uniformly. */ if (immediate) - cl_data->flags = _ZE_IMMEDIATE; + cl_data->flags = _ZE_EXECUTED; o_h->obj_data = (void *)cl_data; @@ -566,14 +568,10 @@ static void _on_destroy_command_list(ze_command_list_handle_t command_list) { } if (_do_profile) { struct _ze_command_list_obj_data *cl_data = (struct _ze_command_list_obj_data *)(o_h->obj_data); - /* Note: do NOT call zeCommandListIsImmediate here — _on_destroy_command_list - * is the epilogue of zeCommandListDestroy, so command_list is already an - * invalid handle. Use the stored _ZE_IMMEDIATE flag instead. */ struct _ze_event_h *elt = NULL, *tmp = NULL; DL_FOREACH_SAFE(cl_data->events, elt, tmp) { DL_DELETE(cl_data->events, elt); - _unregister_ze_event(elt->event, - (cl_data->flags & _ZE_IMMEDIATE) || (cl_data->flags & _ZE_EXECUTED)); + _unregister_ze_event(elt->event, cl_data->flags & _ZE_EXECUTED); } } free(o_h); From 2fe2874e1779fdbe009988e35335a6aac948be7e Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Fri, 15 May 2026 14:59:08 +0000 Subject: [PATCH 07/19] ze: drop unused obj_data_free field and commented _delete_ze_obj --- backends/ze/tracer_ze_helpers.include.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index 8861221c1..e0a453bce 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -62,22 +62,11 @@ struct _ze_obj_h { UT_hash_handle hh; enum _ze_obj_type type; void *obj_data; - void (*obj_data_free)(void *obj_data); }; struct _ze_obj_h *_ze_objs = NULL; pthread_mutex_t _ze_objs_mutex = PTHREAD_MUTEX_INITIALIZER; -/* -static inline void _delete_ze_obj(struct _ze_obj_h *o_h) { - HASH_DEL(_ze_objs, o_h); - if (o_h->obj_data && o_h->obj_data_free) { - o_h->obj_data_free(o_h->obj_data); - } - free(o_h); -} -*/ - #define FIND_ZE_OBJ(key, val) \ do { \ pthread_mutex_lock(&_ze_objs_mutex); \ From df028b35fe6df39dab6244ec5b163cd86a192d43 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Fri, 15 May 2026 15:01:47 +0000 Subject: [PATCH 08/19] ze: drop write-only o_h->type field and _ze_obj_type enum --- backends/ze/tracer_ze_helpers.include.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index e0a453bce..dd36e6e90 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -23,8 +23,6 @@ #define THAPI_ATTRIBUTE_DESTRUCTOR #endif -enum _ze_obj_type { UNKNOWN = 0, COMMAND_LIST, EVENT }; - static int _do_profile = 0; static int _do_cleanup = 0; static int _do_chained_structs = 0; @@ -60,7 +58,6 @@ struct _ze_command_list_obj_data { struct _ze_obj_h { void *ptr; UT_hash_handle hh; - enum _ze_obj_type type; void *obj_data; }; @@ -112,7 +109,6 @@ static inline void _on_create_command_list(ze_command_list_handle_t command_list cl_data = (struct _ze_command_list_obj_data *)(mem + sizeof(struct _ze_obj_h)); o_h->ptr = (void *)command_list; - o_h->type = COMMAND_LIST; /* Immediate cls have no Execute step; their appends run on the device the * moment they're submitted. Treat them as already-executed so drainers * (Reset/Destroy hooks) query their events via _ZE_EXECUTED uniformly. */ @@ -362,7 +358,6 @@ static inline void _on_created_event(ze_event_handle_t event) { o_h = (struct _ze_obj_h *)mem; o_h->ptr = (void *)event; - o_h->type = EVENT; ADD_ZE_OBJ(o_h); #else From 647723551e5c394b9460533ff35d2af3000893b7 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Fri, 15 May 2026 15:08:01 +0000 Subject: [PATCH 09/19] ze: collapse _ze_obj_h indirection into _ze_command_list_obj_data After dropping the device hash entries and obj_data_free, _ze_objs only ever held command-list payloads (in non-debug builds) or no-op event-tag entries (debug-only). Both indirections are now dead weight. Changes: - Drop debug-only event-hash usage in _on_created_event / _on_destroy_event (removes the THAPI_DEBUG branches and the zeEventCreate epilogue registration). - Fold _ze_obj_h's ptr/hh into _ze_command_list_obj_data; drop the separate header struct, the trailing-data calloc, and the obj_data void* indirection. - Rename macros and global to reflect the now-monomorphic hash: _ze_objs -> _ze_cls, FIND/ADD/FIND_AND_DEL_ZE_OBJ -> *_ZE_CL. Net: ~55 LOC removed; one less struct, one less indirection. --- backends/ze/tracer_ze_helpers.include.c | 128 ++++++++---------------- backends/ze/ze_model.rb | 6 -- 2 files changed, 39 insertions(+), 95 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index dd36e6e90..5bdf87fb9 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -51,73 +51,62 @@ typedef _ze_command_list_flag_t _ze_command_list_flags_t; struct _ze_event_h; struct _ze_command_list_obj_data { + void *ptr; /* the ze_command_list_handle_t this entry tracks */ + UT_hash_handle hh; _ze_command_list_flags_t flags; struct _ze_event_h *events; }; -struct _ze_obj_h { - void *ptr; - UT_hash_handle hh; - void *obj_data; -}; +struct _ze_command_list_obj_data *_ze_cls = NULL; +pthread_mutex_t _ze_cls_mutex = PTHREAD_MUTEX_INITIALIZER; -struct _ze_obj_h *_ze_objs = NULL; -pthread_mutex_t _ze_objs_mutex = PTHREAD_MUTEX_INITIALIZER; - -#define FIND_ZE_OBJ(key, val) \ +#define FIND_ZE_CL(key, val) \ do { \ - pthread_mutex_lock(&_ze_objs_mutex); \ - HASH_FIND_PTR(_ze_objs, key, val); \ - pthread_mutex_unlock(&_ze_objs_mutex); \ + pthread_mutex_lock(&_ze_cls_mutex); \ + HASH_FIND_PTR(_ze_cls, key, val); \ + pthread_mutex_unlock(&_ze_cls_mutex); \ } while (0) -#define ADD_ZE_OBJ(val) \ +#define ADD_ZE_CL(val) \ do { \ - pthread_mutex_lock(&_ze_objs_mutex); \ - HASH_ADD_PTR(_ze_objs, ptr, val); \ - pthread_mutex_unlock(&_ze_objs_mutex); \ + pthread_mutex_lock(&_ze_cls_mutex); \ + HASH_ADD_PTR(_ze_cls, ptr, val); \ + pthread_mutex_unlock(&_ze_cls_mutex); \ } while (0) -#define FIND_AND_DEL_ZE_OBJ(key, val) \ +#define FIND_AND_DEL_ZE_CL(key, val) \ do { \ - pthread_mutex_lock(&_ze_objs_mutex); \ - HASH_FIND_PTR(_ze_objs, key, val); \ + pthread_mutex_lock(&_ze_cls_mutex); \ + HASH_FIND_PTR(_ze_cls, key, val); \ if (val) { \ - HASH_DEL(_ze_objs, val); \ + HASH_DEL(_ze_cls, val); \ } \ - pthread_mutex_unlock(&_ze_objs_mutex); \ + pthread_mutex_unlock(&_ze_cls_mutex); \ } while (0) static inline void _on_create_command_list(ze_command_list_handle_t command_list, int immediate) { - struct _ze_obj_h *o_h = NULL; struct _ze_command_list_obj_data *cl_data = NULL; - FIND_ZE_OBJ(&command_list, o_h); - if (o_h) { + FIND_ZE_CL(&command_list, cl_data); + if (cl_data) { THAPI_DBGLOG("Command list already registered: %p", command_list); return; } - intptr_t mem = - (intptr_t)calloc(1, sizeof(struct _ze_obj_h) + sizeof(struct _ze_command_list_obj_data)); - if (mem == 0) { + cl_data = (struct _ze_command_list_obj_data *)calloc(1, sizeof(*cl_data)); + if (!cl_data) { THAPI_DBGLOG_NO_ARGS("Failed to allocate memory"); return; } - o_h = (struct _ze_obj_h *)mem; - cl_data = (struct _ze_command_list_obj_data *)(mem + sizeof(struct _ze_obj_h)); - - o_h->ptr = (void *)command_list; + cl_data->ptr = (void *)command_list; /* Immediate cls have no Execute step; their appends run on the device the * moment they're submitted. Treat them as already-executed so drainers * (Reset/Destroy hooks) query their events via _ZE_EXECUTED uniformly. */ if (immediate) cl_data->flags = _ZE_EXECUTED; - o_h->obj_data = (void *)cl_data; - - ADD_ZE_OBJ(o_h); + ADD_ZE_CL(cl_data); } typedef enum _ze_event_flag { @@ -275,20 +264,17 @@ static inline void _register_ze_event(ze_event_handle_t event, is_immediate) _ze_event->flags |= _ZE_IMMEDIATE_CMD; - struct _ze_obj_h *o_h = NULL; struct _ze_command_list_obj_data *cl_data = NULL; - FIND_AND_DEL_ZE_OBJ(&command_list, o_h); - if (!o_h) + FIND_AND_DEL_ZE_CL(&command_list, cl_data); + if (!cl_data) THAPI_DBGLOG("Could not get command list associated to event: %p", event); - else - cl_data = (struct _ze_command_list_obj_data *)(o_h->obj_data); /* only track our events, users are responsible for reseting/deleting their events */ if (cl_data && _ze_event->event_pool) DL_APPEND(cl_data->events, _ze_event); ADD_ZE_EVENT(_ze_event); - if (o_h) - ADD_ZE_OBJ(o_h); + if (cl_data) + ADD_ZE_CL(cl_data); } static struct _ze_event_h *_get_profiling_event(ze_command_list_handle_t command_list) { @@ -341,41 +327,9 @@ static struct _ze_event_h *_get_profiling_event(ze_command_list_handle_t command static void _profile_event_results(ze_event_handle_t event); -static inline void _on_created_event(ze_event_handle_t event) { -#ifdef THAPI_DEBUG - struct _ze_obj_h *o_h = NULL; - FIND_ZE_OBJ(&event, o_h); - if (o_h) { - THAPI_DBGLOG("Event already registered: %p", event); - return; - } - - intptr_t mem = (intptr_t)calloc(1, sizeof(struct _ze_obj_h)); - if (mem == 0) { - THAPI_DBGLOG_NO_ARGS("Failed to allocate memory"); - return; - } - - o_h = (struct _ze_obj_h *)mem; - o_h->ptr = (void *)event; - - ADD_ZE_OBJ(o_h); -#else - (void)event; -#endif -} - static inline void _on_destroy_event(ze_event_handle_t event) { struct _ze_event_h *ze_event = NULL; -#ifdef THAPI_DEBUG - struct _ze_obj_h *o_h = NULL; - FIND_AND_DEL_ZE_OBJ(&event, o_h); - if (!o_h) { - THAPI_DBGLOG("Could not find event: %p", event); - } -#endif - FIND_AND_DEL_ZE_EVENT(&event, ze_event); if (!ze_event) { return; @@ -505,60 +459,56 @@ static void _on_destroy_context(ze_context_handle_t context) { } static void _on_reset_command_list(ze_command_list_handle_t command_list) { - struct _ze_obj_h *o_h = NULL; + struct _ze_command_list_obj_data *cl_data = NULL; - FIND_AND_DEL_ZE_OBJ(&command_list, o_h); - if (!o_h) { + FIND_AND_DEL_ZE_CL(&command_list, cl_data); + if (!cl_data) { THAPI_DBGLOG("Could not get command list: %p", command_list); return; } - struct _ze_command_list_obj_data *cl_data = (struct _ze_command_list_obj_data *)(o_h->obj_data); struct _ze_event_h *elt = NULL, *tmp = NULL; DL_FOREACH_SAFE(cl_data->events, elt, tmp) { DL_DELETE(cl_data->events, elt); _unregister_ze_event(elt->event, cl_data->flags & _ZE_EXECUTED); } cl_data->flags &= ~_ZE_EXECUTED; - ADD_ZE_OBJ(o_h); + ADD_ZE_CL(cl_data); } static void _on_execute_command_lists(uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists) { for (uint32_t i = 0; i < numCommandLists; i++) { - struct _ze_obj_h *o_h = NULL; - FIND_AND_DEL_ZE_OBJ(phCommandLists + i, o_h); - if (o_h) { - struct _ze_command_list_obj_data *cl_data = - (struct _ze_command_list_obj_data *)(o_h->obj_data); + struct _ze_command_list_obj_data *cl_data = NULL; + FIND_AND_DEL_ZE_CL(phCommandLists + i, cl_data); + if (cl_data) { /* dump events if they were executed */ if (cl_data->flags & _ZE_EXECUTED) { struct _ze_event_h *elt = NULL; DL_FOREACH(cl_data->events, elt) { _dump_and_reset_our_event(elt->event); } } else cl_data->flags |= _ZE_EXECUTED; - ADD_ZE_OBJ(o_h); + ADD_ZE_CL(cl_data); } else THAPI_DBGLOG("Could not get command list: %p", phCommandLists[i]); } } static void _on_destroy_command_list(ze_command_list_handle_t command_list) { - struct _ze_obj_h *o_h = NULL; + struct _ze_command_list_obj_data *cl_data = NULL; - FIND_AND_DEL_ZE_OBJ(&command_list, o_h); - if (!o_h) { + FIND_AND_DEL_ZE_CL(&command_list, cl_data); + if (!cl_data) { THAPI_DBGLOG("Could not get command list: %p", command_list); return; } if (_do_profile) { - struct _ze_command_list_obj_data *cl_data = (struct _ze_command_list_obj_data *)(o_h->obj_data); struct _ze_event_h *elt = NULL, *tmp = NULL; DL_FOREACH_SAFE(cl_data->events, elt, tmp) { DL_DELETE(cl_data->events, elt); _unregister_ze_event(elt->event, cl_data->flags & _ZE_EXECUTED); } } - free(o_h); + free(cl_data); } static pthread_once_t _init = PTHREAD_ONCE_INIT; diff --git a/backends/ze/ze_model.rb b/backends/ze/ze_model.rb index a3ceccea1..4b11d7add 100644 --- a/backends/ze/ze_model.rb +++ b/backends/ze/ze_model.rb @@ -194,12 +194,6 @@ def upper_snake_case(str) } EOF -register_epilogue 'zeEventCreate', < Date: Fri, 15 May 2026 15:18:39 +0000 Subject: [PATCH 10/19] ze: split _register_ze_event into our-event vs user-event functions _register_ze_event used a NULL-_ze_event-pointer as a discriminator between two different paths (tracer-injected event vs user event). Splitting them into _register_our_event and _register_user_event makes each path linear and removes the magic NULL dispatch. Two helpers extracted along the way: - _tag_event_from_cl: shared introspection (context + immediate) - _attach_event_to_cl: shared FIND_AND_DEL/ADD pattern that guards cl_data against a concurrent free in _on_destroy_command_list. Also: remove redundant zero-initializations of event_pool and flags in the user-event path. GET_ZE_EVENT_WRAPPER returns a fully-zeroed wrapper (calloc on first use, memset by PUT_ZE_EVENT_WRAPPER on recycle), so only the fields we want non-zero need explicit assignment. ze_model.rb's profiling_epilogue picks the right function based on whether _ewrapper is set (we injected) or NULL (user event). --- backends/ze/tracer_ze_helpers.include.c | 84 +++++++++++++++---------- backends/ze/ze_model.rb | 5 +- 2 files changed, 56 insertions(+), 33 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index 5bdf87fb9..6d50acb0a 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -226,31 +226,10 @@ static pthread_mutex_t _ze_event_wrappers_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_unlock(&_ze_event_wrappers_mutex); \ } while (0) -static inline void _register_ze_event(ze_event_handle_t event, - ze_command_list_handle_t command_list, - struct _ze_event_h *_ze_event) { - // If _ze_event, our event - if (!_ze_event) { - FIND_ZE_EVENT(&event, _ze_event); - if (_ze_event) { - if (_ze_event->flags & _ZE_IMMEDIATE_CMD) { - THAPI_DBGLOG("Event already registered: %p", event); - } - _ze_event->command_list = command_list; - return; - } - - GET_ZE_EVENT_WRAPPER(_ze_event); - if (!_ze_event) { - THAPI_DBGLOG("Could not get event wrapper for: %p", event); - return; - } - _ze_event->event = event; - _ze_event->command_list = command_list; - _ze_event->event_pool = NULL; - _ze_event->flags = 0; - } - +/* Tag an event wrapper with context + immediate flag from its cmdlist. + * Both reads use introspection — see project-ze-introspect for why. */ +static inline void _tag_event_from_cl(struct _ze_event_h *_ze_event, + ze_command_list_handle_t command_list) { ze_context_handle_t context = NULL; ze_result_t res = ZE_COMMAND_LIST_GET_CONTEXT_HANDLE_PTR(command_list, &context); if (res == ZE_RESULT_SUCCESS && context) @@ -263,18 +242,59 @@ static inline void _register_ze_event(ze_event_handle_t event, if (ZE_COMMAND_LIST_IS_IMMEDIATE_PTR(command_list, &is_immediate) == ZE_RESULT_SUCCESS && is_immediate) _ze_event->flags |= _ZE_IMMEDIATE_CMD; +} +/* Append an event wrapper we own to its cmdlist's events list, under the + * cl-hash lock (the FIND_AND_DEL/ADD pattern guards cl_data against a + * concurrent free in _on_destroy_command_list). */ +static inline void _attach_event_to_cl(struct _ze_event_h *_ze_event, + ze_command_list_handle_t command_list) { struct _ze_command_list_obj_data *cl_data = NULL; FIND_AND_DEL_ZE_CL(&command_list, cl_data); - if (!cl_data) - THAPI_DBGLOG("Could not get command list associated to event: %p", event); + if (!cl_data) { + THAPI_DBGLOG("Could not get command list associated to event: %p", _ze_event->event); + return; + } + DL_APPEND(cl_data->events, _ze_event); + ADD_ZE_CL(cl_data); +} + +/* Register an injected (tracer-owned) event. Caller has already populated + * _ze_event->event and _ze_event->event_pool via _get_profiling_event. */ +static inline void _register_our_event(struct _ze_event_h *_ze_event, + ze_command_list_handle_t command_list) { + _ze_event->command_list = command_list; + _tag_event_from_cl(_ze_event, command_list); + _attach_event_to_cl(_ze_event, command_list); + ADD_ZE_EVENT(_ze_event); +} + +/* Register a user event (we don't own its lifetime). Look up or create the + * wrapper; users are responsible for reset/destroy, so we don't attach it + * to the cl's events list. */ +static inline void _register_user_event(ze_event_handle_t event, + ze_command_list_handle_t command_list) { + struct _ze_event_h *_ze_event = NULL; + FIND_ZE_EVENT(&event, _ze_event); + if (_ze_event) { + /* already tracked — just migrate to the new cmdlist */ + _ze_event->command_list = command_list; + return; + } + + GET_ZE_EVENT_WRAPPER(_ze_event); + if (!_ze_event) { + THAPI_DBGLOG("Could not get event wrapper for: %p", event); + return; + } + /* GET_ZE_EVENT_WRAPPER returns a fully-zeroed wrapper (calloc on first use, + * memset by PUT_ZE_EVENT_WRAPPER on recycle), so event_pool and flags are + * already 0 — only set the fields we actually want non-zero. */ + _ze_event->event = event; + _ze_event->command_list = command_list; - /* only track our events, users are responsible for reseting/deleting their events */ - if (cl_data && _ze_event->event_pool) - DL_APPEND(cl_data->events, _ze_event); + _tag_event_from_cl(_ze_event, command_list); ADD_ZE_EVENT(_ze_event); - if (cl_data) - ADD_ZE_CL(cl_data); } static struct _ze_event_h *_get_profiling_event(ze_command_list_handle_t command_list) { diff --git a/backends/ze/ze_model.rb b/backends/ze/ze_model.rb index 4b11d7add..3d533cdf8 100644 --- a/backends/ze/ze_model.rb +++ b/backends/ze/ze_model.rb @@ -275,7 +275,10 @@ def upper_snake_case(str) < Date: Fri, 15 May 2026 15:51:48 +0000 Subject: [PATCH 11/19] Fix formating --- backends/ze/tracer_ze_helpers.include.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index 6d50acb0a..48cf92c93 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -43,9 +43,7 @@ struct ze_closure { struct ze_closure *ze_closures = NULL; -typedef enum _ze_command_list_flag { - _ZE_EXECUTED = ZE_BIT(0) -} _ze_command_list_flag_t; +typedef enum _ze_command_list_flag { _ZE_EXECUTED = ZE_BIT(0) } _ze_command_list_flag_t; typedef _ze_command_list_flag_t _ze_command_list_flags_t; struct _ze_event_h; @@ -333,8 +331,8 @@ static struct _ze_event_h *_get_profiling_event(ze_command_list_handle_t command ZE_EVENT_SCOPE_FLAG_HOST}; res = ZE_EVENT_CREATE_PTR(e_w->event_pool, &e_desc, &e_w->event); if (res != ZE_RESULT_SUCCESS) { - THAPI_DBGLOG("zeEventCreate failed with %d, for event pool: %p, context: %p", - res, e_w->event_pool, context); + THAPI_DBGLOG("zeEventCreate failed with %d, for event pool: %p, context: %p", res, + e_w->event_pool, context); goto cleanup_ep; } return e_w; From e2e4df3058fb6565e83dadbf8cf322d85fdbfb44 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Fri, 15 May 2026 16:03:29 +0000 Subject: [PATCH 12/19] ze: fix bogus memory ref in _tag_event_from_cl comment The previous comment cited a 'project-ze-introspect' memory file that doesn't exist. Replace with the actual reason the immediate flag is snapshotted at register time: by _on_reset_event time the cmdlist may already be destroyed, so introspecting it would dereference a freed handle. --- backends/ze/tracer_ze_helpers.include.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index 48cf92c93..e8e961c25 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -224,8 +224,10 @@ static pthread_mutex_t _ze_event_wrappers_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_unlock(&_ze_event_wrappers_mutex); \ } while (0) -/* Tag an event wrapper with context + immediate flag from its cmdlist. - * Both reads use introspection — see project-ze-introspect for why. */ +/* Snapshot context + immediate-flag from cmdlist into the event wrapper. + * The immediate flag is read at register time (not at _on_reset_event + * time) because by reset time the cmdlist may already be destroyed and + * zeCommandListIsImmediate would dereference a freed handle. */ static inline void _tag_event_from_cl(struct _ze_event_h *_ze_event, ze_command_list_handle_t command_list) { ze_context_handle_t context = NULL; From 92140075b649dfbcbfe6cbcaed848a6474471d64 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Fri, 15 May 2026 16:15:31 +0000 Subject: [PATCH 13/19] ze: don't profile zeCommandListAppendQueryKernelTimestamps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Query op has no kernel to time — its signal event records whatever the driver happened to sample, leading to nonsensical device-side durations (5.73min max observed under m_ooo_query_no_wait_stress_ze, total claimed 10.79h). Drop it from the profiling-injection list. Host-side timing of the API call itself is still tracked (it's still a regular ze_* call that goes through the host tracepoints). --- backends/ze/ze_model.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backends/ze/ze_model.rb b/backends/ze/ze_model.rb index 3d533cdf8..ec664445a 100644 --- a/backends/ze/ze_model.rb +++ b/backends/ze/ze_model.rb @@ -307,10 +307,11 @@ def upper_snake_case(str) zeCommandListAppendImageCopyRegion zeCommandListAppendImageCopyToMemory zeCommandListAppendImageCopyFromMemory - zeCommandListAppendQueryKernelTimestamps zeCommandListAppendWriteGlobalTimestamp zeCommandListAppendImageCopyToMemoryExt zeCommandListAppendImageCopyFromMemoryExt].each do |c| + # zeCommandListAppendQueryKernelTimestamps intentionally NOT in this list + # — it has no kernel to time register_prologue c, profiling_prologue.call('hSignalEvent') register_prologue c, paranoid_drift_prologue register_epilogue c, profiling_epilogue.call('hSignalEvent') From be6a75a1271d2ad0f58d8070eb258253cc07b9f8 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Fri, 15 May 2026 16:21:54 +0000 Subject: [PATCH 14/19] =?UTF-8?q?ze:=20drop=20=5FZE=5FPROFILED=20flag=20?= =?UTF-8?q?=E2=80=94=20never=20set=20anywhere?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The _ZE_PROFILED bit was read in 5 places (always negated) and cleared in one, but never set. Every guarded `_profile_event_results` call therefore always fired, and the clear was a no-op. Drop the enum value, the 5 guards, and the clear. _ZE_IMMEDIATE_CMD is now the only remaining event flag. --- backends/ze/tracer_ze_helpers.include.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index e8e961c25..f72d81242 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -107,10 +107,7 @@ static inline void _on_create_command_list(ze_command_list_handle_t command_list ADD_ZE_CL(cl_data); } -typedef enum _ze_event_flag { - _ZE_PROFILED = ZE_BIT(0), - _ZE_IMMEDIATE_CMD = ZE_BIT(1) -} _ze_event_flag_t; +typedef enum _ze_event_flag { _ZE_IMMEDIATE_CMD = ZE_BIT(0) } _ze_event_flag_t; typedef _ze_event_flag_t _ze_event_flags_t; struct _ze_event_h { @@ -355,8 +352,7 @@ static inline void _on_destroy_event(ze_event_handle_t event) { return; } - if (!(ze_event->flags & _ZE_PROFILED)) - _profile_event_results(event); + _profile_event_results(event); PUT_ZE_EVENT_WRAPPER(ze_event); } @@ -369,7 +365,7 @@ static inline void _unregister_ze_event(ze_event_handle_t event, int get_results return; } - if (get_results && !(ze_event->flags & _ZE_PROFILED)) + if (get_results) _profile_event_results(event); if (ze_event->event_pool) PUT_ZE_EVENT(ze_event); @@ -386,8 +382,7 @@ static inline void _on_reset_event(ze_event_handle_t event) { return; } - if (!(ze_event->flags & _ZE_PROFILED)) - _profile_event_results(event); + _profile_event_results(event); if (!(ze_event->flags & _ZE_IMMEDIATE_CMD)) ADD_ZE_EVENT(ze_event); @@ -406,8 +401,6 @@ static inline void _dump_and_reset_our_event(ze_event_handle_t event) { _profile_event_results(event); ZE_EVENT_HOST_RESET_PTR(event); - - ze_event->flags &= ~_ZE_PROFILED; ADD_ZE_EVENT(ze_event); } @@ -431,7 +424,7 @@ static void _event_cleanup() { HASH_ITER(hh, _ze_events, ze_event, tmp) { HASH_DEL(_ze_events, ze_event); - if (ze_event->event && !(ze_event->flags & _ZE_PROFILED)) + if (ze_event->event) _profile_event_results(ze_event->event); if (ze_event->event_pool) { if (ze_event->event) @@ -449,7 +442,7 @@ static void _on_destroy_context(ze_context_handle_t context) { HASH_ITER(hh, _ze_events, ze_event, tmp) { if (ze_event->context == context) { HASH_DEL(_ze_events, ze_event); - if (ze_event->event && !(ze_event->flags & _ZE_PROFILED)) + if (ze_event->event) _profile_event_results(ze_event->event); if (ze_event->event_pool) { if (ze_event->event) From 11e97c999bacf87f705fc7fc2098870d35e3bed0 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Fri, 15 May 2026 16:26:13 +0000 Subject: [PATCH 15/19] ze: _unregister_ze_event takes the wrapper directly Both call sites already iterate cl_data->events and have the wrapper in hand. Passing _ze_event_h* instead of ze_event_handle_t skips a hash lookup per element on cl reset/destroy. The remaining FIND_AND_DEL_ZE_EVENT inside the function is just removing the wrapper from the global events hash; it's still required because external state outside this function (e.g. _on_reset_event) also indexes by event handle. --- backends/ze/tracer_ze_helpers.include.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index f72d81242..72dc02f6b 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -356,17 +356,16 @@ static inline void _on_destroy_event(ze_event_handle_t event) { PUT_ZE_EVENT_WRAPPER(ze_event); } -static inline void _unregister_ze_event(ze_event_handle_t event, int get_results) { - struct _ze_event_h *ze_event = NULL; - - FIND_AND_DEL_ZE_EVENT(&event, ze_event); - if (!ze_event) { - THAPI_DBGLOG("Could not find event: %p", event); - return; - } +/* Caller already holds the wrapper (e.g. iterating cl_data->events) and + * has removed it from any per-cl list. Drops it from the global events + * hash, optionally emits its timestamp tracepoint, and recycles. */ +static inline void _unregister_ze_event(struct _ze_event_h *ze_event, int get_results) { + struct _ze_event_h *evicted = NULL; + FIND_AND_DEL_ZE_EVENT(&ze_event->event, evicted); + /* evicted should be == ze_event; if not, our hash bookkeeping is corrupt. */ if (get_results) - _profile_event_results(event); + _profile_event_results(ze_event->event); if (ze_event->event_pool) PUT_ZE_EVENT(ze_event); else @@ -482,7 +481,7 @@ static void _on_reset_command_list(ze_command_list_handle_t command_list) { struct _ze_event_h *elt = NULL, *tmp = NULL; DL_FOREACH_SAFE(cl_data->events, elt, tmp) { DL_DELETE(cl_data->events, elt); - _unregister_ze_event(elt->event, cl_data->flags & _ZE_EXECUTED); + _unregister_ze_event(elt, cl_data->flags & _ZE_EXECUTED); } cl_data->flags &= ~_ZE_EXECUTED; ADD_ZE_CL(cl_data); @@ -518,7 +517,7 @@ static void _on_destroy_command_list(ze_command_list_handle_t command_list) { struct _ze_event_h *elt = NULL, *tmp = NULL; DL_FOREACH_SAFE(cl_data->events, elt, tmp) { DL_DELETE(cl_data->events, elt); - _unregister_ze_event(elt->event, cl_data->flags & _ZE_EXECUTED); + _unregister_ze_event(elt, cl_data->flags & _ZE_EXECUTED); } } free(cl_data); From babb9e66e802b0822a4805d5f8095d9722c9c0c8 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Fri, 15 May 2026 16:29:51 +0000 Subject: [PATCH 16/19] ze: hoist _profile_event_results above its callers, drop forward decl Definition moved to where the callers can see it directly. Removes the forward declaration that existed only because the function was defined too far down the file. --- backends/ze/tracer_ze_helpers.include.c | 28 ++++++++++++------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index 72dc02f6b..8b24d7109 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -342,7 +342,19 @@ static struct _ze_event_h *_get_profiling_event(ze_command_list_handle_t command return NULL; } -static void _profile_event_results(ze_event_handle_t event); +static void _profile_event_results(ze_event_handle_t event) { + ze_kernel_timestamp_result_t res = {0}; + ze_result_t status; + ze_result_t timestamp_status; + + if (tracepoint_enabled(lttng_ust_ze_profiling, event_profiling_results)) { + status = ZE_EVENT_QUERY_STATUS_PTR(event); + timestamp_status = ZE_EVENT_QUERY_KERNEL_TIMESTAMP_PTR(event, &res); + do_tracepoint(lttng_ust_ze_profiling, event_profiling_results, event, status, timestamp_status, + res.global.kernelStart, res.global.kernelEnd, res.context.kernelStart, + res.context.kernelEnd); + } +} static inline void _on_destroy_event(ze_event_handle_t event) { struct _ze_event_h *ze_event = NULL; @@ -403,20 +415,6 @@ static inline void _dump_and_reset_our_event(ze_event_handle_t event) { ADD_ZE_EVENT(ze_event); } -static void _profile_event_results(ze_event_handle_t event) { - ze_kernel_timestamp_result_t res = {0}; - ze_result_t status; - ze_result_t timestamp_status; - - if (tracepoint_enabled(lttng_ust_ze_profiling, event_profiling_results)) { - status = ZE_EVENT_QUERY_STATUS_PTR(event); - timestamp_status = ZE_EVENT_QUERY_KERNEL_TIMESTAMP_PTR(event, &res); - do_tracepoint(lttng_ust_ze_profiling, event_profiling_results, event, status, timestamp_status, - res.global.kernelStart, res.global.kernelEnd, res.context.kernelStart, - res.context.kernelEnd); - } -} - static void _event_cleanup() { struct _ze_event_h *ze_event = NULL; struct _ze_event_h *tmp = NULL; From 71920eb084fee21c3459c33cc0d73a745b8071e7 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Fri, 15 May 2026 16:34:44 +0000 Subject: [PATCH 17/19] ze: drop write-only command_list field from _ze_event_h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Field was set in 6 places, read in 0 — pure dead state. Cascade: - _register_user_event's 'already registered' early-return branch used to update command_list there; now it's a pure no-op return. - _register_our_event's first line (assigning command_list) gone. - _get_profiling_event's two assignments gone. - PUT_ZE_EVENT macro's redundant clear gone. The field's original purpose was probably "track which cmdlist this event most recently belonged to" but no consumer ever read it. The new tracer can introduce a real cl pointer if/when it needs one. --- backends/ze/tracer_ze_helpers.include.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index 8b24d7109..52da3e33d 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -113,7 +113,6 @@ typedef _ze_event_flag_t _ze_event_flags_t; struct _ze_event_h { ze_event_handle_t event; UT_hash_handle hh; - ze_command_list_handle_t command_list; ze_event_pool_handle_t event_pool; ze_context_handle_t context; _ze_event_flags_t flags; @@ -191,7 +190,6 @@ static pthread_mutex_t _ze_event_pools_mutex = PTHREAD_MUTEX_INITIALIZER; pool->context = val->context; \ HASH_ADD_PTR(_ze_event_pools, context, pool); \ } \ - val->command_list = NULL; \ val->flags = 0; \ ZE_EVENT_HOST_RESET_PTR(val->event); \ DL_PREPEND(pool->events, val); \ @@ -260,7 +258,6 @@ static inline void _attach_event_to_cl(struct _ze_event_h *_ze_event, * _ze_event->event and _ze_event->event_pool via _get_profiling_event. */ static inline void _register_our_event(struct _ze_event_h *_ze_event, ze_command_list_handle_t command_list) { - _ze_event->command_list = command_list; _tag_event_from_cl(_ze_event, command_list); _attach_event_to_cl(_ze_event, command_list); ADD_ZE_EVENT(_ze_event); @@ -273,11 +270,8 @@ static inline void _register_user_event(ze_event_handle_t event, ze_command_list_handle_t command_list) { struct _ze_event_h *_ze_event = NULL; FIND_ZE_EVENT(&event, _ze_event); - if (_ze_event) { - /* already tracked — just migrate to the new cmdlist */ - _ze_event->command_list = command_list; - return; - } + if (_ze_event) + return; /* already tracked, nothing more to do */ GET_ZE_EVENT_WRAPPER(_ze_event); if (!_ze_event) { @@ -288,7 +282,6 @@ static inline void _register_user_event(ze_event_handle_t event, * memset by PUT_ZE_EVENT_WRAPPER on recycle), so event_pool and flags are * already 0 — only set the fields we actually want non-zero. */ _ze_event->event = event; - _ze_event->command_list = command_list; _tag_event_from_cl(_ze_event, command_list); ADD_ZE_EVENT(_ze_event); @@ -305,10 +298,8 @@ static struct _ze_event_h *_get_profiling_event(ze_command_list_handle_t command return NULL; } GET_ZE_EVENT(&context, e_w); - if (e_w) { - e_w->command_list = command_list; + if (e_w) return e_w; - } GET_ZE_EVENT_WRAPPER(e_w); if (!e_w) { @@ -316,7 +307,6 @@ static struct _ze_event_h *_get_profiling_event(ze_command_list_handle_t command return NULL; } - e_w->command_list = command_list; ze_event_pool_desc_t desc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, NULL, ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP | ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1}; From 7db1712bc2d3639e651184bda1573cf988e7b5c6 Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Fri, 15 May 2026 16:36:39 +0000 Subject: [PATCH 18/19] ze: recycle wrappers in _on_destroy_context instead of free() The /* should put? */ comment marked an unresolved decision. The neighboring sweep over the per-context free list (lines just below) already does PUT_ZE_EVENT_WRAPPER for the same struct type; doing the same here avoids alloc/free churn in workloads that create and destroy contexts in a loop (e.g. m_many_contexts_ze). No behavior change for single-context workloads. --- backends/ze/tracer_ze_helpers.include.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index 52da3e33d..a20af8192 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -436,8 +436,7 @@ static void _on_destroy_context(ze_context_handle_t context) { ZE_EVENT_DESTROY_PTR(ze_event->event); ZE_EVENT_POOL_DESTROY_PTR(ze_event->event_pool); } - /* should put? */ - free(ze_event); + PUT_ZE_EVENT_WRAPPER(ze_event); } } pthread_mutex_unlock(&_ze_events_mutex); From cfd21ae2fd4f62f1a86a8ee2e2df8593063ca3eb Mon Sep 17 00:00:00 2001 From: tapplencourt Date: Fri, 15 May 2026 17:25:50 +0000 Subject: [PATCH 19/19] [DRAFT] ze: extract _dispose_event_wrapper helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three wrapper-teardown sites duplicated the same sequence: 1. _event_cleanup (process exit destructor) 2. _on_destroy_context first sweep (live events for that context) 3. _on_destroy_context second sweep (per-context free list) All three: optionally dump timestamp tracepoint, destroy our injected event+pool if we own them, recycle the wrapper. Extracted into _dispose_event_wrapper(wrapper, do_dump) — site 3 passes do_dump=0 because its wrappers were already drained when they were recycled. This is the "tear down a wrapper" primitive PLAN_v2's per-scenario drain hooks will all call. Side effect: _event_cleanup now recycles wrappers via PUT_ZE_EVENT_WRAPPER instead of free()'ing them. Process is exiting either way, so no observable difference, but the code path is now uniform. [DRAFT]: compile-clean but bats not re-run — compute node was lost mid-iteration. Verify m_many_contexts_ze, m_concurrent_distinct_cls_ze, and baselines all green before merging. --- backends/ze/tracer_ze_helpers.include.c | 40 ++++++++++++------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/backends/ze/tracer_ze_helpers.include.c b/backends/ze/tracer_ze_helpers.include.c index a20af8192..8cfe31d7a 100644 --- a/backends/ze/tracer_ze_helpers.include.c +++ b/backends/ze/tracer_ze_helpers.include.c @@ -405,20 +405,27 @@ static inline void _dump_and_reset_our_event(ze_event_handle_t event) { ADD_ZE_EVENT(ze_event); } +/* Tear down a wrapper: optionally emit its timestamp tracepoint, then + * destroy the injected event+pool if we own them, then recycle the + * wrapper. Caller must have already removed it from any list/hash that + * references it. */ +static inline void _dispose_event_wrapper(struct _ze_event_h *ze_event, int do_dump) { + if (do_dump && ze_event->event) + _profile_event_results(ze_event->event); + if (ze_event->event_pool) { + if (ze_event->event) + ZE_EVENT_DESTROY_PTR(ze_event->event); + ZE_EVENT_POOL_DESTROY_PTR(ze_event->event_pool); + } + PUT_ZE_EVENT_WRAPPER(ze_event); +} + static void _event_cleanup() { struct _ze_event_h *ze_event = NULL; struct _ze_event_h *tmp = NULL; - HASH_ITER(hh, _ze_events, ze_event, tmp) { HASH_DEL(_ze_events, ze_event); - if (ze_event->event) - _profile_event_results(ze_event->event); - if (ze_event->event_pool) { - if (ze_event->event) - ZE_EVENT_DESTROY_PTR(ze_event->event); - ZE_EVENT_POOL_DESTROY_PTR(ze_event->event_pool); - } - free(ze_event); + _dispose_event_wrapper(ze_event, 1); } } @@ -429,14 +436,7 @@ static void _on_destroy_context(ze_context_handle_t context) { HASH_ITER(hh, _ze_events, ze_event, tmp) { if (ze_event->context == context) { HASH_DEL(_ze_events, ze_event); - if (ze_event->event) - _profile_event_results(ze_event->event); - if (ze_event->event_pool) { - if (ze_event->event) - ZE_EVENT_DESTROY_PTR(ze_event->event); - ZE_EVENT_POOL_DESTROY_PTR(ze_event->event_pool); - } - PUT_ZE_EVENT_WRAPPER(ze_event); + _dispose_event_wrapper(ze_event, 1); } } pthread_mutex_unlock(&_ze_events_mutex); @@ -448,9 +448,9 @@ static void _on_destroy_context(ze_context_handle_t context) { struct _ze_event_h *elt = NULL, *tmp = NULL; DL_FOREACH_SAFE(pool->events, elt, tmp) { DL_DELETE(pool->events, elt); - ZE_EVENT_DESTROY_PTR(elt->event); - ZE_EVENT_POOL_DESTROY_PTR(elt->event_pool); - PUT_ZE_EVENT_WRAPPER(elt); + /* Wrapper is in the free list — its event was already dumped+reset + * by whoever recycled it. Don't dump again, just tear down. */ + _dispose_event_wrapper(elt, 0); } free(pool); }