@@ -428,10 +428,8 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
428428 unique_cores .add (task ["core_id" ])
429429
430430 core_to_tid = {}
431- tid_counter = 1000
432431 for core_id in sorted (unique_cores ):
433- core_to_tid [core_id ] = tid_counter
434- tid_counter += 1
432+ core_to_tid [core_id ] = 10000 + core_id * 10
435433
436434 if verbose :
437435 print (f" Unique cores: { len (unique_cores )} " )
@@ -471,22 +469,11 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
471469 {"args" : {"name" : thread_name }, "cat" : "__metadata" , "name" : "thread_name" , "ph" : "M" , "pid" : 1 , "tid" : tid }
472470 )
473471
474- # Also add thread name for AICPU View if data exists
475- if has_aicpu_data :
476- events .append (
477- {
478- "args" : {"name" : thread_name },
479- "cat" : "__metadata" ,
480- "name" : "thread_name" ,
481- "ph" : "M" ,
482- "pid" : 2 ,
483- "tid" : tid ,
484- }
485- )
486-
487472 # Duration events (Complete events "X")
488473 # Build task_id -> event_id mapping for flow events
489474 task_to_event_id = {}
475+ task_to_aicpu_event_id : dict [int , int ] = {}
476+ task_to_aicpu_tid : dict [int , int ] = {}
490477 event_id = 0
491478
492479 for task in tasks :
@@ -530,15 +517,80 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
530517 event_id += 1
531518
532519 # AICPU View duration events (dispatch_time to finish_time)
520+ # Assign overlapping tasks on the same core to different tids so Perfetto
521+ # renders each bar on its own row (Perfetto requires strict nesting on a tid).
533522 if has_aicpu_data :
523+ # Build per-core sorted task lists and assign sub-lanes.
524+ # Each core gets a base tid from core_to_tid; overlapping tasks get base+1.
525+ _core_aicpu_tasks : dict [int , list ] = defaultdict (list )
526+ for task in tasks :
527+ d = task .get ("dispatch_time_us" , 0 )
528+ f = task .get ("finish_time_us" , 0 )
529+ if d < 0 or f <= 0 :
530+ continue
531+ _core_aicpu_tasks [task ["core_id" ]].append (task )
532+ for ct_list in _core_aicpu_tasks .values ():
533+ ct_list .sort (key = lambda t : t ["dispatch_time_us" ])
534+
535+ aicpu_tid_set : set [int ] = set ()
536+ for core_id , ct_list in _core_aicpu_tasks .items ():
537+ base_tid = core_to_tid [core_id ]
538+ # Greedy lane assignment: track finish time per sub-lane
539+ lane_finish = [0.0 ] # lane 0 = base_tid
540+ for task in ct_list :
541+ d = task ["dispatch_time_us" ]
542+ assigned = - 1
543+ for lane_idx , lf in enumerate (lane_finish ):
544+ if lf <= d :
545+ assigned = lane_idx
546+ break
547+ if assigned < 0 :
548+ assigned = len (lane_finish )
549+ lane_finish .append (0.0 )
550+ lane_finish [assigned ] = task ["finish_time_us" ]
551+ tid = base_tid if assigned == 0 else base_tid + assigned
552+ task_to_aicpu_tid [task ["task_id" ]] = tid
553+ aicpu_tid_set .add (tid )
554+
555+ # Thread name metadata for AICPU View (one entry per unique tid used)
556+ for core_id , base_tid in core_to_tid .items ():
557+ ct_list = _core_aicpu_tasks .get (core_id )
558+ core_type_str = ct_list [0 ]["core_type" ].upper () if ct_list else "unknown"
559+ base_name = f"{ core_type_str } _{ core_id } "
560+ # Base lane always gets metadata (even if no tasks, for consistency)
561+ if base_tid in aicpu_tid_set or not aicpu_tid_set :
562+ events .append (
563+ {
564+ "args" : {"name" : base_name },
565+ "cat" : "__metadata" ,
566+ "name" : "thread_name" ,
567+ "ph" : "M" ,
568+ "pid" : 2 ,
569+ "tid" : base_tid ,
570+ }
571+ )
572+ # Overflow lane (at most one: dual-slot dispatch means max 2 concurrent tasks per core)
573+ overflow_tid = base_tid + 1
574+ if overflow_tid in aicpu_tid_set :
575+ events .append (
576+ {
577+ "args" : {"name" : base_name },
578+ "cat" : "__metadata" ,
579+ "name" : "thread_name" ,
580+ "ph" : "M" ,
581+ "pid" : 2 ,
582+ "tid" : overflow_tid ,
583+ }
584+ )
585+
534586 for task in tasks :
535587 dispatch_us = task .get ("dispatch_time_us" , 0 )
536588 finish_us = task .get ("finish_time_us" , 0 )
537589 # 0us is a valid timestamp (base-time aligned); only reject negative/invalid values.
538590 if dispatch_us < 0 or finish_us <= 0 :
539591 continue
540592
541- tid = core_to_tid [task ["core_id" ]]
593+ tid = task_to_aicpu_tid . get ( task [ "task_id" ], core_to_tid [task ["core_id" ]])
542594 aicpu_dur = finish_us - dispatch_us
543595
544596 # Get function name if available
@@ -569,6 +621,7 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
569621 "dur" : aicpu_dur ,
570622 }
571623 )
624+ task_to_aicpu_event_id [task ["task_id" ]] = event_id
572625 event_id += 1
573626
574627 # Flow events (Flow events "s" and "f" for dependencies)
@@ -808,7 +861,8 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
808861 if src_finish_us < 0 :
809862 continue
810863
811- src_tid = core_to_tid [task ["core_id" ]]
864+ src_tid = task_to_aicpu_tid .get (task ["task_id" ], core_to_tid [task ["core_id" ]])
865+ src_aicpu_eid = task_to_aicpu_event_id .get (task ["task_id" ])
812866
813867 for succ_task_id in task ["fanout" ]:
814868 if succ_task_id not in task_map :
@@ -819,31 +873,36 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
819873 if dst_dispatch_us < 0 :
820874 continue
821875
822- dst_tid = core_to_tid [succ_task ["core_id" ]]
876+ dst_tid = task_to_aicpu_tid .get (succ_task_id , core_to_tid [succ_task ["core_id" ]])
877+ dst_aicpu_eid = task_to_aicpu_event_id .get (succ_task_id )
878+
879+ flow_s = {
880+ "cat" : "flow" ,
881+ "id" : flow_id ,
882+ "name" : "dependency" ,
883+ "ph" : "s" ,
884+ "pid" : 2 ,
885+ "tid" : src_tid ,
886+ "ts" : src_finish_us - 0.01 ,
887+ }
888+ if src_aicpu_eid is not None :
889+ flow_s ["bind_id" ] = src_aicpu_eid
890+ events .append (flow_s )
891+
892+ flow_f = {
893+ "cat" : "flow" ,
894+ "id" : flow_id ,
895+ "name" : "dependency" ,
896+ "ph" : "f" ,
897+ "pid" : 2 ,
898+ "tid" : dst_tid ,
899+ "ts" : dst_dispatch_us ,
900+ "bp" : "e" ,
901+ }
902+ if dst_aicpu_eid is not None :
903+ flow_f ["bind_id" ] = dst_aicpu_eid
904+ events .append (flow_f )
823905
824- events .append (
825- {
826- "cat" : "flow" ,
827- "id" : flow_id ,
828- "name" : "dependency" ,
829- "ph" : "s" ,
830- "pid" : 2 ,
831- "tid" : src_tid ,
832- "ts" : src_finish_us - 0.01 ,
833- }
834- )
835- events .append (
836- {
837- "cat" : "flow" ,
838- "id" : flow_id ,
839- "name" : "dependency" ,
840- "ph" : "f" ,
841- "pid" : 2 ,
842- "tid" : dst_tid ,
843- "ts" : dst_dispatch_us ,
844- "bp" : "e" ,
845- }
846- )
847906 flow_id += 1
848907
849908 # Scheduler DISPATCH → task execution arrows
@@ -894,6 +953,7 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
894953 if matched_thread is not None :
895954 sched_tid = 3000 + matched_thread
896955 core_tid = core_to_tid [task ["core_id" ]]
956+ aicpu_tid = task_to_aicpu_tid .get (task ["task_id" ], core_tid )
897957
898958 # Flow: scheduler DISPATCH → AICore View task start
899959 events .append (
@@ -922,6 +982,7 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
922982 flow_id += 1
923983
924984 # Flow: scheduler DISPATCH → AICPU View task start
985+ aicpu_eid = task_to_aicpu_event_id .get (task ["task_id" ])
925986 events .append (
926987 {
927988 "cat" : "flow" ,
@@ -933,18 +994,19 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
933994 "ts" : dispatch_us ,
934995 }
935996 )
936- events .append (
937- {
938- "cat" : "flow" ,
939- "id" : flow_id ,
940- "name" : "dispatch" ,
941- "ph" : "f" ,
942- "pid" : 2 ,
943- "tid" : core_tid ,
944- "ts" : dispatch_us ,
945- "bp" : "e" ,
946- }
947- )
997+ flow_f = {
998+ "cat" : "flow" ,
999+ "id" : flow_id ,
1000+ "name" : "dispatch" ,
1001+ "ph" : "f" ,
1002+ "pid" : 2 ,
1003+ "tid" : aicpu_tid ,
1004+ "ts" : dispatch_us ,
1005+ "bp" : "e" ,
1006+ }
1007+ if aicpu_eid is not None :
1008+ flow_f ["bind_id" ] = aicpu_eid
1009+ events .append (flow_f )
9481010 flow_id += 1
9491011
9501012 # Orchestrator → scheduler dispatch:
0 commit comments