Skip to content

Commit 019c2be

Browse files
authored
Fix: resolve AICPU View overlapping bars and misbound flow arrows in swimlane (#557)
Perfetto silently drops partially overlapping slices on the same tid and binds flow-finish events to the wrong enclosing slice when bars overlap. - Assign per-core AICPU tids as 10000 + core_id * 10, with greedy lane assignment placing overlapping tasks on base_tid + 1 (dual-slot overflow) - Add bind_id to AICPU dependency and scheduler→task flow events so arrows attach to the correct slice regardless of overlap - Generate AICPU View thread metadata independently (not inside AICore loop) since overflow lanes need separate entries
1 parent 7abe560 commit 019c2be

1 file changed

Lines changed: 116 additions & 54 deletions

File tree

tools/swimlane_converter.py

Lines changed: 116 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -428,10 +428,8 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
428428
unique_cores.add(task["core_id"])
429429

430430
core_to_tid = {}
431-
tid_counter = 1000
432431
for core_id in sorted(unique_cores):
433-
core_to_tid[core_id] = tid_counter
434-
tid_counter += 1
432+
core_to_tid[core_id] = 10000 + core_id * 10
435433

436434
if verbose:
437435
print(f" Unique cores: {len(unique_cores)}")
@@ -471,22 +469,11 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
471469
{"args": {"name": thread_name}, "cat": "__metadata", "name": "thread_name", "ph": "M", "pid": 1, "tid": tid}
472470
)
473471

474-
# Also add thread name for AICPU View if data exists
475-
if has_aicpu_data:
476-
events.append(
477-
{
478-
"args": {"name": thread_name},
479-
"cat": "__metadata",
480-
"name": "thread_name",
481-
"ph": "M",
482-
"pid": 2,
483-
"tid": tid,
484-
}
485-
)
486-
487472
# Duration events (Complete events "X")
488473
# Build task_id -> event_id mapping for flow events
489474
task_to_event_id = {}
475+
task_to_aicpu_event_id: dict[int, int] = {}
476+
task_to_aicpu_tid: dict[int, int] = {}
490477
event_id = 0
491478

492479
for task in tasks:
@@ -530,15 +517,80 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
530517
event_id += 1
531518

532519
# AICPU View duration events (dispatch_time to finish_time)
520+
# Assign overlapping tasks on the same core to different tids so Perfetto
521+
# renders each bar on its own row (Perfetto requires strict nesting on a tid).
533522
if has_aicpu_data:
523+
# Build per-core sorted task lists and assign sub-lanes.
524+
# Each core gets a base tid from core_to_tid; overlapping tasks get base+1.
525+
_core_aicpu_tasks: dict[int, list] = defaultdict(list)
526+
for task in tasks:
527+
d = task.get("dispatch_time_us", 0)
528+
f = task.get("finish_time_us", 0)
529+
if d < 0 or f <= 0:
530+
continue
531+
_core_aicpu_tasks[task["core_id"]].append(task)
532+
for ct_list in _core_aicpu_tasks.values():
533+
ct_list.sort(key=lambda t: t["dispatch_time_us"])
534+
535+
aicpu_tid_set: set[int] = set()
536+
for core_id, ct_list in _core_aicpu_tasks.items():
537+
base_tid = core_to_tid[core_id]
538+
# Greedy lane assignment: track finish time per sub-lane
539+
lane_finish = [0.0] # lane 0 = base_tid
540+
for task in ct_list:
541+
d = task["dispatch_time_us"]
542+
assigned = -1
543+
for lane_idx, lf in enumerate(lane_finish):
544+
if lf <= d:
545+
assigned = lane_idx
546+
break
547+
if assigned < 0:
548+
assigned = len(lane_finish)
549+
lane_finish.append(0.0)
550+
lane_finish[assigned] = task["finish_time_us"]
551+
tid = base_tid if assigned == 0 else base_tid + assigned
552+
task_to_aicpu_tid[task["task_id"]] = tid
553+
aicpu_tid_set.add(tid)
554+
555+
# Thread name metadata for AICPU View (one entry per unique tid used)
556+
for core_id, base_tid in core_to_tid.items():
557+
ct_list = _core_aicpu_tasks.get(core_id)
558+
core_type_str = ct_list[0]["core_type"].upper() if ct_list else "unknown"
559+
base_name = f"{core_type_str}_{core_id}"
560+
# Base lane always gets metadata (even if no tasks, for consistency)
561+
if base_tid in aicpu_tid_set or not aicpu_tid_set:
562+
events.append(
563+
{
564+
"args": {"name": base_name},
565+
"cat": "__metadata",
566+
"name": "thread_name",
567+
"ph": "M",
568+
"pid": 2,
569+
"tid": base_tid,
570+
}
571+
)
572+
# Overflow lane (at most one: dual-slot dispatch means max 2 concurrent tasks per core)
573+
overflow_tid = base_tid + 1
574+
if overflow_tid in aicpu_tid_set:
575+
events.append(
576+
{
577+
"args": {"name": base_name},
578+
"cat": "__metadata",
579+
"name": "thread_name",
580+
"ph": "M",
581+
"pid": 2,
582+
"tid": overflow_tid,
583+
}
584+
)
585+
534586
for task in tasks:
535587
dispatch_us = task.get("dispatch_time_us", 0)
536588
finish_us = task.get("finish_time_us", 0)
537589
# 0us is a valid timestamp (base-time aligned); only reject negative/invalid values.
538590
if dispatch_us < 0 or finish_us <= 0:
539591
continue
540592

541-
tid = core_to_tid[task["core_id"]]
593+
tid = task_to_aicpu_tid.get(task["task_id"], core_to_tid[task["core_id"]])
542594
aicpu_dur = finish_us - dispatch_us
543595

544596
# Get function name if available
@@ -569,6 +621,7 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
569621
"dur": aicpu_dur,
570622
}
571623
)
624+
task_to_aicpu_event_id[task["task_id"]] = event_id
572625
event_id += 1
573626

574627
# Flow events (Flow events "s" and "f" for dependencies)
@@ -808,7 +861,8 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
808861
if src_finish_us < 0:
809862
continue
810863

811-
src_tid = core_to_tid[task["core_id"]]
864+
src_tid = task_to_aicpu_tid.get(task["task_id"], core_to_tid[task["core_id"]])
865+
src_aicpu_eid = task_to_aicpu_event_id.get(task["task_id"])
812866

813867
for succ_task_id in task["fanout"]:
814868
if succ_task_id not in task_map:
@@ -819,31 +873,36 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
819873
if dst_dispatch_us < 0:
820874
continue
821875

822-
dst_tid = core_to_tid[succ_task["core_id"]]
876+
dst_tid = task_to_aicpu_tid.get(succ_task_id, core_to_tid[succ_task["core_id"]])
877+
dst_aicpu_eid = task_to_aicpu_event_id.get(succ_task_id)
878+
879+
flow_s = {
880+
"cat": "flow",
881+
"id": flow_id,
882+
"name": "dependency",
883+
"ph": "s",
884+
"pid": 2,
885+
"tid": src_tid,
886+
"ts": src_finish_us - 0.01,
887+
}
888+
if src_aicpu_eid is not None:
889+
flow_s["bind_id"] = src_aicpu_eid
890+
events.append(flow_s)
891+
892+
flow_f = {
893+
"cat": "flow",
894+
"id": flow_id,
895+
"name": "dependency",
896+
"ph": "f",
897+
"pid": 2,
898+
"tid": dst_tid,
899+
"ts": dst_dispatch_us,
900+
"bp": "e",
901+
}
902+
if dst_aicpu_eid is not None:
903+
flow_f["bind_id"] = dst_aicpu_eid
904+
events.append(flow_f)
823905

824-
events.append(
825-
{
826-
"cat": "flow",
827-
"id": flow_id,
828-
"name": "dependency",
829-
"ph": "s",
830-
"pid": 2,
831-
"tid": src_tid,
832-
"ts": src_finish_us - 0.01,
833-
}
834-
)
835-
events.append(
836-
{
837-
"cat": "flow",
838-
"id": flow_id,
839-
"name": "dependency",
840-
"ph": "f",
841-
"pid": 2,
842-
"tid": dst_tid,
843-
"ts": dst_dispatch_us,
844-
"bp": "e",
845-
}
846-
)
847906
flow_id += 1
848907

849908
# Scheduler DISPATCH → task execution arrows
@@ -894,6 +953,7 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
894953
if matched_thread is not None:
895954
sched_tid = 3000 + matched_thread
896955
core_tid = core_to_tid[task["core_id"]]
956+
aicpu_tid = task_to_aicpu_tid.get(task["task_id"], core_tid)
897957

898958
# Flow: scheduler DISPATCH → AICore View task start
899959
events.append(
@@ -922,6 +982,7 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
922982
flow_id += 1
923983

924984
# Flow: scheduler DISPATCH → AICPU View task start
985+
aicpu_eid = task_to_aicpu_event_id.get(task["task_id"])
925986
events.append(
926987
{
927988
"cat": "flow",
@@ -933,18 +994,19 @@ def generate_chrome_trace_json( # noqa: PLR0912, PLR0915
933994
"ts": dispatch_us,
934995
}
935996
)
936-
events.append(
937-
{
938-
"cat": "flow",
939-
"id": flow_id,
940-
"name": "dispatch",
941-
"ph": "f",
942-
"pid": 2,
943-
"tid": core_tid,
944-
"ts": dispatch_us,
945-
"bp": "e",
946-
}
947-
)
997+
flow_f = {
998+
"cat": "flow",
999+
"id": flow_id,
1000+
"name": "dispatch",
1001+
"ph": "f",
1002+
"pid": 2,
1003+
"tid": aicpu_tid,
1004+
"ts": dispatch_us,
1005+
"bp": "e",
1006+
}
1007+
if aicpu_eid is not None:
1008+
flow_f["bind_id"] = aicpu_eid
1009+
events.append(flow_f)
9481010
flow_id += 1
9491011

9501012
# Orchestrator → scheduler dispatch:

0 commit comments

Comments
 (0)