uname -a
Linux 6.12.0-105.51.5.el9uek.x86_64 #1 SMP PREEMPT_DYNAMIC Tue Oct 14 19:55:41 PDT 2025 x86_64 x86_64 x86_64 GNU/Linux
A userspace process kindnetd appears stuck with its kernel stack inside fanotify_get_response() / wait_event_state(). However, crash dump analysis shows that the task is not sleeping in fanotify anymore.
The task is in TASK_RUNNING state and has task->on_rq = 1, but its CFS scheduling entity is not enqueued:
task->__state = 0 // TASK_RUNNING
task->on_rq = 1 // TASK_ON_RQ_QUEUED
task->on_cpu = 0
task->se.on_rq = 0
task->se.sched_delayed = 0
rq->nr_running = 0
rq->cfs.nr_running = 0
CPU 1 current = swapper/1
CPU 1 runqueue = empty
CPU 1, which is the task's recorded CPU, is idle and has no runnable RT or CFS tasks queued.
This suggests a scheduler state inconsistency: the task is marked as queued/runnable at the task_struct level, but its fair scheduling entity is not present in the CFS runqueue, and the CPU runqueue is empty.
ps -m
[0 00:00:20.884] [RU] PID: 104412 TASK: ffff94c10a9b0000 CPU: 1 COMMAND: "kindnetd"
crash> bt
PID: 104412 TASK: ffff94c10a9b0000 CPU: 1 COMMAND: "kindnetd"
#0 [ffffb3b711e5b630] __schedule at ffffffffa7bbceba
#1 [ffffb3b711e5b698] schedule at ffffffffa7bbd207
#2 [ffffb3b711e5b6a8] fanotify_get_response at ffffffffa7145b87
#3 [ffffb3b711e5b700] fanotify_handle_event at ffffffffa7147569
#4 [ffffb3b711e5b758] send_to_group at ffffffffa713f93b
#5 [ffffb3b711e5b7b8] fsnotify at ffffffffa713ff77
#6 [ffffb3b711e5b880] __fsnotify_parent at ffffffffa71408d4
#7 [ffffb3b711e5b938] fsnotify_open_perm at ffffffffa72027ae
#8 [ffffb3b711e5b950] do_dentry_open at ffffffffa70ceb29
#9 [ffffb3b711e5b980] vfs_open at ffffffffa70d0e9e
#10 [ffffb3b711e5b9a8] do_open at ffffffffa70e88a4
#11 [ffffb3b711e5b9e8] path_openat at ffffffffa70ee700
#12 [ffffb3b711e5ba48] do_filp_open at ffffffffa70ee984
#13 [ffffb3b711e5bb78] do_open_execat at ffffffffa70e027c
#14 [ffffb3b711e5bba8] alloc_bprm at ffffffffa70e0cc1
#15 [ffffb3b711e5bbf8] do_execveat_common at ffffffffa70e1c02
#16 [ffffb3b711e5bc40] __x64_sys_execve at ffffffffa70e1fc6
crash> px &((struct fsnotify_group *)0xffff96b64500fc00)->fanotify_data.access_waitq
$4 = (wait_queue_head_t *) 0xffff96b64500fcb8
crash> waitq 0xffff96b64500fcb8
wait queue ffff96b64500fcb8 is empty
crash> px &((struct fsnotify_group *)0xffff96b64500fc00)->fanotify_data.access_list
$5 = (struct list_head *) 0xffff96b64500fca8
crash> list -H 0xffff96b64500fca8
(empty)
crash> px &((struct fsnotify_group *)0xffff96b64500fc00)->notification_list
$6 = (struct list_head *) 0xffff96b64500fc10
crash> list -H 0xffff96b64500fc10
(empty)
These structures do not indicate an active pending fanotify permission event. The wait queue is empty, and both fanotify event lists are empty.
PID: 104412 TASK: ffff94c10a9b0000 CPU: 1 COMMAND: "kindnetd"
__state = 0
on_rq = 1
on_cpu = 0
policy = 0
prio = 120
rt_priority = 0
sched_class = 0xffffffffa86072e8 <fair_sched_class>
jobctl = 0
crash> runq -c 1
CPU 1 RUNQUEUE: ffff94c36fab6940
CURRENT: PID: 0 TASK: ffff94c04089a200 COMMAND: "swapper/1"
RT PRIO_ARRAY: ffff94c36fab6bc0
[no tasks queued]
CFS RB_ROOT: ffff94c36fab6a10
[no tasks queued]
crash> runq -t -c 1
CPU 1: 277489455080
000000000000 PID: 0 TASK: ffff94c04089a200 COMMAND: "swapper/1"
crash> bt -c 1
PID: 0 TASK: ffff94c04089a200 CPU: 1 COMMAND: "swapper/1"
#0 [fffffe49fb980ea8] crash_nmi_callback at ffffffffa6ca262d
#1 [fffffe49fb980eb0] default_do_nmi at ffffffffa7bb3945
#2 [fffffe49fb980ed0] exc_nmi at ffffffffa7bb3bfb
#3 [fffffe49fb980ef0] end_repeat_nmi at ffffffffa7c01f1d
[exception RIP: pv_native_safe_halt+15]
RIP: ffffffffa7bb6a5f RSP: ffffb3b7000d7eb8 RFLAGS: 00000246
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffff94c04089a200 R8: 0000000000000000 R9: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
--- <NMI exception stack> ---
#4 [ffffb3b7000d7eb8] pv_native_safe_halt at ffffffffa7bb6a5f
#5 [ffffb3b7000d7eb8] default_idle at ffffffffa7bb88d9
#6 [ffffb3b7000d7ec0] default_idle_call at ffffffffa7bb8bb9
#7 [ffffb3b7000d7ec8] cpuidle_idle_call at ffffffffa6d6b590
#8 [ffffb3b7000d7f00] do_idle at ffffffffa6d6b65b
#9 [ffffb3b7000d7f18] cpu_startup_entry at ffffffffa6d6b8b9
#10 [ffffb3b7000d7f28] start_secondary at ffffffffa6ca54ee
#11 [ffffb3b7000d7f40] common_startup_64 at ffffffffa6c523ed
The task's CFS runqueue:
crash> struct cfs_rq 0xffff94c1b3bf7a00
struct cfs_rq {
load = {
weight = 0,
inv_weight = 0
},
nr_running = 0,
h_nr_queued = 0,
idle_nr_running = 0,
idle_h_nr_running = 0,
h_nr_delayed = 0,
avg_vruntime = 0,
avg_load = 0,
min_vruntime = 3402147,
forceidle_seq = 0,
min_vruntime_fi = 0,
tasks_timeline = {
rb_root = {
rb_node = 0x0
},
rb_leftmost = 0x0
},
curr = 0x0,
next = 0x0,
...
rq = 0xffff94c36fab6940,
tg = 0xffff94c13f358dc0,
idle = 0,
runtime_enabled = 1,
runtime_remaining = 940404,
throttled = 0,
throttle_count = 0,
...
h_nr_runnable = 0,
}
Direct CPU 1 rq fields:
crash> p ((struct rq *)0xffff94c36fab6940)->nr_running
$26 = 0
crash> p ((struct rq *)0xffff94c36fab6940)->cfs.nr_running
$29 = 0
crash> p ((struct rq *)0xffff94c36fab6940)->cfs.h_nr_queued
$30 = 0
crash> p ((struct rq *)0xffff94c36fab6940)->cfs.h_nr_runnable
$31 = 0
This confirms that CPU 1 has no runnable tasks according to its runqueue, even though kindnetd has: task->__state = TASK_RUNNING, task->on_rq = 1
Why this is unlikely to be cgroup/CFS throttling:
runtime_enabled = 1
runtime_remaining = 940404
throttled = 0
throttle_count = 0
nr_running = 0
h_nr_queued = 0
h_nr_runnable = 0
Why this is unlikely to be CPU lockup on CPU 1:
CPU 1 is in the normal idle path, it is not stuck in a spinlock, interrupt-disabled loop, driver loop, or RT task.
pv_native_safe_halt
default_idle
cpuidle_idle_call
do_idle
Is this a known issue?
A userspace process kindnetd appears stuck with its kernel stack inside fanotify_get_response() / wait_event_state(). However, crash dump analysis shows that the task is not sleeping in fanotify anymore.
The task is in TASK_RUNNING state and has task->on_rq = 1, but its CFS scheduling entity is not enqueued:
CPU 1, which is the task's recorded CPU, is idle and has no runnable RT or CFS tasks queued.
This suggests a scheduler state inconsistency: the task is marked as queued/runnable at the task_struct level, but its fair scheduling entity is not present in the CFS runqueue, and the CPU runqueue is empty.
These structures do not indicate an active pending fanotify permission event. The wait queue is empty, and both fanotify event lists are empty.
Direct CPU 1 rq fields:
This confirms that CPU 1 has no runnable tasks according to its runqueue, even though kindnetd has: task->__state = TASK_RUNNING, task->on_rq = 1
Why this is unlikely to be cgroup/CFS throttling:
Why this is unlikely to be CPU lockup on CPU 1:
CPU 1 is in the normal idle path, it is not stuck in a spinlock, interrupt-disabled loop, driver loop, or RT task.
Is this a known issue?