OL9: CFS task stuck after fanotify_get_response() with inconsistent scheduler state

```
uname -a
Linux 6.12.0-105.51.5.el9uek.x86_64 #1 SMP PREEMPT_DYNAMIC Tue Oct 14 19:55:41 PDT 2025 x86_64 x86_64 x86_64 GNU/Linux
```

A userspace process kindnetd appears stuck with its kernel stack inside fanotify_get_response() / wait_event_state(). However, crash dump analysis shows that the task is not sleeping in fanotify anymore.
The task is in TASK_RUNNING state and has task->on_rq = 1, but its CFS scheduling entity is not enqueued:
```
task->__state       = 0        // TASK_RUNNING
task->on_rq         = 1        // TASK_ON_RQ_QUEUED
task->on_cpu        = 0
task->se.on_rq      = 0
task->se.sched_delayed = 0
rq->nr_running      = 0
rq->cfs.nr_running  = 0
CPU 1 current       = swapper/1
CPU 1 runqueue      = empty

```
CPU 1, which is the task's recorded CPU, is idle and has no runnable RT or CFS tasks queued.

This suggests a scheduler state inconsistency: the task is marked as queued/runnable at the task_struct level, but its fair scheduling entity is not present in the CFS runqueue, and the CPU runqueue is empty.
```
ps -m
[0 00:00:20.884] [RU]  PID: 104412   TASK: ffff94c10a9b0000  CPU: 1    COMMAND: "kindnetd"
```
```
crash> bt
PID: 104412   TASK: ffff94c10a9b0000  CPU: 1    COMMAND: "kindnetd"
 #0 [ffffb3b711e5b630] __schedule at ffffffffa7bbceba
 #1 [ffffb3b711e5b698] schedule at ffffffffa7bbd207
 #2 [ffffb3b711e5b6a8] fanotify_get_response at ffffffffa7145b87
 #3 [ffffb3b711e5b700] fanotify_handle_event at ffffffffa7147569
 #4 [ffffb3b711e5b758] send_to_group at ffffffffa713f93b
 #5 [ffffb3b711e5b7b8] fsnotify at ffffffffa713ff77
 #6 [ffffb3b711e5b880] __fsnotify_parent at ffffffffa71408d4
 #7 [ffffb3b711e5b938] fsnotify_open_perm at ffffffffa72027ae
 #8 [ffffb3b711e5b950] do_dentry_open at ffffffffa70ceb29
 #9 [ffffb3b711e5b980] vfs_open at ffffffffa70d0e9e
#10 [ffffb3b711e5b9a8] do_open at ffffffffa70e88a4
#11 [ffffb3b711e5b9e8] path_openat at ffffffffa70ee700
#12 [ffffb3b711e5ba48] do_filp_open at ffffffffa70ee984
#13 [ffffb3b711e5bb78] do_open_execat at ffffffffa70e027c
#14 [ffffb3b711e5bba8] alloc_bprm at ffffffffa70e0cc1
#15 [ffffb3b711e5bbf8] do_execveat_common at ffffffffa70e1c02
#16 [ffffb3b711e5bc40] __x64_sys_execve at ffffffffa70e1fc6

crash> px &((struct fsnotify_group *)0xffff96b64500fc00)->fanotify_data.access_waitq
$4 = (wait_queue_head_t *) 0xffff96b64500fcb8

crash> waitq 0xffff96b64500fcb8
wait queue ffff96b64500fcb8 is empty
crash> px &((struct fsnotify_group *)0xffff96b64500fc00)->fanotify_data.access_list
$5 = (struct list_head *) 0xffff96b64500fca8

crash> list -H 0xffff96b64500fca8
(empty)
crash> px &((struct fsnotify_group *)0xffff96b64500fc00)->notification_list
$6 = (struct list_head *) 0xffff96b64500fc10

crash> list -H 0xffff96b64500fc10
(empty)
```
These structures do not indicate an active pending fanotify permission event. The wait queue is empty, and both fanotify event lists are empty.
```
PID: 104412   TASK: ffff94c10a9b0000  CPU: 1    COMMAND: "kindnetd"

__state = 0
on_rq = 1
on_cpu = 0
policy = 0
prio = 120
rt_priority = 0
sched_class = 0xffffffffa86072e8 <fair_sched_class>
jobctl = 0

crash> runq -c 1
CPU 1 RUNQUEUE: ffff94c36fab6940
  CURRENT: PID: 0      TASK: ffff94c04089a200  COMMAND: "swapper/1"
  RT PRIO_ARRAY: ffff94c36fab6bc0
     [no tasks queued]
  CFS RB_ROOT: ffff94c36fab6a10
     [no tasks queued]
crash> runq -t -c 1
 CPU 1: 277489455080
        000000000000  PID: 0      TASK: ffff94c04089a200  COMMAND: "swapper/1"
crash> bt -c 1
PID: 0        TASK: ffff94c04089a200  CPU: 1    COMMAND: "swapper/1"
 #0 [fffffe49fb980ea8] crash_nmi_callback at ffffffffa6ca262d
 #1 [fffffe49fb980eb0] default_do_nmi at ffffffffa7bb3945
 #2 [fffffe49fb980ed0] exc_nmi at ffffffffa7bb3bfb
 #3 [fffffe49fb980ef0] end_repeat_nmi at ffffffffa7c01f1d
    [exception RIP: pv_native_safe_halt+15]
    RIP: ffffffffa7bb6a5f  RSP: ffffb3b7000d7eb8  RFLAGS: 00000246
    RAX: 0000000000000000  RBX: 0000000000000000  RCX: 0000000000000000
    RDX: 0000000000000000  RSI: 0000000000000000  RDI: 0000000000000000
    RBP: ffff94c04089a200   R8: 0000000000000000   R9: 0000000000000000
    R10: 0000000000000000  R11: 0000000000000000  R12: 0000000000000000
    R13: 0000000000000000  R14: 0000000000000000  R15: 0000000000000000
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
--- <NMI exception stack> ---
 #4 [ffffb3b7000d7eb8] pv_native_safe_halt at ffffffffa7bb6a5f
 #5 [ffffb3b7000d7eb8] default_idle at ffffffffa7bb88d9
 #6 [ffffb3b7000d7ec0] default_idle_call at ffffffffa7bb8bb9
 #7 [ffffb3b7000d7ec8] cpuidle_idle_call at ffffffffa6d6b590
 #8 [ffffb3b7000d7f00] do_idle at ffffffffa6d6b65b
 #9 [ffffb3b7000d7f18] cpu_startup_entry at ffffffffa6d6b8b9
#10 [ffffb3b7000d7f28] start_secondary at ffffffffa6ca54ee
#11 [ffffb3b7000d7f40] common_startup_64 at ffffffffa6c523ed

The task's CFS runqueue:
crash> struct cfs_rq 0xffff94c1b3bf7a00
struct cfs_rq {
  load = {
    weight = 0,
    inv_weight = 0
  },
  nr_running = 0,
  h_nr_queued = 0,
  idle_nr_running = 0,
  idle_h_nr_running = 0,
  h_nr_delayed = 0,
  avg_vruntime = 0,
  avg_load = 0,
  min_vruntime = 3402147,
  forceidle_seq = 0,
  min_vruntime_fi = 0,
  tasks_timeline = {
    rb_root = {
      rb_node = 0x0
    },
    rb_leftmost = 0x0
  },
  curr = 0x0,
  next = 0x0,
  ...
  rq = 0xffff94c36fab6940,
  tg = 0xffff94c13f358dc0,
  idle = 0,
  runtime_enabled = 1,
  runtime_remaining = 940404,
  throttled = 0,
  throttle_count = 0,
  ...
  h_nr_runnable = 0,
}
```
Direct CPU 1 rq fields:
```
crash> p ((struct rq *)0xffff94c36fab6940)->nr_running
$26 = 0

crash> p ((struct rq *)0xffff94c36fab6940)->cfs.nr_running
$29 = 0

crash> p ((struct rq *)0xffff94c36fab6940)->cfs.h_nr_queued
$30 = 0

crash> p ((struct rq *)0xffff94c36fab6940)->cfs.h_nr_runnable
$31 = 0
```
This confirms that CPU 1 has no runnable tasks according to its runqueue, even though kindnetd has: task->__state = **TASK_RUNNING**, task->on_rq = 1
Why this is unlikely to be cgroup/CFS throttling:
```
runtime_enabled = 1
runtime_remaining = 940404
throttled = 0
throttle_count = 0
nr_running = 0
h_nr_queued = 0
h_nr_runnable = 0
```
Why this is unlikely to be CPU lockup on CPU 1:
CPU 1 is in the normal idle path, it is not stuck in a spinlock, interrupt-disabled loop, driver loop, or RT task.
```
pv_native_safe_halt
default_idle
cpuidle_idle_call
do_idle
```
Is this a known issue?


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

OL9: CFS task stuck after fanotify_get_response() with inconsistent scheduler state #223

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

OL9: CFS task stuck after fanotify_get_response() with inconsistent scheduler state #223

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions