From 33fc8009df3029cc654807b8505e97f5ea2f2323 Mon Sep 17 00:00:00 2001
From: Jim Huang <jserv@ccns.ncku.edu.tw>
Date: Wed, 6 May 2026 06:21:24 +0800
Subject: [PATCH] Harden /proc/self oom and fdinfo nodes

procfs emulation now treats the OOM trio (oom_score_adj, legacy oom_adj,
read-only oom_score) as one process-wide adjustment with per-path read
and write semantics: legacy oom_adj scales to oom_score_adj on writes
(special-casing OOM_DISABLE -> SCORE_ADJ_MIN and OOM_ADJUST_MAX ->
SCORE_ADJ_MAX so the boundary intent survives the lossy multiply) and
back-clamps to [-17, 15] on reads; oom_score is read-only with a stub
zero. The OOM write path serializes the truncate+pwrite+lseek under a
new oom_write_lock and publishes the global atomic only after the
backing rewrite succeeds, so a partial-rewrite failure no longer leaves
the process-wide value diverged from a returned -1. Zero-length writes
short-circuit to success (matches Linux for proc nodes; sys_writev
previously hit -EINVAL in the parser). Stat reports st_size 0 for every
synthetic /proc file so callers that pre-size buffers from stat cannot
truncate (a 256-byte cap had silently chopped /proc/cpuinfo on hosts
with many CPUs; a 2-byte cap had reduced -1000 to -1 on oom_score_adj).

A new read-intercept path mirrors the write side. proc_intercept_read
and proc_intercept_readv let read/pread/readv/preadv on the OOM nodes
return the live atomic value rather than the per-open temp file
content, and sendfile/copy_file_range route through the same hook so
proc-source byte counts stay consistent with the value an immediately
following open would observe.

/proc/self/fdinfo gains type-specific lines for the special fd classes
elfuse implements: eventfd-count (16-char hex matching fs/eventfd.c),
sigmask (16-char hex), and timerfd clockid/ticks/it_value/it_interval.
The accessors live in src/syscall/fd.c (eventfd_fdinfo_snapshot,
signalfd_fdinfo_snapshot, timerfd_fdinfo_snapshot) and read state under
sfd_lock to prevent tearing across concurrent read/write/settime. The
per-fd lseek probe now uses fd_to_host_dup so a concurrent close+reopen
on another vCPU cannot redirect the probe to an unrelated host fd, and
errno is saved/restored across the ESPIPE-prone lseek so
non-seekable fds (sockets, pipes) do not pollute the caller's state.

/proc/self/fdinfo and /proc/self/fd no longer share one static backing
directory across opens. The previous design let a second open unlink
and recreate entries while a sibling thread iterated its dirfd; both
nodes now go through proc_open_fd_scratch, which mkdtemps a private
directory per open, populates it from a fresh fd-table snapshot, and
tracks the path in proc_scratch_dirs[] for atexit cleanup so the
previously-leaked backing dirs are reaped at process exit.

The unix-net visitor's buffer-tail margin grew from 128 to 256 bytes
to fit the longest possible row (54 fixed + 108 sun_path + newline);
the previous margin let the snprintf truncate the path and drop the
trailing newline. Eight explicit /proc/<pid>/X cases collapsed into
one general alias-and-recurse, so /proc/<our_pid>/maps,
/oom_score_adj, /limits, etc. now route through the matching
/proc/self handler.

Locked in by tests/test-tier-b.c (35 cases including oom write
persistence, out-of-range -EINVAL, oom_adj=15 -> 1000 scaling,
oom_score read-only and write-rejected, zero-length writev,
stat-size-zero, fdinfo eventfd-count hex, fdinfo sigmask, fdinfo
timerfd next expiry for periodic timers, concurrent fdinfo
enumeration, and a /proc/net/tcp sl-density regression that opens
non-TCP sockets before TCP listeners so the iterator visits rejected
sockets first; the post-fix dense sl=0,1,... output matches qemu
Linux ground truth, and a manual bug reintroduction confirms the
test catches the sparse-slot regression with sl=4 expected=0).
tests/test-io-opt.c adds sendfile and copy_file_range coverage for
the read-intercept path.
---
 src/runtime/procemu.c | 1666 ++++++++++++++++++++++++++---------------
 src/runtime/procemu.h |   19 +
 src/syscall/fd.c      |  150 +++-
 src/syscall/fd.h      |   12 +
 src/syscall/fs.c      |   18 +-
 src/syscall/io.c      |  124 ++-
 src/syscall/syscall.c |    7 +-
 tests/test-io-opt.c   |  103 +++
 tests/test-netstat.c  |   31 +
 tests/test-proc.c     |   16 +
 tests/test-tier-b.c   |  758 +++++++++++++++++++
 11 files changed, 2258 insertions(+), 646 deletions(-)
diff --git a/src/runtime/procemu.c b/src/runtime/procemu.c
index 04eb698..f35b163 100644
--- a/src/runtime/procemu.c
+++ b/src/runtime/procemu.c
@@ -19,6 +19,7 @@
  */
 #define MAPS_NAME_COLUMN 73
 
+#include <stdarg.h>
 #include <stdatomic.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -47,17 +48,18 @@
 #include "runtime/thread.h"
 
 #include "syscall/abi.h"
+#include "syscall/fd.h"
 #include "syscall/internal.h"
 #include "syscall/proc.h"
 #include "syscall/sys.h"
 
 /* Return the shared /dev/shm emulation directory, creating it on first call.
- * Linux POSIX shm names live in one namespace, so this must not be keyed by
- * the host process id.
+ * Linux POSIX shm names live in one namespace, so this must not be keyed by the
+ * host process id.
  *
- * Uses a mutex for thread-safe lazy initialization while still allowing
- * retries after transient failures. The mkdir+lstat sequence has an inherent
- * TOCTOU window, but the lstat ownership check limits the impact to directories
+ * Uses a mutex for thread-safe lazy initialization while still allowing retries
+ * after transient failures. The mkdir+lstat sequence has an inherent TOCTOU
+ * window, but the lstat ownership check limits the impact to directories
  * already owned by this UID.
  */
 static char shm_dir[128];
@@ -74,8 +76,273 @@ static pthread_mutex_t shm_dir_lock = PTHREAD_MUTEX_INITIALIZER;
 static char proc_tmpdir[128];
 static bool proc_tmpdir_ok;
 static pthread_mutex_t proc_tmpdir_lock = PTHREAD_MUTEX_INITIALIZER;
+
+/* OOM range constants from Linux include/uapi/linux/oom.h. */
+#define LINUX_OOM_SCORE_ADJ_MIN (-1000)
+#define LINUX_OOM_SCORE_ADJ_MAX 1000
+#define LINUX_OOM_DISABLE (-17)
+#define LINUX_OOM_ADJUST_MAX 15
+
+/* Process-wide stub for the OOM score adjustment. The legacy oom_adj interface,
+ * the modern oom_score_adj interface, and the read-only oom_score node all
+ * derive their displayed values from this single state.
+ */
 static _Atomic int oom_score_adj_value = 0;
 
+/* Serializes backing-fd rewrites so concurrent writers do not race the
+ * truncate+pwrite sequence that publishes the new value to a same-fd reader.
+ * The atomic store happens last so a failed rewrite leaves the global state
+ * unchanged.
+ */
+static pthread_mutex_t oom_write_lock = PTHREAD_MUTEX_INITIALIZER;
+
+enum {
+    OOM_PATH_NONE = 0,
+    OOM_PATH_SCORE_ADJ, /* /proc/self/oom_score_adj: writable, [-1000, 1000] */
+    OOM_PATH_ADJ,       /* /proc/self/oom_adj: legacy, writable, [-17, 15] */
+    OOM_PATH_SCORE,     /* /proc/self/oom_score: read-only computed score */
+};
+
+static int proc_oom_path_kind(const char *path)
+{
+    if (!strcmp(path, "/proc/self/oom_score_adj"))
+        return OOM_PATH_SCORE_ADJ;
+    if (!strcmp(path, "/proc/self/oom_adj"))
+        return OOM_PATH_ADJ;
+    if (!strcmp(path, "/proc/self/oom_score"))
+        return OOM_PATH_SCORE;
+    return OOM_PATH_NONE;
+}
+
+/* Linux fs/proc/base.c oom_adj_write: a write to oom_adj is scaled into the
+ * [-1000, 1000] oom_score_adj domain. The kernel special-cases both boundary
+ * values so the "disable" and "max" semantics survive the lossy multiply that
+ * would otherwise round 15*1000/17 to 882 and lose the "kill me first" intent.
+ */
+static int oom_adj_to_score_adj(int v)
+{
+    if (v == LINUX_OOM_DISABLE)
+        return LINUX_OOM_SCORE_ADJ_MIN;
+    if (v == LINUX_OOM_ADJUST_MAX)
+        return LINUX_OOM_SCORE_ADJ_MAX;
+    return v * LINUX_OOM_SCORE_ADJ_MAX / -LINUX_OOM_DISABLE;
+}
+
+/* Inverse of oom_adj_to_score_adj for legacy oom_adj reads. Clamp to the legacy
+ * [-17, 15] range so values outside the representable space (e.g. a guest that
+ * wrote -1000 to oom_score_adj) do not surprise readers.
+ */
+static int oom_score_adj_to_adj(int v)
+{
+    int s = v * -LINUX_OOM_DISABLE / LINUX_OOM_SCORE_ADJ_MAX;
+    if (s < LINUX_OOM_DISABLE)
+        s = LINUX_OOM_DISABLE;
+    if (s > LINUX_OOM_ADJUST_MAX)
+        s = LINUX_OOM_ADJUST_MAX;
+    return s;
+}
+
+static int proc_oom_format_value(int kind, char *buf, size_t bufsz)
+{
+    int score_adj = atomic_load(&oom_score_adj_value);
+    int val = 0;
+    if (kind == OOM_PATH_SCORE_ADJ)
+        val = score_adj;
+    else if (kind == OOM_PATH_ADJ)
+        val = oom_score_adj_to_adj(score_adj);
+    return snprintf(buf, bufsz, "%d\n", val);
+}
+
+static int proc_oom_copy_slice(char *dst,
+                               size_t count,
+                               int64_t offset,
+                               const char *src,
+                               size_t src_len,
+                               ssize_t *read_out)
+{
+    if (offset < 0) {
+        errno = EINVAL;
+        return -1;
+    }
+    if ((uint64_t) offset >= src_len) {
+        *read_out = 0;
+        return 1;
+    }
+
+    size_t avail = src_len - (size_t) offset;
+    size_t n = count < avail ? count : avail;
+    memcpy(dst, src + offset, n);
+    *read_out = (ssize_t) n;
+    return 1;
+}
+
+typedef struct {
+    int fd;
+    int kind;
+} proc_oom_live_fd_t;
+
+/* OOM proc nodes are opened on per-open temp files so lseek/pread semantics
+ * work naturally. After any successful write, republish the current formatted
+ * value into every still-open OOM fd so a later seek+read on another fd does
+ * not observe the stale snapshot that was materialized at open time.
+ */
+static void proc_oom_refresh_live_fds_locked(void)
+{
+    proc_oom_live_fd_t live[FD_TABLE_SIZE];
+    int nlive = 0;
+
+    pthread_mutex_lock(&fd_lock);
+    for (int i = 0; i < FD_TABLE_SIZE; i++) {
+        int kind = proc_oom_path_kind(fd_table[i].proc_path);
+        if (kind == OOM_PATH_NONE || fd_table[i].type == FD_CLOSED)
+            continue;
+
+        int dup_fd = dup(fd_table[i].host_fd);
+        if (dup_fd < 0)
+            continue;
+
+        live[nlive].fd = dup_fd;
+        live[nlive].kind = kind;
+        nlive++;
+    }
+    pthread_mutex_unlock(&fd_lock);
+
+    for (int i = 0; i < nlive; i++) {
+        char text[32];
+        int len = proc_oom_format_value(live[i].kind, text, sizeof(text));
+        if (len > 0 && (size_t) len < sizeof(text)) {
+            /* Rewrite the backing temp file as defense in depth for any code
+             * path that might bypass proc_intercept_read and fall through to
+             * host read(). The dup'd fd shares the open file description with
+             * the guest's fd, so a paired lseek to "restore" the offset would
+             * clobber a concurrent reader's position; skip the offset dance and
+             * let proc_intercept_read (which always pulls from the atomic) be
+             * the source of truth for offset-aware reads.
+             */
+            if (ftruncate(live[i].fd, 0) == 0)
+                pwrite(live[i].fd, text, (size_t) len, 0);
+        }
+        close(live[i].fd);
+    }
+}
+
+static int proc_open_dir_fd(const char *path, int linux_flags);
+static int proc_lazy_mkdtemp(char *buf, size_t buf_size, const char *template);
+static int append_proc_net_row(char *buf,
+                               size_t bufsz,
+                               int off,
+                               bool want_tcp,
+                               int sl,
+                               const char laddr[33],
+                               uint16_t lport,
+                               const char raddr[33],
+                               uint16_t rport,
+                               int st);
+static void format_proc_net_addr(char out[33],
+                                 const struct in_sockinfo *ini,
+                                 int local,
+                                 int v6);
+
+/* Per-open scratch dirs for /proc/self/fd and /proc/self/fdinfo.
+ *
+ * The previous design shared one host directory across every open, which meant
+ * a second open could unlink/recreate entries while the first opener was
+ * mid-getdents on its dirfd. Each open now allocates its own mkdtemp dir, so
+ * concurrent enumerations cannot mutate one another.
+ *
+ * The tracker keeps the paths so an atexit hook can rmdir them at process exit.
+ * The capacity is a soft cap: callers that exceed it leak the dir to /tmp
+ * (cleared on host reboot or by tmp janitors).
+ */
+#define PROC_SCRATCH_DIRS_MAX 128
+static char proc_scratch_dirs[PROC_SCRATCH_DIRS_MAX][80];
+static int proc_scratch_dirs_count;
+static pthread_mutex_t proc_scratch_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_once_t proc_scratch_atexit_once = PTHREAD_ONCE_INIT;
+
+static void proc_scratch_remove_one(const char *dir)
+{
+    DIR *d = opendir(dir);
+    if (d) {
+        struct dirent *ent;
+        char path[160];
+        while ((ent = readdir(d))) {
+            if (ent->d_name[0] == '.' &&
+                (ent->d_name[1] == '\0' ||
+                 (ent->d_name[1] == '.' && ent->d_name[2] == '\0')))
+                continue;
+            int n = snprintf(path, sizeof(path), "%s/%s", dir, ent->d_name);
+            if (n > 0 && (size_t) n < sizeof(path))
+                unlink(path);
+        }
+        closedir(d);
+    }
+    rmdir(dir);
+}
+
+static void proc_scratch_cleanup_atexit(void)
+{
+    pthread_mutex_lock(&proc_scratch_lock);
+    for (int i = 0; i < proc_scratch_dirs_count; i++)
+        proc_scratch_remove_one(proc_scratch_dirs[i]);
+    proc_scratch_dirs_count = 0;
+    pthread_mutex_unlock(&proc_scratch_lock);
+}
+
+static void proc_scratch_register_atexit(void)
+{
+    atexit(proc_scratch_cleanup_atexit);
+}
+
+/* Open a per-call scratch directory populated with one empty file per live
+ * guest fd. Returns a host dirfd on success, -1 on failure with errno set.
+ *
+ * The dirfd is the standard backing for getdents on this synthetic listing.
+ * Two concurrent openers get two independent dirs, so neither mutates the
+ * other's enumeration.
+ */
+static int proc_open_fd_scratch(const char *prefix, int linux_flags)
+{
+    char dir[80];
+    int n = snprintf(dir, sizeof(dir), "/tmp/%s-XXXXXX", prefix);
+    if (n < 0 || (size_t) n >= sizeof(dir)) {
+        errno = ENAMETOOLONG;
+        return -1;
+    }
+    if (!mkdtemp(dir))
+        return -1;
+
+    for (int i = 0; i < FD_TABLE_SIZE; i++) {
+        fd_entry_t snap;
+        if (!fd_snapshot(i, &snap))
+            continue;
+        char entry[160];
+        int en = snprintf(entry, sizeof(entry), "%s/%d", dir, i);
+        if (en <= 0 || (size_t) en >= sizeof(entry))
+            continue;
+        int tfd = open(entry, O_CREAT | O_WRONLY, 0444);
+        if (tfd >= 0)
+            close(tfd);
+    }
+
+    pthread_once(&proc_scratch_atexit_once, proc_scratch_register_atexit);
+
+    pthread_mutex_lock(&proc_scratch_lock);
+    if (proc_scratch_dirs_count < PROC_SCRATCH_DIRS_MAX) {
+        str_copy_trunc(proc_scratch_dirs[proc_scratch_dirs_count++], dir,
+                       sizeof(proc_scratch_dirs[0]));
+    }
+    pthread_mutex_unlock(&proc_scratch_lock);
+
+    int fd = proc_open_dir_fd(dir, linux_flags);
+    if (fd < 0) {
+        int saved = errno;
+        proc_scratch_remove_one(dir);
+        errno = saved;
+    }
+    return fd;
+}
+
 /* atexit cleanup: remove snapshot files and the temp directory tree. */
 static void proc_tmpdir_cleanup(void)
 {
@@ -190,12 +457,12 @@ static int proc_synthetic_fd(const void *data, size_t len)
     return fd;
 }
 
-/* Lazy mkdtemp into a caller-provided buffer. Returns 0 on success (buf
- * holds the path), or -1 on failure (buf[0] reset to '\0').
+/* Lazy mkdtemp into a caller-provided buffer. Returns 0 on success (buf holds
+ * the path), or -1 on failure (buf[0] reset to '\0').
  *
- * Caller must hold the lock that protects buf, since the helper runs the
- * "is buf empty?" check and mkdtemp non-atomically. The created directory
- * is reused across calls until process exit.
+ * Caller must hold the lock that protects buf, since the helper runs the "is
+ * buf empty?" check and mkdtemp non-atomically. The created directory is reused
+ * across calls until process exit.
  */
 static int proc_lazy_mkdtemp(char *buf, size_t buf_size, const char *template)
 {
@@ -222,6 +489,356 @@ static int proc_synthetic_fd_str(const char *buf, int snprintf_ret, size_t cap)
     return proc_synthetic_fd(buf, (size_t) snprintf_ret);
 }
 
+/* Format a string into a stack buffer and return the synthetic fd in one
+ * step. Collapses the recurring three-line pattern:
+ *     char buf[N];
+ *     int len = snprintf(buf, sizeof(buf), fmt, ...);
+ *     return proc_synthetic_fd_str(buf, len, sizeof(buf));
+ * 4096-byte cap is the largest formatted /proc payload elfuse emits via this
+ * helper (the few handlers that exceed it -- /proc/self/maps, /proc/net/tcp
+ * -- build their output incrementally and call proc_synthetic_fd directly).
+ */
+__attribute__((format(printf, 1, 2))) static int proc_emit_fmt(const char *fmt,
+                                                               ...)
+{
+    char buf[4096];
+    va_list ap;
+    va_start(ap, fmt);
+    int n = vsnprintf(buf, sizeof(buf), fmt, ap);
+    va_end(ap);
+    return proc_synthetic_fd_str(buf, n, sizeof(buf));
+}
+
+/* Emit a fixed string literal as a synthetic fd. Used for the handlers that
+ * return identical content every time (mountinfo, filesystems, /proc/sys
+ * constants); avoids allocating a stack buffer when there is nothing to format.
+ */
+static int proc_emit_literal(const char *s)
+{
+    return proc_synthetic_fd(s, strlen(s));
+}
+
+/* Return the basename of the loaded ELF binary, falling back to "elfuse" when
+ * the path is unavailable. Matches the comm-name semantic Linux uses for
+ * /proc/<pid>/comm and the second field of /proc/<pid>/stat. Storage is owned
+ * by proc_get_elf_path() (stable for process lifetime) or the literal fallback;
+ * caller must not free.
+ */
+static const char *proc_comm_name(void)
+{
+    const char *exe = proc_get_elf_path();
+    if (!exe)
+        return "elfuse";
+    const char *slash = strrchr(exe, '/');
+    return slash ? slash + 1 : exe;
+}
+
+/* Parse the numeric tail of a /proc/.../<N> or /dev/fd/<N> path.
+ * prefix_len is the length of the leading literal that the caller already
+ * matched with strncmp. Returns the parsed fd on success, or -1 with errno set
+ * to errno_on_invalid for any malformed input or out-of-range index.
+ */
+static int proc_parse_fd_index(const char *path,
+                               size_t prefix_len,
+                               int errno_on_invalid)
+{
+    char *endp;
+    long n = strtol(path + prefix_len, &endp, 10);
+    if (endp == path + prefix_len || *endp != '\0' || n < 0 ||
+        n >= FD_TABLE_SIZE) {
+        errno = errno_on_invalid;
+        return -1;
+    }
+    return (int) n;
+}
+
+/* Resolve a /dev/shm/<suffix> guest path to a host path inside the per-UID shm
+ * dir. Rejects empty, traversing, or compound suffixes with EACCES; reports
+ * ENAMETOOLONG when the host path overflows. The same validation runs in
+ * proc_intercept_open and proc_intercept_stat, so the helper is one source of
+ * truth for the security gate.
+ */
+static int dev_shm_resolve_path(const char *guest_suffix,
+                                char *host_path,
+                                size_t host_path_sz)
+{
+    const char *shm = shm_dir_path();
+    if (!shm)
+        return -1;
+    if (strstr(guest_suffix, "..") || strchr(guest_suffix, '/') ||
+        guest_suffix[0] == '\0') {
+        errno = EACCES;
+        return -1;
+    }
+    int n = snprintf(host_path, host_path_sz, "%s/%s", shm, guest_suffix);
+    if (n < 0 || (size_t) n >= host_path_sz) {
+        errno = ENAMETOOLONG;
+        return -1;
+    }
+    return 0;
+}
+
+/* Populate *st for a synthetic /proc directory entry. */
+static void stat_fill_proc_dir(struct stat *st, mode_t mode, nlink_t nlink)
+{
+    memset(st, 0, sizeof(*st));
+    st->st_mode = S_IFDIR | mode;
+    st->st_nlink = nlink;
+}
+
+/* Resolve a /dev/fd/<N> or /proc/self/fd/<N> path to a fresh dup() of the
+ * underlying host fd. prefix_len is the length of the matched literal (8 for
+ * "/dev/fd/", 14 for "/proc/self/fd/"). Returns the dup or -1 with errno=EBADF
+ * for malformed indices or closed slots.
+ *
+ * fd_to_host_dup duplicates the host fd atomically under fd_lock so a
+ * concurrent close+reopen on another vCPU cannot redirect the dup to an
+ * unrelated host object that took the freed slot.
+ */
+static int dev_fd_dup(const char *path, size_t prefix_len)
+{
+    int n = proc_parse_fd_index(path, prefix_len, EBADF);
+    if (n < 0)
+        return -1;
+    int dup_fd = fd_to_host_dup(n);
+    if (dup_fd < 0) {
+        errno = EBADF;
+        return -1;
+    }
+    return dup_fd;
+}
+
+/* If path matches /proc/<our_pid>[/...], rewrite into alias as /proc/self[...]
+ * Used by both proc_intercept_open and proc_intercept_stat so the explicit-pid
+ * form aliases through the same /proc/self handlers (Linux treats them
+ * equivalent for the calling process). The trailing-character constraint
+ * admits the bare /proc/<pid> directory and /proc/<pid>/X files alike.
+ *
+ * Returns 1 when alias was rewritten (caller should recurse on alias), 0 when
+ * path is not a self-alias (caller continues with other handlers), or -1 with
+ * errno=ENAMETOOLONG when the rewrite would overflow alias_sz (matches Linux
+ * semantics for paths > PATH_MAX rather than letting the intercept fall through
+ * to a host syscall that would silently fail).
+ */
+static int proc_alias_self(const char *path, char *alias, size_t alias_sz)
+{
+    if (strncmp(path, "/proc/", 6) != 0)
+        return 0;
+    char *endp;
+    long pid = strtol(path + 6, &endp, 10);
+    if (endp == path + 6 || pid != (long) proc_get_pid())
+        return 0;
+    if (*endp != '\0' && *endp != '/')
+        return 0;
+    int n = snprintf(alias, alias_sz, "/proc/self%s", endp);
+    if (n < 0 || (size_t) n >= alias_sz) {
+        errno = ENAMETOOLONG;
+        return -1;
+    }
+    return 1;
+}
+
+/* Populate *st for a synthetic /proc regular-file entry. Linux reports
+ * st_size = 0 for proc nodes; mirroring that forces readers to drain to EOF
+ * instead of pre-sizing buffers from a stale value.
+ */
+static void stat_fill_proc_file(struct stat *st, mode_t mode)
+{
+    memset(st, 0, sizeof(*st));
+    st->st_mode = S_IFREG | mode;
+    st->st_nlink = 1;
+    st->st_size = 0;
+    st->st_blksize = 4096;
+    st->st_blocks = 0;
+}
+
+/* Visitor signature for proc_net_for_each_socket below. Returning false stops
+ * the iteration (used when the caller's output buffer is full).
+ *   sinfo: kernel socket info for the current fd
+ *   pid:   pid that owns the fd (self or a fork child)
+ *   fd_index: index within that pid's fdinfo list (used by /proc/net/unix
+ *             to synthesize a fake-but-stable inode number)
+ *
+ * /proc/net/tcp's "sl" column must be dense, counting only emitted rows (not
+ * inspected sockets), so the iterator deliberately omits a global serial
+ * counter. Visitors that need one track it inside their own ctx and increment
+ * it only after a successful emit.
+ */
+typedef bool (*proc_net_socket_visitor)(const struct socket_fdinfo *sinfo,
+                                        pid_t pid,
+                                        int fd_index,
+                                        void *ctx);
+
+/* Walk every socket fd across self plus active fork children, invoking visit
+ * once per socket. Centralizes the proc_pidinfo + proc_pidfdinfo scaffolding
+ * shared by /proc/net/{tcp,udp,raw}{,6} and /proc/net/unix.
+ */
+static void proc_net_for_each_socket(proc_net_socket_visitor visit, void *ctx)
+{
+    pid_t pids[PROC_TABLE_SIZE + 1];
+    pids[0] = getpid();
+    int npids = 1 + proc_get_child_pids(pids + 1, PROC_TABLE_SIZE);
+
+    for (int p = 0; p < npids; p++) {
+        struct proc_fdinfo fdinfo[512];
+        int fdsz =
+            proc_pidinfo(pids[p], PROC_PIDLISTFDS, 0, fdinfo, sizeof(fdinfo));
+        if (fdsz <= 0)
+            continue;
+        int nfds = fdsz / (int) PROC_PIDLISTFD_SIZE;
+        for (int fi = 0; fi < nfds; fi++) {
+            if (fdinfo[fi].proc_fdtype != PROX_FDTYPE_SOCKET)
+                continue;
+            struct socket_fdinfo sinfo;
+            int sz =
+                proc_pidfdinfo(pids[p], fdinfo[fi].proc_fd,
+                               PROC_PIDFDSOCKETINFO, &sinfo, sizeof(sinfo));
+            if (sz < (int) sizeof(sinfo))
+                continue;
+            if (!visit(&sinfo, pids[p], fi, ctx))
+                return;
+        }
+    }
+}
+
+/* Visitor context + callback for /proc/net/{tcp,udp,raw}{,6}.
+ * sl counts only emitted rows so the "sl" column stays dense even when the
+ * iterator visits other-family sockets that the visitor filters out.
+ */
+struct proc_net_inet_ctx {
+    char *buf;
+    size_t bufsz;
+    int off;
+    int sl;
+    int want_af;
+    int want_stype;
+    bool want_tcp;
+    bool want_v6;
+};
+
+/* Map macOS TSI_S_* socket states (returned in tcp_connection_info.state)
+ * to the 1-based hex values Linux /proc/net/tcp uses (ESTABLISHED=01,
+ * LISTEN=0A, etc.). Indexed by macOS state ordinal.
+ */
+static int proc_net_tcp_state_linux(int kstate)
+{
+    static const int state_map[] = {
+        0x07, /* 0: CLOSED */
+        0x0A, /* 1: LISTEN */
+        0x02, /* 2: SYN_SENT */
+        0x03, /* 3: SYN_RECEIVED */
+        0x01, /* 4: ESTABLISHED */
+        0x08, /* 5: CLOSE_WAIT */
+        0x04, /* 6: FIN_WAIT_1 */
+        0x06, /* 7: CLOSING */
+        0x09, /* 8: LAST_ACK */
+        0x05, /* 9: FIN_WAIT_2 */
+        0x0B, /* 10: TIME_WAIT */
+    };
+    return RANGE_CHECK(kstate, 0, 11) ? state_map[kstate] : 0x07;
+}
+
+static bool proc_net_inet_visit(const struct socket_fdinfo *sinfo,
+                                pid_t pid,
+                                int fd_index,
+                                void *ctx_v)
+{
+    (void) pid;
+    (void) fd_index;
+    struct proc_net_inet_ctx *c = ctx_v;
+    if (c->off >= (int) c->bufsz - 256)
+        return false;
+    if (sinfo->psi.soi_family != c->want_af ||
+        sinfo->psi.soi_type != c->want_stype)
+        return true;
+
+    const struct in_sockinfo *ini =
+        c->want_tcp ? &sinfo->psi.soi_proto.pri_tcp.tcpsi_ini
+                    : &sinfo->psi.soi_proto.pri_in;
+    char laddr[33], raddr[33];
+    format_proc_net_addr(laddr, ini, 1, c->want_v6);
+    format_proc_net_addr(raddr, ini, 0, c->want_v6);
+    int st =
+        c->want_tcp
+            ? proc_net_tcp_state_linux(sinfo->psi.soi_proto.pri_tcp.tcpsi_state)
+            : 0x07;
+    c->off = append_proc_net_row(c->buf, c->bufsz, c->off, c->want_tcp, c->sl,
+                                 laddr, ntohs(ini->insi_lport), raddr,
+                                 ntohs(ini->insi_fport), st);
+    c->sl++;
+    return true;
+}
+
+/* Visitor context + callback for /proc/net/unix. */
+struct proc_net_unix_ctx {
+    char *buf;
+    size_t bufsz;
+    int off;
+};
+
+/* Lock-protected handle to a persistent /tmp directory used to back synthetic
+ * /proc subdirectories whose contents must repopulate per open (e.g.
+ * /proc/self/task with its dynamic TID set). The static buffer + lazy mkdtemp
+ * pattern is shared by multiple handlers so the helper keeps one source of
+ * truth for the locking and creation order.
+ */
+typedef struct {
+    char path[128];
+    pthread_mutex_t lock;
+    const char *template;
+} proc_persistent_dir_t;
+
+#define PROC_PERSISTENT_DIR(prefix) \
+    {.path = {0}, .lock = PTHREAD_MUTEX_INITIALIZER, .template = prefix}
+
+/* Acquire the persistent dir's lock and ensure the dir exists. Caller owns the
+ * lock until proc_persistent_dir_release(). Returns the directory path or NULL
+ * on failure (lock released, errno set).
+ */
+static const char *proc_persistent_dir_acquire(proc_persistent_dir_t *d)
+{
+    pthread_mutex_lock(&d->lock);
+    if (proc_lazy_mkdtemp(d->path, sizeof(d->path), d->template) < 0) {
+        pthread_mutex_unlock(&d->lock);
+        return NULL;
+    }
+    return d->path;
+}
+
+static void proc_persistent_dir_release(proc_persistent_dir_t *d)
+{
+    pthread_mutex_unlock(&d->lock);
+}
+
+static bool proc_net_unix_visit(const struct socket_fdinfo *sinfo,
+                                pid_t pid,
+                                int fd_index,
+                                void *ctx_v)
+{
+    (void) pid;
+    struct proc_net_unix_ctx *c = ctx_v;
+    /* A unix row is up to 56 bytes of fixed format plus a sun_path of
+     * up to 108 bytes plus the trailing newline -- ~165 bytes worst
+     * case. The 128-byte margin previously inherited from the inline
+     * loop could leave a half-formatted row at the buffer tail; 256
+     * matches the inet visitor and covers the longest possible path.
+     */
+    if (c->off >= (int) c->bufsz - 256)
+        return false;
+    if (sinfo->psi.soi_family != AF_UNIX)
+        return true;
+    int stype = sinfo->psi.soi_type;
+    int lt = (stype == SOCK_STREAM)      ? 1
+             : (stype == SOCK_DGRAM)     ? 2
+             : (stype == SOCK_SEQPACKET) ? 5
+                                         : 1;
+    const char *spath = sinfo->psi.soi_proto.pri_un.unsi_addr.ua_sun.sun_path;
+    c->off += snprintf(c->buf + c->off, c->bufsz - (size_t) c->off,
+                       "%016X: %08X %08X %08X %04X %02X %5d %s\n", 0, 3, 0, 0,
+                       lt, 3, 10000 + fd_index, spath[0] ? spath : "");
+    return true;
+}
+
 static int append_proc_net_row(char *buf,
                                size_t bufsz,
                                int off,
@@ -299,17 +916,6 @@ static int proc_open_numbered_dir(const char *dir, int64_t id, int linux_flags)
     return proc_open_dir_fd(path, linux_flags);
 }
 
-static int proc_is_oom_writable(const char *path)
-{
-    return !strcmp(path, "/proc/self/oom_score_adj") ||
-           !strcmp(path, "/proc/self/oom_adj");
-}
-
-static int proc_is_oom_path(const char *path)
-{
-    return proc_is_oom_writable(path) || !strcmp(path, "/proc/self/oom_score");
-}
-
 static int copy_fd_to_path(int src_fd, const char *path)
 {
     int out = open(path, O_CREAT | O_TRUNC | O_WRONLY, 0444);
@@ -366,6 +972,17 @@ static void populate_proc_snapshot(const guest_t *g,
     close(fd);
 }
 
+static void populate_proc_placeholder(const char *dir, const char *name)
+{
+    char path[LINUX_PATH_MAX];
+    if (snprintf(path, sizeof(path), "%s/%s", dir, name) >= (int) sizeof(path))
+        return;
+
+    int fd = open(path, O_CREAT | O_TRUNC | O_WRONLY, 0444);
+    if (fd >= 0)
+        close(fd);
+}
+
 static void format_proc_net_addr(char out[33],
                                  const struct in_sockinfo *ini,
                                  int local,
@@ -427,6 +1044,16 @@ static const char *ensure_proc_tmpdir(const guest_t *g)
     snprintf(taskdir, sizeof(taskdir), "%s/task", piddir);
     mkdir(taskdir, 0755);
 
+    char netdir[128];
+    snprintf(netdir, sizeof(netdir), "%s/net", proc_tmpdir);
+    if (mkdir(netdir, 0755) == 0 || errno == EEXIST) {
+        static const char *net_files[] = {
+            "tcp", "tcp6", "udp", "udp6", "raw", "raw6", "unix", NULL,
+        };
+        for (const char **name = net_files; *name; name++)
+            populate_proc_placeholder(netdir, *name);
+    }
+
     char exepath[128];
     snprintf(exepath, sizeof(exepath), "%s/exe", piddir);
     const char *exe = proc_get_elf_path();
@@ -455,21 +1082,6 @@ static void proc_task_collect_cb(thread_entry_t *t, void *arg)
         c->tids[c->ntids++] = t->guest_tid;
 }
 
-static char fddir[128];
-static pthread_mutex_t fddir_lock = PTHREAD_MUTEX_INITIALIZER;
-
-static void cleanup_fddir(void)
-{
-    if (fddir[0] != '\0') {
-        for (int i = 0; i < FD_TABLE_SIZE; i++) {
-            char entry[192];
-            snprintf(entry, sizeof(entry), "%s/%d", fddir, i);
-            unlink(entry);
-        }
-        rmdir(fddir);
-    }
-}
-
 int proc_intercept_open(const guest_t *g,
                         const char *path,
                         int linux_flags,
@@ -504,8 +1116,7 @@ int proc_intercept_open(const guest_t *g,
          */
         int oflags = host_accmode | (translate_open_flags(linux_flags) &
                                      (O_NONBLOCK | O_CLOEXEC));
-        int fd = open(host_dev, oflags);
-        return fd >= 0 ? fd : -1;
+        return open(host_dev, oflags);
     }
 
     /* /dev/shm -> tmpfs-backed host temp directory.
@@ -513,32 +1124,19 @@ int proc_intercept_open(const guest_t *g,
      * Redirect to one shared host namespace so named shm works across elfuse
      * processes and fork children.
      */
-    if (!strcmp(path, "/dev/shm") || !strncmp(path, "/dev/shm/", 9)) {
+    if (!strcmp(path, "/dev/shm")) {
         const char *shm = shm_dir_path();
-        if (!shm)
-            return -1;
-        if (!strcmp(path, "/dev/shm"))
-            return proc_open_dir_fd(shm, linux_flags);
-        /* /dev/shm/name -> /tmp/elfuse-shm-UID/name
-         * Reject any path component traversal: "..", "/", or leading "/"
-         */
-        const char *suffix = path + 9;
-        if (strstr(suffix, "..") || strchr(suffix, '/') || suffix[0] == '\0') {
-            errno = EACCES;
-            return -1;
-        }
+        return shm ? proc_open_dir_fd(shm, linux_flags) : -1;
+    }
+    if (!strncmp(path, "/dev/shm/", 9)) {
         char host_path[512];
-        int n = snprintf(host_path, sizeof(host_path), "%s/%s", shm, suffix);
-        if (n < 0 || (size_t) n >= sizeof(host_path)) {
-            errno = ENAMETOOLONG;
+        if (dev_shm_resolve_path(path + 9, host_path, sizeof(host_path)) < 0)
             return -1;
-        }
         int oflags = translate_open_flags(linux_flags);
         /* O_NOFOLLOW: do not follow symlinks created by the guest inside the
          * shm directory (prevents symlink-based escape).
          */
-        int fd = open(host_path, oflags | O_NOFOLLOW, mode);
-        return fd >= 0 ? fd : -1;
+        return open(host_path, oflags | O_NOFOLLOW, mode);
     }
 
     /* /dev/stdin -> dup(0), /dev/stdout -> dup(1), /dev/stderr -> dup(2) */
@@ -550,21 +1148,8 @@ int proc_intercept_open(const guest_t *g,
         return dup(STDERR_FILENO);
 
     /* /dev/fd/N -> dup(N) */
-    if (!strncmp(path, "/dev/fd/", 8)) {
-        char *endptr;
-        long n = strtol(path + 8, &endptr, 10);
-        if (endptr == path + 8 || *endptr != '\0' || n < 0 ||
-            n >= FD_TABLE_SIZE) {
-            errno = EBADF;
-            return -1;
-        }
-        int host_fd = fd_to_host((int) n);
-        if (host_fd < 0) {
-            errno = EBADF;
-            return -1;
-        }
-        return dup(host_fd);
-    }
+    if (!strncmp(path, "/dev/fd/", 8))
+        return dev_fd_dup(path, 8);
 
     /* /proc -> synthetic directory with PID entries for busybox ps, top, etc.
      * Creates a temp dir once (cached for the process lifetime) with entries
@@ -576,8 +1161,7 @@ int proc_intercept_open(const guest_t *g,
         const char *dir = ensure_proc_tmpdir(g);
         if (!dir)
             return -1;
-        int fd = proc_open_dir_fd(dir, linux_flags);
-        return fd >= 0 ? fd : -1;
+        return proc_open_dir_fd(dir, linux_flags);
     }
 
     /* /proc/self -> directory fd for the PID subdirectory */
@@ -585,87 +1169,53 @@ int proc_intercept_open(const guest_t *g,
         const char *dir = ensure_proc_tmpdir(g);
         if (!dir)
             return -1;
-        int fd = proc_open_numbered_dir(dir, proc_get_pid(), linux_flags);
-        return fd >= 0 ? fd : -1;
+        return proc_open_numbered_dir(dir, proc_get_pid(), linux_flags);
     }
 
     /* /proc/self/fd -> directory listing of guest-visible file descriptors.
-     * Use a persistent temp directory because macOS getdents-backed callers
-     * need real directory entries for fchdir/readdir to work.
+     * Each open gets its own scratch dir so concurrent enumerations cannot
+     * mutate one another (see proc_open_fd_scratch).
      */
     if (!strcmp(path, "/proc/self/fd") || !strcmp(path, "/proc/self/fd/")) {
-        pthread_mutex_lock(&fddir_lock);
-        if (fddir[0] == '\0') {
-            if (proc_lazy_mkdtemp(fddir, sizeof(fddir),
-                                  "/tmp/elfuse-fd-XXXXXX") < 0) {
-                pthread_mutex_unlock(&fddir_lock);
-                return -1;
-            }
-            atexit(cleanup_fddir);
-        }
+        return proc_open_fd_scratch("elfuse-fd", linux_flags);
+    }
 
-        for (int i = 0; i < FD_TABLE_SIZE; i++) {
-            char entry[192];
-            snprintf(entry, sizeof(entry), "%s/%d", fddir, i);
-            fd_entry_t snap;
-            if (fd_snapshot(i, &snap)) {
-                int tfd = open(entry, O_CREAT | O_WRONLY, 0444);
-                if (tfd >= 0)
-                    close(tfd);
-            } else {
-                unlink(entry);
-            }
+    if (!strcmp(path, "/proc/net") || !strcmp(path, "/proc/net/")) {
+        const char *dir = ensure_proc_tmpdir(g);
+        if (!dir)
+            return -1;
+        char netdir[LINUX_PATH_MAX];
+        if (snprintf(netdir, sizeof(netdir), "%s/net", dir) >=
+            (int) sizeof(netdir)) {
+            errno = ENAMETOOLONG;
+            return -1;
         }
+        return proc_open_dir_fd(netdir, linux_flags);
+    }
 
-        int fd = proc_open_dir_fd(fddir, linux_flags);
-        pthread_mutex_unlock(&fddir_lock);
-        return fd >= 0 ? fd : -1;
+    /* /proc/<our_pid>[/...] -> /proc/self[...]. Returns -1 on
+     * ENAMETOOLONG so the guest sees the same error a real Linux kernel
+     * would produce instead of falling through to a host syscall.
+     */
+    {
+        char alias[LINUX_PATH_MAX];
+        int aliased = proc_alias_self(path, alias, sizeof(alias));
+        if (aliased < 0)
+            return -1;
+        if (aliased > 0)
+            return proc_intercept_open(g, alias, linux_flags, mode);
     }
 
-    /* /proc/<pid>/stat -> redirect to /proc/self/stat for the current PID */
-    if (!strncmp(path, "/proc/", 6)) {
-        char *endp;
-        long pid = strtol(path + 6, &endp, 10);
-        if (endp != path + 6 && pid == (long) proc_get_pid()) {
-            /* Rewrite /proc/<our_pid>/X to /proc/self/X and recurse */
-            if (!strncmp(endp, "/stat", 5) && endp[5] == '\0')
-                return proc_intercept_open(g, "/proc/self/stat", linux_flags,
-                                           mode);
-            if (!strncmp(endp, "/status", 7) && endp[7] == '\0')
-                return proc_intercept_open(g, "/proc/self/status", linux_flags,
-                                           mode);
-            if (!strncmp(endp, "/cmdline", 8) && endp[8] == '\0')
-                return proc_intercept_open(g, "/proc/self/cmdline", linux_flags,
-                                           mode);
-            if (!strncmp(endp, "/exe", 4) && endp[4] == '\0')
-                return proc_intercept_open(g, "/proc/self/exe", linux_flags,
-                                           mode);
-            if (!strncmp(endp, "/environ", 8) && endp[8] == '\0')
-                return proc_intercept_open(g, "/proc/self/environ", linux_flags,
-                                           mode);
-            if (!strncmp(endp, "/auxv", 5) && endp[5] == '\0')
-                return proc_intercept_open(g, "/proc/self/auxv", linux_flags,
-                                           mode);
-            if (!strncmp(endp, "/task", 5) &&
-                (endp[5] == '\0' || endp[5] == '/')) {
-                char redir[128];
-                snprintf(redir, sizeof(redir), "/proc/self/task%s", endp + 5);
-                return proc_intercept_open(g, redir, linux_flags, mode);
-            }
-            if (!strncmp(endp, "/fd", 3) &&
-                (endp[3] == '\0' || endp[3] == '/')) {
-                char redir[128];
-                snprintf(redir, sizeof(redir), "/proc/self/fd%s", endp + 3);
-                return proc_intercept_open(g, redir, linux_flags, mode);
-            }
-            if (!strcmp(endp, "") || !strcmp(endp, "/")) {
-                const char *dir = ensure_proc_tmpdir(g);
-                if (!dir)
-                    return -1;
-                int fd =
-                    proc_open_numbered_dir(dir, proc_get_pid(), linux_flags);
-                return fd >= 0 ? fd : -1;
-            }
+    int oom_kind = proc_oom_path_kind(path);
+    if (oom_kind == OOM_PATH_SCORE) {
+        /* Mirror the non-root Linux open contract for the 0444 proc node:
+         * reject writable opens immediately instead of letting the write path
+         * fail later against a synthetic temp file.
+         */
+        int oom_accmode = translate_open_flags(linux_flags) & O_ACCMODE;
+        if (oom_accmode != O_RDONLY) {
+            errno = EACCES;
+            return -1;
         }
     }
 
@@ -679,8 +1229,7 @@ int proc_intercept_open(const guest_t *g,
             errno = ENOENT;
             return -1;
         }
-        int fd = open(exe, O_RDONLY);
-        return fd >= 0 ? fd : -1;
+        return open(exe, O_RDONLY);
     }
 
     /* /proc/cpuinfo -> synthetic file with CPU count.
@@ -734,20 +1283,10 @@ int proc_intercept_open(const guest_t *g,
         }
         vm_rss_kb /= 1024;
 
-        /* Extract basename from ELF path for the Name field (Linux uses the
-         * comm name, which is basename truncated to 15 chars)
-         */
-        const char *exe = proc_get_elf_path();
-        const char *name = "elfuse";
-        if (exe) {
-            const char *slash = strrchr(exe, '/');
-            name = slash ? slash + 1 : exe;
-        }
-
+        /* Linux uses the comm name (basename truncated to 15 chars). */
+        const char *name = proc_comm_name();
         int threads = thread_active_count();
-        char buf[2048];
-        int len = snprintf(
-            buf, sizeof(buf),
+        return proc_emit_fmt(
             "Name:\t%.15s\n"
             "State:\tR (running)\n"
             "Tgid:\t%lld\n"
@@ -764,7 +1303,6 @@ int proc_intercept_open(const guest_t *g,
             GUEST_UID, GUEST_GID, GUEST_GID, GUEST_GID, GUEST_GID,
             (unsigned long long) vm_size_kb, (unsigned long long) vm_size_kb,
             (unsigned long long) vm_rss_kb, threads);
-        return proc_synthetic_fd_str(buf, len, sizeof(buf));
     }
 
     /* /proc/self/limits -> resource limits from prlimit64 cache */
@@ -812,31 +1350,25 @@ int proc_intercept_open(const guest_t *g,
      * dirs returns empty. Uses a static path cleaned up at exit.
      */
     if (!strcmp(path, "/proc/self/task") || !strcmp(path, "/proc/self/task/")) {
-        static char taskdir[128];
-        static pthread_mutex_t taskdir_lock = PTHREAD_MUTEX_INITIALIZER;
-
-        pthread_mutex_lock(&taskdir_lock);
-        if (proc_lazy_mkdtemp(taskdir, sizeof(taskdir),
-                              "/tmp/elfuse-task-XXXXXX") < 0) {
-            pthread_mutex_unlock(&taskdir_lock);
+        static proc_persistent_dir_t taskdir =
+            PROC_PERSISTENT_DIR("/tmp/elfuse-task-XXXXXX");
+        const char *dir = proc_persistent_dir_acquire(&taskdir);
+        if (!dir)
             return -1;
-        }
 
         int64_t tids[MAX_THREADS];
         proc_task_collect_ctx_t ctx = {tids, 0};
         thread_for_each(proc_task_collect_cb, &ctx);
-
         for (int i = 0; i < ctx.ntids; i++) {
             char tidpath[128];
-            snprintf(tidpath, sizeof(tidpath), "%s/%lld", taskdir,
+            snprintf(tidpath, sizeof(tidpath), "%s/%lld", dir,
                      (long long) tids[i]);
             mkdir(tidpath, 0755);
         }
 
-        int fd = proc_open_dir_fd(taskdir, linux_flags);
-        pthread_mutex_unlock(&taskdir_lock);
-
-        return fd >= 0 ? fd : -1;
+        int fd = proc_open_dir_fd(dir, linux_flags);
+        proc_persistent_dir_release(&taskdir);
+        return fd;
     }
 
     /* /proc/self/task/<tid>/stat -> per-thread stat line */
@@ -853,75 +1385,51 @@ int proc_intercept_open(const guest_t *g,
         }
 
         if (!strcmp(endp, "/stat")) {
-            const char *exe = proc_get_elf_path();
-            const char *name = "elfuse";
-            if (exe) {
-                const char *slash = strrchr(exe, '/');
-                name = slash ? slash + 1 : exe;
-            }
-            char buf[512];
-            int len =
-                snprintf(buf, sizeof(buf),
-                         "%ld (%.15s) R %lld %lld %lld 0 0 0 0 0 0 0 0 0 0 0 "
-                         "20 0 %d 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 "
-                         "0 0 0 0 0 0 0 0\n",
-                         tid, name, (long long) proc_get_ppid(),
-                         (long long) proc_get_pid(), /* pgid */
-                         (long long) proc_get_sid(), thread_active_count());
-            return proc_synthetic_fd_str(buf, len, sizeof(buf));
+            return proc_emit_fmt(
+                "%ld (%.15s) R %lld %lld %lld 0 0 0 0 0 0 0 0 0 0 0 "
+                "20 0 %d 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 "
+                "0 0 0 0 0 0 0 0\n",
+                tid, proc_comm_name(), (long long) proc_get_ppid(),
+                (long long) proc_get_pid(), /* pgid */
+                (long long) proc_get_sid(), thread_active_count());
         }
 
         if (!strcmp(endp, "/status")) {
-            const char *exe = proc_get_elf_path();
-            const char *name = "elfuse";
-            if (exe) {
-                const char *slash = strrchr(exe, '/');
-                name = slash ? slash + 1 : exe;
-            }
-            char buf[1024];
-            int len =
-                snprintf(buf, sizeof(buf),
-                         "Name:\t%.15s\n"
-                         "State:\tR (running)\n"
-                         "Tgid:\t%lld\n"
-                         "Pid:\t%ld\n"
-                         "PPid:\t%lld\n"
-                         "Uid:\t%d\t%d\t%d\t%d\n"
-                         "Gid:\t%d\t%d\t%d\t%d\n"
-                         "Threads:\t%d\n",
-                         name, (long long) proc_get_pid(), tid,
-                         (long long) proc_get_ppid(), GUEST_UID, GUEST_UID,
-                         GUEST_UID, GUEST_UID, GUEST_GID, GUEST_GID, GUEST_GID,
-                         GUEST_GID, thread_active_count());
-            return proc_synthetic_fd_str(buf, len, sizeof(buf));
+            return proc_emit_fmt(
+                "Name:\t%.15s\n"
+                "State:\tR (running)\n"
+                "Tgid:\t%lld\n"
+                "Pid:\t%ld\n"
+                "PPid:\t%lld\n"
+                "Uid:\t%d\t%d\t%d\t%d\n"
+                "Gid:\t%d\t%d\t%d\t%d\n"
+                "Threads:\t%d\n",
+                proc_comm_name(), (long long) proc_get_pid(), tid,
+                (long long) proc_get_ppid(), GUEST_UID, GUEST_UID, GUEST_UID,
+                GUEST_UID, GUEST_GID, GUEST_GID, GUEST_GID, GUEST_GID,
+                thread_active_count());
         }
 
-        /* /proc/self/task/<tid> directory itself */
+        /* /proc/self/task/<tid> directory itself: synthesize a dir with
+         * stat/status placeholder entries. Persistent so getdents sees
+         * the entries on macOS (which cannot enumerate unlinked dirs).
+         */
         if (*endp == '\0' || !strcmp(endp, "/")) {
-            /* Return a synthetic directory with stat/status placeholder
-             * entries. Uses a persistent temp dir (not cleaned until process
-             * exit) so getdents sees entries on macOS.
-             */
-            static char tiddir_base[128];
-            static pthread_mutex_t tiddir_lock = PTHREAD_MUTEX_INITIALIZER;
-
-            pthread_mutex_lock(&tiddir_lock);
-            if (proc_lazy_mkdtemp(tiddir_base, sizeof(tiddir_base),
-                                  "/tmp/elfuse-tid-XXXXXX") < 0) {
-                pthread_mutex_unlock(&tiddir_lock);
+            static proc_persistent_dir_t tiddir =
+                PROC_PERSISTENT_DIR("/tmp/elfuse-tid-XXXXXX");
+            const char *dir = proc_persistent_dir_acquire(&tiddir);
+            if (!dir)
                 return -1;
-            }
 
             char p[160];
-            snprintf(p, sizeof(p), "%s/stat", tiddir_base);
+            snprintf(p, sizeof(p), "%s/stat", dir);
             close(open(p, O_CREAT | O_WRONLY, 0444));
-            snprintf(p, sizeof(p), "%s/status", tiddir_base);
+            snprintf(p, sizeof(p), "%s/status", dir);
             close(open(p, O_CREAT | O_WRONLY, 0444));
 
-            int fd = proc_open_dir_fd(tiddir_base, linux_flags);
-            pthread_mutex_unlock(&tiddir_lock);
-
-            return fd >= 0 ? fd : -1;
+            int fd = proc_open_dir_fd(dir, linux_flags);
+            proc_persistent_dir_release(&tiddir);
+            return fd;
         }
 
         return -2; /* unknown /proc/self/task/<tid>/XXX */
@@ -1045,9 +1553,7 @@ int proc_intercept_open(const guest_t *g,
         gettimeofday(&now, NULL);
         double uptime = (double) (now.tv_sec - boottime.tv_sec) +
                         (double) (now.tv_usec - boottime.tv_usec) / 1e6;
-        char buf[128];
-        int len = snprintf(buf, sizeof(buf), "%.2f 0.00\n", uptime);
-        return proc_synthetic_fd_str(buf, len, sizeof(buf));
+        return proc_emit_fmt("%.2f 0.00\n", uptime);
     }
 
     /* /proc/loadavg -> synthetic load averages.
@@ -1056,11 +1562,9 @@ int proc_intercept_open(const guest_t *g,
     if (!strcmp(path, "/proc/loadavg")) {
         double loadavg[3] = {0};
         getloadavg(loadavg, 3);
-        char buf[128];
-        int len =
-            snprintf(buf, sizeof(buf), "%.2f %.2f %.2f 1/1 %lld\n", loadavg[0],
-                     loadavg[1], loadavg[2], (long long) proc_get_pid());
-        return proc_synthetic_fd_str(buf, len, sizeof(buf));
+        return proc_emit_fmt("%.2f %.2f %.2f 1/1 %lld\n", loadavg[0],
+                             loadavg[1], loadavg[2],
+                             (long long) proc_get_pid());
     }
 
     /* /var/run/utmp, /run/utmp -> synthetic utmp with current user.
@@ -1097,171 +1601,70 @@ int proc_intercept_open(const guest_t *g,
         !strcmp(path, "/proc/net/raw") || !strcmp(path, "/proc/net/raw6")) {
         bool want_tcp = !!strstr(path, "tcp"), want_udp = !!strstr(path, "udp");
         bool want_v6 = (path[strlen(path) - 1] == '6');
-        int want_af = want_v6 ? AF_INET6 : AF_INET;
-        int want_stype = want_tcp   ? SOCK_STREAM
-                         : want_udp ? SOCK_DGRAM
-                                    : SOCK_RAW;
-        const char *header_fmt =
+        struct proc_net_inet_ctx ctx = {
+            .buf = NULL, /* set below */
+            .bufsz = 16384,
+            .off = 0,
+            .sl = 0,
+            .want_af = want_v6 ? AF_INET6 : AF_INET,
+            .want_stype = want_tcp   ? SOCK_STREAM
+                          : want_udp ? SOCK_DGRAM
+                                     : SOCK_RAW,
+            .want_tcp = want_tcp,
+            .want_v6 = want_v6,
+        };
+        char buf[16384];
+        ctx.buf = buf;
+        ctx.off = snprintf(
+            buf, sizeof(buf), "%s",
             want_tcp ? "  sl  local_address rem_address   st tx_queue "
                        "rx_queue tr tm->when retrnsmt   uid  timeout inode\n"
                      : "  sl  local_address rem_address   st tx_queue "
                        "rx_queue tr tm->when retrnsmt   uid  timeout inode"
-                       " ref pointer drops\n";
-        char buf[16384];
-        int off = snprintf(buf, sizeof(buf), "%s", header_fmt);
-
-        /* Collect PIDs to scan: self + active children */
-        pid_t pids[PROC_TABLE_SIZE + 1];
-        pids[0] = getpid();
-        int npids = 1 + proc_get_child_pids(pids + 1, PROC_TABLE_SIZE);
-
-        int sl = 0;
-        for (int p = 0; p < npids && off < (int) sizeof(buf) - 256; p++) {
-            struct proc_fdinfo fdinfo[512];
-            int fdsz = proc_pidinfo(pids[p], PROC_PIDLISTFDS, 0, fdinfo,
-                                    sizeof(fdinfo));
-            if (fdsz <= 0)
-                continue;
-            int nfds = fdsz / (int) PROC_PIDLISTFD_SIZE;
-
-            for (int fi = 0; fi < nfds && off < (int) sizeof(buf) - 256; fi++) {
-                if (fdinfo[fi].proc_fdtype != PROX_FDTYPE_SOCKET)
-                    continue;
-
-                struct socket_fdinfo sinfo;
-                int sz =
-                    proc_pidfdinfo(pids[p], fdinfo[fi].proc_fd,
-                                   PROC_PIDFDSOCKETINFO, &sinfo, sizeof(sinfo));
-                if (sz < (int) sizeof(sinfo))
-                    continue;
-
-                int saf = sinfo.psi.soi_family, stype = sinfo.psi.soi_type;
-                if (saf != want_af || stype != want_stype)
-                    continue;
-
-                uint16_t lport = 0, rport = 0;
-                char laddr[33], raddr[33];
-                const struct in_sockinfo *ini =
-                    want_tcp ? &sinfo.psi.soi_proto.pri_tcp.tcpsi_ini
-                             : &sinfo.psi.soi_proto.pri_in;
-
-                format_proc_net_addr(laddr, ini, 1, want_v6);
-                lport = ntohs(ini->insi_lport);
-                format_proc_net_addr(raddr, ini, 0, want_v6);
-                rport = ntohs(ini->insi_fport);
-
-                /* TCP state from the kernel's tcp_connection_info */
-                int st = 0x07; /* TCP_CLOSE default */
-                if (want_tcp) {
-                    int kstate = sinfo.psi.soi_proto.pri_tcp.tcpsi_state;
-                    /* macOS TSI_S_* matches Linux TCP state encoding:
-                     * 0=CLOSED, 1=LISTEN, 2=SYN_SENT, etc. But Linux
-                     * /proc/net uses 1-based: 01=ESTABLISHED, 0A=LISTEN
-                     */
-                    static const int state_map[] = {
-                        0x07, /* 0: CLOSED */
-                        0x0A, /* 1: LISTEN */
-                        0x02, /* 2: SYN_SENT */
-                        0x03, /* 3: SYN_RECEIVED */
-                        0x01, /* 4: ESTABLISHED */
-                        0x08, /* 5: CLOSE_WAIT */
-                        0x04, /* 6: FIN_WAIT_1 */
-                        0x06, /* 7: CLOSING */
-                        0x09, /* 8: LAST_ACK */
-                        0x05, /* 9: FIN_WAIT_2 */
-                        0x0B, /* 10: TIME_WAIT */
-                    };
-                    if (RANGE_CHECK(kstate, 0, 11))
-                        st = state_map[kstate];
-                }
-
-                off = append_proc_net_row(buf, sizeof(buf), off, want_tcp, sl,
-                                          laddr, lport, raddr, rport, st);
-                sl++;
-            }
-        }
-        return proc_synthetic_fd_str(buf, off, sizeof(buf));
+                       " ref pointer drops\n");
+        proc_net_for_each_socket(proc_net_inet_visit, &ctx);
+        return proc_synthetic_fd_str(buf, ctx.off, sizeof(buf));
     }
     if (!strcmp(path, "/proc/net/unix")) {
         char buf[8192];
-        int off = snprintf(buf, sizeof(buf),
-                           "Num       RefCount Protocol Flags    Type St "
-                           "Inode Path\n");
-
-        pid_t pids[PROC_TABLE_SIZE + 1];
-        pids[0] = getpid();
-        int npids = 1 + proc_get_child_pids(pids + 1, PROC_TABLE_SIZE);
-
-        for (int p = 0; p < npids && off < (int) sizeof(buf) - 128; p++) {
-            struct proc_fdinfo fdinfo[512];
-            int fdsz = proc_pidinfo(pids[p], PROC_PIDLISTFDS, 0, fdinfo,
-                                    sizeof(fdinfo));
-            if (fdsz <= 0)
-                continue;
-            int nfds = fdsz / (int) PROC_PIDLISTFD_SIZE;
-
-            for (int fi = 0; fi < nfds && off < (int) sizeof(buf) - 128; fi++) {
-                if (fdinfo[fi].proc_fdtype != PROX_FDTYPE_SOCKET)
-                    continue;
-                struct socket_fdinfo sinfo;
-                int sz =
-                    proc_pidfdinfo(pids[p], fdinfo[fi].proc_fd,
-                                   PROC_PIDFDSOCKETINFO, &sinfo, sizeof(sinfo));
-                if (sz < (int) sizeof(sinfo))
-                    continue;
-                if (sinfo.psi.soi_family != AF_UNIX)
-                    continue;
-                int stype = sinfo.psi.soi_type;
-                int lt = (stype == SOCK_STREAM)      ? 1
-                         : (stype == SOCK_DGRAM)     ? 2
-                         : (stype == SOCK_SEQPACKET) ? 5
-                                                     : 1;
-                /* Unix socket path from soi_proto.pri_un.unsi_addr */
-                const char *spath =
-                    sinfo.psi.soi_proto.pri_un.unsi_addr.ua_sun.sun_path;
-                off +=
-                    snprintf(buf + off, sizeof(buf) - off,
-                             "%016X: %08X %08X %08X %04X %02X %5d %s\n", 0, 3,
-                             0, 0, lt, 3, 10000 + fi, spath[0] ? spath : "");
-            }
-        }
-        return proc_synthetic_fd_str(buf, off, sizeof(buf));
+        struct proc_net_unix_ctx ctx = {
+            .buf = buf,
+            .bufsz = sizeof(buf),
+            .off = snprintf(buf, sizeof(buf),
+                            "Num       RefCount Protocol Flags    Type St "
+                            "Inode Path\n"),
+        };
+        proc_net_for_each_socket(proc_net_unix_visit, &ctx);
+        return proc_synthetic_fd_str(buf, ctx.off, sizeof(buf));
     }
 
     /* /proc/sys/vm/mmap_min_addr -> synthetic mmap minimum address. */
-    if (!strcmp(path, "/proc/sys/vm/mmap_min_addr")) {
-        const char *data = "32768\n";
-        return proc_synthetic_fd(data, strlen(data));
-    }
+    if (!strcmp(path, "/proc/sys/vm/mmap_min_addr"))
+        return proc_emit_literal("32768\n");
 
     /* /proc/sys/kernel/randomize_va_space -> ASLR enabled (full). */
-    if (!strcmp(path, "/proc/sys/kernel/randomize_va_space")) {
-        const char *data = "2\n";
-        return proc_synthetic_fd(data, strlen(data));
-    }
+    if (!strcmp(path, "/proc/sys/kernel/randomize_va_space"))
+        return proc_emit_literal("2\n");
 
     /* /proc/version -> synthetic kernel version string */
     if (!strcmp(path, "/proc/version")) {
-        char buf[256];
-        int len = snprintf(buf, sizeof(buf),
-                           "Linux version 6.17.0-20-generic "
-                           "(buildd@bos03-arm64-051) "
-                           "(aarch64-linux-gnu-gcc (Ubuntu 15.2.0-4ubuntu4) "
-                           "15.2.0, GNU ld (GNU Binutils for Ubuntu) 2.45) "
-                           "#20-Ubuntu SMP PREEMPT_DYNAMIC\n");
-        return proc_synthetic_fd_str(buf, len, sizeof(buf));
+        return proc_emit_literal(
+            "Linux version 6.17.0-20-generic "
+            "(buildd@bos03-arm64-051) "
+            "(aarch64-linux-gnu-gcc (Ubuntu 15.2.0-4ubuntu4) "
+            "15.2.0, GNU ld (GNU Binutils for Ubuntu) 2.45) "
+            "#20-Ubuntu SMP PREEMPT_DYNAMIC\n");
     }
 
     /* /proc/filesystems -> supported filesystem types */
     if (!strcmp(path, "/proc/filesystems")) {
-        const char *data =
+        return proc_emit_literal(
             "\tmpfs\n"
             "\tproc\n"
             "\tsysfs\n"
             "\tdevtmpfs\n"
             "\text4\n"
-            "\tvfat\n";
-        return proc_synthetic_fd(data, strlen(data));
+            "\tvfat\n");
     }
 
     /* /proc/self/mountinfo -> Linux mountinfo format (different from
@@ -1269,121 +1672,133 @@ int proc_intercept_open(const guest_t *g,
      * - type source super_options
      */
     if (!strcmp(path, "/proc/self/mountinfo")) {
-        char buf[1024];
-        int len =
-            snprintf(buf, sizeof(buf),
-                     "1 0 0:1 / / rw,relatime - ext4 /dev/root rw\n"
-                     "2 1 0:2 / /proc rw,nosuid,nodev,noexec - proc proc rw\n"
-                     "3 1 0:3 / /tmp rw,nosuid,nodev - tmpfs tmpfs rw\n"
-                     "4 1 0:4 / /dev rw,nosuid - devtmpfs devtmpfs rw\n"
-                     "5 4 0:5 / /dev/shm rw,nosuid,nodev - tmpfs tmpfs rw\n");
-        return proc_synthetic_fd_str(buf, len, sizeof(buf));
+        return proc_emit_literal(
+            "1 0 0:1 / / rw,relatime - ext4 /dev/root rw\n"
+            "2 1 0:2 / /proc rw,nosuid,nodev,noexec - proc proc rw\n"
+            "3 1 0:3 / /tmp rw,nosuid,nodev - tmpfs tmpfs rw\n"
+            "4 1 0:4 / /dev rw,nosuid - devtmpfs devtmpfs rw\n"
+            "5 4 0:5 / /dev/shm rw,nosuid,nodev - tmpfs tmpfs rw\n");
     }
 
     /* /proc/mounts, /etc/mtab -> synthetic mount table */
     if (!strcmp(path, "/proc/mounts") || !strcmp(path, "/proc/self/mounts") ||
         !strcmp(path, "/etc/mtab")) {
-        char buf[512];
-        int len = snprintf(buf, sizeof(buf),
-                           "/ / ext4 rw,relatime 0 0\n"
-                           "proc /proc proc rw,nosuid,nodev,noexec 0 0\n"
-                           "tmpfs /tmp tmpfs rw,nosuid,nodev 0 0\n"
-                           "devtmpfs /dev devtmpfs rw,nosuid 0 0\n"
-                           "tmpfs /dev/shm tmpfs rw,nosuid,nodev 0 0\n");
-        return proc_synthetic_fd_str(buf, len, sizeof(buf));
-    }
-
-    /* /proc/self/oom_score_adj -> writable stub.
-     * Containers and systemd write this; accept writes and return
-     * last-written value (default 0).
+        return proc_emit_literal(
+            "/ / ext4 rw,relatime 0 0\n"
+            "proc /proc proc rw,nosuid,nodev,noexec 0 0\n"
+            "tmpfs /tmp tmpfs rw,nosuid,nodev 0 0\n"
+            "devtmpfs /dev devtmpfs rw,nosuid 0 0\n"
+            "tmpfs /dev/shm tmpfs rw,nosuid,nodev 0 0\n");
+    }
+
+    /* OOM nodes share one stored adjustment.
+     *   oom_score_adj: returns the raw adjustment in [-1000, 1000].
+     *   oom_adj:       legacy view, scaled into [-17, 15] for compatibility.
+     *   oom_score:     stub computed score, currently a fixed 0.
      */
-    if (proc_is_oom_path(path)) {
-        int val = atomic_load(&oom_score_adj_value);
+    if (oom_kind != OOM_PATH_NONE) {
         char buf[32];
-        int len = snprintf(buf, sizeof(buf), "%d\n", val);
-        return proc_synthetic_fd_str(buf, len, sizeof(buf));
+        int len = proc_oom_format_value(oom_kind, buf, sizeof(buf));
+        return proc_synthetic_fd(buf, (size_t) len);
     }
 
-    /* /proc/self/fdinfo/<N> -> per-fd flags/pos/mnt_id */
+    /* /proc/self/fdinfo/<N> -> per-fd flags/pos/mnt_id plus type-specific
+     * fields for fds where Linux exposes additional state (eventfd counter,
+     * signalfd mask, timerfd settings).
+     */
     if (!strncmp(path, "/proc/self/fdinfo/", 18)) {
-        char *endptr;
-        long n = strtol(path + 18, &endptr, 10);
-        if (endptr == path + 18 || *endptr != '\0' || n < 0 ||
-            n >= FD_TABLE_SIZE) {
-            errno = ENOENT;
+        int n = proc_parse_fd_index(path, 18, ENOENT);
+        if (n < 0)
             return -1;
-        }
         fd_entry_t snap;
-        if (!fd_snapshot((int) n, &snap)) {
+        if (!fd_snapshot(n, &snap)) {
             errno = ENOENT;
             return -1;
         }
-        off_t pos = 0;
-        int host_fd = fd_to_host((int) n);
-        if (host_fd >= 0)
-            pos = lseek(host_fd, 0, SEEK_CUR);
-        if (pos < 0)
-            pos = 0;
-        int flags = snap.linux_flags;
-        char buf[256];
-        int len = snprintf(buf, sizeof(buf),
-                           "pos:\t%lld\n"
-                           "flags:\t0%o\n"
-                           "mnt_id:\t0\n",
-                           (long long) pos, flags);
-        return proc_synthetic_fd_str(buf, len, sizeof(buf));
-    }
-
-    /* /proc/self/fdinfo -> directory listing via persistent temp dir (macOS
-     * getdents needs real directory entries).
-     */
-    if (!strcmp(path, "/proc/self/fdinfo") ||
-        !strcmp(path, "/proc/self/fdinfo/")) {
-        static char fdinfodir[128];
-        static pthread_mutex_t fdinfodir_lock = PTHREAD_MUTEX_INITIALIZER;
 
-        pthread_mutex_lock(&fdinfodir_lock);
-        if (proc_lazy_mkdtemp(fdinfodir, sizeof(fdinfodir),
-                              "/tmp/elfuse-fdinfo-XXXXXX") < 0) {
-            pthread_mutex_unlock(&fdinfodir_lock);
-            return -1;
+        /* fd_to_host_dup atomically duplicates under fd_lock so a concurrent
+         * close+reopen on another vCPU cannot redirect the lseek to an
+         * unrelated host fd that took the freed slot. The probe pollutes
+         * errno with ESPIPE on non-seekable fds (sockets, pipes), so save
+         * and restore around the call to keep the caller's view clean.
+         */
+        off_t pos = 0;
+        int dup_fd = fd_to_host_dup(n);
+        if (dup_fd >= 0) {
+            int saved_errno = errno;
+            off_t probe = lseek(dup_fd, 0, SEEK_CUR);
+            if (probe >= 0)
+                pos = probe;
+            errno = saved_errno;
+            close(dup_fd);
         }
 
-        for (int i = 0; i < FD_TABLE_SIZE; i++) {
-            char entry[192];
-            snprintf(entry, sizeof(entry), "%s/%d", fdinfodir, i);
-            fd_entry_t snap;
-            if (fd_snapshot(i, &snap)) {
-                int tfd = open(entry, O_CREAT | O_WRONLY, 0444);
-                if (tfd >= 0)
-                    close(tfd);
-            } else {
-                unlink(entry);
+        char extra[160];
+        extra[0] = '\0';
+        if (snap.type == FD_EVENTFD) {
+            uint64_t count;
+            /* fs/eventfd.c uses a single space after the colon, matching
+             * the timerfd convention (and unlike pos:/flags:/mnt_id: in
+             * fs/proc/fd.c which use tabs). */
+            if (eventfd_fdinfo_snapshot(n, &count))
+                snprintf(extra, sizeof(extra), "eventfd-count: %16llx\n",
+                         (unsigned long long) count);
+        } else if (snap.type == FD_SIGNALFD) {
+            uint64_t mask;
+            /* fs/signalfd.c uses a tab after the colon (matching the
+             * pos:/flags:/mnt_id: convention in fs/proc/fd.c, not the
+             * single-space style of eventfd/timerfd). Verified against a
+             * real Linux 6.x /proc/self/fdinfo dump. */
+            if (signalfd_fdinfo_snapshot(n, &mask))
+                snprintf(extra, sizeof(extra), "sigmask:\t%016llx\n",
+                         (unsigned long long) mask);
+        } else if (snap.type == FD_TIMERFD) {
+            int clockid;
+            uint64_t ticks;
+            int64_t value_ns, interval_ns;
+            if (timerfd_fdinfo_snapshot(n, &clockid, &ticks, &value_ns,
+                                        &interval_ns)) {
+                /* Linux fs/timerfd.c emits these fields with single
+                 * spaces after the colon, not tabs (unlike pos:/flags:/
+                 * mnt_id: in fs/proc/fd.c, which do use tabs). Match the
+                 * upstream format so guest readers parsing fdinfo via a
+                 * "it_value: (" prefix find the field. */
+                snprintf(extra, sizeof(extra),
+                         "clockid: %d\n"
+                         "ticks: %llu\n"
+                         "settime flags: 0\n"
+                         "it_value: (%lld, %lld)\n"
+                         "it_interval: (%lld, %lld)\n",
+                         clockid, (unsigned long long) ticks,
+                         (long long) (value_ns / 1000000000LL),
+                         (long long) (value_ns % 1000000000LL),
+                         (long long) (interval_ns / 1000000000LL),
+                         (long long) (interval_ns % 1000000000LL));
             }
         }
 
-        int fd = proc_open_dir_fd(fdinfodir, linux_flags);
-        pthread_mutex_unlock(&fdinfodir_lock);
-        return fd >= 0 ? fd : -1;
+        return proc_emit_fmt(
+            "pos:\t%lld\n"
+            "flags:\t0%o\n"
+            "mnt_id:\t0\n"
+            "%s",
+            (long long) pos, snap.linux_flags, extra);
     }
 
-    /* /proc/self/fd/N -> open the target of the fd (readlink-style) */
-    if (!strncmp(path, "/proc/self/fd/", 14)) {
-        char *endptr;
-        long n = strtol(path + 14, &endptr, 10);
-        if (endptr == path + 14 || *endptr != '\0' || n < 0 ||
-            n >= FD_TABLE_SIZE) {
-            errno = EBADF;
-            return -1;
-        }
-        int host_fd = fd_to_host((int) n);
-        if (host_fd < 0) {
-            errno = EBADF;
-            return -1;
-        }
-        return dup(host_fd);
+    /* /proc/self/fdinfo -> directory listing. Each open gets its own scratch
+     * dir so concurrent getdents on independent dirfds cannot interfere
+     * (the previous shared-dir design unlinked entries under a sibling
+     * enumerator). The dirs are tracked for atexit cleanup.
+     */
+    if (!strcmp(path, "/proc/self/fdinfo") ||
+        !strcmp(path, "/proc/self/fdinfo/")) {
+        return proc_open_fd_scratch("elfuse-fdinfo", linux_flags);
     }
 
+    /* /proc/self/fd/N -> open the target of the fd (readlink-style) */
+    if (!strncmp(path, "/proc/self/fd/", 14))
+        return dev_fd_dup(path, 14);
+
     /* /proc/meminfo -> synthetic memory info from host vm_statistics */
     if (!strcmp(path, "/proc/meminfo")) {
         int64_t physmem = 0;
@@ -1420,9 +1835,7 @@ int proc_intercept_open(const guest_t *g,
             buffers_kb = total_kb / 20;
             cached_kb = total_kb / 4;
         }
-        char buf[2048];
-        int len = snprintf(
-            buf, sizeof(buf),
+        return proc_emit_fmt(
             "MemTotal:       %llu kB\n"
             "MemFree:        %llu kB\n"
             "MemAvailable:   %llu kB\n"
@@ -1456,7 +1869,6 @@ int proc_intercept_open(const guest_t *g,
             (unsigned long long) (total_kb - free_kb - cached_kb - buffers_kb),
             (unsigned long long) (cached_kb / 2),
             (unsigned long long) (total_kb / 2));
-        return proc_synthetic_fd_str(buf, len, sizeof(buf));
     }
 
     /* /proc/self/io -> synthetic I/O counters.
@@ -1465,15 +1877,14 @@ int proc_intercept_open(const guest_t *g,
      * it does not track per-guest I/O.
      */
     if (!strcmp(path, "/proc/self/io")) {
-        static const char data[] =
+        return proc_emit_literal(
             "rchar: 0\n"
             "wchar: 0\n"
             "syscr: 0\n"
             "syscw: 0\n"
             "read_bytes: 0\n"
             "write_bytes: 0\n"
-            "cancelled_write_bytes: 0\n";
-        return proc_synthetic_fd(data, sizeof(data) - 1);
+            "cancelled_write_bytes: 0\n");
     }
 
     /* /proc/self/stat -> single-line process stat (man 5 proc).
@@ -1505,33 +1916,24 @@ int proc_intercept_open(const guest_t *g,
                 rss_pages += sz / (uint64_t) page_size;
         }
 
-        const char *exe = proc_get_elf_path();
-        const char *comm = "elfuse";
-        if (exe) {
-            const char *slash = strrchr(exe, '/');
-            comm = slash ? slash + 1 : exe;
-        }
-
-        char buf[1024];
         /* Fields: pid(1) (comm)(2) state(3) ppid(4) pgrp(5) session(6)
          *   tty_nr(7) tpgid(8) flags(9) minflt(10) cminflt(11) majflt(12)
          *   cmajflt(13) utime(14) stime(15) cutime(16) cstime(17)
          *   priority(18) nice(19) num_threads(20) itrealvalue(21)
          *   starttime(22) vsize(23) rss(24) rsslim(25) ... (52 fields total)
          */
-        int len = snprintf(
-            buf, sizeof(buf),
+        return proc_emit_fmt(
             "%lld (%.15s) R %lld %lld %lld 0 -1 0 "        /* 1-9 */
             "0 0 0 0 %ld %ld 0 0 "                         /* 10-17 */
             "20 0 %d 0 0 %llu %llu "                       /* 18-24 */
             "18446744073709551615 0 0 0 0 0 0 "            /* 25-31 */
             "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", /* 32-52 */
-            (long long) proc_get_pid(), comm, (long long) proc_get_ppid(),
+            (long long) proc_get_pid(), proc_comm_name(),
+            (long long) proc_get_ppid(),
             (long long) proc_get_pid(), /* pgrp = pid */
             (long long) proc_get_pid(), /* session = pid */
             utime_ticks, stime_ticks, thread_active_count(),
             (unsigned long long) vsize, (unsigned long long) rss_pages);
-        return proc_synthetic_fd_str(buf, len, sizeof(buf));
     }
 
     /* /proc/stat -> synthetic CPU statistics */
@@ -1569,21 +1971,17 @@ int proc_intercept_open(const guest_t *g,
 
     /* /etc/passwd -> synthetic passwd with root + current user */
     if (!strcmp(path, "/etc/passwd")) {
-        char buf[512];
-        int len = snprintf(buf, sizeof(buf),
-                           "root:x:0:0:root:/root:/bin/sh\n"
-                           "user:x:1000:1000:user:/home/user:/bin/sh\n");
-        return proc_synthetic_fd_str(buf, len, sizeof(buf));
+        return proc_emit_literal(
+            "root:x:0:0:root:/root:/bin/sh\n"
+            "user:x:1000:1000:user:/home/user:/bin/sh\n");
     }
 
     /* /etc/group -> synthetic group file */
     if (!strcmp(path, "/etc/group")) {
-        char buf[512];
-        int len = snprintf(buf, sizeof(buf),
-                           "root:x:0:\n"
-                           "staff:x:20:\n"
-                           "user:x:1000:\n");
-        return proc_synthetic_fd_str(buf, len, sizeof(buf));
+        return proc_emit_literal(
+            "root:x:0:\n"
+            "staff:x:20:\n"
+            "user:x:1000:\n");
     }
 
     return PROC_NOT_INTERCEPTED;
@@ -1602,35 +2000,20 @@ int proc_intercept_stat(const char *path, struct stat *st)
      */
     /* /dev/shm is a directory */
     if (!strcmp(path, "/dev/shm") || !strcmp(path, "/dev/shm/")) {
-        memset(st, 0, sizeof(*st));
-        st->st_mode = S_IFDIR | 01777; /* sticky bit, like real /dev/shm */
-        st->st_nlink = 2;
+        stat_fill_proc_dir(st, 01777, 2); /* sticky bit, like real /dev/shm */
         return 0;
     }
     /* /dev/shm/<name> files: check the host temp dir */
     if (!strncmp(path, "/dev/shm/", 9)) {
-        const char *shm = shm_dir_path();
-        if (!shm)
-            return -1;
-        const char *suffix = path + 9;
-        if (strstr(suffix, "..") || strchr(suffix, '/') || suffix[0] == '\0') {
-            errno = EACCES;
-            return -1;
-        }
         char host_path[512];
-        int n = snprintf(host_path, sizeof(host_path), "%s/%s", shm, suffix);
-        if (n < 0 || (size_t) n >= sizeof(host_path)) {
-            errno = ENAMETOOLONG;
+        if (dev_shm_resolve_path(path + 9, host_path, sizeof(host_path)) < 0)
             return -1;
-        }
         return stat(host_path, st);
     }
 
     /* /proc and /proc/<our_pid> are directories */
     if (!strcmp(path, "/proc") || !strcmp(path, "/proc/")) {
-        memset(st, 0, sizeof(*st));
-        st->st_mode = S_IFDIR | 0555;
-        st->st_nlink = 3;
+        stat_fill_proc_dir(st, 0555, 3);
         return 0;
     }
     {
@@ -1641,29 +2024,28 @@ int proc_intercept_stat(const char *path, struct stat *st)
                  (long long) proc_get_pid());
         if (!strcmp(path, pidbuf) || !strcmp(path, pidslash) ||
             !strcmp(path, "/proc/self") || !strcmp(path, "/proc/self/")) {
-            memset(st, 0, sizeof(*st));
-            st->st_mode = S_IFDIR | 0555;
-            st->st_nlink = 3;
+            stat_fill_proc_dir(st, 0555, 3);
             return 0;
         }
     }
+    if (!strcmp(path, "/proc/net") || !strcmp(path, "/proc/net/")) {
+        stat_fill_proc_dir(st, 0555, 2);
+        return 0;
+    }
 
-    /* /proc/<our_pid>/<file> -> treat as /proc/self/<file> */
-    if (!strncmp(path, "/proc/", 6)) {
-        char *endp;
-        long pid = strtol(path + 6, &endp, 10);
-        if (endp != path + 6 && pid == (long) proc_get_pid() && *endp == '/') {
-            char alias[LINUX_PATH_MAX];
-            snprintf(alias, sizeof(alias), "/proc/self%s", endp);
+    /* /proc/<our_pid>[/...] -> /proc/self[...]. */
+    {
+        char alias[LINUX_PATH_MAX];
+        int aliased = proc_alias_self(path, alias, sizeof(alias));
+        if (aliased < 0)
+            return -1;
+        if (aliased > 0)
             return proc_intercept_stat(alias, st);
-        }
     }
 
     /* /proc/self/task and /proc/self/task/<tid> are directories */
     if (!strcmp(path, "/proc/self/task") || !strcmp(path, "/proc/self/task/")) {
-        memset(st, 0, sizeof(*st));
-        st->st_mode = S_IFDIR | 0555;
-        st->st_nlink = 2 + thread_active_count();
+        stat_fill_proc_dir(st, 0555, 2 + (nlink_t) thread_active_count());
         return 0;
     }
     if (!strncmp(path, "/proc/self/task/", 16)) {
@@ -1675,68 +2057,41 @@ int proc_intercept_stat(const char *path, struct stat *st)
                 return -1;
             }
             if (*endp == '\0' || !strcmp(endp, "/")) {
-                /* /proc/self/task/<tid> directory */
-                memset(st, 0, sizeof(*st));
-                st->st_mode = S_IFDIR | 0555;
-                st->st_nlink = 2;
+                stat_fill_proc_dir(st, 0555, 2);
                 return 0;
             }
             if (!strcmp(endp, "/stat") || !strcmp(endp, "/status")) {
-                memset(st, 0, sizeof(*st));
-                st->st_mode = S_IFREG | 0444;
-                st->st_nlink = 1;
-                st->st_size = 256;
-                st->st_blksize = 4096;
-                st->st_blocks = 1;
+                stat_fill_proc_file(st, 0444);
                 return 0;
             }
         }
     }
 
-    if (proc_is_oom_path(path)) {
-        memset(st, 0, sizeof(*st));
-        st->st_mode = S_IFREG | 0644;
-        st->st_nlink = 1;
-        st->st_size = 2;
-        st->st_blksize = 4096;
-        st->st_blocks = 1;
-        return 0;
+    {
+        int kind = proc_oom_path_kind(path);
+        if (kind != OOM_PATH_NONE) {
+            stat_fill_proc_file(st, (kind == OOM_PATH_SCORE) ? 0444 : 0644);
+            return 0;
+        }
     }
 
     if (!strcmp(path, "/proc/self/fdinfo") ||
-        !strcmp(path, "/proc/self/fdinfo/")) {
-        memset(st, 0, sizeof(*st));
-        st->st_mode = S_IFDIR | 0555;
-        st->st_nlink = 2;
-        return 0;
-    }
-
-    if (!strcmp(path, "/proc/self/fd") || !strcmp(path, "/proc/self/fd/")) {
-        memset(st, 0, sizeof(*st));
-        st->st_mode = S_IFDIR | 0555;
-        st->st_nlink = 2;
+        !strcmp(path, "/proc/self/fdinfo/") || !strcmp(path, "/proc/self/fd") ||
+        !strcmp(path, "/proc/self/fd/")) {
+        stat_fill_proc_dir(st, 0555, 2);
         return 0;
     }
 
     if (!strncmp(path, "/proc/self/fdinfo/", 18)) {
-        char *endp;
-        long fd = strtol(path + 18, &endp, 10);
-        if (endp == path + 18 || *endp != '\0' || fd < 0 ||
-            fd >= FD_TABLE_SIZE) {
-            errno = ENOENT;
+        int fd = proc_parse_fd_index(path, 18, ENOENT);
+        if (fd < 0)
             return -1;
-        }
         fd_entry_t snap;
-        if (!fd_snapshot((int) fd, &snap)) {
+        if (!fd_snapshot(fd, &snap)) {
             errno = ENOENT;
             return -1;
         }
-        memset(st, 0, sizeof(*st));
-        st->st_mode = S_IFREG | 0444;
-        st->st_nlink = 1;
-        st->st_size = 32;
-        st->st_blksize = 4096;
-        st->st_blocks = 1;
+        stat_fill_proc_file(st, 0444);
         return 0;
     }
 
@@ -1772,26 +2127,17 @@ int proc_intercept_stat(const char *path, struct stat *st)
 
     for (const char **p = known_proc_files; *p; p++) {
         if (!strcmp(path, *p)) {
-            memset(st, 0, sizeof(*st));
-            st->st_mode = S_IFREG | 0444; /* Regular file, read-only */
-            st->st_nlink = 1;
-            st->st_size = 256; /* Approximate; exact value not critical */
-            st->st_blksize = 4096;
-            st->st_blocks = 1;
+            stat_fill_proc_file(st, 0444);
             return 0;
         }
     }
 
     /* /proc/self/fd/N: stat the underlying host fd */
     if (!strncmp(path, "/proc/self/fd/", 14)) {
-        char *endptr;
-        long n = strtol(path + 14, &endptr, 10);
-        if (endptr == path + 14 || *endptr != '\0' || n < 0 ||
-            n >= FD_TABLE_SIZE) {
-            errno = EBADF;
+        int n = proc_parse_fd_index(path, 14, EBADF);
+        if (n < 0)
             return -1;
-        }
-        int host_fd = fd_to_host((int) n);
+        int host_fd = fd_to_host(n);
         if (host_fd < 0) {
             errno = EBADF;
             return -1;
@@ -1806,6 +2152,15 @@ int proc_intercept_stat(const char *path, struct stat *st)
 
 int proc_intercept_readlink(const char *path, char *buf, size_t bufsiz)
 {
+    {
+        char alias[LINUX_PATH_MAX];
+        int aliased = proc_alias_self(path, alias, sizeof(alias));
+        if (aliased < 0)
+            return -1;
+        if (aliased > 0)
+            return proc_intercept_readlink(alias, buf, bufsiz);
+    }
+
     /* /proc/self/exe -> path of current ELF binary */
     if (!strcmp(path, "/proc/self/exe")) {
         const char *exe = proc_get_elf_path();
@@ -1863,6 +2218,72 @@ int proc_intercept_readlink(const char *path, char *buf, size_t bufsiz)
     return PROC_NOT_INTERCEPTED;
 }
 
+int proc_intercept_read(int guest_fd,
+                        void *buf,
+                        size_t count,
+                        int64_t offset,
+                        ssize_t *read_out)
+{
+    fd_entry_t snap;
+    if (!fd_snapshot(guest_fd, &snap))
+        return 0;
+
+    int kind = proc_oom_path_kind(snap.proc_path);
+    if (kind == OOM_PATH_NONE)
+        return 0;
+
+    /* Recompute from the shared atomic on every read so lseek(0)+read on an
+     * already-open fd sees updates written through oom_score_adj or oom_adj.
+     */
+    char text[32];
+    int len = proc_oom_format_value(kind, text, sizeof(text));
+    return proc_oom_copy_slice(buf, count, offset, text, (size_t) len,
+                               read_out);
+}
+
+int proc_intercept_readv(int guest_fd,
+                         const struct iovec *iov,
+                         int iovcnt,
+                         int64_t offset,
+                         ssize_t *read_out)
+{
+    fd_entry_t snap;
+    if (!fd_snapshot(guest_fd, &snap))
+        return 0;
+
+    int kind = proc_oom_path_kind(snap.proc_path);
+    if (kind == OOM_PATH_NONE)
+        return 0;
+    if (offset < 0) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    char text[32];
+    int len = proc_oom_format_value(kind, text, sizeof(text));
+    size_t src_len = (size_t) len;
+    if ((uint64_t) offset >= src_len) {
+        *read_out = 0;
+        return 1;
+    }
+
+    size_t src_off = (size_t) offset;
+    ssize_t total = 0;
+    for (int i = 0; i < iovcnt && src_off < src_len; i++) {
+        size_t n = iov[i].iov_len;
+        if (n > src_len - src_off)
+            n = src_len - src_off;
+        if (n == 0)
+            continue;
+        memcpy(iov[i].iov_base, text + src_off, n);
+        src_off += n;
+        total += (ssize_t) n;
+    }
+
+    *read_out = total;
+    return 1;
+}
+
 int proc_intercept_write(int guest_fd,
                          int host_fd,
                          const void *buf,
@@ -1874,33 +2295,72 @@ int proc_intercept_write(int guest_fd,
     fd_entry_t snap;
     if (!fd_snapshot(guest_fd, &snap))
         return 0;
-    if (!proc_is_oom_writable(snap.proc_path))
+    int kind = proc_oom_path_kind(snap.proc_path);
+    if (kind == OOM_PATH_SCORE) {
+        /* Linux: oom_score has no write handler. proc_reg_write returns
+         * -EIO when the underlying proc_dir_entry exposes no write op,
+         * not -EINVAL. Match that so guests probing the error code see
+         * the same value as on a real kernel. */
+        errno = EIO;
+        return -1;
+    }
+    if (kind != OOM_PATH_SCORE_ADJ && kind != OOM_PATH_ADJ)
         return 0;
 
+    /* Linux: zero-byte writes to proc nodes succeed without side effects.
+     * Without this short-circuit, sys_writev would funnel a zero-length
+     * vector through proc_parse_int_write and get -EINVAL.
+     */
+    if (count == 0) {
+        *written_out = 0;
+        return 1;
+    }
+
     int val;
     if (proc_parse_int_write(buf, count, &val) < 0)
         return -1;
-    if (val < -1000 || val > 1000) {
-        errno = EINVAL;
-        return -1;
-    }
 
-    atomic_store(&oom_score_adj_value, val);
+    int score_adj;
+    if (kind == OOM_PATH_ADJ) {
+        if (val < LINUX_OOM_DISABLE || val > LINUX_OOM_ADJUST_MAX) {
+            errno = EINVAL;
+            return -1;
+        }
+        score_adj = oom_adj_to_score_adj(val);
+    } else {
+        if (val < LINUX_OOM_SCORE_ADJ_MIN || val > LINUX_OOM_SCORE_ADJ_MAX) {
+            errno = EINVAL;
+            return -1;
+        }
+        score_adj = val;
+    }
 
+    /* Both interfaces persist the value the writer supplied: oom_adj keeps the
+     * legacy [-17,15] number, oom_score_adj keeps the [-1000,1000] number.
+     * proc_oom_refresh_live_fds_locked re-renders each open fd's backing file
+     * through proc_oom_format_value, so the kind-specific view stays correct
+     * across reads.
+     */
     char text[32];
     int len = snprintf(text, sizeof(text), "%d\n", val);
-    if (len < 0) {
-        errno = EINVAL;
-        return -1;
-    }
 
+    /* Serialize the backing-fd rewrite so concurrent writers cannot race the
+     * truncate+pwrite sequence. Publish to the global atomic last so a
+     * partial-rewrite failure leaves the process-wide value unchanged.
+     */
+    pthread_mutex_lock(&oom_write_lock);
+    int rc = -1;
     if (ftruncate(host_fd, 0) < 0)
-        return -1;
+        goto unlock;
     if (pwrite(host_fd, text, (size_t) len, 0) != len)
-        return -1;
+        goto unlock;
     if (!use_pwrite && lseek(host_fd, offset + (int64_t) count, SEEK_SET) < 0)
-        return -1;
-
+        goto unlock;
+    atomic_store(&oom_score_adj_value, score_adj);
+    proc_oom_refresh_live_fds_locked();
     *written_out = (ssize_t) count;
-    return 1;
+    rc = 1;
+unlock:
+    pthread_mutex_unlock(&oom_write_lock);
+    return rc;
 }
diff --git a/src/runtime/procemu.h b/src/runtime/procemu.h
index de5058a..ae52de2 100644
--- a/src/runtime/procemu.h
+++ b/src/runtime/procemu.h
@@ -12,6 +12,7 @@
 
 #include <stddef.h>
 #include <sys/stat.h>
+#include <sys/uio.h>
 #include "core/guest.h"
 
 /* Sentinel return value: path was not intercepted, caller should fall through
@@ -53,6 +54,24 @@ int proc_intercept_write(int guest_fd,
                          int use_pwrite,
                          ssize_t *written_out);
 
+/* Intercept reads from synthetic proc files that must reflect shared state on
+ * every read rather than the per-open temp-file snapshot.
+ * Returns 1 if handled (with *read_out set), 0 if not intercepted, or -1 on
+ * error with errno set.
+ */
+int proc_intercept_read(int guest_fd,
+                        void *buf,
+                        size_t count,
+                        int64_t offset,
+                        ssize_t *read_out);
+
+/* Vector form of proc_intercept_read for readv/preadv. */
+int proc_intercept_readv(int guest_fd,
+                         const struct iovec *iov,
+                         int iovcnt,
+                         int64_t offset,
+                         ssize_t *read_out);
+
 /* Get the /dev/shm emulation directory path (creating it on first call).
  * Used by sys_unlinkat to rewrite /dev/shm/<name> paths.
  */
diff --git a/src/syscall/fd.c b/src/syscall/fd.c
index ebc2d95..3903e9b 100644
--- a/src/syscall/fd.c
+++ b/src/syscall/fd.c
@@ -116,6 +116,53 @@ static int timerfd_alloc(void)
     return sfd_alloc_slot(timerfd_state, TIMERFD_MAX, sizeof(timerfd_state[0]));
 }
 
+/* Called with sfd_lock held. Drain any kevent expirations sitting on the
+ * timer's kqueue and fold them into the slot's accumulator. Used by
+ * timerfd_read before consuming the counter and by timerfd_fdinfo_snapshot
+ * before reporting it; without this drain, fdinfo would lag the actual
+ * fire count by however many ticks were pending in the kqueue.
+ */
+static void timerfd_drain_pending_locked(int slot)
+{
+    int kq = timerfd_state[slot].kq_fd;
+    struct kevent kev;
+    struct timespec ts_zero = {0, 0};
+    int nev = kevent(kq, NULL, 0, &kev, 1, &ts_zero);
+    if (nev > 0) {
+        uint64_t fires = (uint64_t) kev.data;
+        if (fires == 0)
+            fires = 1; /* At least one expiration */
+        timerfd_state[slot].expirations += fires;
+    }
+}
+
+/* Called with sfd_lock held. Returns nanoseconds until the next expiration,
+ * or 0 when the timer is disarmed or a one-shot timer has already expired.
+ */
+static int64_t timerfd_remaining_ns_locked(int slot, int64_t now_ns)
+{
+    if (!timerfd_state[slot].armed)
+        return 0;
+
+    int64_t elapsed = now_ns - timerfd_state[slot].arm_time_ns;
+    if (elapsed < 0)
+        elapsed = 0;
+
+    if (timerfd_state[slot].interval_ns > 0) {
+        int64_t total = timerfd_state[slot].initial_ns;
+        if (elapsed >= total) {
+            int64_t since_first = elapsed - total;
+            int64_t interval = timerfd_state[slot].interval_ns;
+            int64_t remaining = interval - (since_first % interval);
+            return remaining == 0 ? interval : remaining;
+        }
+        return total - elapsed;
+    }
+
+    int64_t remaining = timerfd_state[slot].initial_ns - elapsed;
+    return remaining > 0 ? remaining : 0;
+}
+
 int64_t sys_timerfd_create(int clockid, int flags)
 {
     if (clockid != LINUX_CLOCK_REALTIME && clockid != LINUX_CLOCK_MONOTONIC)
@@ -203,8 +250,7 @@ int64_t sys_timerfd_settime(guest_t *g,
             struct timespec now;
             clock_gettime(CLOCK_MONOTONIC, &now);
             int64_t now_ns = now.tv_sec * NS_PER_SEC + now.tv_nsec;
-            int64_t elapsed = now_ns - timerfd_state[slot].arm_time_ns;
-            int64_t remaining = timerfd_state[slot].initial_ns - elapsed;
+            int64_t remaining = timerfd_remaining_ns_locked(slot, now_ns);
             if (remaining > 0) {
                 old.it_value_sec = remaining / NS_PER_SEC;
                 old.it_value_nsec = remaining % NS_PER_SEC;
@@ -319,27 +365,10 @@ int64_t sys_timerfd_gettime(guest_t *g, int fd, uint64_t curr_value_gva)
         its.it_interval_sec = timerfd_state[slot].interval_ns / NS_PER_SEC;
         its.it_interval_nsec = timerfd_state[slot].interval_ns % NS_PER_SEC;
 
-        /* Compute actual remaining time from arm time + initial value */
         struct timespec now;
         clock_gettime(CLOCK_MONOTONIC, &now);
         int64_t now_ns = now.tv_sec * NS_PER_SEC + now.tv_nsec;
-        int64_t elapsed = now_ns - timerfd_state[slot].arm_time_ns;
-        int64_t remaining;
-
-        if (timerfd_state[slot].interval_ns > 0) {
-            /* Repeating timer: remaining = interval - (elapsed % interval) */
-            int64_t total = timerfd_state[slot].initial_ns;
-            if (elapsed >= total) {
-                int64_t since_first = elapsed - total;
-                remaining = timerfd_state[slot].interval_ns -
-                            (since_first % timerfd_state[slot].interval_ns);
-            } else {
-                remaining = total - elapsed;
-            }
-        } else {
-            /* One-shot: remaining = initial - elapsed */
-            remaining = timerfd_state[slot].initial_ns - elapsed;
-        }
+        int64_t remaining = timerfd_remaining_ns_locked(slot, now_ns);
 
         if (remaining <= 0) {
             /* Timer already expired (one-shot) */
@@ -374,18 +403,8 @@ int64_t timerfd_read(int guest_fd, guest_t *g, uint64_t buf_gva, uint64_t count)
 
     int kq = timerfd_state[slot].kq_fd;
 
-    /* Collect pending timer events via kevent(). The data field contains
-     * the number of times the timer fired since the last kevent() call.
-     */
-    struct kevent kev;
-    struct timespec ts_zero = {0, 0};
-    int nev = kevent(kq, NULL, 0, &kev, 1, &ts_zero);
-    if (nev > 0) {
-        uint64_t fires = (uint64_t) kev.data;
-        if (fires == 0)
-            fires = 1; /* At least one expiration */
-        timerfd_state[slot].expirations += fires;
-    }
+    /* Collect pending timer events into the slot's accumulator. */
+    timerfd_drain_pending_locked(slot);
 
     if (timerfd_state[slot].expirations == 0) {
         /* No events yet; check if non-blocking */
@@ -408,8 +427,9 @@ int64_t timerfd_read(int guest_fd, guest_t *g, uint64_t buf_gva, uint64_t count)
          * kevent() returns EBADF in that case, and the code re-validates the
          * slot.
          */
+        struct kevent kev;
         pthread_mutex_unlock(&sfd_lock);
-        nev = kevent(kq, NULL, 0, &kev, 1, NULL);
+        int nev = kevent(kq, NULL, 0, &kev, 1, NULL);
         pthread_mutex_lock(&sfd_lock);
         /* Re-validate: slot may have been freed by timerfd_close() */
         if (timerfd_state[slot].guest_fd != guest_fd) {
@@ -1073,3 +1093,67 @@ void signalfd_notify(int signum)
     }
     pthread_mutex_unlock(&sfd_lock);
 }
+
+/* /proc/self/fdinfo type-specific snapshots. Each takes sfd_lock to prevent
+ * tearing across concurrent read/write/settime; lock order is fd_lock(3)
+ * -> sfd_lock(5a), and these accessors take only sfd_lock so the procemu
+ * caller is free to drop fd_lock between fd_snapshot and the lookup here.
+ */
+
+bool eventfd_fdinfo_snapshot(int guest_fd, uint64_t *count_out)
+{
+    pthread_mutex_lock(&sfd_lock);
+    int slot = eventfd_find(guest_fd);
+    if (slot < 0) {
+        pthread_mutex_unlock(&sfd_lock);
+        return false;
+    }
+    *count_out = eventfd_state[slot].counter;
+    pthread_mutex_unlock(&sfd_lock);
+    return true;
+}
+
+bool signalfd_fdinfo_snapshot(int guest_fd, uint64_t *mask_out)
+{
+    pthread_mutex_lock(&sfd_lock);
+    int slot = signalfd_find(guest_fd);
+    if (slot < 0) {
+        pthread_mutex_unlock(&sfd_lock);
+        return false;
+    }
+    *mask_out = signalfd_state[slot].mask;
+    pthread_mutex_unlock(&sfd_lock);
+    return true;
+}
+
+bool timerfd_fdinfo_snapshot(int guest_fd,
+                             int *clockid_out,
+                             uint64_t *ticks_out,
+                             int64_t *value_ns_out,
+                             int64_t *interval_ns_out)
+{
+    pthread_mutex_lock(&sfd_lock);
+    int slot = timerfd_find(guest_fd);
+    if (slot < 0) {
+        pthread_mutex_unlock(&sfd_lock);
+        return false;
+    }
+    /* Fold any pending kqueue fires into expirations before exporting,
+     * matching what timerfd_read does. Without this, fdinfo lags by
+     * however many ticks were sitting on the kqueue.
+     */
+    timerfd_drain_pending_locked(slot);
+    *clockid_out = timerfd_state[slot].clockid;
+    *ticks_out = timerfd_state[slot].expirations;
+    *interval_ns_out = timerfd_state[slot].interval_ns;
+    int64_t value_ns = 0;
+    if (timerfd_state[slot].armed) {
+        struct timespec now;
+        clock_gettime(CLOCK_MONOTONIC, &now);
+        int64_t now_ns = (int64_t) now.tv_sec * NS_PER_SEC + now.tv_nsec;
+        value_ns = timerfd_remaining_ns_locked(slot, now_ns);
+    }
+    *value_ns_out = value_ns;
+    pthread_mutex_unlock(&sfd_lock);
+    return true;
+}
diff --git a/src/syscall/fd.h b/src/syscall/fd.h
index 60bad04..e087ed4 100644
--- a/src/syscall/fd.h
+++ b/src/syscall/fd.h
@@ -66,3 +66,15 @@ int64_t timerfd_read(int guest_fd,
  * writes a byte to make poll/epoll see readability.
  */
 void signalfd_notify(int signum);
+
+/* Snapshot per-fd state for /proc/self/fdinfo. Each accessor returns true when
+ * the guest_fd refers to a live instance of that special-fd type. The values
+ * are read under sfd_lock so concurrent read/write/settime cannot tear them.
+ */
+bool eventfd_fdinfo_snapshot(int guest_fd, uint64_t *count_out);
+bool signalfd_fdinfo_snapshot(int guest_fd, uint64_t *mask_out);
+bool timerfd_fdinfo_snapshot(int guest_fd,
+                             int *clockid_out,
+                             uint64_t *ticks_out,
+                             int64_t *value_ns_out,
+                             int64_t *interval_ns_out);
diff --git a/src/syscall/fs.c b/src/syscall/fs.c
index f7860f5..90a3c97 100644
--- a/src/syscall/fs.c
+++ b/src/syscall/fs.c
@@ -66,17 +66,15 @@ static const char *proc_virtual_dir_path(const char *path,
 
 static const char *proc_stateful_file_path(const char *path)
 {
-    if (!path || strncmp(path, "/proc", 5) != 0)
+    if (!path || strncmp(path, "/proc/", 6) != 0)
         return NULL;
 
     if (!strcmp(path, "/proc/self/oom_score_adj") ||
-        !strcmp(path, "/proc/self/oom_adj")) {
+        !strcmp(path, "/proc/self/oom_adj") ||
+        !strcmp(path, "/proc/self/oom_score")) {
         return path;
     }
 
-    if (strncmp(path, "/proc/", 6) != 0)
-        return NULL;
-
     char *endp;
     long pid = strtol(path + 6, &endp, 10);
     if (endp == path + 6 || pid != (long) proc_get_pid())
@@ -86,6 +84,8 @@ static const char *proc_stateful_file_path(const char *path)
         return "/proc/self/oom_score_adj";
     if (!strcmp(endp, "/oom_adj"))
         return "/proc/self/oom_adj";
+    if (!strcmp(endp, "/oom_score"))
+        return "/proc/self/oom_score";
 
     return NULL;
 }
@@ -117,9 +117,14 @@ static const char *proc_virtual_dir_path(const char *path,
         virt = "/proc";
     } else if (!strcmp(path, "/proc/self") || !strcmp(path, "/proc/self/")) {
         virt = "/proc/self";
+    } else if (!strcmp(path, "/proc/net") || !strcmp(path, "/proc/net/")) {
+        virt = "/proc/net";
     } else if (!strcmp(path, "/proc/self/fd") ||
                !strcmp(path, "/proc/self/fd/")) {
         virt = "/proc/self/fd";
+    } else if (!strcmp(path, "/proc/self/fdinfo") ||
+               !strcmp(path, "/proc/self/fdinfo/")) {
+        virt = "/proc/self/fdinfo";
     } else if (!strcmp(path, "/proc/self/task") ||
                !strcmp(path, "/proc/self/task/")) {
         virt = "/proc/self/task";
@@ -137,6 +142,9 @@ static const char *proc_virtual_dir_path(const char *path,
         if (endp != path + 6 && pid == (long) proc_get_pid() &&
             (*endp == '\0' || !strcmp(endp, "/"))) {
             virt = "/proc/self";
+        } else if (endp != path + 6 && pid == (long) proc_get_pid() &&
+                   (!strcmp(endp, "/fdinfo") || !strcmp(endp, "/fdinfo/"))) {
+            virt = "/proc/self/fdinfo";
         } else if (endp != path + 6 && pid == (long) proc_get_pid() &&
                    !strcmp(endp, "/fd")) {
             virt = "/proc/self/fd";
diff --git a/src/syscall/io.c b/src/syscall/io.c
index b938f42..3fa5b13 100644
--- a/src/syscall/io.c
+++ b/src/syscall/io.c
@@ -479,6 +479,66 @@ static int64_t host_fd_ref_open_regular_io(int guest_fd, host_fd_ref_t *ref)
     return host_fd_ref_open_io(guest_fd, ref);
 }
 
+static int64_t proc_try_read_intercept(int fd,
+                                       int host_fd,
+                                       void *buf,
+                                       size_t count,
+                                       int64_t offset,
+                                       int use_pread)
+{
+    ssize_t intercepted = 0;
+    int handled = proc_intercept_read(fd, buf, count, offset, &intercepted);
+    if (handled < 0)
+        return linux_errno();
+    if (handled > 0) {
+        if (!use_pread &&
+            lseek(host_fd, offset + (int64_t) intercepted, SEEK_SET) < 0)
+            return linux_errno();
+        return intercepted;
+    }
+    return INT64_MIN;
+}
+
+static int64_t proc_try_readv_intercept(int fd,
+                                        int host_fd,
+                                        const struct iovec *iov,
+                                        int iovcnt,
+                                        int64_t offset,
+                                        int use_pread)
+{
+    ssize_t intercepted = 0;
+    int handled = proc_intercept_readv(fd, iov, iovcnt, offset, &intercepted);
+    if (handled < 0)
+        return linux_errno();
+    if (handled > 0) {
+        if (!use_pread &&
+            lseek(host_fd, offset + (int64_t) intercepted, SEEK_SET) < 0)
+            return linux_errno();
+        return intercepted;
+    }
+    return INT64_MIN;
+}
+
+/* Sendfile/copy_file_range chunk read: route the chunk through proc_intercept
+ * when the source fd is a synthetic /proc node, otherwise fall through
+ * (INT64_MIN). For the streaming (use_pread=0) variant the input offset is
+ * irrelevant; the helper queries the live host fd cursor.
+ */
+static int64_t proc_try_chunk_read_intercept(int fd,
+                                             int host_fd,
+                                             void *buf,
+                                             size_t count,
+                                             int64_t offset,
+                                             int use_pread)
+{
+    if (!use_pread) {
+        offset = lseek(host_fd, 0, SEEK_CUR);
+        if (offset < 0)
+            return INT64_MIN;
+    }
+    return proc_try_read_intercept(fd, host_fd, buf, count, offset, use_pread);
+}
+
 static int64_t proc_try_writev_intercept(int fd,
                                          int host_fd,
                                          const struct iovec *iov,
@@ -613,6 +673,16 @@ int64_t sys_read(guest_t *g, int fd, uint64_t buf_gva, uint64_t count)
     if (count > avail)
         count = avail;
 
+    off_t offset = lseek(host_ref.fd, 0, SEEK_CUR);
+    if (offset >= 0) {
+        int64_t intercepted =
+            proc_try_read_intercept(fd, host_ref.fd, buf, count, offset, 0);
+        if (intercepted != INT64_MIN) {
+            host_fd_ref_close(&host_ref);
+            return intercepted;
+        }
+    }
+
     ssize_t ret = read(host_ref.fd, buf, count);
     host_fd_ref_close(&host_ref);
     return ret < 0 ? linux_errno() : ret;
@@ -642,6 +712,13 @@ int64_t sys_pread64(guest_t *g,
     if (count > avail)
         count = avail;
 
+    int64_t intercepted =
+        proc_try_read_intercept(fd, host_ref.fd, buf, count, offset, 1);
+    if (intercepted != INT64_MIN) {
+        host_fd_ref_close(&host_ref);
+        return intercepted;
+    }
+
     ssize_t ret = pread(host_ref.fd, buf, count, offset);
     host_fd_ref_close(&host_ref);
     return ret < 0 ? linux_errno() : ret;
@@ -832,6 +909,17 @@ int64_t sys_readv(guest_t *g, int fd, uint64_t iov_gva, int iovcnt)
         return err;
     }
 
+    off_t offset = lseek(host_ref.fd, 0, SEEK_CUR);
+    if (offset >= 0) {
+        int64_t intercepted = proc_try_readv_intercept(
+            fd, host_ref.fd, host_iov.iov, iovcnt, offset, 0);
+        if (intercepted != INT64_MIN) {
+            host_iov_free(&host_iov);
+            host_fd_ref_close(&host_ref);
+            return intercepted;
+        }
+    }
+
     ssize_t ret = readv(host_ref.fd, host_iov.iov, iovcnt);
     int64_t result = ret < 0 ? linux_errno() : ret;
     host_iov_free(&host_iov);
@@ -919,6 +1007,14 @@ int64_t sys_preadv(guest_t *g,
         return err;
     }
 
+    int64_t intercepted = proc_try_readv_intercept(
+        fd, host_ref.fd, host_iov.iov, iovcnt, offset, 1);
+    if (intercepted != INT64_MIN) {
+        host_iov_free(&host_iov);
+        host_fd_ref_close(&host_ref);
+        return intercepted;
+    }
+
     ssize_t ret = preadv(host_ref.fd, host_iov.iov, iovcnt, offset);
     int64_t result = ret < 0 ? linux_errno() : ret;
     host_iov_free(&host_iov);
@@ -1354,9 +1450,19 @@ int64_t sys_sendfile(guest_t *g,
         size_t chunk = remaining > sizeof(buf) ? sizeof(buf) : remaining;
         ssize_t nr;
         if (offset >= 0) {
-            nr = pread(in_ref.fd, buf, chunk, offset);
+            int64_t intercepted = proc_try_chunk_read_intercept(
+                in_fd, in_ref.fd, buf, chunk, offset, 1);
+            if (intercepted != INT64_MIN)
+                nr = intercepted;
+            else
+                nr = pread(in_ref.fd, buf, chunk, offset);
         } else {
-            nr = read(in_ref.fd, buf, chunk);
+            int64_t intercepted = proc_try_chunk_read_intercept(
+                in_fd, in_ref.fd, buf, chunk, 0, 0);
+            if (intercepted != INT64_MIN)
+                nr = intercepted;
+            else
+                nr = read(in_ref.fd, buf, chunk);
         }
         if (nr < 0) {
             if (total > 0)
@@ -1443,9 +1549,19 @@ int64_t sys_copy_file_range(guest_t *g,
         size_t chunk = remaining > sizeof(buf) ? sizeof(buf) : remaining;
         ssize_t nr;
         if (off_in >= 0) {
-            nr = pread(in_ref.fd, buf, chunk, off_in);
+            int64_t intercepted = proc_try_chunk_read_intercept(
+                fd_in, in_ref.fd, buf, chunk, off_in, 1);
+            if (intercepted != INT64_MIN)
+                nr = intercepted;
+            else
+                nr = pread(in_ref.fd, buf, chunk, off_in);
         } else {
-            nr = read(in_ref.fd, buf, chunk);
+            int64_t intercepted = proc_try_chunk_read_intercept(
+                fd_in, in_ref.fd, buf, chunk, 0, 0);
+            if (intercepted != INT64_MIN)
+                nr = intercepted;
+            else
+                nr = read(in_ref.fd, buf, chunk);
         }
         if (nr < 0) {
             if (total > 0)
diff --git a/src/syscall/syscall.c b/src/syscall/syscall.c
index 11794eb..36f8c61 100644
--- a/src/syscall/syscall.c
+++ b/src/syscall/syscall.c
@@ -1583,7 +1583,12 @@ int syscall_dispatch(hv_vcpu_t vcpu, guest_t *g, int *exit_code, bool verbose)
             if (tp != FD_REGULAR && tp != FD_STDIO && tp != FD_PIPE &&
                 tp != FD_SOCKET)
                 goto slow_path;
-            if (nr == SYS_write && fd_table[fd].proc_path[0] != '\0')
+            /* Proc-backed fds may need synthetic read/write handling (for
+             * example, oom_* rereads recompute content on each read and proc
+             * dirfds steer relative *at() resolution). Keep them on the slow
+             * path so the proc interceptors run.
+             */
+            if (fd_table[fd].proc_path[0] != '\0')
                 goto slow_path;
 
             host_fd_ref_t host_ref;
diff --git a/tests/test-io-opt.c b/tests/test-io-opt.c
index 263c732..e1691c1 100644
--- a/tests/test-io-opt.c
+++ b/tests/test-io-opt.c
@@ -16,6 +16,15 @@
 
 #include "test-harness.h"
 
+static void reset_oom_score_adj(void)
+{
+    int fd = open("/proc/self/oom_score_adj", O_RDWR);
+    if (fd >= 0) {
+        write(fd, "0\n", 2);
+        close(fd);
+    }
+}
+
 int main(void)
 {
     int passes = 0, fails = 0;
@@ -79,6 +88,52 @@ int main(void)
         }
     }
 
+    TEST("sendfile rereads synthetic oom proc source");
+    {
+        const char *proc_dst = "/tmp/elfuse-test-proc-sendfile.txt";
+        unlink(proc_dst);
+        reset_oom_score_adj();
+
+        int in_fd = open("/proc/self/oom_adj", O_RDONLY);
+        int score_fd = open("/proc/self/oom_score_adj", O_RDWR);
+        int out_fd = open(proc_dst, O_CREAT | O_WRONLY | O_TRUNC, 0644);
+        if (in_fd >= 0 && score_fd >= 0 && out_fd >= 0) {
+            char buf[32] = {0};
+            off_t offset = 0;
+            ssize_t wrote = write(score_fd, "1000\n", 5);
+            ssize_t copied =
+                wrote == 5 ? sendfile(out_fd, in_fd, &offset, 32) : -1;
+            close(out_fd);
+            close(score_fd);
+            close(in_fd);
+
+            int verify_fd = open(proc_dst, O_RDONLY);
+            if (copied >= 0 && verify_fd >= 0) {
+                ssize_t n = read(verify_fd, buf, sizeof(buf) - 1);
+                close(verify_fd);
+                if (copied == 3 && offset == 3 && n == 3 &&
+                    memcmp(buf, "15\n", 3) == 0)
+                    PASS();
+                else
+                    FAIL("unexpected sendfile proc content");
+            } else {
+                if (verify_fd >= 0)
+                    close(verify_fd);
+                FAIL("proc sendfile setup failed");
+            }
+        } else {
+            if (in_fd >= 0)
+                close(in_fd);
+            if (score_fd >= 0)
+                close(score_fd);
+            if (out_fd >= 0)
+                close(out_fd);
+            PASS();
+        }
+        reset_oom_score_adj();
+        unlink(proc_dst);
+    }
+
     /* Test fsync */
     TEST("fsync");
     {
@@ -151,6 +206,54 @@ int main(void)
         unlink(cfr_dst);
     }
 
+    TEST("copy_file_range rereads synthetic oom proc source");
+    {
+        const char *proc_dst = "/tmp/elfuse-test-proc-cfr.txt";
+        unlink(proc_dst);
+        reset_oom_score_adj();
+
+        int in_fd = open("/proc/self/oom_adj", O_RDONLY);
+        int score_fd = open("/proc/self/oom_score_adj", O_RDWR);
+        int out_fd = open(proc_dst, O_CREAT | O_WRONLY | O_TRUNC, 0644);
+        if (in_fd >= 0 && score_fd >= 0 && out_fd >= 0) {
+            char buf[32] = {0};
+            off_t off_in = 0, off_out = 0;
+            ssize_t wrote = write(score_fd, "1000\n", 5);
+            ssize_t copied =
+                wrote == 5
+                    ? copy_file_range(in_fd, &off_in, out_fd, &off_out, 32, 0)
+                    : -1;
+            close(out_fd);
+            close(score_fd);
+            close(in_fd);
+
+            int verify_fd = open(proc_dst, O_RDONLY);
+            if (copied >= 0 && verify_fd >= 0) {
+                ssize_t n = read(verify_fd, buf, sizeof(buf) - 1);
+                close(verify_fd);
+                if (copied == 3 && off_in == 3 && off_out == 3 && n == 3 &&
+                    memcmp(buf, "15\n", 3) == 0)
+                    PASS();
+                else
+                    FAIL("unexpected copy_file_range proc content");
+            } else {
+                if (verify_fd >= 0)
+                    close(verify_fd);
+                FAIL("proc copy_file_range setup failed");
+            }
+        } else {
+            if (in_fd >= 0)
+                close(in_fd);
+            if (score_fd >= 0)
+                close(score_fd);
+            if (out_fd >= 0)
+                close(out_fd);
+            PASS();
+        }
+        reset_oom_score_adj();
+        unlink(proc_dst);
+    }
+
     /* Cleanup */
     unlink(src_path);
     unlink(dst_path);
diff --git a/tests/test-netstat.c b/tests/test-netstat.c
index f275a65..822159c 100644
--- a/tests/test-netstat.c
+++ b/tests/test-netstat.c
@@ -18,6 +18,8 @@
 #include <sys/un.h>
 #include <fcntl.h>
 #include <errno.h>
+#include <dirent.h>
+#include <sys/stat.h>
 
 static int read_proc_file(const char *path, char *buf, size_t bufsz)
 {
@@ -41,6 +43,35 @@ int main(void)
     int pass = 0, fail = 0;
     char buf[8192];
 
+    /* 0. Verify /proc/net exists as a directory with expected children. */
+    struct stat st;
+    if (stat("/proc/net", &st) == 0 && S_ISDIR(st.st_mode)) {
+        DIR *dir = opendir("/proc/net");
+        if (dir) {
+            int found_tcp = 0, found_udp = 0, found_unix = 0;
+            struct dirent *de;
+            while ((de = readdir(dir))) {
+                found_tcp |= !strcmp(de->d_name, "tcp");
+                found_udp |= !strcmp(de->d_name, "udp");
+                found_unix |= !strcmp(de->d_name, "unix");
+            }
+            closedir(dir);
+            if (found_tcp && found_udp && found_unix) {
+                printf("PASS: /proc/net enumerates synthetic socket tables\n");
+                pass++;
+            } else {
+                printf("FAIL: /proc/net missing expected entries\n");
+                fail++;
+            }
+        } else {
+            printf("FAIL: cannot open /proc/net: %s\n", strerror(errno));
+            fail++;
+        }
+    } else {
+        printf("FAIL: /proc/net is not a directory: %s\n", strerror(errno));
+        fail++;
+    }
+
     /* 1. TCP listener on 127.0.0.1:7777 */
     int tcp_fd = socket(AF_INET, SOCK_STREAM, 0);
     if (tcp_fd < 0) {
diff --git a/tests/test-proc.c b/tests/test-proc.c
index 754b0bc..c2735a5 100644
--- a/tests/test-proc.c
+++ b/tests/test-proc.c
@@ -144,6 +144,22 @@ int main(void)
             FAIL("readlink failed");
     }
 
+    TEST("readlink /proc/<pid>/exe aliases /proc/self/exe");
+    {
+        char path[64];
+        char self_buf[4096], pid_buf[4096];
+        snprintf(path, sizeof(path), "/proc/%d/exe", getpid());
+        ssize_t self_n =
+            readlink("/proc/self/exe", self_buf, sizeof(self_buf) - 1);
+        ssize_t pid_n = readlink(path, pid_buf, sizeof(pid_buf) - 1);
+        if (self_n > 0 && pid_n > 0) {
+            self_buf[self_n] = '\0';
+            pid_buf[pid_n] = '\0';
+            EXPECT_TRUE(!strcmp(self_buf, pid_buf), "exe targets differ");
+        } else
+            FAIL("readlink failed");
+    }
+
     /* openat(procfd, "<pid>/stat"): proc walkers keep /proc as a dirfd. */
     TEST("openat /proc/<pid>/stat");
     {
diff --git a/tests/test-tier-b.c b/tests/test-tier-b.c
index 0ffee12..8594524 100644
--- a/tests/test-tier-b.c
+++ b/tests/test-tier-b.c
@@ -7,6 +7,7 @@
  * parity, /proc/self/oom_score_adj, /proc/self/fdinfo, cpuinfo scaling.
  */
 
+#include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
@@ -14,11 +15,18 @@
 #include <stdlib.h>
 #include <string.h>
 #include <signal.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/eventfd.h>
+#include <sys/socket.h>
+#include <sys/un.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/signalfd.h>
 #include <sys/syscall.h>
+#include <sys/timerfd.h>
 #include <sys/types.h>
+#include <sys/uio.h>
 #include <unistd.h>
 
 #include "test-harness.h"
@@ -604,6 +612,738 @@ static void test_proc_fdinfo(void)
     }
 }
 
+static void test_proc_oom_score_adj_rejects_out_of_range(void)
+{
+    TEST("/proc/self/oom_score_adj rejects out-of-range writes");
+    int fd = open("/proc/self/oom_score_adj", O_RDWR);
+    if (fd < 0) {
+        FAIL("open");
+        return;
+    }
+    /* Linux validates the input domain on the writer side; the kernel
+     * returns EINVAL for any value outside [-1000, 1000]. */
+    const char too_high[] = "1001\n";
+    ssize_t rc = write(fd, too_high, sizeof(too_high) - 1);
+    int saved = errno;
+    close(fd);
+    if (rc < 0 && saved == EINVAL)
+        PASS();
+    else
+        FAIL("expected -EINVAL");
+}
+
+static void test_proc_oom_adj_scaling(void)
+{
+    TEST("/proc/self/oom_adj scales to oom_score_adj");
+    /* Reset to a known starting value so test ordering does not matter. */
+    int z = open("/proc/self/oom_score_adj", O_RDWR);
+    if (z >= 0) {
+        write(z, "0\n", 2);
+        close(z);
+    }
+
+    int fd = open("/proc/self/oom_adj", O_RDWR);
+    if (fd < 0) {
+        /* Some Linux configs deprecate oom_adj; treat absence as OK. */
+        PASS();
+        return;
+    }
+    /* Linux fs/proc/base.c oom_adj_write special-cases OOM_ADJUST_MAX so
+     * 15 maps directly to OOM_SCORE_ADJ_MAX (1000), not 15*1000/17 = 882. */
+    if (write(fd, "15\n", 3) != 3) {
+        close(fd);
+        FAIL("write");
+        return;
+    }
+    close(fd);
+
+    int sa = open("/proc/self/oom_score_adj", O_RDONLY);
+    if (sa < 0) {
+        FAIL("reopen oom_score_adj");
+        return;
+    }
+    char buf[32] = {0};
+    ssize_t n = read(sa, buf, sizeof(buf) - 1);
+    close(sa);
+    if (n <= 0) {
+        FAIL("read");
+        return;
+    }
+    int score = atoi(buf);
+    EXPECT_TRUE(score == 1000, "oom_adj=15 should map to oom_score_adj=1000");
+}
+
+static void test_proc_oom_adj_same_fd_roundtrip(void)
+{
+    TEST("/proc/self/oom_adj same-fd readback stays legacy");
+
+    int reset = open("/proc/self/oom_score_adj", O_RDWR);
+    if (reset >= 0) {
+        write(reset, "0\n", 2);
+        close(reset);
+    }
+
+    int fd = open("/proc/self/oom_adj", O_RDWR);
+    if (fd < 0) {
+        PASS();
+        return;
+    }
+    if (write(fd, "15\n", 3) != 3) {
+        close(fd);
+        FAIL("write");
+        return;
+    }
+    if (lseek(fd, 0, SEEK_SET) < 0) {
+        close(fd);
+        FAIL("lseek");
+        return;
+    }
+
+    char buf[32] = {0};
+    ssize_t n = read(fd, buf, sizeof(buf) - 1);
+    close(fd);
+    reset = open("/proc/self/oom_score_adj", O_RDWR);
+    if (reset >= 0) {
+        write(reset, "0\n", 2);
+        close(reset);
+    }
+    if (n <= 0) {
+        FAIL("read");
+        return;
+    }
+    EXPECT_TRUE(atoi(buf) == 15, "same-fd readback should preserve oom_adj");
+}
+
+static void test_proc_oom_score_no_write(void)
+{
+    TEST("/proc/self/oom_score writes are rejected");
+    /* Linux: open succeeds (root bypasses the 0444 check, non-root sees
+     * EACCES from the permission gate); writes always fail because there
+     * is no write handler. The test focuses on the write side, which is
+     * uniform across uids.
+     */
+    int fd = open("/proc/self/oom_score", O_RDONLY);
+    if (fd < 0) {
+        FAIL("open RDONLY");
+        return;
+    }
+    char buf[32] = {0};
+    ssize_t n = read(fd, buf, sizeof(buf) - 1);
+    close(fd);
+    if (n <= 0) {
+        FAIL("read");
+        return;
+    }
+    /* Stub returns 0; real Linux computes a small positive score, but for
+     * a userspace bridge a constant zero is acceptable.
+     */
+    EXPECT_TRUE(atoi(buf) >= 0, "score must be non-negative");
+}
+
+static void test_proc_oom_score_write_fails(void)
+{
+    TEST("/proc/self/oom_score write is rejected");
+    int fd = open("/proc/self/oom_score", O_WRONLY);
+    if (fd < 0) {
+        /* Non-root environments cannot open read-only file for write;
+         * that is also acceptable proof the file is not writable.
+         */
+        if (errno == EACCES) {
+            PASS();
+            return;
+        }
+        FAIL("open WRONLY");
+        return;
+    }
+    ssize_t w = write(fd, "0\n", 2);
+    int saved = errno;
+    close(fd);
+    /* Linux's proc_reg_write returns -EIO when the proc node has no
+     * write op. Older or stripped kernels may return other errno; the
+     * load-bearing assertion is that the write fails, not the exact
+     * errno value.
+     */
+    if (w < 0)
+        PASS();
+    else
+        printf("FAIL: write succeeded rc=%zd errno=%d\n", w, saved), fails++;
+}
+
+static void test_proc_oom_score_open_enforces_read_only(void)
+{
+    TEST("/proc/self/oom_score rejects writable open");
+    errno = 0;
+    int fd = open("/proc/self/oom_score", O_WRONLY);
+    if (fd >= 0) {
+        close(fd);
+        FAIL("open should fail");
+        return;
+    }
+    EXPECT_TRUE(errno == EACCES, "expected EACCES from open");
+}
+
+static void test_proc_oom_adj_reread_tracks_score_adj_updates(void)
+{
+    TEST("/proc/self/oom_adj reread reflects later score_adj writes");
+
+    int reset = open("/proc/self/oom_score_adj", O_RDWR);
+    if (reset < 0) {
+        FAIL("reset open");
+        return;
+    }
+    if (write(reset, "0\n", 2) != 2) {
+        close(reset);
+        FAIL("reset write");
+        return;
+    }
+    close(reset);
+
+    int fd = open("/proc/self/oom_adj", O_RDONLY);
+    if (fd < 0) {
+        PASS();
+        return;
+    }
+
+    char buf[32] = {0};
+    ssize_t n = read(fd, buf, sizeof(buf) - 1);
+    if (n <= 0 || atoi(buf) != 0) {
+        close(fd);
+        FAIL("initial read");
+        return;
+    }
+
+    int score = open("/proc/self/oom_score_adj", O_RDWR);
+    if (score < 0) {
+        close(fd);
+        FAIL("score_adj open");
+        return;
+    }
+    if (write(score, "1000\n", 5) != 5) {
+        close(score);
+        close(fd);
+        FAIL("score_adj write");
+        return;
+    }
+    close(score);
+
+    if (lseek(fd, 0, SEEK_SET) < 0) {
+        close(fd);
+        FAIL("lseek");
+        return;
+    }
+
+    memset(buf, 0, sizeof(buf));
+    n = read(fd, buf, sizeof(buf) - 1);
+    close(fd);
+
+    reset = open("/proc/self/oom_score_adj", O_RDWR);
+    if (reset >= 0) {
+        write(reset, "0\n", 2);
+        close(reset);
+    }
+
+    if (n <= 0) {
+        FAIL("reread");
+        return;
+    }
+    EXPECT_TRUE(atoi(buf) == 15, "oom_adj fd should reflect current score_adj");
+}
+
+static void test_proc_oom_score_adj_reread_tracks_updates(void)
+{
+    TEST("/proc/self/oom_score_adj reread reflects later writes");
+
+    int reset = open("/proc/self/oom_score_adj", O_RDWR);
+    if (reset < 0) {
+        FAIL("reset open");
+        return;
+    }
+    if (write(reset, "0\n", 2) != 2) {
+        close(reset);
+        FAIL("reset write");
+        return;
+    }
+    close(reset);
+
+    int fd = open("/proc/self/oom_score_adj", O_RDONLY);
+    if (fd < 0) {
+        FAIL("open");
+        return;
+    }
+
+    char buf[32] = {0};
+    ssize_t n = read(fd, buf, sizeof(buf) - 1);
+    if (n <= 0 || atoi(buf) != 0) {
+        close(fd);
+        FAIL("initial read");
+        return;
+    }
+
+    int update = open("/proc/self/oom_score_adj", O_RDWR);
+    if (update < 0) {
+        close(fd);
+        FAIL("update open");
+        return;
+    }
+    if (write(update, "1000\n", 5) != 5) {
+        close(update);
+        close(fd);
+        FAIL("update write");
+        return;
+    }
+    close(update);
+
+    if (lseek(fd, 0, SEEK_SET) < 0) {
+        close(fd);
+        FAIL("lseek");
+        return;
+    }
+
+    memset(buf, 0, sizeof(buf));
+    n = read(fd, buf, sizeof(buf) - 1);
+    close(fd);
+
+    reset = open("/proc/self/oom_score_adj", O_RDWR);
+    if (reset >= 0) {
+        write(reset, "0\n", 2);
+        close(reset);
+    }
+
+    if (n <= 0) {
+        FAIL("reread");
+        return;
+    }
+    EXPECT_TRUE(atoi(buf) == 1000,
+                "oom_score_adj fd should reflect current value");
+}
+
+static void test_proc_oom_zero_length_writev(void)
+{
+    TEST("/proc/self/oom_score_adj zero-length writev returns 0");
+    int fd = open("/proc/self/oom_score_adj", O_WRONLY);
+    if (fd < 0) {
+        FAIL("open");
+        return;
+    }
+    /* Two empty iovecs: total length zero. Linux returns 0; the previous
+     * implementation returned EINVAL via proc_parse_int_write. */
+    char dummy = 0;
+    struct iovec iov[2] = {{&dummy, 0}, {&dummy, 0}};
+    ssize_t n = writev(fd, iov, 2);
+    int saved = errno;
+    close(fd);
+    if (n == 0)
+        PASS();
+    else
+        printf("FAIL: writev returned %zd errno=%d\n", n, saved), fails++;
+}
+
+static void test_proc_oom_stat_size_zero(void)
+{
+    TEST("/proc/self/oom_score_adj stat reports size 0");
+    struct stat st;
+    if (stat("/proc/self/oom_score_adj", &st) < 0) {
+        FAIL("stat");
+        return;
+    }
+    /* A non-zero st_size would cap stat-sized read buffers, truncating
+     * "-1000\n" (6 bytes) to whatever size was hardcoded. */
+    EXPECT_TRUE(st.st_size == 0, "st_size should be 0");
+}
+
+static void test_proc_fdinfo_eventfd_count(void)
+{
+    TEST("/proc/self/fdinfo/<N> exposes eventfd-count");
+    int efd = eventfd(42, 0);
+    if (efd < 0) {
+        FAIL("eventfd");
+        return;
+    }
+    char path[64];
+    snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", efd);
+    int fd = open(path, O_RDONLY);
+    if (fd < 0) {
+        close(efd);
+        FAIL("open");
+        return;
+    }
+    char buf[256] = {0};
+    ssize_t n = read(fd, buf, sizeof(buf) - 1);
+    close(fd);
+    close(efd);
+    if (n <= 0) {
+        FAIL("read");
+        return;
+    }
+    /* Linux fs/eventfd.c emits "eventfd-count: %16llx" with a single
+     * space separator (not a tab, unlike pos:/flags:/mnt_id:). Pin the
+     * exact prefix so a regression to a tab is caught. Decimal 42 is 0x2a.
+     */
+    const char *p = strstr(buf, "eventfd-count: ");
+    EXPECT_TRUE(p && strstr(p, "2a") != NULL,
+                "eventfd-count missing space separator or wrong hex value");
+}
+
+static void test_proc_fdinfo_signalfd_mask(void)
+{
+    TEST("/proc/self/fdinfo/<N> exposes sigmask");
+    sigset_t mask;
+    sigemptyset(&mask);
+    sigaddset(&mask, SIGUSR1);
+    int sfd = signalfd(-1, &mask, 0);
+    if (sfd < 0) {
+        FAIL("signalfd");
+        return;
+    }
+    char path[64];
+    snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", sfd);
+    int fd = open(path, O_RDONLY);
+    if (fd < 0) {
+        close(sfd);
+        FAIL("open");
+        return;
+    }
+    char buf[256] = {0};
+    ssize_t n = read(fd, buf, sizeof(buf) - 1);
+    close(fd);
+    close(sfd);
+    if (n <= 0) {
+        FAIL("read");
+        return;
+    }
+    /* Linux fs/signalfd.c emits "sigmask:\t%016llx" with a tab separator
+     * (verified against a real /proc/self/fdinfo dump on Linux 6.x).
+     * Pin the exact prefix so a regression to a space is caught.
+     */
+    EXPECT_TRUE(strstr(buf, "sigmask:\t") != NULL,
+                "sigmask missing tab separator");
+}
+
+static void test_proc_fdinfo_timerfd_periodic_value(void)
+{
+    TEST("/proc/self/fdinfo/<N> reports periodic timerfd next expiry");
+    int tfd = timerfd_create(CLOCK_MONOTONIC, 0);
+    if (tfd < 0) {
+        FAIL("timerfd_create");
+        return;
+    }
+
+    struct itimerspec its = {.it_value = {.tv_sec = 0, .tv_nsec = 50000000},
+                             .it_interval = {.tv_sec = 0, .tv_nsec = 50000000}};
+    if (timerfd_settime(tfd, 0, &its, NULL) < 0) {
+        close(tfd);
+        FAIL("timerfd_settime");
+        return;
+    }
+
+    usleep(70000);
+
+    char path[64];
+    snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", tfd);
+    int fd = open(path, O_RDONLY);
+    if (fd < 0) {
+        close(tfd);
+        FAIL("open");
+        return;
+    }
+
+    char buf[256] = {0};
+    ssize_t n = read(fd, buf, sizeof(buf) - 1);
+    close(fd);
+    close(tfd);
+    if (n <= 0) {
+        FAIL("read");
+        return;
+    }
+
+    long long value_sec = -1, value_nsec = -1;
+    long long interval_sec = -1, interval_nsec = -1;
+    /* Linux fs/timerfd.c emits "it_value: (S, NS)" with a single space
+     * after the colon (unlike pos:/flags: which use tabs). */
+    const char *value = strstr(buf, "it_value: (");
+    const char *interval = strstr(buf, "it_interval: (");
+    if (!value || !interval ||
+        sscanf(value, "it_value: (%lld, %lld)", &value_sec, &value_nsec) != 2 ||
+        sscanf(interval, "it_interval: (%lld, %lld)", &interval_sec,
+               &interval_nsec) != 2) {
+        FAIL("parse fdinfo");
+        return;
+    }
+
+    long long value_total_ns = value_sec * 1000000000LL + value_nsec;
+    long long interval_total_ns = interval_sec * 1000000000LL + interval_nsec;
+    /* it_interval is the static settime value and must round-trip; Linux's
+     * timerfd_get_remaining() reports 0 once the timer has fired, while
+     * elfuse computes time-until-next from the kqueue arm time. Both are
+     * non-negative and bounded by the interval, so accept either form.
+     */
+    EXPECT_TRUE(interval_total_ns == 50000000 && value_total_ns >= 0 &&
+                    value_total_ns <= interval_total_ns,
+                "interval should round-trip and value should be within bounds");
+}
+
+static void test_proc_fdinfo_timerfd_ticks_drains_kqueue(void)
+{
+    TEST("/proc/self/fdinfo/<N> ticks reflects pending kqueue fires");
+    /* Arm a periodic timer, wait for several fires, then read fdinfo
+     * WITHOUT first reading the timerfd. The pre-fix snapshot exported
+     * a stale expirations counter (the kqueue events had not been folded
+     * in), so ticks would read 0 even after multiple fires. Linux's
+     * fs/timerfd.c snapshots ticks under the wait-queue lock, where the
+     * counter reflects every fire that hit the kernel state. */
+    int tfd = timerfd_create(CLOCK_MONOTONIC, 0);
+    if (tfd < 0) {
+        FAIL("timerfd_create");
+        return;
+    }
+    struct itimerspec its = {.it_value = {.tv_sec = 0, .tv_nsec = 20000000},
+                             .it_interval = {.tv_sec = 0, .tv_nsec = 20000000}};
+    if (timerfd_settime(tfd, 0, &its, NULL) < 0) {
+        close(tfd);
+        FAIL("timerfd_settime");
+        return;
+    }
+    /* Wait long enough for the timer to fire at least three times. */
+    usleep(120000);
+
+    char path[64];
+    snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", tfd);
+    int fd = open(path, O_RDONLY);
+    if (fd < 0) {
+        close(tfd);
+        FAIL("open");
+        return;
+    }
+    char buf[256] = {0};
+    ssize_t n = read(fd, buf, sizeof(buf) - 1);
+    close(fd);
+    close(tfd);
+    if (n <= 0) {
+        FAIL("read");
+        return;
+    }
+
+    /* Linux uses "ticks: %llu" with a single space; elfuse matches. */
+    const char *p = strstr(buf, "ticks: ");
+    unsigned long long ticks = 0;
+    if (!p || sscanf(p, "ticks: %llu", &ticks) != 1) {
+        FAIL("parse ticks");
+        return;
+    }
+    /* At minimum one fire should be visible; on a slow host more would
+     * be expected. Pre-fix elfuse would report 0 here. */
+    EXPECT_TRUE(ticks >= 1, "ticks should reflect at least one fire");
+}
+
+static void test_proc_fdinfo_dir_concurrent_safe(void)
+{
+    TEST("/proc/self/fdinfo dir tolerates concurrent re-open");
+    /* Open the directory twice and verify both enumerate independently.
+     * The earlier shared-dir design could mutate one open's backing files
+     * while another iterated. Both Linux and the per-open scratch fix
+     * should at minimum surface stdin/out/err on each enumeration.
+     */
+    DIR *d1 = opendir("/proc/self/fdinfo");
+    if (!d1) {
+        FAIL("opendir 1");
+        return;
+    }
+    DIR *d2 = opendir("/proc/self/fdinfo");
+    if (!d2) {
+        closedir(d1);
+        FAIL("opendir 2");
+        return;
+    }
+
+    int n1 = 0, n2 = 0;
+    struct dirent *ent;
+    while ((ent = readdir(d1)))
+        if (ent->d_name[0] != '.')
+            n1++;
+    while ((ent = readdir(d2)))
+        if (ent->d_name[0] != '.')
+            n2++;
+    closedir(d1);
+    closedir(d2);
+    EXPECT_TRUE(n1 >= 3 && n2 >= 3, "concurrent enumeration broken");
+}
+
+static void test_proc_fdinfo_dirfd_openat_uses_virtual_entries(void)
+{
+    TEST("/proc/self/fdinfo dirfd openat resolves virtually");
+    int dirfd = open("/proc/self/fdinfo", O_RDONLY | O_DIRECTORY);
+    if (dirfd < 0) {
+        FAIL("open dir");
+        return;
+    }
+
+    int fd = openat(dirfd, "0", O_RDONLY);
+    close(dirfd);
+    if (fd < 0) {
+        FAIL("openat");
+        return;
+    }
+
+    char buf[256] = {0};
+    ssize_t n = read(fd, buf, sizeof(buf) - 1);
+    close(fd);
+    if (n <= 0) {
+        FAIL("read");
+        return;
+    }
+
+    EXPECT_TRUE(strstr(buf, "pos:\t") && strstr(buf, "flags:\t"),
+                "fdinfo openat should yield synthetic payload");
+}
+
+static int bind_listen_loopback_tcp(void)
+{
+    int s = socket(AF_INET, SOCK_STREAM, 0);
+    if (s < 0)
+        return -1;
+    int one = 1;
+    setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+    struct sockaddr_in sa = {0};
+    sa.sin_family = AF_INET;
+    sa.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+    sa.sin_port = 0;
+    if (bind(s, (struct sockaddr *) &sa, sizeof(sa)) < 0 || listen(s, 1) < 0) {
+        close(s);
+        return -1;
+    }
+    return s;
+}
+
+static void test_proc_net_tcp_sl_dense(void)
+{
+    TEST("/proc/net/tcp sl column stays dense across mixed sockets");
+    /* Interleave non-TCP sockets BEFORE the bound TCP listeners so the
+     * proc_pidinfo iterator visits the rejected sockets first and the
+     * pre-fix sparse-slot bug would assign nonzero sl to the first
+     * emitted row. Two TCP listeners ensure the second row's sl exposes
+     * any gap created by additional non-TCP visits between them.
+     *
+     * Pre-fix: udp1, udp2, sp[0], sp[1] all bump the iterator slot
+     * counter to 4 before tcp1 emits. tcp1 row: sl=4. tcp2 row: sl=5.
+     * The first-row check (sl == 0) would fail.
+     * Post-fix: only emitted rows increment the visitor's row counter;
+     * tcp1: sl=0, tcp2: sl=1. Dense.
+     */
+    int udp1 = socket(AF_INET, SOCK_DGRAM, 0);
+    int udp2 = socket(AF_INET, SOCK_DGRAM, 0);
+    int sp[2] = {-1, -1};
+    int sp_rc = socketpair(AF_UNIX, SOCK_STREAM, 0, sp);
+    int tcp1 = bind_listen_loopback_tcp();
+    int tcp2 = bind_listen_loopback_tcp();
+    if (udp1 < 0 || udp2 < 0 || sp_rc < 0 || tcp1 < 0 || tcp2 < 0) {
+        if (udp1 >= 0)
+            close(udp1);
+        if (udp2 >= 0)
+            close(udp2);
+        if (sp[0] >= 0)
+            close(sp[0]);
+        if (sp[1] >= 0)
+            close(sp[1]);
+        if (tcp1 >= 0)
+            close(tcp1);
+        if (tcp2 >= 0)
+            close(tcp2);
+        FAIL("socket setup");
+        return;
+    }
+
+    int fd = open("/proc/net/tcp", O_RDONLY);
+    if (fd < 0) {
+        close(udp1);
+        close(udp2);
+        close(sp[0]);
+        close(sp[1]);
+        close(tcp1);
+        close(tcp2);
+        FAIL("open");
+        return;
+    }
+    char buf[16384];
+    ssize_t total = 0;
+    for (;;) {
+        ssize_t n = read(fd, buf + total, sizeof(buf) - total - 1);
+        if (n <= 0)
+            break;
+        total += n;
+    }
+    close(fd);
+    close(udp1);
+    close(udp2);
+    close(sp[0]);
+    close(sp[1]);
+    close(tcp1);
+    close(tcp2);
+    buf[total] = '\0';
+
+    /* Skip the header line; collect each subsequent row's leading "sl"
+     * field. /proc/net/tcp's row format is "  N: ..." with N a decimal
+     * serial. Verify the serials form 0,1,2,... with no gaps.
+     */
+    char *line = strchr(buf, '\n');
+    if (!line) {
+        FAIL("no rows");
+        return;
+    }
+    line++;
+    int expected = 0;
+    while (*line) {
+        char *colon = strchr(line, ':');
+        char *eol = strchr(line, '\n');
+        if (!colon || (eol && colon > eol))
+            break;
+        int sl = atoi(line);
+        if (sl != expected) {
+            printf("FAIL: sl=%d expected=%d\n", sl, expected);
+            fails++;
+            return;
+        }
+        expected++;
+        if (!eol)
+            break;
+        line = eol + 1;
+    }
+    if (expected == 0) {
+        /* The bound listener should have produced a row. Treat absence
+         * as failure since the regression coverage depends on it. */
+        FAIL("no TCP rows after bind/listen");
+        return;
+    }
+    PASS();
+}
+
+static void test_proc_net_dirfd_openat_uses_virtual_entries(void)
+{
+    TEST("/proc/net dirfd openat resolves virtually");
+    int dirfd = open("/proc/net", O_RDONLY | O_DIRECTORY);
+    if (dirfd < 0) {
+        FAIL("open dir");
+        return;
+    }
+
+    int fd = openat(dirfd, "tcp", O_RDONLY);
+    close(dirfd);
+    if (fd < 0) {
+        FAIL("openat");
+        return;
+    }
+
+    char buf[512] = {0};
+    ssize_t n = read(fd, buf, sizeof(buf) - 1);
+    close(fd);
+    if (n <= 0) {
+        FAIL("read");
+        return;
+    }
+
+    EXPECT_TRUE(strstr(buf, "local_address"),
+                "proc net dirfd should preserve synthetic tcp table");
+}
+
 static void test_proc_cpuinfo_all_cpus(void)
 {
     TEST("/proc/cpuinfo lists all CPUs");
@@ -668,7 +1408,25 @@ int main(void)
     /* /proc */
     test_proc_oom_score_adj();
     test_proc_oom_score_adj_persists_write();
+    test_proc_oom_score_adj_rejects_out_of_range();
+    test_proc_oom_adj_scaling();
+    test_proc_oom_adj_same_fd_roundtrip();
+    test_proc_oom_adj_reread_tracks_score_adj_updates();
+    test_proc_oom_score_adj_reread_tracks_updates();
+    test_proc_oom_score_no_write();
+    test_proc_oom_score_write_fails();
+    test_proc_oom_score_open_enforces_read_only();
+    test_proc_oom_zero_length_writev();
+    test_proc_oom_stat_size_zero();
     test_proc_fdinfo();
+    test_proc_fdinfo_eventfd_count();
+    test_proc_fdinfo_signalfd_mask();
+    test_proc_fdinfo_timerfd_periodic_value();
+    test_proc_fdinfo_timerfd_ticks_drains_kqueue();
+    test_proc_fdinfo_dir_concurrent_safe();
+    test_proc_fdinfo_dirfd_openat_uses_virtual_entries();
+    test_proc_net_tcp_sl_dense();
+    test_proc_net_dirfd_openat_uses_virtual_entries();
     test_proc_cpuinfo_all_cpus();
 
     /* signalfd */