Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
363 changes: 363 additions & 0 deletions src/runtime/procemu.c

Large diffs are not rendered by default.

13 changes: 7 additions & 6 deletions src/syscall/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1410,15 +1410,16 @@ int64_t sys_faccessat(guest_t *g,
if (host_dirfd_ref_open(dirfd, &dir_ref) < 0)
return -LINUX_EBADF;

/* Check /proc paths first since macOS has no /proc filesystem, so
* access("/proc/self/stat", R_OK) etc. must be intercepted.
* If proc_intercept_stat succeeds, the path is a known emulated
* entry and the code reports it as accessible.
/* Check intercepted stat paths first since macOS has no /proc filesystem
* and the sysfs CPU tree is synthetic. Access must reflect the synthetic
* mode bits, not just path existence.
*/
struct stat dummy_st;
struct stat intercepted_st;
if (path_might_use_stat_intercept(access_path) &&
proc_intercept_stat(access_path, &dummy_st) == 0) {
proc_intercept_stat(access_path, &intercepted_st) == 0) {
host_fd_ref_close(&dir_ref);
if (path_check_intercept_access(&intercepted_st, mode, flags) < 0)
return linux_errno();
return 0;
}

Expand Down
102 changes: 84 additions & 18 deletions src/syscall/path.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,59 +26,125 @@

#define PROC_PATH_COMPONENTS_MAX (LINUX_PATH_MAX / 2)

int path_might_use_open_intercept(const char *path)
/* True when path equals prefix exactly, or extends it with '/'. Avoids the
* surprise where "/sys/devices/system/cpufoo" would match a bare strncmp on
* "/sys/devices/system/cpu" and pull an unrelated path through the intercept
* layer.
*/
static bool path_prefix_match(const char *path, const char *prefix, size_t plen)
{
if (strncmp(path, prefix, plen) != 0)
return false;
return path[plen] == '\0' || path[plen] == '/';
}

#define SYSFS_CPU_PREFIX "/sys/devices/system/cpu"

bool path_might_use_open_intercept(const char *path)
{
if (!path || path[0] != '/')
return 0;
return false;

if (!strncmp(path, "/proc", 5))
return 1;
return true;
if (!strncmp(path, "/dev", 4))
return 1;
return true;
if (path_prefix_match(path, SYSFS_CPU_PREFIX, sizeof(SYSFS_CPU_PREFIX) - 1))
return true;
if (!strcmp(path, "/etc/mtab") || !strcmp(path, "/etc/passwd") ||
!strcmp(path, "/etc/group"))
return 1;
return true;
if (!strcmp(path, "/var/run/utmp") || !strcmp(path, "/run/utmp"))
return 1;
return true;

return 0;
return false;
}

int path_might_use_stat_intercept(const char *path)
bool path_might_use_stat_intercept(const char *path)
{
if (!path || path[0] != '/')
return 0;
return false;

if (!strncmp(path, "/proc", 5))
return 1;
return true;
if (!strncmp(path, "/dev/shm", 8))
return 1;
return true;
if (path_prefix_match(path, SYSFS_CPU_PREFIX, sizeof(SYSFS_CPU_PREFIX) - 1))
return true;

return 0;
return false;
}

static int path_next_component(const char **pathp,
const char **comp,
size_t *len)
int path_check_intercept_access(const struct stat *st, int mode, int flags)
{
if ((mode & ~(F_OK | R_OK | W_OK | X_OK)) != 0) {
errno = EINVAL;
return -1;
}
if (mode == F_OK)
return 0;

mode_t granted = 0;
uint32_t uid =
(flags & LINUX_AT_EACCESS) ? proc_get_euid() : proc_get_uid();
uint32_t gid =
(flags & LINUX_AT_EACCESS) ? proc_get_egid() : proc_get_gid();

if (uid == 0) {
Comment thread
cubic-dev-ai[bot] marked this conversation as resolved.
/* CAP_DAC_OVERRIDE: root reads and writes any file regardless of mode
* bits. Execute still requires at least one x-bit set so non-executable
* files cannot be run as root. Matches Linux generic_permission() in
* fs/namei.c.
*/
granted |= R_OK | W_OK;
if (st->st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))
granted |= X_OK;
} else {
mode_t bits;
if (uid == st->st_uid)
bits = (st->st_mode >> 6) & 7;
else if (gid == st->st_gid)
bits = (st->st_mode >> 3) & 7;
else
bits = st->st_mode & 7;

if (bits & 4)
granted |= R_OK;
if (bits & 2)
granted |= W_OK;
if (bits & 1)
granted |= X_OK;
}

if ((mode & granted) == mode)
return 0;

errno = EACCES;
return -1;
}

static bool path_next_component(const char **pathp,
const char **comp,
size_t *len)
{
const char *p = *pathp;

while (*p == '/')
p++;
if (*p == '\0') {
*pathp = p;
return 0;
return false;
}

*comp = p;
while (*p != '\0' && *p != '/')
p++;
*len = (size_t) (p - *comp);
*pathp = p;
return 1;
return true;
}

static int path_component_is_dot(const char *comp, size_t len)
static bool path_component_is_dot(const char *comp, size_t len)
{
return len == 1 && comp[0] == '.';
}
Expand Down
6 changes: 4 additions & 2 deletions src/syscall/path.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <sys/stat.h>

#include "syscall/internal.h"

int path_might_use_open_intercept(const char *path);
int path_might_use_stat_intercept(const char *path);
bool path_might_use_open_intercept(const char *path);
bool path_might_use_stat_intercept(const char *path);
int path_check_intercept_access(const struct stat *st, int mode, int flags);
int resolve_proc_at_path(guest_fd_t dirfd,
const char *path,
char *out,
Expand Down
6 changes: 5 additions & 1 deletion src/syscall/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,11 @@ SC_FORWARD(sc_fchmodat2, sys_fchmodat(g, (int) x0, x1, (uint32_t) x2, (int) x3
SC_FORWARD(sc_fchownat, sys_fchownat(g, (int) x0, x1, (uint32_t) x2, (uint32_t) x3, (int) x4))
SC_FORWARD(sc_fchown, sys_fchown((int) x0, (uint32_t) x1, (uint32_t) x2))
SC_FORWARD(sc_utimensat, sys_utimensat(g, (int) x0, x1, x2, (int) x3))
SC_FORWARD(sc_faccessat, sys_faccessat(g, (int) x0, x1, (int) x2, (int) x3))
/* Linux faccessat (SYS 48) is 3-arg: dirfd, path, mode.
* The flags parameter was added in faccessat2 (SYS 439).
* x3 contains garbage from the caller's register state.
*/
SC_FORWARD(sc_faccessat, sys_faccessat(g, (int) x0, x1, (int) x2, 0))
SC_FORWARD(sc_faccessat2, sys_faccessat(g, (int) x0, x1, (int) x2, (int) x3))
SC_FORWARD(sc_ftruncate, sys_ftruncate((int) x0, (int64_t) x1))
SC_FORWARD(sc_truncate, sys_truncate(g, x0, (int64_t) x1))
Expand Down
1 change: 1 addition & 0 deletions tests/manifest.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ test-large-io-boundary

[section] /proc and /dev emulation tests
test-proc
test-sysfs-cpu

[section] Network tests
test-net
Expand Down
10 changes: 7 additions & 3 deletions tests/test-matrix.sh
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,12 @@ run_elfuse_sysroot()
# Tests that either hang under qemu-system-aarch64 on Apple Silicon
# (raw clone / PI futex / massive thread+mmap stress) or currently diverge
# from the Alpine linux-virt reference kernel on the deprecated oom_adj
# procfs compatibility path exercised by test-io-opt. They still run in
# elfuse-aarch64 mode and in `make check`; the qemu reference run skips them.
QEMU_SKIP="test-thread test-stress test-futex-pi test-io-opt"
# procfs compatibility path exercised by test-io-opt. test-sysfs-cpu asserts
# the elfuse stub contract (cache/topology subtree empty, possible == online,
# cpuN count == online count) which a real kernel does not honor. All listed
# tests still run in elfuse-aarch64 mode and in `make check`; the qemu
# reference run skips them.
QEMU_SKIP="test-thread test-stress test-futex-pi test-io-opt test-sysfs-cpu"

is_qemu_skipped()
{
Expand Down Expand Up @@ -355,6 +358,7 @@ run_unit_tests()

printf "\n/proc and /dev\n"
test_check "$runner" "test-proc" "0 failed" "$bindir/test-proc"
test_check "$runner" "test-sysfs-cpu" "0 failed" "$bindir/test-sysfs-cpu"

printf "\nNetwork\n"
test_check "$runner" "test-net" "0 failed" "$bindir/test-net"
Expand Down
Loading
Loading