Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Supported user-facing options:
| `-h`, `--help` | Print built-in usage help |
| `-V`, `--version` | Print the build version and exit |
| `-v`, `--verbose` | Enable syscall-level and loader diagnostics |
| `-t`, `--timeout N` | Per-iteration vCPU watchdog, in seconds (default `10`) |
| `-t`, `--timeout N` | Per-iteration vCPU watchdog, in seconds (default `10`, `0` disables) |
| `--sysroot PATH` | Resolve guest absolute paths under `PATH` first |
| `--gdb PORT` | Listen for a GDB RSP client on `PORT` |
| `--gdb-stop-on-entry` | Stop before the first guest instruction |
Expand All @@ -25,6 +25,7 @@ Supported user-facing options:
`--timeout` is a run-loop watchdog. It does not cap total process runtime. It
only bounds a single `hv_vcpu_run()` iteration before the host regains control,
which is what allows host-side timers and signals to be observed promptly.
Setting `--timeout 0` disables this watchdog for long-running CPU-bound guests.

## Common Launch Patterns

Expand Down
15 changes: 15 additions & 0 deletions mk/tests.mk
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
test-glibc-coreutils test-perf \
test-matrix test-matrix-elfuse-aarch64 test-matrix-qemu-aarch64 \
test-full test-multi-vcpu test-rwx test-sysroot-rename \
test-sysroot-procfs-exec test-timeout-disable \
test-sysroot-nofollow perf

## Build and run the assembly hello world test
Expand All @@ -18,6 +19,10 @@ check: $(ELFUSE_BIN) $(TEST_DEPS)
@bash tests/driver.sh -e $(ELFUSE_BIN) -d $(TEST_DIR) -v
@printf "\n$(BLUE)━━━ busybox applet validation ━━━$(RESET)\n"
@$(MAKE) --no-print-directory test-busybox
@printf "\n$(BLUE)━━━ sysroot procfs exec validation ━━━$(RESET)\n"
@$(MAKE) --no-print-directory test-sysroot-procfs-exec
@printf "\n$(BLUE)━━━ timeout=0 validation ━━━$(RESET)\n"
@$(MAKE) --no-print-directory test-timeout-disable

test-sysroot-rename: $(ELFUSE_BIN) $(BUILD_DIR)/test-sysroot-rename
@tmpdir=$$(mktemp -d); \
Expand All @@ -42,6 +47,16 @@ test-sysroot-nofollow: $(ELFUSE_BIN) $(BUILD_DIR)/test-sysroot-nofollow
ln -sf /outside-target "$$tmpdir/tmp/elfuse-sysroot-nofollow-link"; \
$(ELFUSE_BIN) --sysroot "$$tmpdir" $(BUILD_DIR)/test-sysroot-nofollow

test-sysroot-procfs-exec: $(ELFUSE_BIN) $(BUILD_DIR)/test-procfs-exec
@tmpdir=$$(mktemp -d); \
trap 'rm -rf "$$tmpdir"' EXIT; \
mkdir -p "$$tmpdir/bin"; \
cp $(BUILD_DIR)/test-procfs-exec "$$tmpdir/bin/test-procfs-exec"; \
$(ELFUSE_BIN) --sysroot "$$tmpdir" "$$tmpdir/bin/test-procfs-exec"

test-timeout-disable: $(ELFUSE_BIN) $(TEST_HELLO_DEP)
@$(ELFUSE_BIN) --timeout 0 $(TEST_DIR)/test-hello > /dev/null

## Run GDB stub integration tests (LLDB <-> elfuse gdbstub)
test-gdbstub: $(ELFUSE_BIN) $(TEST_DIR)/test-hello
@bash tests/test-gdbstub.sh -e $(ELFUSE_BIN) -v
Expand Down
17 changes: 13 additions & 4 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ int main(int argc, char **argv)
log_init();

bool verbose = false;
int timeout_sec = 10, fork_child_fd = -1;
int timeout_sec = 10, fork_child_fd = -1, vfork_notify_fd = -1;
const char *sysroot = NULL;
int gdb_port = 0;
bool gdb_stop_on_entry = false;
Expand All @@ -103,7 +103,7 @@ int main(int argc, char **argv)
" -V, --version Show version and exit\n"
" -v, --verbose Trace each guest syscall\n"
" --timeout N Per-iteration vCPU run timeout "
"(seconds, default 10)\n"
"(seconds, default 10; 0 disables)\n"
" --sysroot PATH Resolve absolute guest paths under "
"PATH first\n"
" --gdb PORT Listen for GDB Remote Serial "
Expand All @@ -124,7 +124,7 @@ int main(int argc, char **argv)
} else if ((!strcmp(argv[arg_start], "--timeout") ||
!strcmp(argv[arg_start], "-t")) &&
arg_start + 1 < argc) {
if (parse_int_arg(argv[arg_start + 1], 1, INT_MAX, &timeout_sec) <
if (parse_int_arg(argv[arg_start + 1], 0, INT_MAX, &timeout_sec) <
0)
timeout_sec = 10;
arg_start += 2;
Expand All @@ -136,6 +136,14 @@ int main(int argc, char **argv)
return 1;
}
arg_start += 2;
} else if (!strcmp(argv[arg_start], "--vfork-notify-fd") &&
arg_start + 1 < argc) {
if (parse_int_arg(argv[arg_start + 1], 0, INT_MAX,
&vfork_notify_fd) < 0) {
log_error("invalid vfork notify fd: %s", argv[arg_start + 1]);
return 1;
}
arg_start += 2;
} else if (!strcmp(argv[arg_start], "--sysroot") &&
arg_start + 1 < argc) {
sysroot = argv[arg_start + 1];
Expand Down Expand Up @@ -166,7 +174,8 @@ int main(int argc, char **argv)

/* Fork-child mode: receive VM state over IPC and run */
if (fork_child_fd >= 0)
return fork_child_main(fork_child_fd, verbose, timeout_sec);
return fork_child_main(fork_child_fd, vfork_notify_fd, verbose,
timeout_sec);

if (arg_start >= argc) {
log_error(
Expand Down
201 changes: 94 additions & 107 deletions src/runtime/fork-state.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,72 +56,95 @@ int fork_ipc_read_all(int fd, void *buf, size_t len)
return 0;
}

/* macOS rejects overly large SCM_RIGHTS payloads with EINVAL. Keep each control
* message comfortably below that limit and stream large fd sets in multiple
* chunks.
*/
#define FORK_IPC_FD_CHUNK 120

int fork_ipc_send_fds(int sock, const int *fds, int count)
{
if (count <= 0)
return 0;

char dummy = 'F';
struct iovec iov = {.iov_base = &dummy, .iov_len = 1};
size_t cmsg_size = CMSG_SPACE(count * sizeof(int));
uint8_t *cmsg_buf = calloc(1, cmsg_size);
if (!cmsg_buf)
return -1;
int sent = 0;
while (sent < count) {
int chunk = count - sent;
if (chunk > FORK_IPC_FD_CHUNK)
chunk = FORK_IPC_FD_CHUNK;

char dummy = 'F';
struct iovec iov = {.iov_base = &dummy, .iov_len = 1};
size_t cmsg_size = CMSG_SPACE((size_t) chunk * sizeof(int));
uint8_t *cmsg_buf = calloc(1, cmsg_size);
if (!cmsg_buf)
return -1;

struct msghdr msg = {0};
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = cmsg_buf;
msg.msg_controllen = cmsg_size;
struct msghdr msg = {0};
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = cmsg_buf;
msg.msg_controllen = cmsg_size;

struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(count * sizeof(int));
memcpy(CMSG_DATA(cmsg), fds, count * sizeof(int));
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN((size_t) chunk * sizeof(int));
memcpy(CMSG_DATA(cmsg), fds + sent, (size_t) chunk * sizeof(int));

ssize_t ret = sendmsg(sock, &msg, 0);
free(cmsg_buf);
return ret < 0 ? -1 : 0;
ssize_t ret = sendmsg(sock, &msg, 0);
free(cmsg_buf);
if (ret < 0)
return -1;
sent += chunk;
}
return 0;
}

int fork_ipc_recv_fds(int sock, int *fds, int max_count, int *out_count)
{
char dummy;
struct iovec iov = {.iov_base = &dummy, .iov_len = 1};
size_t cmsg_size = CMSG_SPACE(max_count * sizeof(int));
uint8_t *cmsg_buf = calloc(1, cmsg_size);
if (!cmsg_buf)
return -1;
*out_count = 0;
while (*out_count < max_count) {
int chunk_max = max_count - *out_count;
if (chunk_max > FORK_IPC_FD_CHUNK)
chunk_max = FORK_IPC_FD_CHUNK;

char dummy;
struct iovec iov = {.iov_base = &dummy, .iov_len = 1};
size_t cmsg_size = CMSG_SPACE((size_t) chunk_max * sizeof(int));
uint8_t *cmsg_buf = calloc(1, cmsg_size);
if (!cmsg_buf)
return -1;

struct msghdr msg = {0};
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = cmsg_buf;
msg.msg_controllen = cmsg_size;
struct msghdr msg = {0};
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = cmsg_buf;
msg.msg_controllen = cmsg_size;

ssize_t ret = recvmsg(sock, &msg, 0);
if (ret < 0) {
free(cmsg_buf);
return -1;
}

*out_count = 0;
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
cmsg->cmsg_type == SCM_RIGHTS) {
if (cmsg->cmsg_len < CMSG_LEN(0)) {
ssize_t ret = recvmsg(sock, &msg, 0);
if (ret < 0) {
free(cmsg_buf);
return -1;
}
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
if (!cmsg || cmsg->cmsg_level != SOL_SOCKET ||
cmsg->cmsg_type != SCM_RIGHTS || cmsg->cmsg_len < CMSG_LEN(0) ||
(msg.msg_flags & MSG_CTRUNC)) {
free(cmsg_buf);
return -1;
}

int n = (int) ((cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
if (n > max_count)
n = max_count;
memcpy(fds, CMSG_DATA(cmsg), n * sizeof(int));
*out_count = n;
}
if (n <= 0 || n > chunk_max) {
free(cmsg_buf);
return -1;
}

free(cmsg_buf);
memcpy(fds + *out_count, CMSG_DATA(cmsg), (size_t) n * sizeof(int));
*out_count += n;
free(cmsg_buf);
}
return 0;
}

Expand Down Expand Up @@ -379,36 +402,28 @@ static int fork_ipc_send_backing_fds(int ipc_sock,
uint32_t nbacking = 0;

for (uint32_t i = 0; i < num_guest_regions; i++) {
if (regions_snapshot[i].backing_fd >= 0)
if (regions_snapshot[i].backing_fd >= 0) {
if (fcntl(regions_snapshot[i].backing_fd, F_GETFD) < 0) {
log_error("clone: region %u carries stale backing_fd=%d: %s", i,
regions_snapshot[i].backing_fd, strerror(errno));
return -1;
}
backing_fds[nbacking++] = regions_snapshot[i].backing_fd;
}
}

if (fork_ipc_write_all(ipc_sock, &nbacking, sizeof(nbacking)) < 0)
return -1;
if (nbacking == 0)
return 0;

char dummy = 'B';
struct iovec iov = {.iov_base = &dummy, .iov_len = 1};
size_t cmsg_sz = CMSG_SPACE(nbacking * sizeof(int));
uint8_t *cmsg_buf = calloc(1, cmsg_sz);
if (!cmsg_buf)
return -1;

struct msghdr msg = {
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_control = cmsg_buf,
.msg_controllen = cmsg_sz,
};
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(nbacking * sizeof(int));
memcpy(CMSG_DATA(cmsg), backing_fds, nbacking * sizeof(int));
int ret = sendmsg(ipc_sock, &msg, 0);
free(cmsg_buf);
return ret < 0 ? -1 : 0;
log_debug("clone: sending %u backing fds for %u regions", nbacking,
num_guest_regions);
if (fork_ipc_send_fds(ipc_sock, backing_fds, (int) nbacking) < 0) {
log_error("clone: send backing fds failed: %s", strerror(errno));
return -1;
}
return 0;
}

int fork_ipc_send_process_state(int ipc_sock,
Expand Down Expand Up @@ -507,45 +522,17 @@ static int fork_ipc_recv_backing_fds(int ipc_fd,
if (nbacking == 0 || nbacking > GUEST_MAX_REGIONS)
return 0;

char dummy;
struct iovec iov = {.iov_base = &dummy, .iov_len = 1};
size_t cmsg_sz = CMSG_SPACE(nbacking * sizeof(int));
uint8_t *cmsg_buf = calloc(1, cmsg_sz);
if (!cmsg_buf)
return -1;

struct msghdr msg = {
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_control = cmsg_buf,
.msg_controllen = cmsg_sz,
};
ssize_t nr = recvmsg(ipc_fd, &msg, 0);
if (nr <= 0) {
free(cmsg_buf);
int *region_fds = calloc(nbacking, sizeof(int));
if (!region_fds)
return -1;
}

struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
if (msg.msg_flags & MSG_CTRUNC) {
log_error("fork-child: backing fd SCM_RIGHTS payload truncated");
free(cmsg_buf);
return -1;
}
if (!cmsg || cmsg->cmsg_level != SOL_SOCKET ||
cmsg->cmsg_type != SCM_RIGHTS) {
log_error("fork-child: missing backing fd SCM_RIGHTS payload");
free(cmsg_buf);
return -1;
}
if (cmsg->cmsg_len < CMSG_LEN(0)) {
free(cmsg_buf);
int received_count = 0;
if (fork_ipc_recv_fds(ipc_fd, region_fds, (int) nbacking, &received_count) <
0) {
log_error("fork-child: failed to receive backing fds");
free(region_fds);
return -1;
}

int *region_fds = (int *) CMSG_DATA(cmsg);
uint32_t nreceived =
(uint32_t) ((cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
uint32_t nreceived = (uint32_t) received_count;
uint32_t fi = 0;

/* Sender (fork_ipc_send_backing_fds) iterates regions and sends one fd per
Expand All @@ -572,10 +559,10 @@ static int fork_ipc_recv_backing_fds(int ipc_fd,
if (nreceived != nbacking) {
log_error("fork-child: expected %u backing fds but received %u",
nbacking, nreceived);
free(cmsg_buf);
free(region_fds);
return -1;
}
free(cmsg_buf);
free(region_fds);
return 0;
}

Expand Down
Loading
Loading