diff --git a/src/arch/arm64/desc.h b/src/arch/arm64/desc.h index 49d2619..8e80c0c 100644 --- a/src/arch/arm64/desc.h +++ b/src/arch/arm64/desc.h @@ -1,7 +1,17 @@ #pragma once #define RAM_BASE (1UL << 31) + +/* GIC SPIs (offset by ARM_GIC_SPI_BASE inside vm_irq_line). Distinct lines per + * device so a level-triggered ARM GIC can de-assert per source rather than + * sharing a vector across virtio paths. + */ #define SERIAL_IRQ 0 #define VIRTIO_BLK_IRQ 1 #define VIRTIO_NET_IRQ 2 -#define KERNEL_OPTS "console=ttyS0" + +/* panic=-1 reboots immediately on guest panic. arm64 has no keyboard reset + * path; the kernel issues a PSCI SYSTEM_RESET / SYSTEM_OFF, which KVM + * surfaces as KVM_EXIT_SYSTEM_EVENT and vm_run() handles as a clean exit. + */ +#define KERNEL_OPTS "console=ttyS0 panic=-1" diff --git a/src/arch/arm64/vm.c b/src/arch/arm64/vm.c index f3c8a0d..67cca1f 100644 --- a/src/arch/arm64/vm.c +++ b/src/arch/arm64/vm.c @@ -92,6 +92,13 @@ int vm_arch_cpu_init(vm_t *v) if (ioctl(v->vm_fd, KVM_ARM_PREFERRED_TARGET, &vcpu_init) < 0) return throw_err("Failed to find perferred CPU type\n"); + /* Enable in-kernel PSCI 0.2 emulation. Without this, a guest panic with + * panic=-1 issues SYSTEM_OFF and KVM either ignores the SMC/HVC (so the + * guest spins) or signals an undefined-instruction trap. With PSCI on, + * the call surfaces as KVM_EXIT_SYSTEM_EVENT to the host loop. + */ + vcpu_init.features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; + if (ioctl(v->vcpu_fd, KVM_ARM_VCPU_INIT, &vcpu_init)) return throw_err("Failed to initialize vCPU\n"); @@ -142,9 +149,9 @@ int vm_arch_init_platform_device(vm_t *v) if (serial_init(&v->serial, &v->io_bus)) return throw_err("Failed to init UART device"); - /* Zero virtio_blk_dev so pci_dev_is_registered() observes a clean - * state when the user boots without -d. virtio_net_init memsets - * inside vm_enable_net, so virtio_net_dev is covered by that path. + /* Zero virtio_blk_dev so pci_dev_is_registered() observes a clean state + * when the user boots without -d. virtio_net_init memsets inside + * vm_enable_net, so virtio_net_dev is covered by that path. * x86 already does the same call in its vm_arch_init_platform_device. */ virtio_blk_init(&v->virtio_blk_dev); @@ -347,6 +354,17 @@ static int generate_fdt(vm_t *v) __FDT(property_cell, "phandle", FDT_PHANDLE_GIC); __FDT(end_node); + /* /psci node: lets the guest discover the in-kernel PSCI 0.2 emulator + * we requested in vm_arch_cpu_init via KVM_ARM_VCPU_PSCI_0_2. KVM uses + * HVC as the conduit on the virtual CPU. Without this node the kernel + * doesn't know the firmware interface exists and falls back to a + * spinloop on panic. + */ + __FDT(begin_node, "psci"); + __FDT(property_string, "compatible", "arm,psci-0.2"); + __FDT(property_string, "method", "hvc"); + __FDT(end_node); + /* /uart node: serial device */ /* The node name of the serial device is different from kvmtool. */ __FDT(begin_node, "uart"); diff --git a/src/arch/x86/desc.h b/src/arch/x86/desc.h index 2960c51..9f2efde 100644 --- a/src/arch/x86/desc.h +++ b/src/arch/x86/desc.h @@ -1,7 +1,17 @@ #pragma once #define RAM_BASE 0 + +/* IO-APIC GSIs. Each device gets its own line so we never share a vector + * between virtio devices, which keeps level-triggered ISA legacy IRQs (the + * 16550 on IRQ4) out of the way of edge-triggered virtio MSI-less paths. + */ #define SERIAL_IRQ 4 #define VIRTIO_NET_IRQ 14 #define VIRTIO_BLK_IRQ 15 -#define KERNEL_OPTS "console=ttyS0 pci=conf1" + +/* panic=-1 reboots immediately on guest panic; reboot=k uses the keyboard + * controller path which on KVM ends in a triple-fault, surfacing cleanly as + * KVM_EXIT_SHUTDOWN to the host loop in vm_run(). + */ +#define KERNEL_OPTS "console=ttyS0 pci=conf1 panic=-1 reboot=k" diff --git a/src/diskimg.c b/src/diskimg.c index 89456a9..c81b845 100644 --- a/src/diskimg.c +++ b/src/diskimg.c @@ -9,8 +9,10 @@ ssize_t diskimg_read(struct diskimg *diskimg, off_t offset, size_t size) { - lseek(diskimg->fd, offset, SEEK_SET); - return read(diskimg->fd, data, size); + /* pread/pwrite carry the offset in the syscall, so concurrent virtq + * workers cannot race on a shared file pointer the way lseek+read does. + */ + return pread(diskimg->fd, data, size, offset); } ssize_t diskimg_write(struct diskimg *diskimg, @@ -18,8 +20,7 @@ ssize_t diskimg_write(struct diskimg *diskimg, off_t offset, size_t size) { - lseek(diskimg->fd, offset, SEEK_SET); - return write(diskimg->fd, data, size); + return pwrite(diskimg->fd, data, size, offset); } int diskimg_flush(struct diskimg *diskimg) @@ -33,7 +34,11 @@ int diskimg_init(struct diskimg *diskimg, const char *file_path) if (diskimg->fd < 0) return -1; struct stat st; - fstat(diskimg->fd, &st); + if (fstat(diskimg->fd, &st) < 0) { + close(diskimg->fd); + diskimg->fd = -1; + return -1; + } diskimg->size = st.st_size; return 0; } diff --git a/src/main.c b/src/main.c index a379acb..a4e0245 100644 --- a/src/main.c +++ b/src/main.c @@ -77,8 +77,6 @@ int main(int argc, char *argv[]) } } - set_input_mode(); - vm_t vm; if (vm_init(&vm) < 0) return throw_err("Failed to initialize guest vm"); @@ -99,6 +97,11 @@ int main(int argc, char *argv[]) if (vm_late_init(&vm) < 0) return -1; + /* Switch the terminal to raw mode only once setup has succeeded so that + * any error from the load/init paths above is rendered on a normal tty. + */ + set_input_mode(); + vm_run(&vm); vm_exit(&vm); diff --git a/src/virtio-blk.c b/src/virtio-blk.c index 3b1452c..304f210 100644 --- a/src/virtio-blk.c +++ b/src/virtio-blk.c @@ -275,7 +275,6 @@ static int virtio_blk_setup(struct virtio_blk_dev *dev, struct diskimg *diskimg) } dev->enable = true; - /* FIXME: irq_num should be different to other devs */ dev->irq_num = VIRTIO_BLK_IRQ; dev->diskimg = diskimg; dev->config.capacity = diskimg->size >> 9; diff --git a/src/virtio-pci.c b/src/virtio-pci.c index 908d7fe..3be2867 100644 --- a/src/virtio-pci.c +++ b/src/virtio-pci.c @@ -46,7 +46,39 @@ static void virtio_pci_write_guest_feature(struct virtio_pci_dev *dev) static void virtio_pci_reset(struct virtio_pci_dev *dev) { - /* TODO: virtio pci reset */ + /* Virtio 1.x §2.4: writing 0 to device_status resets the device to its + * initial post-power-on state. We clear the negotiated bits the guest + * is about to re-write: acked features, ISR, the per-virtq packed-ring + * indices, and the common-cfg selector bytes. This is sufficient for a + * driver re-probe (reload, kexec) where the guest hasn't yet enabled + * any virtqueue. + * + * We deliberately leave info.enable, desc_ring, and the device/driver + * event pointers alone, because the device-emulator workers in + * virtio-blk / virtio-net poll those without locking. A full reset that + * tears down enabled queues would have to write the per-device stopfd + * and pthread_join the workers, which the generic virtio-pci layer has + * no handle on — leaving that for a follow-up that adds a + * virtio_pci_ops::reset hook. + */ + dev->guest_feature = 0; + dev->config.common_cfg.device_feature_select = 0; + dev->config.common_cfg.guest_feature_select = 0; + dev->config.common_cfg.guest_feature = 0; + dev->config.common_cfg.queue_select = 0; + __atomic_store_n(&dev->config.isr_cap.isr_status, 0, __ATOMIC_RELEASE); + + for (uint16_t i = 0; i < dev->num_queues; i++) { + struct virtq *vq = &dev->vq[i]; + if (vq->info.enable) + continue; + vq->info.size = VIRTQ_SIZE; + vq->info.desc_addr = 0; + vq->info.device_addr = 0; + vq->info.driver_addr = 0; + vq->next_avail_idx = 0; + vq->used_wrap_count = 1; + } } static void virtio_pci_write_status(struct virtio_pci_dev *dev) @@ -62,7 +94,7 @@ static void virtio_pci_select_virtq(struct virtio_pci_dev *dev) uint16_t select = dev->config.common_cfg.queue_select; struct virtio_pci_common_cfg *config = &dev->config.common_cfg; - if (select < config->num_queues) { + if (select < dev->num_queues) { uint64_t offset = offsetof(struct virtio_pci_common_cfg, queue_size); memcpy((void *) ((uintptr_t) config + offset), &dev->vq[select].info, sizeof(struct virtq_info)); @@ -114,7 +146,7 @@ static void virtio_pci_space_write(struct virtio_pci_dev *dev, offset <= VIRTIO_PCI_COMMON_Q_USEDHI) { uint16_t select = dev->config.common_cfg.queue_select; uint64_t info_offset = offset - VIRTIO_PCI_COMMON_Q_SIZE; - if (select < dev->config.common_cfg.num_queues) { + if (select < dev->num_queues) { memcpy((void *) ((uintptr_t) &dev->vq[select].info + info_offset), data, size); @@ -251,6 +283,7 @@ void virtio_pci_set_virtq(struct virtio_pci_dev *dev, struct virtq *vq, uint16_t num_queues) { + dev->num_queues = num_queues; dev->config.common_cfg.num_queues = num_queues; dev->vq = vq; } diff --git a/src/virtio-pci.h b/src/virtio-pci.h index d9a1fd5..ed2e764 100644 --- a/src/virtio-pci.h +++ b/src/virtio-pci.h @@ -35,6 +35,13 @@ struct virtio_pci_dev { struct virtio_pci_notify_cap *notify_cap; struct virtio_pci_cap *dev_cfg_cap; struct virtq *vq; + /* Host-side mirror of the queue count. config.common_cfg.num_queues + * lives in guest-writable BAR memory (the unconditional memcpy in + * virtio_pci_space_write lets a guest overwrite it), so trusting it + * to bound vq[] indexing is an OOB-write primitive. Bounds checks + * use this field instead. + */ + uint16_t num_queues; }; uint64_t virtio_pci_get_notify_addr(struct virtio_pci_dev *dev, diff --git a/src/vm.c b/src/vm.c index 344670f..511169e 100644 --- a/src/vm.c +++ b/src/vm.c @@ -161,6 +161,20 @@ int vm_run(vm_t *v) printf("shutdown\n"); munmap(run, run_size); return 0; + case KVM_EXIT_SYSTEM_EVENT: { + /* arm64 PSCI SYSTEM_OFF / SYSTEM_RESET land here. SHUTDOWN and + * RESET are clean exits from our POV — kvm-host has no reboot + * loop, and a guest panic with panic=-1 reaches us as RESET + * (indistinguishable from a userspace `reboot`), matching the + * x86 reboot=k path that comes back as KVM_EXIT_SHUTDOWN. + * CRASH is the one type that signals host-relevant failure + * (NMI watchdog, kdump trigger), so propagate it as -1. + */ + uint32_t type = run->system_event.type; + printf("system event %u\n", type); + munmap(run, run_size); + return type == KVM_SYSTEM_EVENT_CRASH ? -1 : 0; + } default: printf("reason: %d\n", run->exit_reason); munmap(run, run_size);