From 4b26e80d621361fda57b5894894dd42a9aaedf5b Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Wed, 29 Apr 2026 12:26:21 +0800 Subject: [PATCH] Fix packed-ring completion ordering and buffer ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Virtio 1.x packed virtqueue, the device must publish the buffer ID and used.len before flipping the USED flag, with a release barrier separating the body writes from the flag publish. Today virtio-blk and virtio-net XOR the USED bit in desc->flags with a plain store, never write desc->id, and on virtio-blk the desc->len write is also unfenced. A guest CPU can therefore observe USED set with stale len/id, and drivers that demux completions by buffer ID misbehave. Each completion site now writes id (taken from the chain's last descriptor — Virtio 1.x packed places the buffer ID on the descriptor without F_NEXT, matching QEMU's virtqueue_packed_pop) and len, then publishes the new flags via __atomic_store_n with __ATOMIC_RELEASE on a value precomputed from a plain load. The slot is single-writer between AVAIL and USED, so the load is safe non-atomic, and the release store compiles to a plain mov on x86: no lock prefix. virtio-net tx explicitly sets desc->len = 0 since transmit chains have no device-writable bytes. virtq_get_avail now reads desc->flags with __ATOMIC_ACQUIRE so addr/len/id reads after the AVAIL check observe the driver's release write. virtq_handle_avail does the same for guest_event->flags. isr_status writes were a non-atomic read-modify-write shared between virtio-net rx and tx workers; concurrent updates could lose the QUEUE bit. Switch the writers to __atomic_fetch_or with __ATOMIC_RELEASE and the guest-side read-clear in virtio_pci_space_read to __atomic_exchange_n to zero with __ATOMIC_ACQUIRE so a worker's fetch_or that races the guest read is captured rather than dropped. --- src/virtio-blk.c | 28 +++++++++++++++++++++------- src/virtio-net.c | 22 +++++++++++++++++----- src/virtio-pci.c | 13 ++++++++++--- src/virtq.c | 16 ++++++++++++---- 4 files changed, 60 insertions(+), 19 deletions(-) diff --git a/src/virtio-blk.c b/src/virtio-blk.c index d7ec168..d94f321 100644 --- a/src/virtio-blk.c +++ b/src/virtio-blk.c @@ -141,23 +141,37 @@ static void virtio_blk_complete_request(struct virtq *vq) return; desc = virtq_get_avail(vq); /* The status descriptor must advertise at least one device-writable - * byte; otherwise we'd clobber memory the guest did not offer. */ + * byte; otherwise we'd clobber memory the guest did not offer. + */ if (desc->len < 1) return; req.status = vm_guest_buf(v, desc->addr, 1); if (!req.status) return; *req.status = status; - /* used.len is total bytes the device wrote into device-writable - * buffers across the chain: the 1-byte status is always written, plus - * the data buffer on a successful IN. On any error we report only the - * status byte so the guest does not consume stale data. */ + /* used.len is total bytes the device wrote into device-writable buffers + * across the chain: the 1-byte status is always written, plus the data + * buffer on a successful IN. On any error we report only the status + * byte so the guest does not consume stale data. + */ size_t written = 1; if (status == VIRTIO_BLK_S_OK && req.type == VIRTIO_BLK_T_IN) written += (size_t) io_bytes; used_desc->len = (uint32_t) written; - used_desc->flags ^= (1ULL << VRING_PACKED_DESC_F_USED); - dev->virtio_pci_dev.config.isr_cap.isr_status |= VIRTIO_PCI_ISR_QUEUE; + /* Buffer ID lives on the chain's last descriptor in packed virtqueues; + * echo it back into the head/used slot so the driver can match the + * completion to its in-flight request. + */ + used_desc->id = desc->id; + /* Single-writer slot until USED publishes, so a plain load of the + * current flags is safe. Release-store the new value so id and len are + * visible to the guest before the USED flag flip. + */ + uint16_t new_flags = + used_desc->flags ^ (uint16_t) (1U << VRING_PACKED_DESC_F_USED); + __atomic_store_n(&used_desc->flags, new_flags, __ATOMIC_RELEASE); + __atomic_fetch_or(&dev->virtio_pci_dev.config.isr_cap.isr_status, + VIRTIO_PCI_ISR_QUEUE, __ATOMIC_RELEASE); } } diff --git a/src/virtio-net.c b/src/virtio-net.c index 1fafc23..e8338c9 100644 --- a/src/virtio-net.c +++ b/src/virtio-net.c @@ -176,9 +176,15 @@ void virtio_net_complete_request_rx(struct virtq *vq) return; } desc->len = virtio_header_len + read_bytes; - - desc->flags ^= (1ULL << VRING_PACKED_DESC_F_USED); - dev->virtio_pci_dev.config.isr_cap.isr_status |= VIRTIO_PCI_ISR_QUEUE; + /* Single-descriptor chain: head and last alias the same slot, so + * the buffer ID the driver wrote in desc->id is already correct. + * Release-store flags so the len update lands before the guest + * observes the USED flag flip. */ + uint16_t new_flags = + desc->flags ^ (uint16_t) (1U << VRING_PACKED_DESC_F_USED); + __atomic_store_n(&desc->flags, new_flags, __ATOMIC_RELEASE); + __atomic_fetch_or(&dev->virtio_pci_dev.config.isr_cap.isr_status, + VIRTIO_PCI_ISR_QUEUE, __ATOMIC_RELEASE); return; } vq->guest_event->flags = VRING_PACKED_EVENT_FLAG_DISABLE; @@ -213,8 +219,14 @@ void virtio_net_complete_request_tx(struct virtq *vq) vq->guest_event->flags = VRING_PACKED_EVENT_FLAG_DISABLE; return; } - desc->flags ^= (1ULL << VRING_PACKED_DESC_F_USED); - dev->virtio_pci_dev.config.isr_cap.isr_status |= VIRTIO_PCI_ISR_QUEUE; + /* TX buffers are device-readable only, so zero bytes were written + * to device-writable parts. */ + desc->len = 0; + uint16_t new_flags = + desc->flags ^ (uint16_t) (1U << VRING_PACKED_DESC_F_USED); + __atomic_store_n(&desc->flags, new_flags, __ATOMIC_RELEASE); + __atomic_fetch_or(&dev->virtio_pci_dev.config.isr_cap.isr_status, + VIRTIO_PCI_ISR_QUEUE, __ATOMIC_RELEASE); return; } vq->guest_event->flags = VRING_PACKED_EVENT_FLAG_DISABLE; diff --git a/src/virtio-pci.c b/src/virtio-pci.c index ea25027..a2de270 100644 --- a/src/virtio-pci.c +++ b/src/virtio-pci.c @@ -139,9 +139,16 @@ static void virtio_pci_space_read(struct virtio_pci_dev *dev, uint8_t size) { if (offset < offsetof(struct virtio_pci_config, dev_cfg)) { - memcpy(data, (void *) ((uintptr_t) &dev->config + offset), size); - if (offset == offsetof(struct virtio_pci_config, isr_cap)) { - dev->config.isr_cap.isr_status = 0; + if (offset == offsetof(struct virtio_pci_config, isr_cap) && + size <= sizeof(dev->config.isr_cap.isr_status)) { + /* Read-and-clear in one atomic step so a worker thread's + * concurrent fetch_or on isr_status cannot be lost between the + * load and the zero-back. Acquire pairs with workers' release. */ + uint32_t status = __atomic_exchange_n( + &dev->config.isr_cap.isr_status, 0, __ATOMIC_ACQUIRE); + memcpy(data, &status, size); + } else { + memcpy(data, (void *) ((uintptr_t) &dev->config + offset), size); } } else { /* dev config read */ diff --git a/src/virtq.c b/src/virtq.c index 3550624..10cd131 100644 --- a/src/virtq.c +++ b/src/virtq.c @@ -40,13 +40,16 @@ bool virtq_check_next(struct vring_packed_desc *desc) struct vring_packed_desc *virtq_get_avail(struct virtq *vq) { struct vring_packed_desc *desc = &vq->desc_ring[vq->next_avail_idx]; - uint16_t flags = desc->flags; + /* Acquire pairs with the driver's release when it published the descriptor. + * After we observe AVAIL set, every other field of the descriptor + * (addr/len/id/...) is guaranteed to be visible. + */ + uint16_t flags = __atomic_load_n(&desc->flags, __ATOMIC_ACQUIRE); bool avail = flags & (1ULL << VRING_PACKED_DESC_F_AVAIL); bool used = flags & (1ULL << VRING_PACKED_DESC_F_USED); - if (avail != vq->used_wrap_count || used == vq->used_wrap_count) { + if (avail != vq->used_wrap_count || used == vq->used_wrap_count) return NULL; - } vq->next_avail_idx++; if (vq->next_avail_idx >= vq->info.size) { vq->next_avail_idx -= vq->info.size; @@ -60,6 +63,11 @@ void virtq_handle_avail(struct virtq *vq) if (!vq->info.enable) return; virtq_complete_request(vq); - if (vq->guest_event->flags == VRING_PACKED_EVENT_FLAG_ENABLE) + /* Driver-side suppression flag is shared memory; acquire so we don't + * re-order the read past the completion writes above. + */ + uint16_t evt_flags = + __atomic_load_n(&vq->guest_event->flags, __ATOMIC_ACQUIRE); + if (evt_flags == VRING_PACKED_EVENT_FLAG_ENABLE) virtq_notify_used(vq); }