diff --git a/Cargo.lock b/Cargo.lock index 04bce118bb..982d1722c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2689,6 +2689,7 @@ version = "0.1.0" dependencies = [ "arch", "libc", + "thiserror 2.0.17", "vm-memory", ] diff --git a/vm-allocator/Cargo.toml b/vm-allocator/Cargo.toml index a4996d6dc3..512ad23fbb 100644 --- a/vm-allocator/Cargo.toml +++ b/vm-allocator/Cargo.toml @@ -10,6 +10,7 @@ kvm = ["arch/kvm"] [dependencies] libc = { workspace = true } +thiserror = { workspace = true } vm-memory = { workspace = true } [target.'cfg(any(target_arch = "aarch64", target_arch = "riscv64"))'.dependencies] diff --git a/vm-allocator/src/gsi.rs b/vm-allocator/src/gsi.rs index d58670a43c..e20bd2acb7 100644 --- a/vm-allocator/src/gsi.rs +++ b/vm-allocator/src/gsi.rs @@ -4,14 +4,10 @@ #[cfg(target_arch = "x86_64")] use std::collections::btree_map::BTreeMap; -use std::result; +use std::result::Result; +use std::sync::Mutex; -#[derive(Debug)] -pub enum Error { - Overflow, -} - -pub type Result = result::Result; +use thiserror::Error; /// GsiApic #[cfg(target_arch = "x86_64")] @@ -29,80 +25,191 @@ impl GsiApic { } } +#[derive(Debug)] +struct InterruptAllocator { + words: Vec, + size: u32, + offset: u32, +} + +/// Errors that may happen while allocating or freeing an interrupt. +#[derive(Error, Debug)] +pub enum InterruptAllocError { + /// Interrupt allocator is exhausted, i.e. out of interrupt vectors. + #[error("Interrupt allocator is exhausted")] + ExhaustedError(), + + /// Tried to free an interrupt that wasn't allocated. + #[error("Interrupt was not allocated: {0}")] + AlreadyFree(u32), + + /// Tried to free an interrupt that is not in range of the interrupt allocator. + #[error("Interrupt vector is out of range: {0}")] + OutOfRange(u32), +} + +// Maximum number of IRQ routes according to the kernel code. +const KVM_MAX_IRQ_ROUTES: u32 = 4096; + +impl InterruptAllocator { + fn new(size: u32, offset: u32) -> Self { + assert_ne!(size, 0); + let bits_per_word = usize::BITS; + let num_words = (size + bits_per_word - 1).div_ceil(bits_per_word); + + let mut words = vec![0usize; num_words as usize]; + words[(num_words - 1) as usize] = Self::last_word(size); + + Self { + words, + size, + offset, + } + } + + fn last_word(size: u32) -> usize { + let rem = size % usize::BITS; + if rem == 0 { + 0usize + } else { + !((1usize << rem) - 1) + } + } + + fn free(&mut self, vector: u32) -> Result<(), InterruptAllocError> { + // At first we make sure that the vector is not out of range. + if !(self.offset..self.offset + self.size).contains(&vector) { + return Err(InterruptAllocError::OutOfRange(vector)); + } + + let idx = vector.abs_diff(self.offset); + let (w, b) = Self::word_and_bit(idx); + + let mask = 1usize << b; + + // Let's first check whether the bit is set. + if self.words[w] & mask == 0usize { + return Err(InterruptAllocError::AlreadyFree(vector)); + } + // Clear the bit and we are done! + self.words[w] &= !mask; + Ok(()) + } + + fn word_and_bit(vector: u32) -> (usize, usize) { + let idx = vector as usize; + let bits = usize::BITS as usize; + (idx / bits, idx & bits) + } + + fn alloc(&mut self) -> Result { + let bits = usize::BITS as usize; + for idx in 0..self.words.len() { + let word = self.words[idx]; + + // If every bit is set, we can continue. + if word == !0usize { + continue; + } + + // Find lowest free bit. + let bit = (!word).trailing_zeros() as usize; + // Set the bit. + self.words[idx] |= 1usize << bit; + // Calculate index, add offset and return. + let vector = idx * bits + bit; + return Ok(vector as u32 + self.offset); + } + + Err(InterruptAllocError::ExhaustedError()) + } +} + /// GsiAllocator pub struct GsiAllocator { #[cfg(target_arch = "x86_64")] apics: BTreeMap, - next_irq: u32, - next_gsi: u32, + irqs: Mutex, + gsis: Mutex, } impl GsiAllocator { #[cfg(target_arch = "x86_64")] /// New GSI allocator pub fn new(apics: &[GsiApic]) -> Self { - let mut allocator = GsiAllocator { - apics: BTreeMap::new(), - next_irq: 0xffff_ffff, - next_gsi: 0, - }; + let mut allocator_apics = BTreeMap::new(); + let mut next_irq = 0xffff_ffffu32; + let mut next_gsi = 0u32; for apic in apics { - if apic.base < allocator.next_irq { - allocator.next_irq = apic.base; + if apic.base < next_irq { + next_irq = apic.base; } - if apic.base + apic.irqs > allocator.next_gsi { - allocator.next_gsi = apic.base + apic.irqs; + if apic.base + apic.irqs > next_gsi { + next_gsi = apic.base + apic.irqs; } - allocator.apics.insert(apic.base, apic.irqs); + allocator_apics.insert(apic.base, apic.irqs); } - allocator + Self { + apics: allocator_apics, + irqs: Mutex::new(InterruptAllocator::new( + KVM_MAX_IRQ_ROUTES - next_irq, + next_irq, + )), + gsis: Mutex::new(InterruptAllocator::new( + KVM_MAX_IRQ_ROUTES - next_gsi, + next_gsi, + )), + } } #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] /// New GSI allocator pub fn new() -> Self { GsiAllocator { - next_irq: arch::IRQ_BASE, - next_gsi: arch::IRQ_BASE, + irqs: Mutex::new(InterruptAllocator::new( + KVM_MAX_IRQ_ROUTES - arch::IRQ_BASE, + arch::IRQ_BASE, + )), + gsis: Mutex::new(InterruptAllocator::new( + KVM_MAX_IRQ_ROUTES - arch::IRQ_BASE, + arch::IRQ_BASE, + )), } } /// Allocate a GSI - pub fn allocate_gsi(&mut self) -> Result { - let gsi = self.next_gsi; - self.next_gsi = self.next_gsi.checked_add(1).ok_or(Error::Overflow)?; - Ok(gsi) + pub fn allocate_gsi(&mut self) -> Result { + self.gsis.lock().unwrap().alloc() + } + + /// Frees a GSI + pub fn free_gsi(&mut self, vector: u32) -> Result<(), InterruptAllocError> { + self.gsis.lock().unwrap().free(vector) } #[cfg(target_arch = "x86_64")] /// Allocate an IRQ - pub fn allocate_irq(&mut self) -> Result { - let mut irq: u32 = 0; + pub fn allocate_irq(&mut self) -> Result { + let next_irq = self.irqs.lock().unwrap().alloc()?; for (base, irqs) in self.apics.iter() { // HACKHACK - This only works with 1 single IOAPIC... - if self.next_irq >= *base && self.next_irq < *base + *irqs { - irq = self.next_irq; - self.next_irq += 1; + if next_irq >= *base && next_irq < *base + *irqs { + return Ok(next_irq); } } - if irq == 0 { - return Err(Error::Overflow); - } - - Ok(irq) + self.irqs.lock().unwrap().free(next_irq)?; + Err(InterruptAllocError::ExhaustedError()) } #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] /// Allocate an IRQ - pub fn allocate_irq(&mut self) -> Result { - let irq = self.next_irq; - self.next_irq = self.next_irq.checked_add(1).ok_or(Error::Overflow)?; - Ok(irq) + pub fn allocate_irq(&mut self) -> Result { + self.irqs.lock().unwrap().alloc() } } diff --git a/vm-allocator/src/lib.rs b/vm-allocator/src/lib.rs index d560482889..ecc59ec16b 100644 --- a/vm-allocator/src/lib.rs +++ b/vm-allocator/src/lib.rs @@ -17,9 +17,9 @@ pub mod page_size; mod system; pub use crate::address::AddressAllocator; -pub use crate::gsi::GsiAllocator; #[cfg(target_arch = "x86_64")] pub use crate::gsi::GsiApic; +pub use crate::gsi::{GsiAllocator, InterruptAllocError}; pub use crate::system::SystemAllocator; mod memory_slot; pub use memory_slot::MemorySlotAllocator; diff --git a/vm-allocator/src/system.rs b/vm-allocator/src/system.rs index 02ea86c6a1..22d261725d 100644 --- a/vm-allocator/src/system.rs +++ b/vm-allocator/src/system.rs @@ -10,9 +10,9 @@ use vm_memory::{GuestAddress, GuestUsize}; use crate::address::AddressAllocator; -use crate::gsi::GsiAllocator; #[cfg(target_arch = "x86_64")] use crate::gsi::GsiApic; +use crate::gsi::{GsiAllocator, InterruptAllocError}; use crate::page_size::get_page_size; /// Manages allocating system resources such as address space and interrupt numbers. @@ -31,17 +31,17 @@ use crate::page_size::get_page_size; /// GuestAddress(0x10000000), 0x10000000, /// #[cfg(target_arch = "x86_64")] &[GsiApic::new(5, 19)]).unwrap(); /// #[cfg(target_arch = "x86_64")] -/// assert_eq!(allocator.allocate_irq(), Some(5)); +/// assert_eq!(allocator.allocate_irq().unwrap(), 5); /// #[cfg(target_arch = "aarch64")] -/// assert_eq!(allocator.allocate_irq(), Some(32)); +/// assert_eq!(allocator.allocate_irq().unwrap(), 32); /// #[cfg(target_arch = "riscv64")] -/// assert_eq!(allocator.allocate_irq(), Some(0)); +/// assert_eq!(allocator.allocate_irq().unwrap(), 0); /// #[cfg(target_arch = "x86_64")] -/// assert_eq!(allocator.allocate_irq(), Some(6)); +/// assert_eq!(allocator.allocate_irq().unwrap(), 6); /// #[cfg(target_arch = "aarch64")] -/// assert_eq!(allocator.allocate_irq(), Some(33)); +/// assert_eq!(allocator.allocate_irq().unwrap(), 33); /// #[cfg(target_arch = "riscv64")] -/// assert_eq!(allocator.allocate_irq(), Some(1)); +/// assert_eq!(allocator.allocate_irq().unwrap(), 1); /// assert_eq!(allocator.allocate_platform_mmio_addresses(None, 0x1000, Some(0x1000)), Some(GuestAddress(0x1fff_f000))); /// /// ``` @@ -82,13 +82,18 @@ impl SystemAllocator { } /// Reserves the next available system irq number. - pub fn allocate_irq(&mut self) -> Option { - self.gsi_allocator.allocate_irq().ok() + pub fn allocate_irq(&mut self) -> Result { + self.gsi_allocator.allocate_irq() } /// Reserves the next available GSI. - pub fn allocate_gsi(&mut self) -> Option { - self.gsi_allocator.allocate_gsi().ok() + pub fn allocate_gsi(&mut self) -> Result { + self.gsi_allocator.allocate_gsi() + } + + /// Frees the given GSI. + pub fn free_gsi(&mut self, vector: u32) -> Result<(), InterruptAllocError> { + self.gsi_allocator.free_gsi(vector) } /// Reserves a section of `size` bytes of IO address space. diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index c536a67bb4..41421d3ed5 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -94,7 +94,7 @@ use virtio_devices::{ AccessPlatformMapping, ActivateError, Block, Endpoint, IommuMapping, PostMigrationAnnouncer, VdpaDmaMapping, VirtioMemMappingSource, }; -use vm_allocator::{AddressAllocator, SystemAllocator}; +use vm_allocator::{AddressAllocator, InterruptAllocError, SystemAllocator}; use vm_device::dma_mapping::ExternalDmaMapping; use vm_device::interrupt::{ InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, @@ -274,7 +274,7 @@ pub enum DeviceManagerError { /// Cannot allocate IRQ. #[error("Cannot allocate IRQ")] - AllocateIrq, + AllocateIrq(#[from] InterruptAllocError), /// Cannot configure the IRQ. #[error("Cannot configure the IRQ")] diff --git a/vmm/src/interrupt.rs b/vmm/src/interrupt.rs index e42ba2f76b..9edf8d249c 100644 --- a/vmm/src/interrupt.rs +++ b/vmm/src/interrupt.rs @@ -24,19 +24,23 @@ struct InterruptRoute { gsi: u32, irq_fd: EventFd, registered: AtomicBool, + allocator: Arc>, } impl InterruptRoute { - pub fn new(allocator: &mut SystemAllocator) -> Result { + pub fn new(allocator: Arc>) -> Result { let irq_fd = EventFd::new(libc::EFD_NONBLOCK)?; let gsi = allocator + .lock() + .unwrap() .allocate_gsi() - .ok_or_else(|| io::Error::other("Failed allocating new GSI"))?; + .map_err(|e| io::Error::other(format!("Failed allocating new GSI: {e}")))?; Ok(InterruptRoute { gsi, irq_fd, registered: AtomicBool::new(false), + allocator, }) } @@ -77,6 +81,12 @@ impl InterruptRoute { } } +impl Drop for InterruptRoute { + fn drop(&mut self) { + let _ = self.allocator.lock().unwrap().free_gsi(self.gsi); + } +} + pub struct RoutingEntry { route: IrqRoutingEntry, masked: bool, @@ -292,11 +302,10 @@ impl InterruptManager for MsiInterruptManager { type GroupConfig = MsiIrqGroupConfig; fn create_group(&self, config: Self::GroupConfig) -> Result> { - let mut allocator = self.allocator.lock().unwrap(); let mut irq_routes: HashMap = HashMap::with_capacity(config.count as usize); for i in config.base..config.base + config.count { - irq_routes.insert(i, InterruptRoute::new(&mut allocator)?); + irq_routes.insert(i, InterruptRoute::new(self.allocator.clone())?); } Ok(Arc::new(MsiInterruptGroup::new( diff --git a/vmm/src/pci_segment.rs b/vmm/src/pci_segment.rs index 462d6b7111..9b0d88e141 100644 --- a/vmm/src/pci_segment.rs +++ b/vmm/src/pci_segment.rs @@ -198,7 +198,7 @@ impl PciSegment { .lock() .unwrap() .allocate_irq() - .ok_or(DeviceManagerError::AllocateIrq)? as u8, + .map_err(DeviceManagerError::AllocateIrq)? as u8, ); }