diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index e3b1a9e7f7..062e404a56 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -64,6 +64,9 @@ jobs: - name: Build (sev_snp) run: cargo build --locked --bin cloud-hypervisor --no-default-features --features "sev_snp" + - name: Build (kvm + sev_snp) + run: cargo build --locked --bin cloud-hypervisor --no-default-features --features "kvm,igvm,sev_snp,fw_cfg" + - name: Build (igvm) run: cargo build --locked --bin cloud-hypervisor --no-default-features --features "igvm" diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml index de6391186a..a7edfd12d7 100644 --- a/.github/workflows/quality.yaml +++ b/.github/workflows/quality.yaml @@ -157,6 +157,36 @@ jobs: target: ${{ matrix.target }} args: --locked --all --all-targets --no-default-features --tests --examples --features "tdx,kvm" -- -D warnings + - name: Clippy (kvm + sev_snp + igvm + fw_cfg) + if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} + uses: houseabsolute/actions-rust-cross@v1 + with: + command: clippy + cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + args: --locked --all --all-targets --no-default-features --tests --examples --features "kvm,sev_snp,igvm,fw_cfg" -- -D warnings + + - name: Clippy (mshv + kvm + igvm) + if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} + uses: houseabsolute/actions-rust-cross@v1 + with: + command: clippy + cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + args: --locked --all --all-targets --no-default-features --tests --examples --features "mshv,kvm,igvm" -- -D warnings + + - name: Clippy (default features + sev_snp + igvm + fw_cfg) + if: ${{ matrix.target == 'x86_64-unknown-linux-gnu' }} + uses: houseabsolute/actions-rust-cross@v1 + with: + command: clippy + cross-version: 3e0957637b49b1bbced23ad909170650c5b70635 + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + args: --locked --all --all-targets --tests --examples --features "sev_snp,igvm,fw_cfg" -- -D warnings + - name: Check build did not modify any files run: test -z "$(git status --porcelain)" diff --git a/Cargo.lock b/Cargo.lock index f60d47d317..e4e4e414bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -285,6 +285,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "bitfield" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5acf59e2452f0c4b968b15ce4b9468f57b45f7733b919d68b19fcc39264bfb8" + [[package]] name = "bitfield-struct" version = "0.10.1" @@ -998,6 +1004,7 @@ version = "0.1.0" dependencies = [ "anyhow", "arc-swap", + "bitfield", "bitfield-struct 0.12.1", "byteorder", "cfg-if", diff --git a/arch/src/x86_64/mod.rs b/arch/src/x86_64/mod.rs index d35a878e61..9f2ddf6dc4 100644 --- a/arch/src/x86_64/mod.rs +++ b/arch/src/x86_64/mod.rs @@ -819,6 +819,7 @@ pub fn configure_vcpu( cpu_vendor: CpuVendor, topology: (u16, u16, u16, u16), nested: bool, + setup_registers: bool, ) -> super::Result<()> { let x2apic_id = get_x2apic_id(id, Some(topology)); @@ -891,7 +892,9 @@ pub fn configure_vcpu( } regs::setup_msrs(vcpu).map_err(Error::MsrsConfiguration)?; - if let Some((kernel_entry_point, guest_memory)) = boot_setup { + if let Some((kernel_entry_point, guest_memory)) = boot_setup + && setup_registers + { regs::setup_regs(vcpu, kernel_entry_point).map_err(Error::RegsConfiguration)?; regs::setup_fpu(vcpu).map_err(Error::FpuConfiguration)?; diff --git a/devices/src/legacy/fw_cfg.rs b/devices/src/legacy/fw_cfg.rs index f33179d831..d19705609a 100644 --- a/devices/src/legacy/fw_cfg.rs +++ b/devices/src/legacy/fw_cfg.rs @@ -649,6 +649,16 @@ impl FwCfg { let kernel_start = bp.text_offset; #[cfg(target_arch = "x86_64")] let kernel_start = (bp.hdr.setup_sects as usize + 1) * 512; + if kernel_start <= buffer.len() { + buffer.truncate(kernel_start); + } else { + buffer.resize(kernel_start, 0); + file.read_exact_at( + &mut buffer[size_of::()..], + size_of::() as u64, + )?; + } + self.known_items[FW_CFG_SETUP_SIZE as usize] = FwCfgContent::U32(buffer.len() as u32); self.known_items[FW_CFG_SETUP_DATA as usize] = FwCfgContent::Bytes(buffer); self.known_items[FW_CFG_KERNEL_SIZE as usize] = diff --git a/hypervisor/Cargo.toml b/hypervisor/Cargo.toml index 1ffaa46b78..f0bc6e5fe5 100644 --- a/hypervisor/Cargo.toml +++ b/hypervisor/Cargo.toml @@ -16,6 +16,7 @@ tdx = [] [dependencies] anyhow = { workspace = true } arc-swap = "1.9.0" +bitfield = "0.16.1" bitfield-struct = "0.12.0" byteorder = { workspace = true } cfg-if = { workspace = true } diff --git a/hypervisor/src/cpu.rs b/hypervisor/src/cpu.rs index 4bc348a98d..044c81a2e8 100644 --- a/hypervisor/src/cpu.rs +++ b/hypervisor/src/cpu.rs @@ -587,10 +587,15 @@ pub trait Vcpu: Send + Sync { ) -> Result<[u32; 4]> { unimplemented!() } - #[cfg(feature = "mshv")] - fn set_sev_control_register(&self, _reg: u64) -> Result<()> { + #[cfg(feature = "sev_snp")] + fn set_sev_control_register(&self, _vmsa_pfn: u64) -> Result<()> { + unimplemented!() + } + #[cfg(feature = "sev_snp")] + fn setup_sev_snp_regs(&self, _vmsa: igvm::snp_defs::SevVmsa) -> Result<()> { unimplemented!() } + /// /// Sets the value of GIC redistributor address /// diff --git a/hypervisor/src/hypervisor.rs b/hypervisor/src/hypervisor.rs index a25f8a9bf7..05852a230f 100644 --- a/hypervisor/src/hypervisor.rs +++ b/hypervisor/src/hypervisor.rs @@ -96,6 +96,11 @@ pub enum HypervisorError { #[cfg(target_arch = "x86_64")] #[error("Failed to enable AMX tile state components")] CouldNotEnableAmxStateComponents(#[source] crate::arch::x86::AmxGuestSupportError), + /// + /// Failed to retrieve SEV-SNP capabilities + /// + #[error("Failed to retrieve SEV-SNP capabilities:{0}")] + SevSnpCapabilities(#[source] anyhow::Error), } /// diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 8b21002d1d..c9e4c2fa85 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -14,7 +14,9 @@ use std::any::Any; use std::collections::HashMap; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use std::mem::offset_of; -#[cfg(feature = "tdx")] +#[cfg(target_arch = "x86_64")] +use std::os::fd::{FromRawFd, OwnedFd}; +#[cfg(target_arch = "x86_64")] use std::os::unix::io::AsRawFd; #[cfg(feature = "tdx")] use std::os::unix::io::RawFd; @@ -26,9 +28,14 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, RwLock}; use anyhow::anyhow; +#[cfg(target_arch = "x86_64")] +use kvm_bindings::kvm_create_guest_memfd; use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; +#[cfg(feature = "sev_snp")] +use log::info; #[cfg(target_arch = "x86_64")] use log::warn; +use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; #[cfg(target_arch = "aarch64")] @@ -50,8 +57,6 @@ pub use crate::riscv64::{ }; #[cfg(target_arch = "riscv64")] use crate::riscv64_reg_id; -use crate::vm::{self, InterruptSourceConfig, VmOps}; -use crate::{HypervisorType, HypervisorVmConfig, cpu, hypervisor}; // x86_64 dependencies #[cfg(target_arch = "x86_64")] pub mod x86_64; @@ -73,7 +78,12 @@ use crate::arch::x86::{ CpuIdEntry, FpuState, LapicState, MTRR_MSR_INDICES, MsrEntry, NUM_IOAPIC_PINS, SpecialRegisters, XsaveState, }; -use crate::{CpuState, IoEventAddress, IrqRoutingEntry, MpState, StandardRegisters}; +use crate::{ + CpuState, HypervisorType, HypervisorVmConfig, InterruptSourceConfig, IoEventAddress, + IrqRoutingEntry, MpState, StandardRegisters, USER_MEMORY_REGION_GUEST_MEMFD, + USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, + UserMemoryRegion, VmOps, cpu, hypervisor, vm, +}; // aarch64 dependencies #[cfg(target_arch = "aarch64")] pub mod aarch64; @@ -83,6 +93,8 @@ pub mod riscv64; #[cfg(target_arch = "aarch64")] use std::mem; +#[cfg(target_arch = "x86_64")] +use kvm_bindings::KVM_X86_DEFAULT_VM; /// /// Export generically-named wrappers of kvm-bindings for Unix-based platforms /// @@ -92,10 +104,11 @@ pub use kvm_bindings::kvm_vcpu_events as VcpuEvents; use kvm_bindings::nested::KvmNestedStateBuffer; pub use kvm_bindings::{ self, KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, - KVM_IRQ_ROUTING_MSI, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, - kvm_clock_data, kvm_create_device, kvm_create_device as CreateDevice, + KVM_IRQ_ROUTING_MSI, KVM_MEM_GUEST_MEMFD, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, + KVM_MSI_VALID_DEVID, kvm_clock_data, kvm_create_device, kvm_create_device as CreateDevice, kvm_device_attr as DeviceAttr, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug, kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_run, kvm_userspace_memory_region, + kvm_userspace_memory_region2, }; #[cfg(target_arch = "aarch64")] use kvm_bindings::{ @@ -107,7 +120,7 @@ use kvm_bindings::{ #[cfg(target_arch = "riscv64")] use kvm_bindings::{KVM_REG_RISCV_CORE, kvm_riscv_core}; #[cfg(feature = "tdx")] -use kvm_bindings::{KVM_X86_DEFAULT_VM, KVM_X86_SW_PROTECTED_VM, KVMIO, kvm_run__bindgen_ty_1}; +use kvm_bindings::{KVM_X86_SW_PROTECTED_VM, KVMIO, kvm_run__bindgen_ty_1}; #[cfg(target_arch = "x86_64")] use kvm_bindings::{Xsave as xsave2, kvm_xsave2}; pub use kvm_ioctls::{self, Cap, Kvm, VcpuExit}; @@ -128,6 +141,64 @@ use crate::kvm::x86_64::XsaveStateError; #[cfg(target_arch = "x86_64")] ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); +// Maximum virtual address space. +#[cfg(target_arch = "x86_64")] +const VIRTUAL_ADDRESS_SIZE: u64 = 1 << 48; + +#[cfg(feature = "sev_snp")] +use igvm_defs::PAGE_SIZE_4K; +#[cfg(feature = "sev_snp")] +use kvm_bindings::{ + KVM_MEMORY_ATTRIBUTE_PRIVATE, KVM_X86_SNP_VM, kvm_memory_attributes, kvm_segment as Segment, +}; +use vm_memory::GuestAddress; +#[cfg(feature = "sev_snp")] +use x86_64::sev; + +// Hardcoded GPA of a bootloader and VMSA page for KVM +// TODO: Derive these from the IGVM file's PageData/SnpVpContext directives +// instead of using fixed constants, to support arbitrary bootloader layouts. +pub const BOOTLOADER_START: GuestAddress = GuestAddress(0xffc0_0000); +pub const BOOTLOADER_SIZE: usize = 0x40_0000; // 4 MiB +pub const KVM_VMSA_PAGE_ADDRESS: GuestAddress = GuestAddress(0xffff_ffff_f000); +pub const KVM_VMSA_PAGE_SIZE: usize = 0x1000; // 4 KiB + +#[cfg(feature = "sev_snp")] +bitfield::bitfield! { + /// AMD VMCB segment attributes + /// linux/arch/x86/include/asm/svm.h + pub struct SegAccess(u32); + impl Debug; + pub seg_type, _ : 3, 0; + pub s_code_data, _ : 4; + pub priv_level, _ : 6, 5; + pub present, _ : 7; + pub available, _ : 8; + pub l_64bit, _ : 9; + pub db_size_32, _: 10; + pub granularity, _: 11; +} + +#[cfg(feature = "sev_snp")] +fn make_segment(sev_selector: igvm::snp_defs::SevSelector) -> Segment { + let flags = SegAccess(sev_selector.attrib.into()); + Segment { + base: sev_selector.base, + limit: sev_selector.limit, + selector: sev_selector.selector, + type_: flags.seg_type() as u8, + s: flags.s_code_data() as u8, + dpl: flags.priv_level() as u8, + present: flags.present() as u8, + avl: flags.available() as u8, + db: flags.db_size_32() as u8, + g: flags.granularity() as u8, + l: flags.l_64bit() as u8, + unusable: 0, + ..Default::default() + } +} + #[cfg(feature = "tdx")] const KVM_EXIT_TDX: u32 = 50; #[cfg(feature = "tdx")] @@ -238,6 +309,61 @@ pub struct KvmTdxExitVmcall { pub out_rdx: u64, } +impl From for UserMemoryRegion { + fn from(region: kvm_userspace_memory_region2) -> Self { + let mut flags = USER_MEMORY_REGION_READ; + if region.flags & KVM_MEM_READONLY == 0 { + flags |= USER_MEMORY_REGION_WRITE; + } + if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 { + flags |= USER_MEMORY_REGION_LOG_DIRTY; + } + if region.flags & KVM_MEM_GUEST_MEMFD != 0 { + flags |= USER_MEMORY_REGION_GUEST_MEMFD; + } + + UserMemoryRegion { + slot: region.slot, + guest_phys_addr: region.guest_phys_addr, + memory_size: region.memory_size, + userspace_addr: region.userspace_addr, + flags, + guest_memfd: region.guest_memfd, + guest_memfd_offset: region.guest_memfd_offset, + } + } +} + +impl From for kvm_userspace_memory_region2 { + fn from(region: UserMemoryRegion) -> Self { + assert!( + region.flags & USER_MEMORY_REGION_READ != 0, + "KVM mapped memory is always readable" + ); + + let mut flags = 0; + if region.flags & USER_MEMORY_REGION_WRITE == 0 { + flags |= KVM_MEM_READONLY; + } + if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 { + flags |= KVM_MEM_LOG_DIRTY_PAGES; + } + if region.flags & USER_MEMORY_REGION_GUEST_MEMFD != 0 { + flags |= KVM_MEM_GUEST_MEMFD; + } + + kvm_userspace_memory_region2 { + slot: region.slot, + guest_phys_addr: region.guest_phys_addr, + memory_size: region.memory_size, + userspace_addr: region.userspace_addr, + flags, + guest_memfd: region.guest_memfd, + guest_memfd_offset: region.guest_memfd_offset, + ..Default::default() + } + } +} impl From for MpState { fn from(s: kvm_mp_state) -> Self { MpState::Kvm(s) @@ -424,14 +550,22 @@ struct KvmDirtyLogSlot { guest_phys_addr: u64, memory_size: u64, userspace_addr: u64, + // Following fields are used by kvm_userspace_memory_region2. + guest_memfd_offset: u64, + guest_memfd: u32, } /// Wrapper over KVM VM ioctls. pub struct KvmVm { - fd: VmFd, + fd: Arc, #[cfg(target_arch = "x86_64")] msrs: Vec, + #[cfg(all(feature = "sev_snp", target_arch = "x86_64"))] + sev_fd: Option, dirty_log_slots: RwLock>, + #[cfg(target_arch = "x86_64")] + memfd: Option, + kvm_guest_memfd_supported: bool, } impl KvmVm { @@ -494,6 +628,45 @@ impl KvmVm { fn translate_msi_ext_dest_id(address_lo: u32, address_hi: u32) -> (u32, u32) { (address_lo, address_hi) } + /// Set user memory region to use guest_memfd when available. + /// guest_memfd is available on host linux kernel v6.8+ + /// + /// # Safety + /// + /// `region.userspace_addr` must point to `region.memory_size` bytes of + /// memory that will stay mapped until the slot is removed via + /// `remove_user_memory_region`. The memory region must + /// be uniquely owned by the caller, as mapping it into the guest + /// effectively creates a long-lived mutable reference. + unsafe fn set_user_memory_region( + &self, + region: kvm_userspace_memory_region2, + ) -> Result<(), errno::Error> { + if self.kvm_guest_memfd_supported { + // SAFETY: Safe as the caller guarantees that region is safe to map + // the guest and is non-overlapping. + unsafe { self.fd.set_user_memory_region2(region) } + } else { + // SAFETY: Safe because guest regions are guaranteed not to overlap. + unsafe { + self.fd.set_user_memory_region(kvm_userspace_memory_region { + slot: region.slot, + guest_phys_addr: region.guest_phys_addr, + userspace_addr: region.userspace_addr, + flags: region.flags, + memory_size: region.memory_size, + }) + } + } + } + /// Get flag for kvm_userspace_memory_region based on memfd support. + fn get_kvm_userspace_memory_region_flag(&self, flag: u32) -> u32 { + flag | if self.kvm_guest_memfd_supported { + KVM_MEM_GUEST_MEMFD + } else { + 0 + } + } } /// Implementation of Vm trait for KVM @@ -509,6 +682,72 @@ impl KvmVm { /// let vm = hypervisor.create_vm(HypervisorVmConfig::default()).expect("new VM fd creation failed"); /// ``` impl vm::Vm for KvmVm { + #[cfg(all(feature = "sev_snp", target_arch = "x86_64"))] + fn sev_snp_init(&self, guest_policy: igvm_defs::SnpPolicy) -> vm::Result<()> { + self.sev_fd + .as_ref() + .unwrap() + .launch_start(&self.fd, guest_policy) + .map_err(|e| vm::HypervisorVmError::InitializeSevSnp(e.into())) + } + + #[cfg(all(feature = "sev_snp", target_arch = "x86_64"))] + fn import_isolated_pages( + &self, + page_type: u32, + page_size: u32, + // host page frame numbers + pfns: &[u64], + uaddrs: &[u64], + ) -> vm::Result<()> { + if pfns.is_empty() { + return Ok(()); + } + assert_eq!(pfns.len(), uaddrs.len()); + // VMSA pages are not supported by launch_update + // https://elixir.bootlin.com/linux/v6.11/source/arch/x86/kvm/svm/sev.c#L2377 + if page_type == sev::SNP_PAGE_TYPE_VMSA { + return Ok(()); + } + for i in 0..pfns.len() { + self.fd + .set_memory_attributes(kvm_memory_attributes { + address: pfns[i] << sev::GPA_METADATA_PADDING, + size: page_size as u64, + attributes: kvm_bindings::KVM_MEMORY_ATTRIBUTE_PRIVATE as u64, + // Flags must be zero o/w error (flags aren't being used here yet) + flags: 0, + }) + .map_err(|e| vm::HypervisorVmError::ImportIsolatedPages(e.into()))?; + self.sev_fd + .as_ref() + .unwrap() + .launch_update(&self.fd, uaddrs[i], page_size as u64, pfns[i], page_type) + .map_err(|e| vm::HypervisorVmError::ImportIsolatedPages(e.into()))?; + } + + Ok(()) + } + + #[cfg(all(feature = "sev_snp", target_arch = "x86_64"))] + fn complete_isolated_import( + &self, + snp_id_block: igvm_defs::IGVM_VHS_SNP_ID_BLOCK, + host_data: [u8; 32], + id_block_enabled: u8, + ) -> vm::Result<()> { + self.sev_fd + .as_ref() + .unwrap() + .launch_finish( + &self.fd, + host_data, + id_block_enabled, + snp_id_block.author_key_enabled, + ) + .map_err(|e| vm::HypervisorVmError::CompleteIsolatedImport(e.into())) + } + #[cfg(target_arch = "x86_64")] /// /// Sets the address of the one-page region in the VM's address space. @@ -589,6 +828,8 @@ impl vm::Vm for KvmVm { hyperv_synic: AtomicBool::new(false), #[cfg(target_arch = "x86_64")] xsave_size, + #[cfg(feature = "sev_snp")] + vm_fd: self.fd.clone(), }; Ok(Box::new(vcpu)) } @@ -759,14 +1000,27 @@ impl vm::Vm for KvmVm { const _: () = assert!(core::mem::size_of::() <= core::mem::size_of::()); - let mut region = kvm_userspace_memory_region { + // guest_memfd is only used on x86_64 (SEV-SNP, TDX) for now + #[cfg(target_arch = "x86_64")] + let guest_memfd = if let Some(memfd) = &self.memfd { + memfd.as_raw_fd() as u32 + } else { + 0 + }; + #[cfg(not(target_arch = "x86_64"))] + let guest_memfd = 0; + + let mut region = kvm_userspace_memory_region2 { slot, + flags: self.get_kvm_userspace_memory_region_flag(flags), guest_phys_addr, memory_size: memory_size as u64, userspace_addr: userspace_addr as usize as u64, - flags, + #[cfg(not(target_arch = "riscv64"))] + guest_memfd, + guest_memfd_offset: guest_phys_addr, + ..Default::default() }; - if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 { if (region.flags & KVM_MEM_READONLY) != 0 { return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!( @@ -782,20 +1036,34 @@ impl vm::Vm for KvmVm { guest_phys_addr: region.guest_phys_addr, memory_size: region.memory_size, userspace_addr: region.userspace_addr, + guest_memfd_offset: region.guest_memfd_offset, + guest_memfd: region.guest_memfd, }, ); // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`. // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`. - region.flags = 0; + region.flags = self.get_kvm_userspace_memory_region_flag(0); } // SAFETY: Safe because caller promised this is safe. unsafe { + self.set_user_memory_region(region) + .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?; + } + + #[cfg(feature = "sev_snp")] + if self.kvm_guest_memfd_supported { self.fd - .set_user_memory_region(region) - .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into())) + .set_memory_attributes(kvm_memory_attributes { + address: region.guest_phys_addr, + size: region.memory_size, + attributes: KVM_MEMORY_ATTRIBUTE_PRIVATE as u64, + flags: 0, + }) + .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?; } + Ok(()) } /// Removes a guest physical memory region. @@ -823,12 +1091,13 @@ impl vm::Vm for KvmVm { const _: () = assert!(core::mem::size_of::() <= core::mem::size_of::()); - let mut region = kvm_userspace_memory_region { + let mut region = kvm_userspace_memory_region2 { slot, guest_phys_addr, memory_size: memory_size as u64, userspace_addr: userspace_addr as usize as u64, flags, + ..Default::default() }; // Remove the corresponding entry from "self.dirty_log_slots" if needed @@ -838,8 +1107,7 @@ impl vm::Vm for KvmVm { region.memory_size = 0; // SAFETY: Safe because caller promised this is safe. unsafe { - self.fd - .set_user_memory_region(region) + self.set_user_memory_region(region) .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into())) } } @@ -932,17 +1200,19 @@ impl vm::Vm for KvmVm { fn start_dirty_log(&self) -> vm::Result<()> { let dirty_log_slots = self.dirty_log_slots.read().unwrap(); for (_, s) in dirty_log_slots.iter() { - let region = kvm_userspace_memory_region { + let region = kvm_userspace_memory_region2 { slot: s.slot, guest_phys_addr: s.guest_phys_addr, memory_size: s.memory_size, userspace_addr: s.userspace_addr, - flags: KVM_MEM_LOG_DIRTY_PAGES, + flags: self.get_kvm_userspace_memory_region_flag(KVM_MEM_LOG_DIRTY_PAGES), + guest_memfd: s.guest_memfd, + guest_memfd_offset: s.guest_memfd_offset, + ..Default::default() }; // SAFETY: Safe because guest regions are guaranteed not to overlap. unsafe { - self.fd - .set_user_memory_region(region) + self.set_user_memory_region(region) .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; } } @@ -956,17 +1226,19 @@ impl vm::Vm for KvmVm { fn stop_dirty_log(&self) -> vm::Result<()> { let dirty_log_slots = self.dirty_log_slots.read().unwrap(); for (_, s) in dirty_log_slots.iter() { - let region = kvm_userspace_memory_region { + let region = kvm_userspace_memory_region2 { slot: s.slot, guest_phys_addr: s.guest_phys_addr, memory_size: s.memory_size, userspace_addr: s.userspace_addr, - flags: 0, + flags: self.get_kvm_userspace_memory_region_flag(0), + guest_memfd: s.guest_memfd, + guest_memfd_offset: s.guest_memfd_offset, + ..Default::default() }; // SAFETY: Safe because guest regions are guaranteed not to overlap. unsafe { - self.fd - .set_user_memory_region(region) + self.set_user_memory_region(region) .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; } } @@ -1228,11 +1500,19 @@ impl hypervisor::Hypervisor for KvmHypervisor { vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap(); } - #[cfg(feature = "tdx")] - if _config.tdx_enabled { - vm_type = KVM_X86_SW_PROTECTED_VM.into(); - } else { + #[cfg(target_arch = "x86_64")] + { vm_type = KVM_X86_DEFAULT_VM.into(); + + #[cfg(feature = "sev_snp")] + if _config.sev_snp_enabled { + vm_type = KVM_X86_SNP_VM.into(); + } + + #[cfg(feature = "tdx")] + if _config.tdx_enabled { + vm_type = KVM_X86_SW_PROTECTED_VM.into(); + } } loop { @@ -1255,7 +1535,7 @@ impl hypervisor::Hypervisor for KvmHypervisor { { let msr_list = self.get_msr_list()?; let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; - let mut msrs: Vec = vec![ + let mut msrs = vec![ MsrEntry { ..Default::default() }; @@ -1266,18 +1546,63 @@ impl hypervisor::Hypervisor for KvmHypervisor { msrs[pos].index = *index; } + let kvm_guest_memfd_supported = fd.check_extension(Cap::GuestMemfd); + let mut memfd = None; + if kvm_guest_memfd_supported { + // TODO: Refactor to create memfd when the memory region is created so + // that the size is appropriate. + // SAFETY: Safe because guest regions are guaranteed not to overlap. + memfd = unsafe { + Some(OwnedFd::from_raw_fd( + fd.create_guest_memfd(kvm_create_guest_memfd { + size: VIRTUAL_ADDRESS_SIZE, + ..Default::default() + }) + .map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?, + )) + }; + } + + #[cfg(feature = "sev_snp")] + let sev_fd = { + let sev_snp_enabled = vm_type == KVM_X86_SNP_VM as u64; + if sev_snp_enabled { + let mask = self.kvm.check_extension_int(crate::kvm::Cap::ExitHypercall); + let cap = kvm_bindings::kvm_enable_cap { + cap: kvm_bindings::KVM_CAP_EXIT_HYPERCALL, + args: [mask as _, 0, 0, 0], + ..Default::default() + }; + fd.enable_cap(&cap) + .map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; + let sev_dev = x86_64::sev::SevFd::new("/dev/sev") + .map_err(|e| hypervisor::HypervisorError::SevSnpCapabilities(e.into()))?; + sev_dev + .init2(&fd, _config.vmsa_features) + .map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; + Some(sev_dev) + } else { + None + } + }; + Ok(Arc::new(KvmVm { - fd, + fd: Arc::new(fd), msrs, dirty_log_slots: RwLock::new(HashMap::new()), + #[cfg(feature = "sev_snp")] + sev_fd, + memfd, + kvm_guest_memfd_supported, })) } #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] { Ok(Arc::new(KvmVm { - fd, + fd: Arc::new(fd), dirty_log_slots: RwLock::new(HashMap::new()), + kvm_guest_memfd_supported: false, })) } } @@ -1362,6 +1687,8 @@ pub struct KvmVcpu { hyperv_synic: AtomicBool, #[cfg(target_arch = "x86_64")] xsave_size: i32, + #[cfg(feature = "sev_snp")] + vm_fd: Arc, } /// Implementation of Vcpu trait for KVM @@ -2021,6 +2348,75 @@ impl cpu::Vcpu for KvmVcpu { #[cfg(feature = "tdx")] VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx), VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug), + #[cfg(feature = "sev_snp")] + VcpuExit::Hypercall(hypercall) => { + // https://docs.kernel.org/virt/kvm/x86/hypercalls.html#kvm-hc-map-gpa-range + info!("VcpuExit::Hypercall"); + const KVM_HC_MAP_GPA_RANGE: u64 = 12; + // 4th bit of attributes argument is encrypted page bit + match hypercall.nr { + KVM_HC_MAP_GPA_RANGE => { + info!("Handling KVM_HC_MAP_GPA_RANGE hypercall"); + // guest physical address of start page + let address = hypercall.args[0]; + // num pages to map from start address + let num_pages = hypercall.args[1]; + // bits[0-3] = page size encoding + // bits[4] = 1 if private, 0 if shared + // bits[5-63] = zero + let attributes = hypercall.args[2]; + // TODO: Add 2mb page support + let size = num_pages * PAGE_SIZE_4K; + // bit 4 = private attribute encoding + const PRIVATE_ENCODING_BITMASK: u64 = 0b10000; + info!("hypercall attributes: {attributes:#b}"); + let set_private_attr = if attributes & PRIVATE_ENCODING_BITMASK > 0 { + KVM_MEMORY_ATTRIBUTE_PRIVATE as u64 + } else { + // the only attribute available is private, o/w 0 + // https://docs.kernel.org/virt/kvm/api.html#kvm-set-memory-attributes + 0u64 + }; + let mem_attributes = kvm_memory_attributes { + address, + size, + attributes: set_private_attr, + ..Default::default() + }; + self.vm_fd + .set_memory_attributes(mem_attributes) + .map(|_| cpu::VmExit::Ignore) + .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())) + } + _ => Ok(cpu::VmExit::Ignore), + } + } + + #[cfg(feature = "sev_snp")] + VcpuExit::MemoryFault { flags, gpa, size } => { + info!("VcpuExit::MemoryFault: flags={flags:#x}, gpa={gpa:#x}, size={size:#x}"); + + const KVM_MEMORY_EXIT_FLAG_PRIVATE: u64 = + kvm_bindings::KVM_MEMORY_EXIT_FLAG_PRIVATE as u64; + + const KVM_MEMORY_ATTRIBUTE_SHARED: u64 = 0; + + let attributes = if flags & KVM_MEMORY_EXIT_FLAG_PRIVATE != 0 { + KVM_MEMORY_ATTRIBUTE_PRIVATE as u64 + } else { + KVM_MEMORY_ATTRIBUTE_SHARED + }; + + self.vm_fd + .set_memory_attributes(kvm_memory_attributes { + address: gpa, + size, + attributes, + flags: 0, + }) + .map(|_| cpu::VmExit::Ignore) + .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())) + } r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( "Unexpected exit reason on vcpu run: {r:?}" @@ -2874,6 +3270,69 @@ impl cpu::Vcpu for KvmVcpu { Ok(_) => Ok(()), } } + + #[cfg(feature = "sev_snp")] + fn set_sev_control_register(&self, _vmsa_pfn: u64) -> cpu::Result<()> { + Ok(()) + } + + #[cfg(feature = "sev_snp")] + fn setup_sev_snp_regs(&self, vmsa: igvm::snp_defs::SevVmsa) -> cpu::Result<()> { + let mut sregs = self + .fd + .get_sregs() + .map_err(|e: kvm_ioctls::Error| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?; + sregs.cs = make_segment(vmsa.cs); + sregs.ds = make_segment(vmsa.ds); + sregs.es = make_segment(vmsa.es); + sregs.fs = make_segment(vmsa.fs); + sregs.gs = make_segment(vmsa.gs); + sregs.ss = make_segment(vmsa.ss); + sregs.tr = make_segment(vmsa.tr); + sregs.ldt = make_segment(vmsa.ldtr); + + sregs.cr0 = vmsa.cr0; + sregs.cr4 = vmsa.cr4; + sregs.cr3 = vmsa.cr3; + sregs.efer = vmsa.efer; + + sregs.idt.base = vmsa.idtr.base; + sregs.idt.limit = vmsa.idtr.limit.try_into().unwrap(); + sregs.gdt.base = vmsa.gdtr.base; + sregs.gdt.limit = vmsa.gdtr.limit.try_into().unwrap(); + self.fd + .set_sregs(&sregs) + .map_err(|e: kvm_ioctls::Error| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))?; + + let mut regs = self + .fd + .get_regs() + .map_err(|e: kvm_ioctls::Error| cpu::HypervisorCpuError::GetRegister(e.into()))?; + regs.rip = vmsa.rip; + regs.rdx = vmsa.rdx; + regs.rflags = vmsa.rflags; + regs.rsp = vmsa.rsp; + regs.rax = vmsa.rax; + regs.rbx = vmsa.rbx; + regs.rcx = vmsa.rcx; + regs.rbp = vmsa.rbp; + regs.rsi = vmsa.rsi; + regs.rdi = vmsa.rdi; + regs.r8 = vmsa.r8; + regs.r9 = vmsa.r9; + regs.r10 = vmsa.r10; + regs.r11 = vmsa.r11; + regs.r12 = vmsa.r12; + regs.r13 = vmsa.r13; + regs.r14 = vmsa.r14; + regs.r15 = vmsa.r15; + + self.fd + .set_regs(®s) + .map_err(|e: kvm_ioctls::Error| cpu::HypervisorCpuError::SetRegister(e.into()))?; + + Ok(()) + } } impl KvmVcpu { diff --git a/hypervisor/src/kvm/x86_64/mod.rs b/hypervisor/src/kvm/x86_64/mod.rs index e338346c3f..62185fd84e 100644 --- a/hypervisor/src/kvm/x86_64/mod.rs +++ b/hypervisor/src/kvm/x86_64/mod.rs @@ -31,6 +31,9 @@ use crate::arch::x86::{ }; use crate::kvm::{Cap, Kvm, KvmError, KvmResult}; +#[cfg(feature = "sev_snp")] +pub(crate) mod sev; + /// /// Check KVM extension for Linux /// diff --git a/hypervisor/src/kvm/x86_64/sev.rs b/hypervisor/src/kvm/x86_64/sev.rs new file mode 100644 index 0000000000..0ed4dabf74 --- /dev/null +++ b/hypervisor/src/kvm/x86_64/sev.rs @@ -0,0 +1,204 @@ +// Copyright 2025 Google LLC. +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fs::OpenOptions; +use std::os::fd::{AsRawFd, OwnedFd}; +use std::os::unix::fs::OpenOptionsExt; +use std::path::Path; + +use igvm_defs::SnpPolicy; +use kvm_bindings::kvm_sev_cmd; +use kvm_ioctls::VmFd; +use log::{debug, error, info}; +use vmm_sys_util::errno; + +pub(crate) type Result = std::result::Result; + +// KVM SEV command IDs — linux/include/uapi/linux/kvm.h +const KVM_SEV_INIT2: u32 = 22; +const KVM_SEV_SNP_LAUNCH_START: u32 = 100; +const KVM_SEV_SNP_LAUNCH_UPDATE: u32 = 101; +const KVM_SEV_SNP_LAUNCH_FINISH: u32 = 102; +// SNP_LAUNCH_UPDATE page types — linux/arch/x86/include/uapi/asm/sev-guest.h +pub const SNP_PAGE_TYPE_VMSA: u32 = 2; + +// See AMD Spec Section 8.17 — SNP_LAUNCH_UPDATE +// The last 12 bits are metadata about the guest context +// https://docs.amd.com/v/u/en-US/56860_PUB_1.58_SEV_SNP +pub const GPA_METADATA_PADDING: u32 = 12; + +// SNP in VMSA - linux/arch/x86/include/asm/svm.h +const SVM_SEV_FEAT_SNP_ACTIVE: u64 = 1 << 0; + +fn sev_op(vm: &VmFd, sev_cmd: &mut kvm_sev_cmd, name: &str) -> Result<()> { + let ret = vm.encrypt_op_sev(sev_cmd); + if ret.is_err() { + error!("{name} op failed. error code: 0x{:x}", sev_cmd.error); + } + ret +} + +#[derive(Debug)] +pub struct SevFd { + pub fd: OwnedFd, +} + +// These ioctl structs must match the kernel layout exactly. +// Layouts from linux/arch/x86/include/uapi/asm/kvm.h + +#[repr(C, packed)] +#[derive(Debug, Copy, Clone, Default)] +pub(crate) struct KvmSevInit { + pub vmsa_features: u64, + pub flags: u32, + pub ghcb_version: u16, + pub pad1: u16, + pub pad2: [u32; 8], +} + +#[repr(C, packed)] +#[derive(Debug, Copy, Clone, Default)] +pub(crate) struct KvmSevSnpLaunchStart { + pub policy: u64, + pub gosvw: [u8; 16], + pub flags: u16, + pub pad0: [u8; 6], + pub pad1: [u64; 4], +} + +#[repr(C, packed)] +#[derive(Debug, Copy, Clone, Default)] +pub(crate) struct KvmSevSnpLaunchUpdate { + pub gfn_start: u64, + pub uaddr: u64, + pub len: u64, + pub type_: u8, + pub pad0: u8, + pub flags: u16, + pub pad1: u32, + pub pad2: [u64; 4], +} + +#[repr(C, packed)] +#[derive(Debug, Copy, Clone, Default)] +pub(crate) struct KvmSevSnpLaunchFinish { + pub id_block_uaddr: u64, + pub id_auth_uaddr: u64, + pub id_block_en: u8, + pub auth_key_en: u8, + pub vcek_disabled: u8, + pub host_data: [u8; 32], + pub pad0: [u8; 3], + // must be zero https://elixir.bootlin.com/linux/v6.11/source/arch/x86/kvm/svm/sev.c#L2506 + pub flags: u16, + pub pad1: [u64; 4], +} + +impl SevFd { + pub(crate) fn new(sev_path: impl AsRef) -> Result { + // give sev device rw and close on exec + let file_r = OpenOptions::new() + .read(true) + .write(true) + .custom_flags(libc::O_CLOEXEC) + .open(sev_path.as_ref()); + if let Ok(file) = file_r { + Ok(SevFd { + fd: OwnedFd::from(file), + }) + } else { + Err(errno::Error::last()) + } + } + + pub(crate) fn init2(&self, vm: &VmFd, vmsa_features: u64) -> Result<()> { + // Clear the SNP bit, KVM sets it directly + let vmsa_features = vmsa_features & !SVM_SEV_FEAT_SNP_ACTIVE; + + // TODO: Query KVM for supported VMSA features before calling init2 + if vmsa_features != 0 { + info!("SEV-SNP: requesting vmsa_features: {vmsa_features:#x}"); + } + + let mut init = KvmSevInit { + vmsa_features, + ..Default::default() + }; + let mut sev_cmd = kvm_sev_cmd { + id: KVM_SEV_INIT2, + data: &mut init as *mut KvmSevInit as _, + sev_fd: self.fd.as_raw_fd() as _, + ..Default::default() + }; + sev_op(vm, &mut sev_cmd, "KVM_SEV_INIT2") + } + + pub(crate) fn launch_start(&self, vm: &VmFd, guest_policy: SnpPolicy) -> Result<()> { + // See AMD Spec Section 4.3 - Guest Policy + // Bit 17 is reserved and has to be one. + // https://docs.amd.com/v/u/en-US/56860_PUB_1.58_SEV_SNP + let mut start: KvmSevSnpLaunchStart = KvmSevSnpLaunchStart { + policy: guest_policy.into_bits(), + ..Default::default() + }; + let mut sev_cmd = kvm_sev_cmd { + id: KVM_SEV_SNP_LAUNCH_START, + data: &mut start as *mut KvmSevSnpLaunchStart as _, + sev_fd: self.fd.as_raw_fd() as _, + ..Default::default() + }; + sev_op(vm, &mut sev_cmd, "KVM_SEV_SNP_LAUNCH_START") + } + + pub(crate) fn launch_update( + &self, + vm: &VmFd, + // host virtual address + hva: u64, + size: u64, + // guest frame number + gfn_start: u64, + page_type: u32, + ) -> Result<()> { + let mut update = KvmSevSnpLaunchUpdate { + gfn_start, + uaddr: hva, + len: size, + type_: page_type as u8, + ..Default::default() + }; + let mut sev_cmd = kvm_sev_cmd { + id: KVM_SEV_SNP_LAUNCH_UPDATE, + data: &mut update as *mut KvmSevSnpLaunchUpdate as _, + sev_fd: self.fd.as_raw_fd() as _, + ..Default::default() + }; + sev_op(vm, &mut sev_cmd, "KVM_SEV_SNP_LAUNCH_UPDATE") + } + + pub(crate) fn launch_finish( + &self, + vm: &VmFd, + host_data: [u8; 32], + id_block_en: u8, + auth_key_en: u8, + ) -> Result<()> { + let mut finish = KvmSevSnpLaunchFinish { + host_data, + id_block_en, + auth_key_en, + ..Default::default() + }; + let mut sev_cmd = kvm_sev_cmd { + id: KVM_SEV_SNP_LAUNCH_FINISH, + data: &mut finish as *mut KvmSevSnpLaunchFinish as _, + sev_fd: self.fd.as_raw_fd() as _, + ..Default::default() + }; + let flags = finish.flags; + debug!("Calling KVM_SEV_SNP_LAUNCH_FINISH, flags: {flags}"); + sev_op(vm, &mut sev_cmd, "KVM_SEV_SNP_LAUNCH_FINISH") + } +} diff --git a/hypervisor/src/lib.rs b/hypervisor/src/lib.rs index 3d919e45ce..cb106a131c 100644 --- a/hypervisor/src/lib.rs +++ b/hypervisor/src/lib.rs @@ -64,7 +64,7 @@ pub use vm::{ pub use crate::hypervisor::{Hypervisor, HypervisorError}; -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, PartialEq)] pub enum HypervisorType { #[cfg(feature = "kvm")] Kvm, @@ -118,6 +118,32 @@ pub fn vec_with_array_field(count: usize) -> Vec { vec_with_size_in_bytes(vec_size_bytes) } +/// +/// User memory region structure +/// +#[derive(Debug, Default, Eq, PartialEq)] +pub struct UserMemoryRegion { + pub slot: u32, + pub guest_phys_addr: u64, + pub memory_size: u64, + pub userspace_addr: u64, + pub flags: u32, + #[cfg(feature = "kvm")] + pub guest_memfd: u32, + #[cfg(feature = "kvm")] + pub guest_memfd_offset: u64, +} + +/// +/// Flags for user memory region +/// +pub const USER_MEMORY_REGION_READ: u32 = 1; +pub const USER_MEMORY_REGION_WRITE: u32 = 1 << 1; +pub const USER_MEMORY_REGION_EXECUTE: u32 = 1 << 2; +pub const USER_MEMORY_REGION_LOG_DIRTY: u32 = 1 << 3; +pub const USER_MEMORY_REGION_ADJUSTABLE: u32 = 1 << 4; +pub const USER_MEMORY_REGION_GUEST_MEMFD: u32 = 1 << 5; + #[derive(Debug)] pub enum MpState { #[cfg(feature = "kvm")] @@ -170,6 +196,8 @@ pub struct HypervisorVmConfig { pub sev_snp_enabled: bool, #[cfg(feature = "sev_snp")] pub mem_size: u64, + #[cfg(feature = "sev_snp")] + pub vmsa_features: u64, pub nested: bool, pub smt_enabled: bool, } diff --git a/hypervisor/src/mshv/mod.rs b/hypervisor/src/mshv/mod.rs index 8623531c5b..a61f2e44ef 100644 --- a/hypervisor/src/mshv/mod.rs +++ b/hypervisor/src/mshv/mod.rs @@ -58,7 +58,7 @@ pub use aarch64::VcpuMshvState; #[cfg(target_arch = "aarch64")] use aarch64::gic::{BASE_SPI_IRQ, MshvGicV2M}; #[cfg(feature = "sev_snp")] -use igvm_defs::IGVM_VHS_SNP_ID_BLOCK; +use igvm_defs::{IGVM_VHS_SNP_ID_BLOCK, SnpPolicy}; #[cfg(feature = "sev_snp")] use snp_constants::*; use vmm_sys_util::eventfd::EventFd; @@ -2254,7 +2254,7 @@ impl vm::Vm for MshvVm { /// Initialize the SEV-SNP VM #[cfg(feature = "sev_snp")] - fn sev_snp_init(&self) -> vm::Result<()> { + fn sev_snp_init(&self, _guest_policy: SnpPolicy) -> vm::Result<()> { self.fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_STATE, @@ -2272,6 +2272,7 @@ impl vm::Vm for MshvVm { page_type: u32, page_size: u32, pages: &[u64], + _uaddrs: &[u64], ) -> vm::Result<()> { debug_assert!(page_size == hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB); if pages.is_empty() { diff --git a/hypervisor/src/vm.rs b/hypervisor/src/vm.rs index 9d7e60a8be..6d3a4a4ae5 100644 --- a/hypervisor/src/vm.rs +++ b/hypervisor/src/vm.rs @@ -17,6 +17,8 @@ use std::sync::Mutex; #[cfg(feature = "sev_snp")] use igvm_defs::IGVM_VHS_SNP_ID_BLOCK; +#[cfg(feature = "sev_snp")] +use igvm_defs::SnpPolicy; use thiserror::Error; use vmm_sys_util::eventfd::EventFd; @@ -392,9 +394,7 @@ pub trait Vm: Send + Sync + Any { fn get_dirty_log(&self, slot: u32, base_gpa: u64, memory_size: u64) -> Result>; #[cfg(feature = "sev_snp")] /// Initialize SEV-SNP on this VM - fn sev_snp_init(&self) -> Result<()> { - unimplemented!() - } + fn sev_snp_init(&self, guest_policy: SnpPolicy) -> Result<()>; #[cfg(feature = "tdx")] /// Initialize TDX on this VM fn tdx_init(&self, _cpuid: &[CpuIdEntry], _max_vcpus: u32) -> Result<()> { @@ -429,6 +429,7 @@ pub trait Vm: Send + Sync + Any { _page_type: u32, _page_size: u32, _pages: &[u64], + _uaddrs: &[u64], ) -> Result<()> { unimplemented!() } diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index ab0278e6d1..7088db3219 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -30,7 +30,12 @@ mshv = [ "vm-device/mshv", ] pvmemcontrol = ["devices/pvmemcontrol"] -sev_snp = ["arch/sev_snp", "hypervisor/sev_snp", "virtio-devices/sev_snp"] +sev_snp = [ + "arch/sev_snp", + "hypervisor/sev_snp", + "igvm_defs", + "virtio-devices/sev_snp", +] tdx = ["arch/tdx", "hypervisor/tdx"] tracing = ["tracer/tracing"] diff --git a/vmm/src/config.rs b/vmm/src/config.rs index fc7eb1b8d1..5b72fe993d 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -350,6 +350,9 @@ pub enum ValidationError { #[cfg(feature = "sev_snp")] #[error("Invalid host data format")] InvalidHostData, + #[cfg(all(feature = "sev_snp", feature = "igvm"))] + #[error("SEV-SNP requires an IGVM payload (--payload igvm=)")] + SevSnpRequiresIgvm, /// Restore expects all net ids that have fds #[error("Net id {0} is associated with FDs and is required")] RestoreMissingRequiredNetId(String), @@ -2880,12 +2883,25 @@ impl VmConfig { #[cfg(feature = "sev_snp")] { - let host_data_opt = &self.payload.as_ref().unwrap().host_data; - - if let Some(host_data) = host_data_opt - && host_data.len() != 64 - { - return Err(ValidationError::InvalidHostData); + let sev_snp_enabled = self.platform.as_ref().is_some_and(|p| p.sev_snp); + if sev_snp_enabled { + let host_data_opt = &self.payload.as_ref().unwrap().host_data; + if let Some(host_data) = host_data_opt + && host_data.len() != 64 + { + return Err(ValidationError::InvalidHostData); + } + // KVM SEV-SNP requires an IGVM payload to initialise the VMSA. + // Without IGVM the vCPU register state is undefined and VM entry fails. + #[cfg(feature = "igvm")] + if self + .payload + .as_ref() + .and_then(|p| p.igvm.as_ref()) + .is_none() + { + return Err(ValidationError::SevSnpRequiresIgvm); + } } } // The 'conflict' check is introduced in commit 24438e0390d3 diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index 4b15cffc31..520f2a3128 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -212,6 +212,9 @@ pub enum Error { #[cfg(feature = "sev_snp")] #[error("Failed to set sev control register")] SetSevControlRegister(#[source] hypervisor::HypervisorCpuError), + #[cfg(feature = "sev_snp")] + #[error("Failed to set up SEV-SNP vCPU registers")] + SetupSevSnpRegs(#[source] hypervisor::HypervisorCpuError), #[cfg(target_arch = "x86_64")] #[error("Failed to inject NMI")] @@ -546,6 +549,7 @@ impl Vcpu { #[cfg(target_arch = "x86_64")] kvm_hyperv: bool, #[cfg(target_arch = "x86_64")] topology: (u16, u16, u16, u16), #[cfg(target_arch = "x86_64")] nested: bool, + #[cfg(feature = "igvm")] igvm_enabled: bool, ) -> Result<()> { #[cfg(target_arch = "aarch64")] { @@ -558,17 +562,32 @@ impl Vcpu { .map_err(Error::VcpuConfiguration)?; info!("Configuring vCPU: cpu_id = {}", self.id); #[cfg(target_arch = "x86_64")] - arch::configure_vcpu( - self.vcpu.as_ref(), - self.id, - boot_setup, - cpuid, - kvm_hyperv, - self.vendor, - topology, - nested, - ) - .map_err(Error::VcpuConfiguration)?; + { + // When IGVM is enabled, skip standard register setup here — the IGVM + // loader populates vCPU registers from the VMSA via set_sev_control_register + // (currently KVM-specific; MSHV handles this through its own import path). + // igvm_enabled is kept as an explicit flag rather than derived from sev_snp + // state because IGVM could theoretically be used independently of SEV-SNP. + cfg_if::cfg_if! { + if #[cfg(feature = "igvm")] { + let setup_registers = !igvm_enabled; + } else { + let setup_registers = true; + } + } + arch::configure_vcpu( + self.vcpu.as_ref(), + self.id, + boot_setup, + cpuid, + kvm_hyperv, + self.vendor, + topology, + nested, + setup_registers, + ) + .map_err(Error::VcpuConfiguration)?; + } Ok(()) } @@ -628,6 +647,13 @@ impl Vcpu { .map_err(Error::SetSevControlRegister) } + #[cfg(feature = "sev_snp")] + pub fn setup_sev_snp_regs(&self, vmsa: igvm::snp_defs::SevVmsa) -> Result<()> { + self.vcpu + .setup_sev_snp_regs(vmsa) + .map_err(Error::SetupSevSnpRegs) + } + /// /// Sets the vCPU's GIC redistributor base address. /// @@ -697,6 +723,8 @@ pub struct CpuManager { sev_snp_enabled: bool, // State of the core scheduling group leader election (VM mode). core_scheduling_group_leader: Arc, + #[cfg(feature = "igvm")] + igvm_enabled: bool, } const CPU_ENABLE_FLAG: usize = 0; @@ -896,6 +924,7 @@ impl CpuManager { #[cfg(feature = "tdx")] tdx_enabled: bool, numa_nodes: &NumaNodes, #[cfg(feature = "sev_snp")] sev_snp_enabled: bool, + #[cfg(feature = "igvm")] igvm_enabled: bool, ) -> Result>> { if config.max_vcpus > hypervisor.get_max_vcpus() { return Err(Error::MaximumVcpusExceeded( @@ -972,6 +1001,8 @@ impl CpuManager { core_scheduling_group_leader: Arc::new(AtomicI32::new( CoreSchedulingLeader::Initial as i32, )), + #[cfg(feature = "igvm")] + igvm_enabled, }))) } @@ -1050,8 +1081,10 @@ impl CpuManager { vcpu: &mut Vcpu, boot_setup: Option<(EntryPoint, &GuestMemoryAtomic)>, ) -> Result<()> { - #[cfg(feature = "sev_snp")] - if self.sev_snp_enabled { + #[cfg(all(feature = "sev_snp", feature = "mshv"))] + if self.sev_snp_enabled + && self.hypervisor.hypervisor_type() == hypervisor::HypervisorType::Mshv + { if let Some((kernel_entry_point, _)) = boot_setup { vcpu.set_sev_control_register( kernel_entry_point.entry_addr.0 / crate::igvm::HV_PAGE_SIZE, @@ -1092,6 +1125,8 @@ impl CpuManager { self.config.kvm_hyperv, topology, self.config.nested, + #[cfg(feature = "igvm")] + self.igvm_enabled, )?; #[cfg(target_arch = "aarch64")] @@ -2247,7 +2282,7 @@ impl CpuManager { &self.vcpus_kill_signalled } - #[cfg(feature = "igvm")] + #[cfg(all(feature = "igvm", feature = "mshv"))] pub(crate) fn get_cpuid_leaf( &self, cpu_id: u8, @@ -2270,6 +2305,11 @@ impl CpuManager { self.sev_snp_enabled } + #[cfg(feature = "igvm")] + pub(crate) fn hypervisor_type(&self) -> hypervisor::HypervisorType { + self.hypervisor.hypervisor_type() + } + pub(crate) fn nmi(&mut self) -> Result<()> { self.vcpus_kick_signalled.store(true, Ordering::SeqCst); self.signal_vcpus()?; diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index edb8b76af0..c75a080901 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -927,26 +927,26 @@ pub struct AcpiPlatformAddresses { pub sleep_status_reg_address: Option, } -#[cfg(all(feature = "mshv", feature = "sev_snp"))] +#[cfg(feature = "sev_snp")] struct SevSnpPageAccessProxy { vm: Arc, } -#[cfg(all(feature = "mshv", feature = "sev_snp"))] +#[cfg(feature = "sev_snp")] impl std::fmt::Debug for SevSnpPageAccessProxy { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "SNP Page access proxy") } } -#[cfg(all(feature = "mshv", feature = "sev_snp"))] +#[cfg(feature = "sev_snp")] impl SevSnpPageAccessProxy { fn new(vm: Arc) -> SevSnpPageAccessProxy { SevSnpPageAccessProxy { vm } } } -#[cfg(all(feature = "mshv", feature = "sev_snp"))] +#[cfg(feature = "sev_snp")] impl AccessPlatform for SevSnpPageAccessProxy { fn translate_gpa(&self, base: u64, _size: u64) -> std::result::Result { Ok(base) @@ -1155,6 +1155,14 @@ fn create_mmio_allocators( mmio_allocators } +fn use_64bit_bar_for_virtio_device( + device_type: u32, + pci_segment_id: u16, + is_hotplug: bool, +) -> bool { + pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32 || is_hotplug +} + impl DeviceManager { #[allow(clippy::too_many_arguments)] pub fn new( @@ -1656,6 +1664,7 @@ impl DeviceManager { &mapping, &handle.id, handle.pci_segment, + false, handle.dma_handler, )?; @@ -1688,7 +1697,8 @@ impl DeviceManager { } if let Some(iommu_device) = iommu_device { - let dev_id = self.add_virtio_pci_device(iommu_device, &None, &iommu_id, 0, None)?; + let dev_id = + self.add_virtio_pci_device(iommu_device, &None, &iommu_id, 0, false, None)?; self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); } } @@ -4189,6 +4199,7 @@ impl DeviceManager { iommu_mapping: &Option>, virtio_device_id: &str, pci_segment_id: u16, + is_hotplug: bool, dma_handler: Option>, ) -> DeviceManagerResult { let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); @@ -4287,11 +4298,10 @@ impl DeviceManager { self.activate_evt .try_clone() .map_err(DeviceManagerError::EventFd)?, - // All device types *except* virtio block devices should be allocated a 64-bit bar - // The block devices should be given a 32-bit BAR so that they are easily accessible - // to firmware without requiring excessive identity mapping. - // The exception being if not on the default PCI segment. - pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, + // Boot-time block devices stay in 32-bit BAR space so early firmware can access + // them without additional identity mapping. Hot-plugged block devices do not have + // that constraint and should use 64-bit BARs like the rest of the virtio devices. + use_64bit_bar_for_virtio_device(device_type, pci_segment_id, is_hotplug), dma_handler, self.pending_activations.clone(), vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), @@ -4947,6 +4957,7 @@ impl DeviceManager { &mapping, &handle.id, handle.pci_segment, + true, handle.dma_handler, )?; @@ -5693,6 +5704,30 @@ impl Drop for DeviceManager { mod unit_tests { use super::*; + #[test] + fn test_hotplugged_block_devices_use_64bit_bars() { + assert!(!use_64bit_bar_for_virtio_device( + VirtioDeviceType::Block as u32, + 0, + false, + )); + assert!(use_64bit_bar_for_virtio_device( + VirtioDeviceType::Block as u32, + 0, + true, + )); + assert!(use_64bit_bar_for_virtio_device( + VirtioDeviceType::Net as u32, + 0, + false, + )); + assert!(use_64bit_bar_for_virtio_device( + VirtioDeviceType::Block as u32, + 1, + false, + )); + } + #[test] fn test_create_mmio_allocators() { let res = create_mmio_allocators(0x100000, 0x400000, 1, &[1], 4 << 10); diff --git a/vmm/src/igvm/igvm_loader.rs b/vmm/src/igvm/igvm_loader.rs index 6e256c1ecb..2a15ee0461 100644 --- a/vmm/src/igvm/igvm_loader.rs +++ b/vmm/src/igvm/igvm_loader.rs @@ -4,23 +4,32 @@ // use std::collections::HashMap; use std::ffi::CString; -use std::io::{Read, Seek, SeekFrom}; use std::mem::size_of; use std::sync::{Arc, Mutex}; +use hypervisor::HypervisorType; use igvm::snp_defs::SevVmsa; -use igvm::{IgvmDirectiveHeader, IgvmFile, IgvmPlatformHeader, IsolationType}; +use igvm::{IgvmDirectiveHeader, IgvmFile, IgvmPlatformHeader}; #[cfg(feature = "sev_snp")] use igvm_defs::{IGVM_VHS_MEMORY_MAP_ENTRY, MemoryMapEntryType}; use igvm_defs::{ IGVM_VHS_PARAMETER, IGVM_VHS_PARAMETER_INSERT, IgvmPageDataType, IgvmPlatformType, }; use log::debug; +#[cfg(all(feature = "kvm", feature = "sev_snp"))] +use log::error; #[cfg(feature = "sev_snp")] use log::info; +#[cfg(feature = "mshv")] use mshv_bindings::*; use thiserror::Error; +#[cfg(feature = "sev_snp")] +use vm_memory::{Bytes, GuestAddress, GuestAddressSpace, GuestMemory}; +#[cfg(all(feature = "kvm", feature = "sev_snp"))] +use vm_migration::Snapshottable; use zerocopy::IntoBytes; +#[cfg(feature = "sev_snp")] +use zerocopy::{FromBytes, FromZeros}; #[cfg(feature = "sev_snp")] use crate::GuestMemoryMmap; @@ -29,6 +38,36 @@ use crate::igvm::loader::Loader; use crate::igvm::{BootPageAcceptance, HV_PAGE_SIZE, IgvmLoadedInfo, StartupMemoryType}; use crate::memory_manager::{Error as MemoryManagerError, MemoryManager}; +#[cfg(feature = "sev_snp")] +const ISOLATED_PAGE_SHIFT: u32 = 12; +#[cfg(feature = "sev_snp")] +const SNP_CPUID_LIMIT: u32 = 64; +// see section 7.1 +// https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56860.pdf +#[cfg(feature = "sev_snp")] +#[repr(C)] +#[derive(Debug, Clone, PartialEq, Eq, IntoBytes, FromBytes)] +pub struct SnpCpuidFunc { + pub eax_in: u32, + pub ecx_in: u32, + pub xcr0_in: u64, + pub xss_in: u64, + pub eax: u32, + pub ebx: u32, + pub ecx: u32, + pub edx: u32, + pub reserved: u64, +} + +#[cfg(feature = "sev_snp")] +#[repr(C)] +#[derive(Debug, Clone, FromBytes, IntoBytes)] +pub struct SnpCpuidInfo { + pub count: u32, + pub _reserved1: u32, + pub _reserved2: u64, + pub entries: [SnpCpuidFunc; SNP_CPUID_LIMIT as usize], +} #[derive(Debug, Error)] pub enum Error { #[error("command line is not a valid C string")] @@ -51,6 +90,32 @@ pub enum Error { FailedToDecodeHostData(#[source] hex::FromHexError), #[error("Error allocating address space")] MemoryManager(MemoryManagerError), + #[error("Error reading the IGVM file")] + MissingIgvm, + #[error("Error applying VMSA to vCPU registers: {0}")] + SetVmsa(#[source] crate::cpu::Error), +} + +// KVM SNP page types — linux/arch/x86/include/uapi/asm/sev-guest.h +#[cfg(feature = "kvm")] +const KVM_SNP_PAGE_TYPE_NORMAL: u32 = 1; +#[cfg(feature = "kvm")] +const KVM_SNP_PAGE_TYPE_VMSA: u32 = 2; +#[cfg(feature = "kvm")] +const KVM_SNP_PAGE_TYPE_UNMEASURED: u32 = 4; +#[cfg(feature = "kvm")] +const KVM_SNP_PAGE_TYPE_SECRETS: u32 = 5; +#[cfg(feature = "kvm")] +const KVM_SNP_PAGE_TYPE_CPUID: u32 = 6; + +// Consolidated page type/size configuration per hypervisor. +struct PageTypeConfig { + isolated_page_size_4kb: u32, + normal: u32, + unmeasured: u32, + cpuid: u32, + secrets: u32, + vmsa: u32, } #[allow(dead_code)] @@ -128,22 +193,63 @@ fn import_parameter( Ok(()) } +/// +/// Extract sev_features from the boot CPU (vp_index 0) VMSA. +/// +#[cfg(feature = "sev_snp")] +pub fn extract_sev_features(igvm_file: &IgvmFile) -> u64 { + for header in igvm_file.directives() { + if let IgvmDirectiveHeader::SnpVpContext { vp_index, vmsa, .. } = header + && *vp_index == 0 + { + return vmsa.sev_features.into(); + } + } + 0 +} + /// /// Load the given IGVM file to guest memory. /// Right now it only supports SNP based isolation. /// We can boot legacy VM with an igvm file without /// any isolation. +/// +/// NOTE: KVM and MSHV have different page type values and CPUID/VMSA handling. +/// Hypervisor-specific code paths are gated by runtime type checks. A future +/// refactor could split these into separate KVM/MSHV loader implementations. #[allow(clippy::needless_pass_by_value)] pub fn load_igvm( - mut file: &std::fs::File, + igvm_file: IgvmFile, memory_manager: Arc>, cpu_manager: Arc>, cmdline: &str, #[cfg(feature = "sev_snp")] host_data: &Option, ) -> Result, Error> { + let hypervisor_type = cpu_manager.lock().unwrap().hypervisor_type(); + // HypervisorType variants are all #[cfg]-gated, so this match is exhaustive at compile time. + let page_types = match hypervisor_type { + #[cfg(feature = "mshv")] + HypervisorType::Mshv => PageTypeConfig { + isolated_page_size_4kb: mshv_bindings::hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB, + normal: mshv_bindings::hv_isolated_page_type_HV_ISOLATED_PAGE_TYPE_NORMAL, + unmeasured: mshv_bindings::hv_isolated_page_type_HV_ISOLATED_PAGE_TYPE_UNMEASURED, + cpuid: mshv_bindings::hv_isolated_page_type_HV_ISOLATED_PAGE_TYPE_NORMAL, + secrets: mshv_bindings::hv_isolated_page_type_HV_ISOLATED_PAGE_TYPE_UNMEASURED, + vmsa: mshv_bindings::hv_isolated_page_type_HV_ISOLATED_PAGE_TYPE_VMSA, + }, + #[cfg(feature = "kvm")] + HypervisorType::Kvm => PageTypeConfig { + isolated_page_size_4kb: HV_PAGE_SIZE as u32, + normal: KVM_SNP_PAGE_TYPE_NORMAL, + unmeasured: KVM_SNP_PAGE_TYPE_UNMEASURED, + cpuid: KVM_SNP_PAGE_TYPE_CPUID, + secrets: KVM_SNP_PAGE_TYPE_SECRETS, + vmsa: KVM_SNP_PAGE_TYPE_VMSA, + }, + }; + let mut loaded_info: Box = Box::default(); let command_line = CString::new(cmdline).map_err(Error::InvalidCommandLine)?; - let mut file_contents = Vec::new(); let memory = memory_manager.lock().as_ref().unwrap().guest_memory(); let mut gpas: Vec = Vec::new(); let proc_count = cpu_manager.lock().unwrap().vcpus().len() as u32; @@ -156,12 +262,8 @@ pub fn load_igvm( .map_err(Error::FailedToDecodeHostData)?; } - file.seek(SeekFrom::Start(0)).map_err(Error::Igvm)?; - file.read_to_end(&mut file_contents).map_err(Error::Igvm)?; - - let igvm_file = IgvmFile::new_from_binary(&file_contents, Some(IsolationType::Snp)) - .map_err(Error::InvalidIgvmFile)?; - + #[cfg(feature = "sev_snp")] + let sev_snp_enabled = cpu_manager.lock().unwrap().sev_snp_enabled(); let mask = match &igvm_file.platforms()[0] { IgvmPlatformHeader::SupportedPlatform(info) => { debug_assert!(info.platform_type == IgvmPlatformType::SEV_SNP); @@ -194,15 +296,15 @@ pub fn load_igvm( if flags.unmeasured() { gpas.push(GpaPages { gpa: *gpa, - page_type: hv_isolated_page_type_HV_ISOLATED_PAGE_TYPE_UNMEASURED, - page_size: hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB, + page_type: page_types.unmeasured, + page_size: page_types.isolated_page_size_4kb, }); BootPageAcceptance::ExclusiveUnmeasured } else { gpas.push(GpaPages { gpa: *gpa, - page_type: hv_isolated_page_type_HV_ISOLATED_PAGE_TYPE_NORMAL, - page_size: hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB, + page_type: page_types.normal, + page_size: page_types.isolated_page_size_4kb, }); BootPageAcceptance::Exclusive } @@ -210,43 +312,46 @@ pub fn load_igvm( IgvmPageDataType::SECRETS => { gpas.push(GpaPages { gpa: *gpa, - page_type: hv_isolated_page_type_HV_ISOLATED_PAGE_TYPE_SECRETS, - page_size: hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB, + page_type: page_types.secrets, + page_size: page_types.isolated_page_size_4kb, }); BootPageAcceptance::SecretsPage } IgvmPageDataType::CPUID_DATA => { - // SAFETY: CPUID is readonly - unsafe { - let cpuid_page_p: *mut hv_psp_cpuid_page = - data.as_ptr() as *mut hv_psp_cpuid_page; // as *mut hv_psp_cpuid_page; - let cpuid_page: &mut hv_psp_cpuid_page = &mut *cpuid_page_p; - for i in 0..cpuid_page.count { - let leaf = cpuid_page.cpuid_leaf_info[i as usize]; - let mut in_leaf = cpu_manager - .lock() - .unwrap() - .get_cpuid_leaf( - 0, - leaf.eax_in, - leaf.ecx_in, - leaf.xfem_in, - leaf.xss_in, - ) - .unwrap(); - if leaf.eax_in == 1 { - in_leaf[2] &= 0x7FFFFFFF; + #[cfg(feature = "mshv")] + if hypervisor_type == HypervisorType::Mshv { + // SAFETY: CPUID is readonly + unsafe { + let cpuid_page_p: *mut hv_psp_cpuid_page = + data.as_ptr() as *mut hv_psp_cpuid_page; // as *mut hv_psp_cpuid_page; + let cpuid_page: &mut hv_psp_cpuid_page = &mut *cpuid_page_p; + for i in 0..cpuid_page.count { + let leaf = cpuid_page.cpuid_leaf_info[i as usize]; + let mut in_leaf = cpu_manager + .lock() + .unwrap() + .get_cpuid_leaf( + 0, + leaf.eax_in, + leaf.ecx_in, + leaf.xfem_in, + leaf.xss_in, + ) + .unwrap(); + if leaf.eax_in == 1 { + in_leaf[2] &= 0x7FFFFFFF; + } + cpuid_page.cpuid_leaf_info[i as usize].eax_out = in_leaf[0]; + cpuid_page.cpuid_leaf_info[i as usize].ebx_out = in_leaf[1]; + cpuid_page.cpuid_leaf_info[i as usize].ecx_out = in_leaf[2]; + cpuid_page.cpuid_leaf_info[i as usize].edx_out = in_leaf[3]; } - cpuid_page.cpuid_leaf_info[i as usize].eax_out = in_leaf[0]; - cpuid_page.cpuid_leaf_info[i as usize].ebx_out = in_leaf[1]; - cpuid_page.cpuid_leaf_info[i as usize].ecx_out = in_leaf[2]; - cpuid_page.cpuid_leaf_info[i as usize].edx_out = in_leaf[3]; } } gpas.push(GpaPages { gpa: *gpa, - page_type: hv_isolated_page_type_HV_ISOLATED_PAGE_TYPE_CPUID, - page_size: hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB, + page_type: page_types.cpuid, + page_size: page_types.isolated_page_size_4kb, }); BootPageAcceptance::CpuidPage } @@ -254,9 +359,73 @@ pub fn load_igvm( _ => todo!("unsupported IgvmPageDataType"), }; - loader - .import_pages(gpa / HV_PAGE_SIZE, 1, acceptance, data) - .map_err(Error::Loader)?; + #[allow(unused_mut)] + let mut imported_page = false; + #[cfg(all(feature = "kvm", feature = "sev_snp"))] + if hypervisor_type == HypervisorType::Kvm + && *data_type == IgvmPageDataType::CPUID_DATA + { + let mut new_cp = SnpCpuidInfo::new_zeroed(); + + let entries = cpu_manager.lock().unwrap().common_cpuid(); + // TODO: Filter cpuid rather than truncate + for (i, entry) in entries + .iter() + .enumerate() + .take(std::cmp::min(SNP_CPUID_LIMIT as usize, entries.len())) + { + new_cp.entries[i].eax_in = entry.function; + new_cp.entries[i].ecx_in = entry.index; + new_cp.entries[i].eax = entry.eax; + new_cp.entries[i].ebx = entry.ebx; + new_cp.entries[i].ecx = entry.ecx; + new_cp.entries[i].edx = entry.edx; + /* + * Guest kernels will calculate EBX themselves using the 0xD + * subfunctions corresponding to the individual XSAVE areas, so only + * encode the base XSAVE size in the initial leaves, corresponding + * to the initial XCR0=1 state. (https://tinyurl.com/qemu-cpuid) + */ + if new_cp.entries[i].eax_in == 0xd + && (new_cp.entries[i].ecx_in == 0x0 || new_cp.entries[i].ecx_in == 0x1) + { + new_cp.entries[i].ebx = 0x240; + new_cp.entries[i].xcr0_in = 1; + new_cp.entries[i].xss_in = 0; + } + + // KVM SNP launch may reject a CPUID page with bits it intends + // to sanitize internally. Pre-clearing the known unsafe bits keeps + // the CPUID page stable across launch updates. + match (new_cp.entries[i].eax_in, new_cp.entries[i].ecx_in) { + (0x1, 0x0) => { + new_cp.entries[i].ecx &= !(1 << 24); + } + (0x7, 0x0) => { + new_cp.entries[i].ebx &= !0x2; + new_cp.entries[i].edx = 0; + } + (0x80000008, 0x0) => { + new_cp.entries[i].ebx &= !0x0200_0000; + } + (0x80000021, 0x0) => { + new_cp.entries[i].ecx = 0; + } + _ => {} + } + } + new_cp.count = new_cp.entries.len() as u32; + info!("gpa: {:#x}", *gpa); + loader + .import_pages(gpa / HV_PAGE_SIZE, 1, acceptance, new_cp.as_mut_bytes()) + .map_err(Error::Loader)?; + imported_page = true; + } + if !imported_page { + loader + .import_pages(gpa / HV_PAGE_SIZE, 1, acceptance, data) + .map_err(Error::Loader)?; + } } IgvmDirectiveHeader::ParameterArea { number_of_bytes, @@ -288,16 +457,16 @@ pub fn load_igvm( IgvmDirectiveHeader::MmioRanges(_info) => { todo!("unsupported IgvmPageDataType"); } - IgvmDirectiveHeader::MemoryMap(_info) => { + IgvmDirectiveHeader::MemoryMap(_info) => + { #[cfg(feature = "sev_snp")] - { + if sev_snp_enabled { let guest_mem = memory_manager.lock().unwrap().boot_guest_memory(); let memory_map = generate_memory_map(&guest_mem)?; import_parameter(&mut parameter_areas, _info, memory_map.as_bytes())?; + } else { + todo!("Not implemented"); } - - #[cfg(not(feature = "sev_snp"))] - todo!("Not implemented"); } IgvmDirectiveHeader::CommandLine(info) => { import_parameter(&mut parameter_areas, info, command_line.as_bytes_with_nul())?; @@ -325,7 +494,7 @@ pub fn load_igvm( vmsa, } => { assert_eq!(gpa % HV_PAGE_SIZE, 0); - let mut data: [u8; 4096] = [0; 4096]; + let mut data: [u8; HV_PAGE_SIZE as usize] = [0; HV_PAGE_SIZE as usize]; let len = size_of::(); loaded_info.vmsa_gpa = *gpa; loaded_info.vmsa = **vmsa; @@ -337,10 +506,28 @@ pub fn load_igvm( .map_err(Error::Loader)?; } + // Set vCPU initial register state from VMSA before SNP_LAUNCH_FINISH + #[cfg(all(feature = "kvm", feature = "sev_snp"))] + if hypervisor_type == HypervisorType::Kvm { + let vcpus = cpu_manager.lock().unwrap().vcpus(); + for vcpu in vcpus { + let vcpu_locked = vcpu.lock().unwrap(); + let vcpu_id: u16 = vcpu_locked.id().parse().unwrap(); + if vcpu_id == *vp_index { + vcpu_locked + .setup_sev_snp_regs(loaded_info.vmsa) + .map_err(Error::SetVmsa)?; + vcpu_locked + .set_sev_control_register(0) + .map_err(Error::SetVmsa)?; + } + } + } + gpas.push(GpaPages { gpa: *gpa, - page_type: hv_isolated_page_type_HV_ISOLATED_PAGE_TYPE_VMSA, - page_size: hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB, + page_type: page_types.vmsa, + page_size: page_types.isolated_page_size_4kb, }); } IgvmDirectiveHeader::SnpIdBlock { @@ -408,8 +595,8 @@ pub fn load_igvm( *area = ParameterAreaState::Inserted; gpas.push(GpaPages { gpa: *gpa, - page_type: hv_isolated_page_type_HV_ISOLATED_PAGE_TYPE_UNMEASURED, - page_size: hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB, + page_type: page_types.unmeasured, + page_size: page_types.isolated_page_size_4kb, }); } IgvmDirectiveHeader::ErrorRange { .. } => { @@ -422,7 +609,7 @@ pub fn load_igvm( } #[cfg(feature = "sev_snp")] - { + if sev_snp_enabled { memory_manager .lock() .unwrap() @@ -460,18 +647,60 @@ pub fn load_igvm( // of PFN for importing the isolated pages let pfns: Vec = group .iter() - .map(|gpa| gpa.gpa >> HV_HYP_PAGE_SHIFT) + .map(|gpa| gpa.gpa >> ISOLATED_PAGE_SHIFT) .collect(); - memory_manager + let guest_memory = memory_manager.lock().unwrap().guest_memory().memory(); + let uaddrs: Vec<_> = group + .iter() + .map(|gpa| { + let guest_region_mmap = guest_memory.to_region_addr(GuestAddress(gpa.gpa)); + let uaddr_base = guest_region_mmap.unwrap().0.as_ptr() as u64; + let uaddr_offset: u64 = guest_region_mmap.unwrap().1.0; + uaddr_base + uaddr_offset + }) + .collect(); + #[cfg(feature = "kvm")] + let page_type = group[0].page_type; + let mut new_cp = SnpCpuidInfo::new_zeroed(); + let _ = guest_memory.read(new_cp.as_mut_bytes(), GuestAddress(group[0].gpa)); + let _import = memory_manager .lock() .unwrap() .vm .import_isolated_pages( group[0].page_type, - hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB, + page_types.isolated_page_size_4kb, &pfns, + &uaddrs, ) - .map_err(Error::ImportIsolatedPages)?; + .map_err(Error::ImportIsolatedPages); + #[cfg(feature = "kvm")] + if hypervisor_type == HypervisorType::Kvm + && _import.is_err() + && page_type == page_types.cpuid + { + // When we import the CPUID page, the firmware will change any cpuid fns that + // could lead to an insecure guest, we must then make sure to import the updated cpuid + // https://elixir.bootlin.com/linux/v6.11/source/arch/x86/kvm/svm/sev.c#L2322 + let mut updated_cp = SnpCpuidInfo::new_zeroed(); + let _ = guest_memory.read(updated_cp.as_mut_bytes(), GuestAddress(group[0].gpa)); + for (set, got) in std::iter::zip(new_cp.entries.iter(), updated_cp.entries.iter()) { + if set != got { + error!("Set cpuid fn: {set:#x?}, but firmware expects: {got:#x?}"); + } + } + memory_manager + .lock() + .unwrap() + .vm + .import_isolated_pages( + group[0].page_type, + page_types.isolated_page_size_4kb, + &pfns, + &uaddrs, + ) + .map_err(Error::ImportIsolatedPages)?; + } } info!( @@ -480,13 +709,23 @@ pub fn load_igvm( gpas.len() ); + let id_block_enabled = if hypervisor_type == HypervisorType::Mshv { + 1 + } else { + 0 + }; + now = Instant::now(); // Call Complete Isolated Import since we are done importing isolated pages memory_manager .lock() .unwrap() .vm - .complete_isolated_import(loaded_info.snp_id_block, host_data_contents, 1) + .complete_isolated_import( + loaded_info.snp_id_block, + host_data_contents, + id_block_enabled, + ) .map_err(Error::CompleteIsolatedImport)?; info!( diff --git a/vmm/src/igvm/mod.rs b/vmm/src/igvm/mod.rs index 62c32d4e89..b41487cfef 100644 --- a/vmm/src/igvm/mod.rs +++ b/vmm/src/igvm/mod.rs @@ -28,9 +28,18 @@ pub mod igvm_loader; mod loader; use igvm::snp_defs::SevVmsa; +use igvm::{IgvmFile, IsolationType}; use igvm_defs::IGVM_VHS_SNP_ID_BLOCK; +use std::path::Path; use zerocopy::FromZeros; +// TODO: IsolationType is hardcoded to Snp. Parameterize when TDX IGVM support lands. +pub fn parse_igvm(igvm_path: &Path) -> Result { + let file_contents = std::fs::read(igvm_path).map_err(igvm_loader::Error::Igvm)?; + IgvmFile::new_from_binary(&file_contents, Some(IsolationType::Snp)) + .map_err(igvm_loader::Error::InvalidIgvmFile) +} + #[derive(Debug, Clone)] pub struct IgvmLoadedInfo { pub gpas: Vec, diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 92bf4c6b70..6d559dac1d 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -230,6 +230,8 @@ impl From<&VmConfig> for hypervisor::HypervisorVmConfig { sev_snp_enabled: _value.is_sev_snp_enabled(), #[cfg(feature = "sev_snp")] mem_size: _value.memory.total_size(), + #[cfg(feature = "sev_snp")] + vmsa_features: 0, nested: _value.cpus.nested, smt_enabled: _value .cpus @@ -1088,6 +1090,8 @@ impl Vmm { self.console_resize_pipe.clone(), Arc::clone(&self.original_termios_opt), Some(&snapshot), + #[cfg(feature = "igvm")] + None, ) .map_err(|e| { MigratableError::MigrateReceive(anyhow!("Error creating VM from snapshot: {e:?}")) diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index 97f020e650..c6c597261a 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -90,6 +90,9 @@ mod kvm { pub const KVM_HAS_DEVICE_ATTR: u64 = 0x4018_aee3; pub const KVM_SET_ONE_REG: u64 = 0x4010_aeac; pub const KVM_SET_USER_MEMORY_REGION: u64 = 0x4020_ae46; + pub const KVM_SET_USER_MEMORY_REGION2: u64 = 0x40a0_ae49; + pub const KVM_SET_MEMORY_ATTRIBUTES: u64 = 0x4020_aed2; + pub const KVM_CREATE_GUEST_MEMFD: u64 = 0xc040_aed4; pub const KVM_IRQFD: u64 = 0x4020_ae76; pub const KVM_IOEVENTFD: u64 = 0x4040_ae79; pub const KVM_SET_VCPU_EVENTS: u64 = 0x4040_aea0; @@ -108,6 +111,9 @@ mod kvm { pub const KVM_NMI: u64 = 0xae9a; pub const KVM_GET_NESTED_STATE: u64 = 3229658814; pub const KVM_SET_NESTED_STATE: u64 = 1082175167; + pub const KVM_SEV_SNP_LAUNCH_START: u64 = 0x4018_aeb4; + pub const KVM_SEV_SNP_LAUNCH_UPDATE: u64 = 0x8018_aeb5; + pub const KVM_SEV_SNP_LAUNCH_FINISH: u64 = 0x4008_aeb7; } // Block device ioctls (not exported by libc) @@ -240,10 +246,21 @@ fn create_vmm_ioctl_seccomp_rule_common_kvm() -> Result, Backen and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_ONE_REG)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_REGS)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_USER_MEMORY_REGION,)?], + and![Cond::new( + 1, + ArgLen::Dword, + Eq, + KVM_SET_USER_MEMORY_REGION2, + )?], + and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_MEMORY_ATTRIBUTES,)?], + and![Cond::new(1, ArgLen::Dword, Eq, KVM_CREATE_GUEST_MEMFD,)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_VCPU_EVENTS,)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_NMI)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_GET_NESTED_STATE)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_NESTED_STATE)?], + and![Cond::new(1, ArgLen::Dword, Eq, KVM_SEV_SNP_LAUNCH_START)?], + and![Cond::new(1, ArgLen::Dword, Eq, KVM_SEV_SNP_LAUNCH_UPDATE)?], + and![Cond::new(1, ArgLen::Dword, Eq, KVM_SEV_SNP_LAUNCH_FINISH)?], ]) } @@ -714,6 +731,14 @@ fn create_vcpu_ioctl_seccomp_rule_kvm() -> Result, BackendError and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_DEVICE_ATTR,)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_GSI_ROUTING,)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_USER_MEMORY_REGION,)?], + and![Cond::new( + 1, + ArgLen::Dword, + Eq, + KVM_SET_USER_MEMORY_REGION2, + )?], + and![Cond::new(1, ArgLen::Dword, Eq, KVM_CREATE_GUEST_MEMFD,)?], + and![Cond::new(1, ArgLen::Dword, Eq, KVM_SET_MEMORY_ATTRIBUTES,)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_RUN,)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_NMI)?], and![Cond::new(1, ArgLen::Dword, Eq, KVM_GET_NESTED_STATE)?], @@ -840,6 +865,7 @@ fn vcpu_thread_rules( (libc::SYS_sendto, vec![]), (libc::SYS_shutdown, vec![]), (libc::SYS_sigaltstack, vec![]), + (libc::SYS_statx, vec![]), (libc::SYS_tgkill, vec![]), (libc::SYS_tkill, vec![]), #[cfg(target_arch = "x86_64")] diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 04bd2d595a..dc16b0fc89 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -46,7 +46,13 @@ use gdbstub_arch::aarch64::reg::AArch64CoreRegs as CoreRegs; use gdbstub_arch::x86::reg::X86_64CoreRegs as CoreRegs; #[cfg(target_arch = "aarch64")] use hypervisor::arch::aarch64::regs::AARCH64_PMU_IRQ; +#[cfg(all(feature = "kvm", feature = "sev_snp"))] +use hypervisor::kvm::{ + BOOTLOADER_SIZE, BOOTLOADER_START, KVM_VMSA_PAGE_ADDRESS, KVM_VMSA_PAGE_SIZE, +}; use hypervisor::{HypervisorVmConfig, HypervisorVmError, VmOps}; +#[cfg(feature = "sev_snp")] +use igvm_defs::SnpPolicy; use libc::{SIGWINCH, termios}; use linux_loader::cmdline::Cmdline; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] @@ -107,6 +113,8 @@ use crate::{ CPU_MANAGER_SNAPSHOT_ID, DEVICE_MANAGER_SNAPSHOT_ID, GuestMemoryMmap, MEMORY_MANAGER_SNAPSHOT_ID, PciDeviceInfo, cpu, }; +#[cfg(feature = "igvm")] +use igvm::IgvmFile; /// Errors associated with VM management #[derive(Debug, Error)] @@ -325,10 +333,6 @@ pub enum Error { #[error("Error coredumping VM")] Coredump(#[source] GuestDebuggableError), - #[cfg(feature = "igvm")] - #[error("Cannot open igvm file")] - IgvmFile(#[source] io::Error), - #[cfg(feature = "igvm")] #[error("Cannot load the igvm into memory")] IgvmLoad(#[source] igvm_loader::Error), @@ -528,6 +532,19 @@ pub struct Vm { impl Vm { pub const HANDLED_SIGNALS: [i32; 1] = [SIGWINCH]; + #[cfg(feature = "sev_snp")] + pub fn get_default_sev_snp_guest_policy() -> SnpPolicy { + SnpPolicy::new() + .with_abi_minor(0) + .with_abi_major(0) + // SMT permitted: allows the guest to run on an SMT-enabled host. + // This is the permissive default; future work can expose this as a + // configurable platform option. + .with_smt(1) + .with_reserved_must_be_one(1) + .with_migrate_ma(0) + } + #[allow(clippy::needless_pass_by_value)] #[allow(clippy::too_many_arguments)] pub fn new_from_memory_manager( @@ -545,6 +562,7 @@ impl Vm { console_resize_pipe: Option>, original_termios: Arc>>, snapshot: Option<&Snapshot>, + #[cfg(feature = "igvm")] igvm_file: Option, ) -> Result { trace_scoped!("Vm::new_from_memory_manager"); @@ -626,6 +644,8 @@ impl Vm { console_resize_pipe.as_ref(), &original_termios, snapshot, + #[cfg(feature = "igvm")] + igvm_file, )?; // Load kernel and initramfs files @@ -729,6 +749,14 @@ impl Vm { let tdx_enabled = config.lock().unwrap().is_tdx_enabled(); #[cfg(feature = "sev_snp")] let sev_snp_enabled = config.lock().unwrap().is_sev_snp_enabled(); + #[cfg(feature = "igvm")] + let igvm_enabled = config + .lock() + .unwrap() + .payload + .as_ref() + .and_then(|p| p.igvm.as_ref()) + .is_some(); let cpus_config = config.lock().unwrap().cpus.clone(); let cpu_manager = cpu::CpuManager::new( @@ -746,6 +774,8 @@ impl Vm { numa_nodes, #[cfg(feature = "sev_snp")] sev_snp_enabled, + #[cfg(feature = "igvm")] + igvm_enabled, ) .map_err(Error::CpuManager)?; @@ -843,6 +873,7 @@ impl Vm { console_resize_pipe: Option<&Arc>, original_termios: &Arc>>, snapshot: Option<&Snapshot>, + #[cfg(feature = "igvm")] igvm_file: Option, ) -> Result>>> { #[cfg(feature = "mshv")] let is_mshv = matches!( @@ -877,6 +908,8 @@ impl Vm { console_resize_pipe, original_termios, snapshot, + #[cfg(feature = "igvm")] + igvm_file, ); } @@ -906,8 +939,8 @@ impl Vm { config, #[cfg(feature = "igvm")] cpu_manager, - #[cfg(feature = "sev_snp")] - false, + #[cfg(feature = "igvm")] + igvm_file, )? } else { None @@ -952,6 +985,7 @@ impl Vm { console_resize_pipe: Option<&Arc>, original_termios: &Arc>>, snapshot: Option<&Snapshot>, + #[cfg(feature = "igvm")] igvm_file: Option, ) -> Result>>> { // Create boot vCPUs before SEV-SNP initialization cpu_manager @@ -961,7 +995,8 @@ impl Vm { .map_err(Error::CpuManager)?; // Initialize SEV-SNP - transitions guest into secure state - vm.sev_snp_init().map_err(Error::InitializeSevSnpVm)?; + vm.sev_snp_init(Self::get_default_sev_snp_guest_policy()) + .map_err(Error::InitializeSevSnpVm)?; // Load payload for SEV-SNP (IGVM parser needs cpu_manager for cpuid) let load_payload_handle = if snapshot.is_none() { @@ -970,7 +1005,8 @@ impl Vm { config, #[cfg(feature = "igvm")] cpu_manager, - true, + #[cfg(feature = "igvm")] + igvm_file, )? } else { None @@ -997,6 +1033,9 @@ impl Vm { ) .map_err(Error::DeviceManager)?; + #[cfg(feature = "fw_cfg")] + Self::create_fw_cfg_if_enabled(config, device_manager)?; + Ok(load_payload_handle) } @@ -1271,10 +1310,28 @@ impl Vm { vm_config.lock().unwrap().is_tdx_enabled() }; - let vm = Self::create_hypervisor_vm( - hypervisor.as_ref(), - vm_config.as_ref().lock().unwrap().deref().into(), - )?; + #[cfg(feature = "igvm")] + let igvm_file = { + let config = vm_config.lock().unwrap(); + config + .payload + .as_ref() + .and_then(|p| p.igvm.as_ref()) + .map(|igvm_path| crate::igvm::parse_igvm(igvm_path)) + .transpose() + .map_err(Error::IgvmLoad)? + }; + + let vm = { + #[allow(unused_mut)] + let mut hv_config: hypervisor::HypervisorVmConfig = + vm_config.as_ref().lock().unwrap().deref().into(); + #[cfg(all(feature = "igvm", feature = "sev_snp"))] + if let Some(ref igvm) = igvm_file { + hv_config.vmsa_features = igvm_loader::extract_sev_features(igvm); + } + Self::create_hypervisor_vm(hypervisor.as_ref(), hv_config)? + }; #[cfg(all(feature = "kvm", target_arch = "x86_64"))] if vm_config.lock().unwrap().max_apic_id() > MAX_SUPPORTED_CPUS_LEGACY { @@ -1330,6 +1387,8 @@ impl Vm { console_resize_pipe, original_termios, snapshot, + #[cfg(feature = "igvm")] + igvm_file, ) } @@ -1445,16 +1504,33 @@ impl Vm { Ok(EntryPoint { entry_addr }) } + #[cfg(all(feature = "kvm", feature = "sev_snp"))] + fn reserve_bootloader_regions(memory_manager: &Arc>) -> Result<()> { + let mut mm = memory_manager.lock().unwrap(); + mm.add_ram_region(BOOTLOADER_START, BOOTLOADER_SIZE) + .map_err(Error::MemoryManager)?; + mm.add_ram_region(KVM_VMSA_PAGE_ADDRESS, KVM_VMSA_PAGE_SIZE) + .map_err(Error::MemoryManager)?; + Ok(()) + } + #[cfg(feature = "igvm")] #[allow(clippy::needless_pass_by_value)] fn load_igvm( - igvm: File, + igvm_file: IgvmFile, memory_manager: Arc>, cpu_manager: Arc>, #[cfg(feature = "sev_snp")] host_data: &Option, ) -> Result { + // Only reserve bootloader/VMSA regions for KVM + SEV-SNP; other hypervisors + // (e.g. MSHV) handle this through their own import path. + #[cfg(all(feature = "kvm", feature = "sev_snp"))] + if cpu_manager.lock().unwrap().sev_snp_enabled() { + Self::reserve_bootloader_regions(&memory_manager)?; + } + let res = igvm_loader::load_igvm( - &igvm, + igvm_file, memory_manager, cpu_manager.clone(), "", @@ -1544,19 +1620,21 @@ impl Vm { payload: &PayloadConfig, memory_manager: Arc>, #[cfg(feature = "igvm")] cpu_manager: Arc>, - #[cfg(feature = "sev_snp")] sev_snp_enabled: bool, + #[cfg(feature = "igvm")] igvm_file: Option, ) -> Result { trace_scoped!("load_payload"); #[cfg(feature = "igvm")] { - if let Some(_igvm_file) = &payload.igvm { - let igvm = File::open(_igvm_file).map_err(Error::IgvmFile)?; - #[cfg(feature = "sev_snp")] - if sev_snp_enabled { - return Self::load_igvm(igvm, memory_manager, cpu_manager, &payload.host_data); - } - #[cfg(not(feature = "sev_snp"))] - return Self::load_igvm(igvm, memory_manager, cpu_manager); + if payload.igvm.is_some() { + let igvm_file = + igvm_file.ok_or(Error::IgvmLoad(igvm_loader::Error::MissingIgvm))?; + return Self::load_igvm( + igvm_file, + memory_manager, + cpu_manager, + #[cfg(feature = "sev_snp")] + &payload.host_data, + ); } } match (&payload.firmware, &payload.kernel) { @@ -1600,7 +1678,7 @@ impl Vm { memory_manager: &Arc>, config: &Arc>, #[cfg(feature = "igvm")] cpu_manager: &Arc>, - #[cfg(feature = "sev_snp")] sev_snp_enabled: bool, + #[cfg(feature = "igvm")] igvm_file: Option, ) -> Result>>> { // Kernel with TDX is loaded in a different manner #[cfg(feature = "tdx")] @@ -1627,8 +1705,8 @@ impl Vm { memory_manager, #[cfg(feature = "igvm")] cpu_manager, - #[cfg(feature = "sev_snp")] - sev_snp_enabled, + #[cfg(feature = "igvm")] + igvm_file, ) }) .map_err(Error::KernelLoadThreadSpawn) @@ -1637,7 +1715,11 @@ impl Vm { } #[cfg(target_arch = "x86_64")] - fn configure_system(&mut self, rsdp_addr: GuestAddress, entry_addr: EntryPoint) -> Result<()> { + fn configure_system( + &mut self, + rsdp_addr: Option, + entry_addr: EntryPoint, + ) -> Result<()> { trace_scoped!("configure_system"); info!("Configuring system"); let mem = self.memory_manager.lock().unwrap().boot_guest_memory(); @@ -1648,7 +1730,6 @@ impl Vm { }; let boot_vcpus = self.cpu_manager.lock().unwrap().boot_vcpus(); - let rsdp_addr = Some(rsdp_addr); let serial_number = self .config @@ -1700,7 +1781,7 @@ impl Vm { #[cfg(target_arch = "aarch64")] fn configure_system( &mut self, - _rsdp_addr: GuestAddress, + _rsdp_addr: Option, _entry_addr: EntryPoint, ) -> Result<()> { let cmdline = Self::generate_cmdline( @@ -2737,16 +2818,11 @@ impl Vm { let rsdp_addr = self.create_acpi_tables(); #[cfg(not(target_arch = "riscv64"))] - { - #[cfg(not(any(feature = "sev_snp", feature = "tdx")))] - assert!(rsdp_addr.is_some()); - // Configure shared state based on loaded kernel - if let Some(rsdp_adr) = rsdp_addr { - entry_point - .map(|entry_point| self.configure_system(rsdp_adr, entry_point)) - .transpose()?; - } - } + // Configure shared state based on loaded kernel + entry_point + .map(|entry_point| self.configure_system(rsdp_addr, entry_point)) + .transpose()?; + #[cfg(target_arch = "riscv64")] self.configure_system().unwrap(); diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index 88f8af4acf..8d1378b640 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -857,7 +857,7 @@ impl PayloadConfig { #[cfg(feature = "igvm")] { if self.igvm.is_some() { - if self.firmware.is_some() || self.kernel.is_some() { + if self.firmware.is_some() { return Err(PayloadConfigError::IgvmPlusOtherPayloads); } return Ok(());