diff --git a/Cargo.lock b/Cargo.lock index eacc487..e0e49df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -181,15 +181,6 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" -[[package]] -name = "bincode" -version = "1.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" -dependencies = [ - "serde", -] - [[package]] name = "bitflags" version = "2.10.0" @@ -225,6 +216,29 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +[[package]] +name = "bytecheck" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0caa33a2c0edca0419d15ac723dff03f1956f7978329b1e3b5fdaaaed9d3ca8b" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "rancor", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89385e82b5d1821d2219e0b095efa2cc1f246cbf99080f3be46a1a85c0d392d9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "byteorder" version = "1.5.0" @@ -1689,6 +1703,26 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "munge" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e17401f259eba956ca16491461b6e8f72913a0a114e39736ce404410f915a0c" +dependencies = [ + "munge_macro", +] + +[[package]] +name = "munge_macro" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "native-tls" version = "0.2.14" @@ -2272,6 +2306,7 @@ dependencies = [ "parking_lot", "rand", "regex", + "rkyv", "rlimit", "serde", "serde_json", @@ -2313,7 +2348,6 @@ name = "prtip-scanner" version = "0.5.9" dependencies = [ "anyhow", - "bincode", "chrono", "criterion", "crossbeam", @@ -2335,6 +2369,7 @@ dependencies = [ "prtip-network", "rand", "regex", + "rkyv", "rustls", "serde", "serde_json", @@ -2372,6 +2407,26 @@ dependencies = [ "uuid", ] +[[package]] +name = "ptr_meta" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b9a0cf95a1196af61d4f1cbdab967179516d9a4a4312af1f31948f8f6224a79" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "quanta" version = "0.12.6" @@ -2412,6 +2467,15 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rancor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a063ea72381527c2a0561da9c80000ef822bdd7c3241b1cc1b12100e3df081ee" +dependencies = [ + "ptr_meta", +] + [[package]] name = "rand" version = "0.8.5" @@ -2562,6 +2626,15 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "rend" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cadadef317c2f20755a64d7fdc48f9e7178ee6b0e1f7fce33fa60f1d68a276e6" +dependencies = [ + "bytecheck", +] + [[package]] name = "ring" version = "0.17.14" @@ -2576,6 +2649,36 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rkyv" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "360b333c61ae24e5af3ae7c8660bd6b21ccd8200dbbc5d33c2454421e85b9c69" +dependencies = [ + "bytecheck", + "bytes", + "hashbrown 0.16.1", + "indexmap", + "munge", + "ptr_meta", + "rancor", + "rend", + "rkyv_derive", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02f8cdd12b307ab69fe0acf4cd2249c7460eb89dce64a0febadf934ebb6a9e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "rlimit" version = "0.10.2" @@ -2884,6 +2987,12 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "slab" version = "0.4.11" diff --git a/Cargo.toml b/Cargo.toml index ce9fa9a..9f697a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,7 +43,9 @@ rusqlite = { version = "0.31", features = ["bundled"] } # Memory-mapped I/O (Sprint 6.6) memmap2 = "0.9" -bincode = "1.3" + +# Zero-copy serialization (rkyv migration) +rkyv = { version = "0.8.14", features = ["std", "alloc"] } # CSV export (promote to workspace) csv = "1.3" diff --git a/crates/prtip-core/Cargo.toml b/crates/prtip-core/Cargo.toml index 60486ee..56f3ef4 100644 --- a/crates/prtip-core/Cargo.toml +++ b/crates/prtip-core/Cargo.toml @@ -30,6 +30,7 @@ rand = { workspace = true } sysinfo = { workspace = true } flate2 = "1.0" dirs = "5.0" +rkyv = { workspace = true } [dev-dependencies] tokio = { workspace = true } diff --git a/crates/prtip-core/src/lib.rs b/crates/prtip-core/src/lib.rs index d702ea6..0b042a1 100644 --- a/crates/prtip-core/src/lib.rs +++ b/crates/prtip-core/src/lib.rs @@ -136,4 +136,7 @@ pub use resource_monitor::{ pub use retry::{retry_with_backoff, RetryConfig}; pub use service_db::{ServiceMatch, ServiceProbe, ServiceProbeDb}; pub use templates::{ScanTemplate, TemplateManager}; -pub use types::{PortRange, PortState, Protocol, ScanResult, ScanTarget, ScanType, TimingTemplate}; +pub use types::{ + PortRange, PortState, Protocol, ScanResult, ScanResultRkyv, ScanTarget, ScanType, + TimingTemplate, +}; diff --git a/crates/prtip-core/src/types.rs b/crates/prtip-core/src/types.rs index 7084f49..2b60591 100644 --- a/crates/prtip-core/src/types.rs +++ b/crates/prtip-core/src/types.rs @@ -592,6 +592,128 @@ impl fmt::Display for ScanResult { } } +/// rkyv-compatible serialization format for ScanResult +/// +/// This type is optimized for zero-copy deserialization using rkyv. +/// It stores all data in a format that can be directly interpreted from +/// memory-mapped files without allocation. +/// +/// # Alignment Requirements +/// +/// This structure must maintain proper alignment for rkyv's zero-copy +/// deserialization. The fixed-size entry buffer (512 bytes) provides +/// adequate alignment for typical rkyv requirements. +#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] +#[rkyv(derive(Debug))] +pub struct ScanResultRkyv { + /// Target IP address (16 bytes for IPv6 compatibility) + pub target_ip_bytes: [u8; 16], + /// Whether the IP is IPv4 (true) or IPv6 (false) + pub is_ipv4: bool, + /// Port number + pub port: u16, + /// Port state as u8 (Open=0, Closed=1, Filtered=2, Unknown=3) + pub state: u8, + /// Response time in nanoseconds (u64 to avoid truncation) + pub response_time_nanos: u64, + /// Timestamp in nanoseconds since Unix epoch + pub timestamp_nanos: i64, + /// Optional banner (max 128 bytes) + pub banner: Option, + /// Optional service name (max 32 bytes) + pub service: Option, + /// Optional service version (max 64 bytes) + pub version: Option, + /// Optional raw response (limited to 256 bytes to fit in entry) + pub raw_response: Option>, +} + +impl From<&ScanResult> for ScanResultRkyv { + fn from(result: &ScanResult) -> Self { + // Convert IpAddr to bytes + let (target_ip_bytes, is_ipv4) = match result.target_ip { + IpAddr::V4(ipv4) => { + let mut bytes = [0u8; 16]; + bytes[..4].copy_from_slice(&ipv4.octets()); + (bytes, true) + } + IpAddr::V6(ipv6) => (ipv6.octets(), false), + }; + + // Convert PortState to u8 + let state = match result.state { + PortState::Open => 0, + PortState::Closed => 1, + PortState::Filtered => 2, + PortState::Unknown => 3, + }; + + // Convert response time to u64 nanoseconds (avoid truncation issues) + // Note: u64 can represent up to ~584 years, which is more than sufficient + // for network response times. We clamp to u64::MAX to avoid overflow. + let response_time_nanos = result.response_time.as_nanos().min(u64::MAX as u128) as u64; + + // Convert timestamp with proper error handling + let timestamp_nanos = result + .timestamp + .timestamp_nanos_opt() + .expect("timestamp out of range for nanosecond representation"); + + Self { + target_ip_bytes, + is_ipv4, + port: result.port, + state, + response_time_nanos, + timestamp_nanos, + banner: result.banner.clone(), + service: result.service.clone(), + version: result.version.clone(), + raw_response: result.raw_response.clone(), + } + } +} + +impl From for ScanResult { + fn from(rkyv: ScanResultRkyv) -> Self { + // Convert bytes back to IpAddr + let target_ip = if rkyv.is_ipv4 { + let mut octets = [0u8; 4]; + octets.copy_from_slice(&rkyv.target_ip_bytes[..4]); + IpAddr::V4(std::net::Ipv4Addr::from(octets)) + } else { + IpAddr::V6(std::net::Ipv6Addr::from(rkyv.target_ip_bytes)) + }; + + // Convert u8 back to PortState + let state = match rkyv.state { + 0 => PortState::Open, + 1 => PortState::Closed, + 2 => PortState::Filtered, + _ => PortState::Unknown, + }; + + // Convert u64 nanoseconds back to Duration + // Safe: u64::MAX nanoseconds fits within Duration's range + let response_time = Duration::from_nanos(rkyv.response_time_nanos); + + // Convert i64 nanoseconds back to DateTime + let timestamp = DateTime::from_timestamp_nanos(rkyv.timestamp_nanos); + + Self { + target_ip, + port: rkyv.port, + state, + response_time, + timestamp, + banner: rkyv.banner, + service: rkyv.service, + version: rkyv.version, + raw_response: rkyv.raw_response, + } + } +} + /// Port filtering for exclusion/inclusion lists /// /// Provides efficient port filtering using hash sets for O(1) lookups. diff --git a/crates/prtip-scanner/Cargo.toml b/crates/prtip-scanner/Cargo.toml index c42f5e5..6d99d00 100644 --- a/crates/prtip-scanner/Cargo.toml +++ b/crates/prtip-scanner/Cargo.toml @@ -87,7 +87,7 @@ pcap-file = "2.0" # Memory-mapped I/O memmap2 = { workspace = true } -bincode = { workspace = true } +rkyv = { workspace = true } [dev-dependencies] tokio = { workspace = true } diff --git a/crates/prtip-scanner/src/output/mmap_reader.rs b/crates/prtip-scanner/src/output/mmap_reader.rs index 84003e6..ce0a661 100644 --- a/crates/prtip-scanner/src/output/mmap_reader.rs +++ b/crates/prtip-scanner/src/output/mmap_reader.rs @@ -1,13 +1,14 @@ //! Memory-mapped result reader for zero-copy access to scan results use memmap2::Mmap; -use prtip_core::ScanResult; +use prtip_core::{ScanResult, ScanResultRkyv}; use std::fs::File; use std::io; use std::path::Path; const HEADER_SIZE: usize = 64; const ENTRY_SIZE: usize = 512; +const LENGTH_PREFIX_SIZE: usize = 8; // u64 length prefix for each entry /// Memory-mapped result reader pub struct MmapResultReader { @@ -33,10 +34,21 @@ impl MmapResultReader { // Parse header let version = u64::from_le_bytes(mmap[0..8].try_into().unwrap()); - if version != 1 { + if version == 1 { return Err(io::Error::new( io::ErrorKind::InvalidData, - format!("Unsupported version: {}", version), + "Incompatible file format: version 1 (bincode) is no longer supported. \ + This file was created with an older version of the scanner. \ + Please regenerate scan results with the current version (rkyv format, version 2).", + )); + } + if version != 2 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "Unsupported version: {}. Expected version 2 (rkyv format).", + version + ), )); } @@ -77,8 +89,34 @@ impl MmapResultReader { let offset = HEADER_SIZE + (index * self.entry_size); let entry_bytes = &self.mmap[offset..offset + self.entry_size]; - // Deserialize the entry (bincode handles trailing zeros) - bincode::deserialize(entry_bytes).ok() + // Read length prefix (u64 in little-endian) + let len = u64::from_le_bytes( + entry_bytes[..LENGTH_PREFIX_SIZE] + .try_into() + .expect("LENGTH_PREFIX_SIZE is 8 bytes"), + ) as usize; + + // Validate length + if len == 0 || len + LENGTH_PREFIX_SIZE > self.entry_size { + eprintln!( + "MmapResultReader: invalid entry length {} at index {}", + len, index + ); + return None; + } + + // Use zero-copy deserialization without unnecessary allocation + let data_bytes = &entry_bytes[LENGTH_PREFIX_SIZE..LENGTH_PREFIX_SIZE + len]; + match rkyv::from_bytes::(data_bytes) { + Ok(rkyv_result) => Some(ScanResult::from(rkyv_result)), + Err(e) => { + eprintln!( + "MmapResultReader: failed to deserialize entry at index {} with length {}: {}", + index, len, e + ); + None + } + } } /// Create an iterator over all entries @@ -215,4 +253,43 @@ mod tests { assert!(reader.get_entry(1).is_none()); assert!(reader.get_entry(100).is_none()); } + + #[test] + fn test_mmap_version_1_rejected() { + use std::io::Write; + + let temp = NamedTempFile::new().unwrap(); + let path = temp.path().to_owned(); + + // Create a file with version 1 header (old bincode format) + { + let mut file = std::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&path) + .unwrap(); + + // Write a version 1 header + file.write_all(&1u64.to_le_bytes()).unwrap(); // version = 1 + file.write_all(&0u64.to_le_bytes()).unwrap(); // entry_count = 0 + file.write_all(&(ENTRY_SIZE as u64).to_le_bytes()).unwrap(); // entry_size + file.write_all(&0u64.to_le_bytes()).unwrap(); // checksum + // Pad to HEADER_SIZE + file.write_all(&[0u8; HEADER_SIZE - 32]).unwrap(); + } + + // Attempt to open should fail with clear error message + let result = MmapResultReader::open(&path); + assert!(result.is_err()); + if let Err(err) = result { + assert_eq!(err.kind(), std::io::ErrorKind::InvalidData); + let err_msg = err.to_string(); + assert!( + err_msg.contains("version 1") && err_msg.contains("bincode"), + "Error message should mention version 1 and bincode format: {}", + err_msg + ); + } + } } diff --git a/crates/prtip-scanner/src/output/mmap_writer.rs b/crates/prtip-scanner/src/output/mmap_writer.rs index 7b9ee77..990d9cc 100644 --- a/crates/prtip-scanner/src/output/mmap_writer.rs +++ b/crates/prtip-scanner/src/output/mmap_writer.rs @@ -3,15 +3,35 @@ //! Uses memory-mapped files to reduce RAM usage by 20-50% compared to //! in-memory buffering. Results are written to a binary format with //! fixed-size entries for zero-copy random access. +//! +//! # Alignment Requirements +//! +//! The ENTRY_SIZE (512 bytes) is carefully chosen to provide adequate alignment +//! for rkyv's zero-copy deserialization. rkyv typically requires 8-byte alignment, +//! and 512 is a multiple of 16 bytes, ensuring proper alignment for all common +//! data types. use memmap2::{MmapMut, MmapOptions}; -use prtip_core::ScanResult; +use prtip_core::{ScanResult, ScanResultRkyv}; use std::fs::{File, OpenOptions}; use std::io; use std::path::Path; const HEADER_SIZE: usize = 64; // Version, entry_count, entry_size, checksum const ENTRY_SIZE: usize = 512; // Fixed-size entries (padded if needed) +const LENGTH_PREFIX_SIZE: usize = 8; // u64 length prefix for each entry + +// Compile-time assertion to verify ENTRY_SIZE alignment +const _: () = assert!( + ENTRY_SIZE % 16 == 0, + "ENTRY_SIZE must be a multiple of 16 bytes for rkyv alignment" +); + +// Compile-time assertion to verify LENGTH_PREFIX_SIZE alignment +const _: () = assert!( + LENGTH_PREFIX_SIZE == 8, + "LENGTH_PREFIX_SIZE must be 8 bytes for proper alignment" +); /// Memory-mapped result writer pub struct MmapResultWriter { @@ -56,25 +76,39 @@ impl MmapResultWriter { } let offset = HEADER_SIZE + (self.entry_count * ENTRY_SIZE); - let entry_bytes = bincode::serialize(result) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; - if entry_bytes.len() > ENTRY_SIZE { + // Convert to rkyv-compatible format + let rkyv_result = ScanResultRkyv::from(result); + + // Serialize using rkyv with improved error handling + let entry_bytes = rkyv::to_bytes::(&rkyv_result).map_err(|e| { + let msg = format!("rkyv serialization error (rkyv::rancor::Error): {e:?}"); + io::Error::new(io::ErrorKind::InvalidData, msg) + })?; + + // Check if entry fits (accounting for length prefix) + if entry_bytes.len() + LENGTH_PREFIX_SIZE > ENTRY_SIZE { return Err(io::Error::new( io::ErrorKind::InvalidData, format!( - "Entry size {} exceeds maximum {}", + "Entry size {} (+ {} length prefix) exceeds maximum {}", entry_bytes.len(), + LENGTH_PREFIX_SIZE, ENTRY_SIZE ), )); } - // Write serialized data - self.mmap[offset..offset + entry_bytes.len()].copy_from_slice(&entry_bytes); + // Write length prefix (u64 in little-endian) + let len_bytes = (entry_bytes.len() as u64).to_le_bytes(); + self.mmap[offset..offset + LENGTH_PREFIX_SIZE].copy_from_slice(&len_bytes); + + // Write serialized data after length prefix + let data_offset = offset + LENGTH_PREFIX_SIZE; + self.mmap[data_offset..data_offset + entry_bytes.len()].copy_from_slice(&entry_bytes); // Zero-fill remaining space - for i in entry_bytes.len()..ENTRY_SIZE { + for i in (LENGTH_PREFIX_SIZE + entry_bytes.len())..ENTRY_SIZE { self.mmap[offset + i] = 0; } @@ -103,8 +137,9 @@ impl MmapResultWriter { } fn write_header(&mut self) -> io::Result<()> { - // Version: 1 - self.mmap[0..8].copy_from_slice(&1u64.to_le_bytes()); + // Version: 2 (rkyv format with length prefix) + // Version 1 was bincode format (deprecated) + self.mmap[0..8].copy_from_slice(&2u64.to_le_bytes()); // Entry count: 0 self.mmap[8..16].copy_from_slice(&0u64.to_le_bytes()); // Entry size: ENTRY_SIZE