From 8e137e1e804152ab79b759c29c050738012efc80 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 5 Feb 2026 21:28:26 +0200 Subject: [PATCH 01/33] MOD-13577 support Homogenues array floating point forcing(deserialization path only) --- Cargo.toml | 1 + src/array.rs | 176 +++++++++++++++++++++++++++++------- src/de.rs | 177 ++++++++++++++++++++++++++++++++++--- src/{alloc.rs => error.rs} | 14 +++ src/lib.rs | 6 +- 5 files changed, 325 insertions(+), 49 deletions(-) rename src/{alloc.rs => error.rs} (56%) diff --git a/Cargo.toml b/Cargo.toml index 9332c82..ed0021a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ serde_json = { workspace = true } ctor = { version = "0.1.16", optional = true } paste = "1.0.15" half = "2.0.0" +thiserror = "2.0.18" [dev-dependencies] mockalloc = "0.1.2" diff --git a/src/array.rs b/src/array.rs index 2cc0d70..4360d6d 100644 --- a/src/array.rs +++ b/src/array.rs @@ -9,8 +9,9 @@ use std::iter::FromIterator; use std::ops::{Index, IndexMut}; use std::slice::{from_raw_parts, from_raw_parts_mut, SliceIndex}; +use crate::error::IJsonError; use crate::{ - alloc::AllocError, + error::AllocError, thin::{ThinMut, ThinMutExt, ThinRef, ThinRefExt}, value::TypeTag, Defrag, DefragAllocator, IValue, @@ -54,6 +55,41 @@ impl Default for ArrayTag { } } +/// Enum representing different types of floating-point types +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum FloatType { + /// F16 + F16, + /// BF16 + BF16, + /// F32 + F32, + /// F64 + F64, +} + +impl fmt::Display for FloatType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + FloatType::F16 => write!(f, "F16"), + FloatType::BF16 => write!(f, "BF16"), + FloatType::F32 => write!(f, "F32"), + FloatType::F64 => write!(f, "F64"), + } + } +} + +impl From for ArrayTag { + fn from(fp_type: FloatType) -> Self { + match fp_type { + FloatType::F16 => ArrayTag::F16, + FloatType::BF16 => ArrayTag::BF16, + FloatType::F32 => ArrayTag::F32, + FloatType::F64 => ArrayTag::F64, + } + } +} + impl ArrayTag { fn from_type() -> Self { use ArrayTag::*; @@ -182,14 +218,25 @@ impl ArrayTag { /// Determines the ArrayTag for an IValue if it represents a primitive type /// Prefers signed types over unsigned types for positive values to be more conservative fn from_ivalue(value: &IValue) -> ArrayTag { + Self::from_ivalue_with_hint(value, None) + } + + /// Determines the ArrayTag for an IValue, using the provided fp_type for floating-point types. + /// + /// When `fp_type` is `Some`, uses the hinted type directly for floating-point values. + fn from_ivalue_with_hint(value: &IValue, fp_type: Option) -> ArrayTag { use ArrayTag::*; if let Some(num) = value.as_number() { if num.has_decimal_point() { - num.to_f16() - .map(|_| F16) - .or_else(|| num.to_bf16().map(|_| BF16)) - .or_else(|| num.to_f32().map(|_| F32)) - .or_else(|| num.to_f64().map(|_| F64)) + fp_type.map(ArrayTag::from).unwrap_or_else(|| { + num.to_f16() + .map(|_| F16) + .or_else(|| num.to_bf16().map(|_| BF16)) + .or_else(|| num.to_f32().map(|_| F32)) + .or_else(|| num.to_f64().map(|_| F64)) + // Safety: We know the value is a decimal number, and f64 can represent any JSON number + .unwrap_or_else(|| unsafe { std::hint::unreachable_unchecked() }) + }) } else { num.to_i8() .map(|_| I8) @@ -200,9 +247,9 @@ impl ArrayTag { .or_else(|| num.to_u32().map(|_| U32)) .or_else(|| num.to_i64().map(|_| I64)) .or_else(|| num.to_u64().map(|_| U64)) + // Safety: We know the value is a number, and we've checked all possible number types + .unwrap_or_else(|| unsafe { std::hint::unreachable_unchecked() }) } - // Safety: We know the value is a number, and we've checked all possible number types - .unwrap_or_else(|| unsafe { std::hint::unreachable_unchecked() }) } else { Heterogeneous } @@ -401,11 +448,11 @@ impl Header { const TAG_MASK: u64 = 0xF; const TAG_SHIFT: u64 = 60; - const fn new(len: usize, cap: usize, tag: ArrayTag) -> Result { + const fn new(len: usize, cap: usize, tag: ArrayTag) -> Result { // assert!(len <= Self::LEN_MASK as usize, "Length exceeds 30-bit limit"); // assert!(cap <= Self::CAP_MASK as usize, "Capacity exceeds 30-bit limit"); if len > Self::LEN_MASK as usize || cap > Self::CAP_MASK as usize { - return Err(AllocError); + return Err(IJsonError::Alloc(AllocError)); } let packed = ((len as u64) & Self::LEN_MASK) << Self::LEN_SHIFT @@ -670,7 +717,7 @@ impl IArray { .pad_to_align()) } - fn alloc(cap: usize, tag: ArrayTag) -> Result<*mut Header, AllocError> { + fn alloc(cap: usize, tag: ArrayTag) -> Result<*mut Header, IJsonError> { unsafe { let ptr = alloc(Self::layout(cap, tag).map_err(|_| AllocError)?).cast::
(); ptr.write(Header::new(0, cap, tag)?); @@ -678,7 +725,7 @@ impl IArray { } } - fn realloc(ptr: *mut Header, new_cap: usize) -> Result<*mut Header, AllocError> { + fn realloc(ptr: *mut Header, new_cap: usize) -> Result<*mut Header, IJsonError> { unsafe { let tag = (*ptr).type_tag(); let old_layout = Self::layout((*ptr).cap(), tag).map_err(|_| AllocError)?; @@ -706,13 +753,13 @@ impl IArray { /// Constructs a new `IArray` with the specified capacity. At least that many items /// can be added to the array without reallocating. #[must_use] - pub fn with_capacity(cap: usize) -> Result { + pub fn with_capacity(cap: usize) -> Result { Self::with_capacity_and_tag(cap, ArrayTag::Heterogeneous) } /// Constructs a new `IArray` with the specified capacity and array type. #[must_use] - fn with_capacity_and_tag(cap: usize, tag: ArrayTag) -> Result { + fn with_capacity_and_tag(cap: usize, tag: ArrayTag) -> Result { if cap == 0 { Ok(Self::new()) } else { @@ -743,7 +790,7 @@ impl IArray { /// Converts this array to a new type, promoting all existing elements. /// This is used for automatic type promotion when incompatible types are added. - fn promote_to_type(&mut self, new_tag: ArrayTag) -> Result<(), AllocError> { + fn promote_to_type(&mut self, new_tag: ArrayTag) -> Result<(), IJsonError> { if self.is_static() || self.header().type_tag() == new_tag { return Ok(()); } @@ -898,7 +945,7 @@ impl IArray { self.header_mut().as_mut_slice_unchecked::() } - fn resize_internal(&mut self, cap: usize) -> Result<(), AllocError> { + fn resize_internal(&mut self, cap: usize) -> Result<(), IJsonError> { if self.is_static() || cap == 0 { let tag = if self.is_static() { ArrayTag::Heterogeneous @@ -916,7 +963,7 @@ impl IArray { } /// Reserves space for at least this many additional items. - pub fn reserve(&mut self, additional: usize) -> Result<(), AllocError> { + pub fn reserve(&mut self, additional: usize) -> Result<(), IJsonError> { let hd = self.header(); let current_capacity = hd.cap(); let desired_capacity = hd.len().checked_add(additional).ok_or(AllocError)?; @@ -956,7 +1003,7 @@ impl IArray { /// on or after this index will be shifted down to accomodate this. For large /// arrays, insertions near the front will be slow as it will require shifting /// a large number of items. - pub fn insert(&mut self, index: usize, item: impl Into) -> Result<(), AllocError> { + pub fn insert(&mut self, index: usize, item: impl Into) -> Result<(), IJsonError> { let item = item.into(); let current_tag = self.header().type_tag(); let len = self.len(); @@ -1080,8 +1127,49 @@ impl IArray { } } + /// Pushes a new item onto the back of the array with a specific floating-point type. + /// + /// If the item cannot be represented in the specified floating-point type, + /// returns an error. + pub(crate) fn push_with_fp_type( + &mut self, + item: impl Into, + fp_type: FloatType, + ) -> Result<(), IJsonError> { + let desired_tag = fp_type.into(); + let current_tag = self.header().type_tag(); + let len = self.len(); + let item = item.into(); + let can_fit = || match fp_type { + FloatType::F16 => item.to_f16().is_some(), + FloatType::BF16 => item.to_bf16().is_some(), + FloatType::F32 => item.to_f32().is_some(), + FloatType::F64 => item.to_f64().is_some(), + }; + + if (desired_tag != current_tag && len > 0) || !can_fit() { + return Err(IJsonError::OutOfRange(fp_type)); + } + + // We can fit the item into the array, so we can push it directly + + if len == 0 { + if self.is_static() { + *self = IArray::with_capacity_and_tag(4, desired_tag)?; + } else { + self.promote_to_type(desired_tag)?; + } + } + + self.reserve(1)?; + unsafe { + self.header_mut().push(item); + } + Ok(()) + } + /// Pushes a new item onto the back of the array. - pub fn push(&mut self, item: impl Into) -> Result<(), AllocError> { + pub fn push(&mut self, item: impl Into) -> Result<(), IJsonError> { let item = item.into(); let current_tag = self.header().type_tag(); let len = self.len(); @@ -1425,11 +1513,11 @@ pub trait TryExtend { /// Returns an `AllocError` if allocation fails. /// # Errors /// Returns an `AllocError` if memory allocation fails during the extension. - fn try_extend(&mut self, iter: impl IntoIterator) -> Result<(), AllocError>; + fn try_extend(&mut self, iter: impl IntoIterator) -> Result<(), IJsonError>; } impl + private::Sealed> TryExtend for IArray { - fn try_extend(&mut self, iter: impl IntoIterator) -> Result<(), AllocError> { + fn try_extend(&mut self, iter: impl IntoIterator) -> Result<(), IJsonError> { let iter = iter.into_iter(); self.reserve(iter.size_hint().0)?; for v in iter { @@ -1442,7 +1530,7 @@ impl + private::Sealed> TryExtend for IArray { macro_rules! extend_impl_int { ($($ty:ty),*) => { $(impl TryExtend<$ty> for IArray { - fn try_extend(&mut self, iter: impl IntoIterator) -> Result<(), AllocError> { + fn try_extend(&mut self, iter: impl IntoIterator) -> Result<(), IJsonError> { let expected_tag = ArrayTag::from_type::<$ty>(); let iter = iter.into_iter(); let size_hint = iter.size_hint().0; @@ -1494,7 +1582,7 @@ macro_rules! extend_impl_int { macro_rules! extend_impl_float { ($($ty:ty),*) => { $(impl TryExtend<$ty> for IArray { - fn try_extend(&mut self, iter: impl IntoIterator) -> Result<(), AllocError> { + fn try_extend(&mut self, iter: impl IntoIterator) -> Result<(), IJsonError> { let expected_tag = ArrayTag::from_type::<$ty>(); let iter = iter.into_iter(); let size_hint = iter.size_hint().0; @@ -1564,13 +1652,13 @@ pub trait TryFromIterator { /// Returns an `AllocError` if allocation fails. /// # Errors /// Returns `AllocError` if memory allocation fails during the construction. - fn try_from_iter>(iter: U) -> Result + fn try_from_iter>(iter: U) -> Result where Self: Sized; } impl + private::Sealed> TryFromIterator for IArray { - fn try_from_iter>(iter: T) -> Result { + fn try_from_iter>(iter: T) -> Result { let mut res = IArray::new(); res.try_extend(iter)?; Ok(res) @@ -1580,7 +1668,7 @@ impl + private::Sealed> TryFromIterator for IArray { macro_rules! from_iter_impl { ($($ty:ty),*) => { $(impl TryFromIterator<$ty> for IArray { - fn try_from_iter>(iter: T) -> Result { + fn try_from_iter>(iter: T) -> Result { let iter = iter.into_iter(); let mut res = IArray::with_capacity_and_tag(iter.size_hint().0, ArrayTag::from_type::<$ty>())?; res.try_extend(iter)?; @@ -1599,13 +1687,13 @@ pub trait TryCollect: Iterator + Sized { /// Returns an `AllocError` if allocation fails. /// # Errors /// Returns `AllocError` if memory allocation fails during the collection. - fn try_collect(self) -> Result + fn try_collect(self) -> Result where B: TryFromIterator; } impl> TryCollect for I { - fn try_collect(self) -> Result + fn try_collect(self) -> Result where B: TryFromIterator, { @@ -1614,7 +1702,7 @@ impl> TryCollect for I { } impl + private::Sealed> TryFrom> for IArray { - type Error = AllocError; + type Error = IJsonError; fn try_from(other: Vec) -> Result { let mut res = IArray::with_capacity(other.len())?; res.try_extend(other.into_iter().map(Into::into))?; @@ -1623,7 +1711,7 @@ impl + private::Sealed> TryFrom> for IArray { } impl + Clone + private::Sealed> TryFrom<&[T]> for IArray { - type Error = AllocError; + type Error = IJsonError; fn try_from(other: &[T]) -> Result { let mut res = IArray::with_capacity(other.len())?; res.try_extend(other.iter().cloned().map(Into::into))?; @@ -1634,7 +1722,7 @@ impl + Clone + private::Sealed> TryFrom<&[T]> for IArray { macro_rules! from_slice_impl { ($($ty:ty),*) => {$( impl TryFrom> for IArray { - type Error = AllocError; + type Error = IJsonError; fn try_from(other: Vec<$ty>) -> Result { let mut res = IArray::with_capacity_and_tag(other.len(), ArrayTag::from_type::<$ty>())?; TryExtend::<$ty>::try_extend(&mut res, other.into_iter().map(Into::into))?; @@ -1642,7 +1730,7 @@ macro_rules! from_slice_impl { } } impl TryFrom<&[$ty]> for IArray { - type Error = AllocError; + type Error = IJsonError; fn try_from(other: &[$ty]) -> Result { let mut res = IArray::with_capacity_and_tag(other.len(), ArrayTag::from_type::<$ty>())?; TryExtend::<$ty>::try_extend(&mut res, other.iter().cloned().map(Into::into))?; @@ -3207,4 +3295,28 @@ mod tests { } } } + + #[test] + fn test_push_with_fp_type_creates_typed_array() { + let mut arr = IArray::new(); + arr.push_with_fp_type(IValue::from(1.5), FloatType::F16) + .unwrap(); + arr.push_with_fp_type(IValue::from(2.5), FloatType::F16) + .unwrap(); + + assert_eq!(arr.len(), 2); + assert!(matches!(arr.as_slice(), ArraySliceRef::F16(_))); + } + + #[test] + fn test_push_with_fp_type_overflow() { + let mut arr = IArray::new(); + arr.push_with_fp_type(IValue::from(1.5), FloatType::F16) + .unwrap(); + assert!(matches!(arr.as_slice(), ArraySliceRef::F16(_))); + arr.push_with_fp_type(IValue::from(100000.0), FloatType::F16) + .unwrap_err(); + assert_eq!(arr.len(), 1); + assert!(matches!(arr.as_slice(), ArraySliceRef::F16(_))); + } } diff --git a/src/de.rs b/src/de.rs index 4eabe26..1f1faa9 100644 --- a/src/de.rs +++ b/src/de.rs @@ -8,14 +8,40 @@ use serde::de::{ use serde::{forward_to_deserialize_any, Deserialize, Deserializer}; use serde_json::error::Error; -use crate::{DestructuredRef, IArray, INumber, IObject, IString, IValue}; +use crate::{DestructuredRef, FloatType, IArray, INumber, IObject, IString, IValue}; + +/// Seed for deserializing an [`IValue`]. +#[derive(Debug)] +pub struct IValueDeserSeed { + /// Optional floating point type enforcment type for homogeneous arrays. + pub fpha_type: Option, +} + +impl IValueDeserSeed { + /// Creates a new [`IValueDeserSeed`] with the given floating point type enforcment type for homogeneous arrays. + pub fn new(fpha_type: Option) -> Self { + IValueDeserSeed { fpha_type } + } +} + +impl<'de> DeserializeSeed<'de> for IValueDeserSeed { + type Value = IValue; + + fn deserialize(self, deserializer: D) -> Result + where + D: Deserializer<'de>, + { + // Pass hint to a custom visitor + deserializer.deserialize_any(ValueVisitor::new(self.fpha_type)) + } +} impl<'de> Deserialize<'de> for IValue { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { - deserializer.deserialize_any(ValueVisitor) + deserializer.deserialize_any(ValueVisitor::new(None)) } } @@ -42,7 +68,7 @@ impl<'de> Deserialize<'de> for IArray { where D: Deserializer<'de>, { - deserializer.deserialize_seq(ArrayVisitor) + deserializer.deserialize_seq(ArrayVisitor { fpha_type: None }) } } @@ -51,11 +77,19 @@ impl<'de> Deserialize<'de> for IObject { where D: Deserializer<'de>, { - deserializer.deserialize_map(ObjectVisitor) + deserializer.deserialize_map(ObjectVisitor { fpha_type: None }) } } -struct ValueVisitor; +struct ValueVisitor { + fpha_type: Option, +} + +impl ValueVisitor { + fn new(fpha_type: Option) -> Self { + ValueVisitor { fpha_type } + } +} impl<'de> Visitor<'de> for ValueVisitor { type Value = IValue; @@ -104,7 +138,7 @@ impl<'de> Visitor<'de> for ValueVisitor { where D: Deserializer<'de>, { - Deserialize::deserialize(deserializer) + IValueDeserSeed::new(self.fpha_type).deserialize(deserializer) } #[inline] @@ -117,14 +151,22 @@ impl<'de> Visitor<'de> for ValueVisitor { where V: SeqAccess<'de>, { - ArrayVisitor.visit_seq(visitor).map(Into::into) + ArrayVisitor { + fpha_type: self.fpha_type, + } + .visit_seq(visitor) + .map(Into::into) } fn visit_map(self, visitor: V) -> Result where V: MapAccess<'de>, { - ObjectVisitor.visit_map(visitor).map(Into::into) + ObjectVisitor { + fpha_type: self.fpha_type, + } + .visit_map(visitor) + .map(Into::into) } } @@ -192,7 +234,9 @@ impl<'de> Visitor<'de> for StringVisitor { } } -struct ArrayVisitor; +struct ArrayVisitor { + fpha_type: Option, +} impl<'de> Visitor<'de> for ArrayVisitor { type Value = IArray; @@ -208,15 +252,20 @@ impl<'de> Visitor<'de> for ArrayVisitor { { let mut arr = IArray::with_capacity(visitor.size_hint().unwrap_or(0)) .map_err(|_| SError::custom("Failed to allocate array"))?; - while let Some(v) = visitor.next_element::()? { - arr.push(v) - .map_err(|_| SError::custom("Failed to push to array"))?; + while let Some(v) = visitor.next_element_seed(IValueDeserSeed::new(self.fpha_type))? { + match self.fpha_type { + Some(fp_type) => arr.push_with_fp_type(v, fp_type), + None => arr.push(v), + } + .map_err(|e| SError::custom(e.to_string()))?; } Ok(arr) } } -struct ObjectVisitor; +struct ObjectVisitor { + fpha_type: Option, +} impl<'de> Visitor<'de> for ObjectVisitor { type Value = IObject; @@ -230,7 +279,8 @@ impl<'de> Visitor<'de> for ObjectVisitor { V: MapAccess<'de>, { let mut obj = IObject::with_capacity(visitor.size_hint().unwrap_or(0)); - while let Some((k, v)) = visitor.next_entry::()? { + while let Some(k) = visitor.next_key::()? { + let v = visitor.next_value_seed(IValueDeserSeed::new(self.fpha_type))?; obj.insert(k, v); } Ok(obj) @@ -999,3 +1049,102 @@ where { T::deserialize(value) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::array::ArraySliceRef; + use serde::de::DeserializeSeed; + + #[test] + fn test_deserialize_with_f64_fp() { + let json = r#"[1.5, 2.5, 3.5]"#; + let seed = IValueDeserSeed::new(Some(FloatType::F64)); + let mut deserializer = serde_json::Deserializer::from_str(json); + let value = seed.deserialize(&mut deserializer).unwrap(); + + let arr = value.as_array().unwrap(); + assert!(matches!(arr.as_slice(), ArraySliceRef::F64(_))); + assert_eq!(arr.len(), 3); + } + + #[test] + fn test_deserialize_with_f32_fp() { + let json = r#"[1.5, 2.5, 3.5]"#; + let seed = IValueDeserSeed::new(Some(FloatType::F32)); + let mut deserializer = serde_json::Deserializer::from_str(json); + let value = seed.deserialize(&mut deserializer).unwrap(); + + let arr = value.as_array().unwrap(); + assert!(matches!(arr.as_slice(), ArraySliceRef::F32(_))); + assert_eq!(arr.len(), 3); + } + + #[test] + fn test_deserialize_with_f16_fp() { + let json = r#"[0.5, 1.0, 1.5]"#; + let seed = IValueDeserSeed::new(Some(FloatType::F16)); + let mut deserializer = serde_json::Deserializer::from_str(json); + let value = seed.deserialize(&mut deserializer).unwrap(); + + let arr = value.as_array().unwrap(); + assert!(matches!(arr.as_slice(), ArraySliceRef::F16(_))); + assert_eq!(arr.len(), 3); + } + + #[test] + fn test_deserialize_with_bf16_fp() { + let json = r#"[0.5, 1.0, 2.0]"#; + let seed = IValueDeserSeed::new(Some(FloatType::BF16)); + let mut deserializer = serde_json::Deserializer::from_str(json); + let value = seed.deserialize(&mut deserializer).unwrap(); + + let arr = value.as_array().unwrap(); + assert!(matches!(arr.as_slice(), ArraySliceRef::BF16(_))); + assert_eq!(arr.len(), 3); + } + + #[test] + fn test_deserialize_mixed_array_with_fp() { + let json = r#"[1, "string", 3.5]"#; + let seed = IValueDeserSeed::new(Some(FloatType::F32)); + let mut deserializer = serde_json::Deserializer::from_str(json); + let _error = seed.deserialize(&mut deserializer).unwrap_err(); + } + + #[test] + fn test_deserialize_integer_array_with_fp() { + let json = r#"[1, 2, 3]"#; + let seed = IValueDeserSeed::new(Some(FloatType::F32)); + let mut deserializer = serde_json::Deserializer::from_str(json); + let value = seed.deserialize(&mut deserializer).unwrap(); + + let arr = value.as_array().unwrap(); + assert!(matches!(arr.as_slice(), ArraySliceRef::F32(_))); + assert_eq!(arr.len(), 3); + } + + #[test] + fn test_deserialize_f16_value_no_fit() { + let json = r#"[0.5, 100000.0, 1.5]"#; + let seed = IValueDeserSeed::new(Some(FloatType::F16)); + let mut deserializer = serde_json::Deserializer::from_str(json); + let _error = seed.deserialize(&mut deserializer).unwrap_err(); + } + + #[test] + fn test_deserialize_bf16_value_too_large() { + let json = r#"[1e39, 2e39]"#; + let seed = IValueDeserSeed::new(Some(FloatType::BF16)); + let mut deserializer = serde_json::Deserializer::from_str(json); + let _error = seed.deserialize(&mut deserializer).unwrap_err(); + } + + #[test] + fn test_deserialize_f32_value_too_large() { + let json = r#"[1e39, 2e39]"#; + let seed = IValueDeserSeed::new(Some(FloatType::F32)); + let mut deserializer = serde_json::Deserializer::from_str(json); + let _error = seed.deserialize(&mut deserializer).unwrap_err(); + } +} diff --git a/src/alloc.rs b/src/error.rs similarity index 56% rename from src/alloc.rs rename to src/error.rs index af0c87d..d5b9f36 100644 --- a/src/alloc.rs +++ b/src/error.rs @@ -2,6 +2,9 @@ use std::error::Error; use std::fmt; +use thiserror::Error; + +use crate::FloatType; /// Error type for fallible allocation /// This error is returned when an allocation fails. @@ -16,3 +19,14 @@ impl fmt::Display for AllocError { f.write_str("memory allocation failed") } } + +/// Error type for ijson +#[derive(Error, Debug)] +pub enum IJsonError { + /// Memory allocation failed + #[error("memory allocation failed")] + Alloc(#[from] AllocError), + /// Value out of range for the specified floating-point type + #[error("value out of range for {0}")] + OutOfRange(FloatType), +} diff --git a/src/lib.rs b/src/lib.rs index afcff7b..12850fe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,11 +36,11 @@ pub mod unsafe_string; #[cfg(not(feature = "thread_safe"))] pub use unsafe_string::IString; -pub mod alloc; +pub mod error; mod thin; mod value; -pub use array::IArray; +pub use array::{FloatType, IArray}; pub use number::INumber; pub use object::IObject; use std::alloc::Layout; @@ -51,7 +51,7 @@ pub use value::{ mod de; mod ser; -pub use de::from_value; +pub use de::{from_value, IValueDeserSeed}; pub use ser::to_value; /// Trait to implement defrag allocator From bfd678b22cb3f597c06bc03fff5403519a1673b6 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Mon, 9 Feb 2026 11:42:30 +0200 Subject: [PATCH 02/33] add fallback option --- src/de.rs | 125 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 30 deletions(-) diff --git a/src/de.rs b/src/de.rs index 1f1faa9..1eb3681 100644 --- a/src/de.rs +++ b/src/de.rs @@ -8,19 +8,51 @@ use serde::de::{ use serde::{forward_to_deserialize_any, Deserialize, Deserializer}; use serde_json::error::Error; +use crate::error::IJsonError; use crate::{DestructuredRef, FloatType, IArray, INumber, IObject, IString, IValue}; +#[derive(Debug, Clone, Copy)] +pub struct FPHAConfig { + /// Floating point type for homogeneous arrays. + pub fpha_type: FloatType, + /// If `fallback` is true, arrays that don't fit the fpha_type will fall back to regular push. + pub fpha_fallback: bool, +} + +impl FPHAConfig { + /// Creates a new [`FPHAConfig`] with the given floating point type. + pub fn new(fpha_type: FloatType, fpha_fallback: bool) -> Self { + Self { + fpha_type, + fpha_fallback, + } + } + + pub fn new_with_type(fpha_type: FloatType) -> Self { + Self { + fpha_type, + fpha_fallback: false, + } + } + + /// Sets the fallback behavior. + pub fn with_fallback(mut self, fallback: bool) -> Self { + self.fpha_fallback = fallback; + self + } +} + /// Seed for deserializing an [`IValue`]. -#[derive(Debug)] +#[derive(Debug, Default)] pub struct IValueDeserSeed { - /// Optional floating point type enforcment type for homogeneous arrays. - pub fpha_type: Option, + /// Optional FPHA configuration for homogeneous arrays. + pub fpha_config: Option, } impl IValueDeserSeed { /// Creates a new [`IValueDeserSeed`] with the given floating point type enforcment type for homogeneous arrays. - pub fn new(fpha_type: Option) -> Self { - IValueDeserSeed { fpha_type } + pub fn new(fpha_config: Option) -> Self { + IValueDeserSeed { fpha_config } } } @@ -32,7 +64,7 @@ impl<'de> DeserializeSeed<'de> for IValueDeserSeed { D: Deserializer<'de>, { // Pass hint to a custom visitor - deserializer.deserialize_any(ValueVisitor::new(self.fpha_type)) + deserializer.deserialize_any(ValueVisitor::new(self.fpha_config)) } } @@ -68,7 +100,7 @@ impl<'de> Deserialize<'de> for IArray { where D: Deserializer<'de>, { - deserializer.deserialize_seq(ArrayVisitor { fpha_type: None }) + deserializer.deserialize_seq(ArrayVisitor { fpha_config: None }) } } @@ -77,17 +109,17 @@ impl<'de> Deserialize<'de> for IObject { where D: Deserializer<'de>, { - deserializer.deserialize_map(ObjectVisitor { fpha_type: None }) + deserializer.deserialize_map(ObjectVisitor { fpha_config: None }) } } struct ValueVisitor { - fpha_type: Option, + fpha_config: Option, } impl ValueVisitor { - fn new(fpha_type: Option) -> Self { - ValueVisitor { fpha_type } + fn new(fpha_config: Option) -> Self { + ValueVisitor { fpha_config } } } @@ -138,7 +170,7 @@ impl<'de> Visitor<'de> for ValueVisitor { where D: Deserializer<'de>, { - IValueDeserSeed::new(self.fpha_type).deserialize(deserializer) + IValueDeserSeed::new(self.fpha_config).deserialize(deserializer) } #[inline] @@ -152,7 +184,7 @@ impl<'de> Visitor<'de> for ValueVisitor { V: SeqAccess<'de>, { ArrayVisitor { - fpha_type: self.fpha_type, + fpha_config: self.fpha_config, } .visit_seq(visitor) .map(Into::into) @@ -163,7 +195,7 @@ impl<'de> Visitor<'de> for ValueVisitor { V: MapAccess<'de>, { ObjectVisitor { - fpha_type: self.fpha_type, + fpha_config: self.fpha_config, } .visit_map(visitor) .map(Into::into) @@ -235,7 +267,7 @@ impl<'de> Visitor<'de> for StringVisitor { } struct ArrayVisitor { - fpha_type: Option, + fpha_config: Option, } impl<'de> Visitor<'de> for ArrayVisitor { @@ -252,10 +284,16 @@ impl<'de> Visitor<'de> for ArrayVisitor { { let mut arr = IArray::with_capacity(visitor.size_hint().unwrap_or(0)) .map_err(|_| SError::custom("Failed to allocate array"))?; - while let Some(v) = visitor.next_element_seed(IValueDeserSeed::new(self.fpha_type))? { - match self.fpha_type { - Some(fp_type) => arr.push_with_fp_type(v, fp_type), - None => arr.push(v), + while let Some(v) = visitor.next_element_seed(IValueDeserSeed::new(self.fpha_config))? { + match self.fpha_config.map(|c| (c.fpha_type, c.fpha_fallback)) { + Some((fp_type, fallback)) => { + arr.push_with_fp_type(v.clone(), fp_type) + .or_else(|_| match self.fpha_config { + Some(c) if fallback => arr.push(v), + _ => Err(IJsonError::OutOfRange(fp_type)), + }) + } + None => arr.push(v).map_err(Into::into), } .map_err(|e| SError::custom(e.to_string()))?; } @@ -264,7 +302,7 @@ impl<'de> Visitor<'de> for ArrayVisitor { } struct ObjectVisitor { - fpha_type: Option, + fpha_config: Option, } impl<'de> Visitor<'de> for ObjectVisitor { @@ -280,7 +318,7 @@ impl<'de> Visitor<'de> for ObjectVisitor { { let mut obj = IObject::with_capacity(visitor.size_hint().unwrap_or(0)); while let Some(k) = visitor.next_key::()? { - let v = visitor.next_value_seed(IValueDeserSeed::new(self.fpha_type))?; + let v = visitor.next_value_seed(IValueDeserSeed::new(self.fpha_config))?; obj.insert(k, v); } Ok(obj) @@ -1059,7 +1097,7 @@ mod tests { #[test] fn test_deserialize_with_f64_fp() { let json = r#"[1.5, 2.5, 3.5]"#; - let seed = IValueDeserSeed::new(Some(FloatType::F64)); + let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::F64))); let mut deserializer = serde_json::Deserializer::from_str(json); let value = seed.deserialize(&mut deserializer).unwrap(); @@ -1071,7 +1109,7 @@ mod tests { #[test] fn test_deserialize_with_f32_fp() { let json = r#"[1.5, 2.5, 3.5]"#; - let seed = IValueDeserSeed::new(Some(FloatType::F32)); + let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::F32))); let mut deserializer = serde_json::Deserializer::from_str(json); let value = seed.deserialize(&mut deserializer).unwrap(); @@ -1083,7 +1121,7 @@ mod tests { #[test] fn test_deserialize_with_f16_fp() { let json = r#"[0.5, 1.0, 1.5]"#; - let seed = IValueDeserSeed::new(Some(FloatType::F16)); + let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::F16))); let mut deserializer = serde_json::Deserializer::from_str(json); let value = seed.deserialize(&mut deserializer).unwrap(); @@ -1095,7 +1133,7 @@ mod tests { #[test] fn test_deserialize_with_bf16_fp() { let json = r#"[0.5, 1.0, 2.0]"#; - let seed = IValueDeserSeed::new(Some(FloatType::BF16)); + let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::BF16))); let mut deserializer = serde_json::Deserializer::from_str(json); let value = seed.deserialize(&mut deserializer).unwrap(); @@ -1107,7 +1145,7 @@ mod tests { #[test] fn test_deserialize_mixed_array_with_fp() { let json = r#"[1, "string", 3.5]"#; - let seed = IValueDeserSeed::new(Some(FloatType::F32)); + let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::F32))); let mut deserializer = serde_json::Deserializer::from_str(json); let _error = seed.deserialize(&mut deserializer).unwrap_err(); } @@ -1115,7 +1153,7 @@ mod tests { #[test] fn test_deserialize_integer_array_with_fp() { let json = r#"[1, 2, 3]"#; - let seed = IValueDeserSeed::new(Some(FloatType::F32)); + let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::F32))); let mut deserializer = serde_json::Deserializer::from_str(json); let value = seed.deserialize(&mut deserializer).unwrap(); @@ -1127,24 +1165,51 @@ mod tests { #[test] fn test_deserialize_f16_value_no_fit() { let json = r#"[0.5, 100000.0, 1.5]"#; - let seed = IValueDeserSeed::new(Some(FloatType::F16)); + let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::F16))); let mut deserializer = serde_json::Deserializer::from_str(json); let _error = seed.deserialize(&mut deserializer).unwrap_err(); + + let seed = IValueDeserSeed::new(Some( + FPHAConfig::new_with_type(FloatType::F16).with_fallback(true), + )); + let mut deserializer = serde_json::Deserializer::from_str(json); + let value = seed.deserialize(&mut deserializer).unwrap(); + let arr = value.as_array().unwrap(); + assert!(matches!(arr.as_slice(), ArraySliceRef::F32(_))); + assert_eq!(arr.len(), 3); } #[test] fn test_deserialize_bf16_value_too_large() { let json = r#"[1e39, 2e39]"#; - let seed = IValueDeserSeed::new(Some(FloatType::BF16)); + let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::BF16))); let mut deserializer = serde_json::Deserializer::from_str(json); let _error = seed.deserialize(&mut deserializer).unwrap_err(); + + let seed = IValueDeserSeed::new(Some( + FPHAConfig::new_with_type(FloatType::BF16).with_fallback(true), + )); + let mut deserializer = serde_json::Deserializer::from_str(json); + let value = seed.deserialize(&mut deserializer).unwrap(); + let arr = value.as_array().unwrap(); + assert!(matches!(arr.as_slice(), ArraySliceRef::F64(_))); + assert_eq!(arr.len(), 2); } #[test] fn test_deserialize_f32_value_too_large() { let json = r#"[1e39, 2e39]"#; - let seed = IValueDeserSeed::new(Some(FloatType::F32)); + let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::F32))); let mut deserializer = serde_json::Deserializer::from_str(json); let _error = seed.deserialize(&mut deserializer).unwrap_err(); + + let seed = IValueDeserSeed::new(Some( + FPHAConfig::new_with_type(FloatType::F32).with_fallback(true), + )); + let mut deserializer = serde_json::Deserializer::from_str(json); + let value = seed.deserialize(&mut deserializer).unwrap(); + let arr = value.as_array().unwrap(); + assert!(matches!(arr.as_slice(), ArraySliceRef::F64(_))); + assert_eq!(arr.len(), 2); } } From 2079a6851e8783ddc66ad7fbbe418df07f591b4b Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Mon, 9 Feb 2026 11:43:53 +0200 Subject: [PATCH 03/33] export FPHAConfig --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 12850fe..d4fabe6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,7 +51,7 @@ pub use value::{ mod de; mod ser; -pub use de::{from_value, IValueDeserSeed}; +pub use de::{from_value, IValueDeserSeed, FPHAConfig}; pub use ser::to_value; /// Trait to implement defrag allocator From cec89cfe4d7dd235679731af50cdfb5a91fe0349 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Mon, 9 Feb 2026 11:47:29 +0200 Subject: [PATCH 04/33] docs --- src/de.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/de.rs b/src/de.rs index 1eb3681..f6e510b 100644 --- a/src/de.rs +++ b/src/de.rs @@ -12,6 +12,7 @@ use crate::error::IJsonError; use crate::{DestructuredRef, FloatType, IArray, INumber, IObject, IString, IValue}; #[derive(Debug, Clone, Copy)] +/// Configuration for floating point homogeneous arrays. pub struct FPHAConfig { /// Floating point type for homogeneous arrays. pub fpha_type: FloatType, @@ -28,6 +29,7 @@ impl FPHAConfig { } } + /// Creates a new [`FPHAConfig`] with the given floating point type and fallback behavior. pub fn new_with_type(fpha_type: FloatType) -> Self { Self { fpha_type, From bc60e68506c7ca3aa87d0d3c9f50b5881c6e82aa Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Mon, 9 Feb 2026 11:53:38 +0200 Subject: [PATCH 05/33] fmt --- src/array.rs | 2 +- src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/array.rs b/src/array.rs index 4360d6d..6db4d30 100644 --- a/src/array.rs +++ b/src/array.rs @@ -59,7 +59,7 @@ impl Default for ArrayTag { #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum FloatType { /// F16 - F16, + F16 = 1, /// BF16 BF16, /// F32 diff --git a/src/lib.rs b/src/lib.rs index d4fabe6..86e8255 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,7 +51,7 @@ pub use value::{ mod de; mod ser; -pub use de::{from_value, IValueDeserSeed, FPHAConfig}; +pub use de::{from_value, FPHAConfig, IValueDeserSeed}; pub use ser::to_value; /// Trait to implement defrag allocator From 931d520d5d32f19288f09263136293ee5c7a75dd Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Mon, 9 Feb 2026 12:18:18 +0200 Subject: [PATCH 06/33] comments --- src/array.rs | 14 ++++++++++++++ src/de.rs | 18 +++++++++--------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/array.rs b/src/array.rs index 6db4d30..8768418 100644 --- a/src/array.rs +++ b/src/array.rs @@ -79,6 +79,20 @@ impl fmt::Display for FloatType { } } +impl TryFrom for FloatType { + type Error = (); + + fn try_from(value: u8) -> Result { + match value { + 1 => Ok(FloatType::F16), + 2 => Ok(FloatType::BF16), + 3 => Ok(FloatType::F32), + 4 => Ok(FloatType::F64), + _ => Err(()), + } + } +} + impl From for ArrayTag { fn from(fp_type: FloatType) -> Self { match fp_type { diff --git a/src/de.rs b/src/de.rs index f6e510b..9b3cf37 100644 --- a/src/de.rs +++ b/src/de.rs @@ -8,7 +8,6 @@ use serde::de::{ use serde::{forward_to_deserialize_any, Deserialize, Deserializer}; use serde_json::error::Error; -use crate::error::IJsonError; use crate::{DestructuredRef, FloatType, IArray, INumber, IObject, IString, IValue}; #[derive(Debug, Clone, Copy)] @@ -287,14 +286,15 @@ impl<'de> Visitor<'de> for ArrayVisitor { let mut arr = IArray::with_capacity(visitor.size_hint().unwrap_or(0)) .map_err(|_| SError::custom("Failed to allocate array"))?; while let Some(v) = visitor.next_element_seed(IValueDeserSeed::new(self.fpha_config))? { - match self.fpha_config.map(|c| (c.fpha_type, c.fpha_fallback)) { - Some((fp_type, fallback)) => { - arr.push_with_fp_type(v.clone(), fp_type) - .or_else(|_| match self.fpha_config { - Some(c) if fallback => arr.push(v), - _ => Err(IJsonError::OutOfRange(fp_type)), - }) - } + // Matching Some(..) twice, to avoind cloning the value :/ + match self.fpha_config { + Some(FPHAConfig { + fpha_type, + fpha_fallback: true, + }) => arr + .push_with_fp_type(v.clone(), fpha_type) + .or_else(|_| arr.push(v).map_err(Into::into)), + Some(FPHAConfig { fpha_type, .. }) => arr.push_with_fp_type(v, fpha_type), None => arr.push(v).map_err(Into::into), } .map_err(|e| SError::custom(e.to_string()))?; From 3ba050f4ff630dae593b3f6aa83f6bd6e04132c3 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Mon, 9 Feb 2026 12:27:01 +0200 Subject: [PATCH 07/33] lower fuzz time --- .github/actions/fuzz_tests/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/fuzz_tests/action.yml b/.github/actions/fuzz_tests/action.yml index 95c7b09..16774d5 100644 --- a/.github/actions/fuzz_tests/action.yml +++ b/.github/actions/fuzz_tests/action.yml @@ -8,7 +8,7 @@ inputs: fuzz_time: description: 'Maximum time in seconds to run fuzzing' required: false - default: '180' + default: '120' cargo_fuzz_version: description: 'Version of cargo-fuzz to install' required: false From 8cd9ddffa9b87463dcc8d3977a77db6d4a1f58c3 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Mon, 9 Feb 2026 12:43:47 +0200 Subject: [PATCH 08/33] bring back old code --- src/array.rs | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/src/array.rs b/src/array.rs index 8768418..07f422a 100644 --- a/src/array.rs +++ b/src/array.rs @@ -232,25 +232,14 @@ impl ArrayTag { /// Determines the ArrayTag for an IValue if it represents a primitive type /// Prefers signed types over unsigned types for positive values to be more conservative fn from_ivalue(value: &IValue) -> ArrayTag { - Self::from_ivalue_with_hint(value, None) - } - - /// Determines the ArrayTag for an IValue, using the provided fp_type for floating-point types. - /// - /// When `fp_type` is `Some`, uses the hinted type directly for floating-point values. - fn from_ivalue_with_hint(value: &IValue, fp_type: Option) -> ArrayTag { use ArrayTag::*; if let Some(num) = value.as_number() { if num.has_decimal_point() { - fp_type.map(ArrayTag::from).unwrap_or_else(|| { - num.to_f16() - .map(|_| F16) - .or_else(|| num.to_bf16().map(|_| BF16)) - .or_else(|| num.to_f32().map(|_| F32)) - .or_else(|| num.to_f64().map(|_| F64)) - // Safety: We know the value is a decimal number, and f64 can represent any JSON number - .unwrap_or_else(|| unsafe { std::hint::unreachable_unchecked() }) - }) + num.to_f16() + .map(|_| F16) + .or_else(|| num.to_bf16().map(|_| BF16)) + .or_else(|| num.to_f32().map(|_| F32)) + .or_else(|| num.to_f64().map(|_| F64)) } else { num.to_i8() .map(|_| I8) @@ -261,9 +250,9 @@ impl ArrayTag { .or_else(|| num.to_u32().map(|_| U32)) .or_else(|| num.to_i64().map(|_| I64)) .or_else(|| num.to_u64().map(|_| U64)) - // Safety: We know the value is a number, and we've checked all possible number types - .unwrap_or_else(|| unsafe { std::hint::unreachable_unchecked() }) } + // Safety: We know the value is a number, and we've checked all possible number types + .unwrap_or_else(|| unsafe { std::hint::unreachable_unchecked() }) } else { Heterogeneous } From 3cfee7d014a39a3b71d5817ab82dfebc07ad8710 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Mon, 9 Feb 2026 15:10:47 +0200 Subject: [PATCH 09/33] change to lossy push --- src/array.rs | 37 ++++++++++++++++++++++++--------- src/de.rs | 58 ++++++++++++++++++++++++++-------------------------- 2 files changed, 56 insertions(+), 39 deletions(-) diff --git a/src/array.rs b/src/array.rs index 07f422a..8d725c8 100644 --- a/src/array.rs +++ b/src/array.rs @@ -611,6 +611,26 @@ trait HeaderMut<'a>: ThinMutExt<'a, Header> { self.set_len(index + 1); } + // Safety: Space must already be allocated for the item, + // and the item must be a number. The array type must be a floating-point type. + unsafe fn push_lossy(&mut self, item: IValue) { + use ArrayTag::*; + let index = self.len(); + + macro_rules! push_lossy_impl { + ($(($tag:ident, $ty:ty)),*) => { + match self.type_tag() { + $($tag => self.reborrow().raw_array_ptr_mut().cast::<$ty>().add(index).write( + paste::paste!(item.[]()).unwrap()),)* + _ => unreachable!(), + } + } + } + + push_lossy_impl!((F16, f16), (BF16, bf16), (F32, f32), (F64, f64)); + self.set_len(index + 1); + } + fn pop(&mut self) -> Option { if self.len() == 0 { None @@ -1130,10 +1150,7 @@ impl IArray { } } - /// Pushes a new item onto the back of the array with a specific floating-point type. - /// - /// If the item cannot be represented in the specified floating-point type, - /// returns an error. + /// Pushes a new item onto the back of the array with a specific floating-point type, potentially losing precision. pub(crate) fn push_with_fp_type( &mut self, item: impl Into, @@ -1144,10 +1161,10 @@ impl IArray { let len = self.len(); let item = item.into(); let can_fit = || match fp_type { - FloatType::F16 => item.to_f16().is_some(), - FloatType::BF16 => item.to_bf16().is_some(), - FloatType::F32 => item.to_f32().is_some(), - FloatType::F64 => item.to_f64().is_some(), + FloatType::F16 => item.to_f16_lossy().map_or(false, |v| v.is_finite()), + FloatType::BF16 => item.to_bf16_lossy().map_or(false, |v| v.is_finite()), + FloatType::F32 => item.to_f32_lossy().map_or(false, |v| v.is_finite()), + FloatType::F64 => item.to_f64_lossy().map_or(false, |v| v.is_finite()), }; if (desired_tag != current_tag && len > 0) || !can_fit() { @@ -1166,7 +1183,7 @@ impl IArray { self.reserve(1)?; unsafe { - self.header_mut().push(item); + self.header_mut().push_lossy(item); } Ok(()) } @@ -3312,7 +3329,7 @@ mod tests { } #[test] - fn test_push_with_fp_type_overflow() { + fn test_push_with_fp_type_overflow_rejected() { let mut arr = IArray::new(); arr.push_with_fp_type(IValue::from(1.5), FloatType::F16) .unwrap(); diff --git a/src/de.rs b/src/de.rs index 9b3cf37..c7d2926 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1165,53 +1165,53 @@ mod tests { } #[test] - fn test_deserialize_f16_value_no_fit() { + fn test_deserialize_f16_value_overflow_rejected() { let json = r#"[0.5, 100000.0, 1.5]"#; let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::F16))); let mut deserializer = serde_json::Deserializer::from_str(json); let _error = seed.deserialize(&mut deserializer).unwrap_err(); - - let seed = IValueDeserSeed::new(Some( - FPHAConfig::new_with_type(FloatType::F16).with_fallback(true), - )); - let mut deserializer = serde_json::Deserializer::from_str(json); - let value = seed.deserialize(&mut deserializer).unwrap(); - let arr = value.as_array().unwrap(); - assert!(matches!(arr.as_slice(), ArraySliceRef::F32(_))); - assert_eq!(arr.len(), 3); } #[test] - fn test_deserialize_bf16_value_too_large() { + fn test_deserialize_bf16_value_overflow_rejected() { let json = r#"[1e39, 2e39]"#; let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::BF16))); let mut deserializer = serde_json::Deserializer::from_str(json); let _error = seed.deserialize(&mut deserializer).unwrap_err(); - - let seed = IValueDeserSeed::new(Some( - FPHAConfig::new_with_type(FloatType::BF16).with_fallback(true), - )); - let mut deserializer = serde_json::Deserializer::from_str(json); - let value = seed.deserialize(&mut deserializer).unwrap(); - let arr = value.as_array().unwrap(); - assert!(matches!(arr.as_slice(), ArraySliceRef::F64(_))); - assert_eq!(arr.len(), 2); } #[test] - fn test_deserialize_f32_value_too_large() { + fn test_deserialize_f32_value_overflow_rejected() { let json = r#"[1e39, 2e39]"#; let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::F32))); let mut deserializer = serde_json::Deserializer::from_str(json); let _error = seed.deserialize(&mut deserializer).unwrap_err(); + } - let seed = IValueDeserSeed::new(Some( - FPHAConfig::new_with_type(FloatType::F32).with_fallback(true), - )); - let mut deserializer = serde_json::Deserializer::from_str(json); - let value = seed.deserialize(&mut deserializer).unwrap(); - let arr = value.as_array().unwrap(); - assert!(matches!(arr.as_slice(), ArraySliceRef::F64(_))); - assert_eq!(arr.len(), 2); + #[test] + fn test_ser_deser_roundtrip_preserves_type() { + let json = r#"[0.2, 1.0, 1.2]"#; + + for fp_type in [FloatType::F16, FloatType::BF16, FloatType::F32] { + let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(fp_type))); + let mut de = serde_json::Deserializer::from_str(json); + let original = seed.deserialize(&mut de).unwrap(); + + let serialized = serde_json::to_string(&original).unwrap(); + + let reload_seed = + IValueDeserSeed::new(Some(FPHAConfig::new_with_type(fp_type).with_fallback(true))); + let mut de = serde_json::Deserializer::from_str(&serialized); + let roundtripped = reload_seed.deserialize(&mut de).unwrap(); + + let arr = roundtripped.as_array().unwrap(); + assert_eq!(arr.len(), 3); + let roundtrip_tag = arr.as_slice().type_tag(); + assert_eq!( + roundtrip_tag, + fp_type.into(), + "roundtrip should preserve {fp_type}" + ); + } } } From f16631079b5b45399e56e93f9d9a8753cc2dc5fb Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 19 Feb 2026 09:50:36 +0200 Subject: [PATCH 10/33] Binary encoder/decoder --- fuzz/src/lib.rs | 50 +++++ src/binary.rs | 546 ++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 3 + 3 files changed, 599 insertions(+) create mode 100644 fuzz/src/lib.rs create mode 100644 src/binary.rs diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs new file mode 100644 index 0000000..e68f013 --- /dev/null +++ b/fuzz/src/lib.rs @@ -0,0 +1,50 @@ +use arbitrary::Arbitrary; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Arbitrary, Serialize, Deserialize)] +pub enum JsonValue { + Null, + Bool(bool), + Integer(u64), + Float(f64), + Str(String), + Array(Vec), + Object(Vec<(String, JsonValue)>), +} + +impl JsonValue { + pub fn to_json_string(&self) -> String { + match self { + JsonValue::Null => "null".to_string(), + JsonValue::Bool(b) => b.to_string(), + JsonValue::Integer(n) => n.to_string(), + JsonValue::Float(n) => { + if n.is_finite() { + n.to_string() + } else { + "0".to_string() + } + } + JsonValue::Str(s) => { + format!("\"{}\"", s.replace('\\', "\\\\").replace('"', "\\\"")) + } + JsonValue::Array(arr) => { + let items: Vec = arr.iter().map(|v| v.to_json_string()).collect(); + format!("[{}]", items.join(",")) + } + JsonValue::Object(obj) => { + let items: Vec = obj + .iter() + .map(|(k, v)| { + format!( + "\"{}\":{}", + k.replace('\\', "\\\\").replace('"', "\\\""), + v.to_json_string() + ) + }) + .collect(); + format!("{{{}}}", items.join(",")) + } + } + } +} diff --git a/src/binary.rs b/src/binary.rs new file mode 100644 index 0000000..32e803c --- /dev/null +++ b/src/binary.rs @@ -0,0 +1,546 @@ +use std::fmt; + +use half::{bf16, f16}; + +use crate::array::ArraySliceRef; +use crate::{DestructuredRef, IArray, INumber, IObject, IString, IValue}; + +const TAG_NULL: u8 = 0x00; +const TAG_FALSE: u8 = 0x01; +const TAG_TRUE: u8 = 0x02; +const TAG_I64: u8 = 0x03; +const TAG_U64: u8 = 0x04; +const TAG_F64: u8 = 0x05; +const TAG_STRING: u8 = 0x06; +const TAG_OBJECT: u8 = 0x07; +const TAG_ARRAY_HETERO: u8 = 0x08; +const TAG_ARRAY_I8: u8 = 0x10; +const TAG_ARRAY_U8: u8 = 0x11; +const TAG_ARRAY_I16: u8 = 0x12; +const TAG_ARRAY_U16: u8 = 0x13; +const TAG_ARRAY_F16: u8 = 0x14; +const TAG_ARRAY_BF16: u8 = 0x15; +const TAG_ARRAY_I32: u8 = 0x16; +const TAG_ARRAY_U32: u8 = 0x17; +const TAG_ARRAY_F32: u8 = 0x18; +const TAG_ARRAY_I64: u8 = 0x19; +const TAG_ARRAY_U64: u8 = 0x1A; +const TAG_ARRAY_F64: u8 = 0x1B; + +/// Error returned when decoding fails. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum BinaryDecodeError { + /// The input was too short to decode the next value. + UnexpectedEof, + /// An unknown type tag was encountered. + UnknownTag(u8), + /// A string was not valid UTF-8. + InvalidUtf8, + /// An array allocation failed. + AllocError, + /// Nesting depth exceeded the limit. + DepthLimitExceeded, +} + +const MAX_DEPTH: u32 = 128; + +impl fmt::Display for BinaryDecodeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + BinaryDecodeError::UnexpectedEof => write!(f, "unexpected end of input"), + BinaryDecodeError::UnknownTag(t) => write!(f, "unknown type tag: 0x{:02X}", t), + BinaryDecodeError::InvalidUtf8 => write!(f, "invalid UTF-8 in string"), + BinaryDecodeError::AllocError => write!(f, "memory allocation failed"), + BinaryDecodeError::DepthLimitExceeded => write!(f, "nesting depth limit exceeded"), + } + } +} + +/// Encodes an [`IValue`] tree into a compact binary representation that +/// preserves the [`ArrayTag`](crate::array::ArrayTag) of every array. +pub fn encode(value: &IValue) -> Vec { + let mut out = Vec::new(); + encode_into(value, &mut out); + out +} + +fn encode_into(value: &IValue, out: &mut Vec) { + match value.destructure_ref() { + DestructuredRef::Null => out.push(TAG_NULL), + DestructuredRef::Bool(false) => out.push(TAG_FALSE), + DestructuredRef::Bool(true) => out.push(TAG_TRUE), + DestructuredRef::Number(n) => encode_number(n, out), + DestructuredRef::String(s) => encode_string(s, out), + DestructuredRef::Array(a) => encode_array(a, out), + DestructuredRef::Object(o) => encode_object(o, out), + } +} + +fn encode_number(n: &INumber, out: &mut Vec) { + if n.has_decimal_point() { + out.push(TAG_F64); + out.extend_from_slice(&n.to_f64().unwrap().to_le_bytes()); + } else if let Some(v) = n.to_i64() { + out.push(TAG_I64); + out.extend_from_slice(&v.to_le_bytes()); + } else { + out.push(TAG_U64); + out.extend_from_slice(&n.to_u64().unwrap().to_le_bytes()); + } +} + +fn encode_string(s: &IString, out: &mut Vec) { + let bytes = s.as_str().as_bytes(); + out.push(TAG_STRING); + out.extend_from_slice(&(bytes.len() as u32).to_le_bytes()); + out.extend_from_slice(bytes); +} + +fn encode_array(a: &IArray, out: &mut Vec) { + let len = a.len() as u32; + match a.as_slice() { + ArraySliceRef::Heterogeneous(s) => { + out.push(TAG_ARRAY_HETERO); + out.extend_from_slice(&len.to_le_bytes()); + for v in s { + encode_into(v, out); + } + } + ArraySliceRef::I8(s) => { + out.push(TAG_ARRAY_I8); + out.extend_from_slice(&len.to_le_bytes()); + let bytes = unsafe { std::slice::from_raw_parts(s.as_ptr() as *const u8, s.len()) }; + out.extend_from_slice(bytes); + } + ArraySliceRef::U8(s) => { + out.push(TAG_ARRAY_U8); + out.extend_from_slice(&len.to_le_bytes()); + out.extend_from_slice(s); + } + ArraySliceRef::I16(s) => encode_typed_array(TAG_ARRAY_I16, s, out), + ArraySliceRef::U16(s) => encode_typed_array(TAG_ARRAY_U16, s, out), + ArraySliceRef::F16(s) => encode_typed_array(TAG_ARRAY_F16, s, out), + ArraySliceRef::BF16(s) => encode_typed_array(TAG_ARRAY_BF16, s, out), + ArraySliceRef::I32(s) => encode_typed_array(TAG_ARRAY_I32, s, out), + ArraySliceRef::U32(s) => encode_typed_array(TAG_ARRAY_U32, s, out), + ArraySliceRef::F32(s) => encode_typed_array(TAG_ARRAY_F32, s, out), + ArraySliceRef::I64(s) => encode_typed_array(TAG_ARRAY_I64, s, out), + ArraySliceRef::U64(s) => encode_typed_array(TAG_ARRAY_U64, s, out), + ArraySliceRef::F64(s) => encode_typed_array(TAG_ARRAY_F64, s, out), + } +} + +trait ToLeBytes { + fn to_le_bytes_vec(&self) -> impl AsRef<[u8]>; +} + +macro_rules! impl_to_le_bytes { + ($ty:ty) => { + impl ToLeBytes for $ty { + fn to_le_bytes_vec(&self) -> impl AsRef<[u8]> { + self.to_le_bytes() + } + } + }; +} + +impl_to_le_bytes!(i16); +impl_to_le_bytes!(u16); +impl_to_le_bytes!(f16); +impl_to_le_bytes!(bf16); +impl_to_le_bytes!(i32); +impl_to_le_bytes!(u32); +impl_to_le_bytes!(f32); +impl_to_le_bytes!(i64); +impl_to_le_bytes!(u64); +impl_to_le_bytes!(f64); + +fn encode_typed_array(tag: u8, s: &[T], out: &mut Vec) { + out.push(tag); + out.extend_from_slice(&(s.len() as u32).to_le_bytes()); + for v in s { + out.extend_from_slice(v.to_le_bytes_vec().as_ref()); + } +} + +fn encode_object(o: &IObject, out: &mut Vec) { + out.push(TAG_OBJECT); + out.extend_from_slice(&(o.len() as u32).to_le_bytes()); + for (k, v) in o { + let key_bytes = k.as_str().as_bytes(); + out.extend_from_slice(&(key_bytes.len() as u32).to_le_bytes()); + out.extend_from_slice(key_bytes); + encode_into(v, out); + } +} + +/// Decodes an [`IValue`] tree from bytes produced by [`encode`]. +/// +/// # Errors +/// +/// Returns [`BinaryDecodeError`] if the bytes are malformed. +pub fn decode(bytes: &[u8]) -> Result { + let mut cur = 0usize; + decode_value(bytes, &mut cur, 0) +} + +fn read_u8(bytes: &[u8], cur: &mut usize) -> Result { + bytes + .get(*cur) + .copied() + .map(|b| { + *cur += 1; + b + }) + .ok_or(BinaryDecodeError::UnexpectedEof) +} + +fn read_u32(bytes: &[u8], cur: &mut usize) -> Result { + let end = cur.checked_add(4).ok_or(BinaryDecodeError::UnexpectedEof)?; + let slice = bytes.get(*cur..end).ok_or(BinaryDecodeError::UnexpectedEof)?; + *cur = end; + Ok(u32::from_le_bytes(slice.try_into().unwrap())) +} + +fn read_i64(bytes: &[u8], cur: &mut usize) -> Result { + let end = cur.checked_add(8).ok_or(BinaryDecodeError::UnexpectedEof)?; + let slice = bytes.get(*cur..end).ok_or(BinaryDecodeError::UnexpectedEof)?; + *cur = end; + Ok(i64::from_le_bytes(slice.try_into().unwrap())) +} + +fn read_u64(bytes: &[u8], cur: &mut usize) -> Result { + let end = cur.checked_add(8).ok_or(BinaryDecodeError::UnexpectedEof)?; + let slice = bytes.get(*cur..end).ok_or(BinaryDecodeError::UnexpectedEof)?; + *cur = end; + Ok(u64::from_le_bytes(slice.try_into().unwrap())) +} + +fn read_f64(bytes: &[u8], cur: &mut usize) -> Result { + let end = cur.checked_add(8).ok_or(BinaryDecodeError::UnexpectedEof)?; + let slice = bytes.get(*cur..end).ok_or(BinaryDecodeError::UnexpectedEof)?; + *cur = end; + Ok(f64::from_le_bytes(slice.try_into().unwrap())) +} + +fn read_bytes<'a>(bytes: &'a [u8], cur: &mut usize, n: usize) -> Result<&'a [u8], BinaryDecodeError> { + let end = cur.checked_add(n).ok_or(BinaryDecodeError::UnexpectedEof)?; + let slice = bytes.get(*cur..end).ok_or(BinaryDecodeError::UnexpectedEof)?; + *cur = end; + Ok(slice) +} + +fn decode_value(bytes: &[u8], cur: &mut usize, depth: u32) -> Result { + if depth >= MAX_DEPTH { + return Err(BinaryDecodeError::DepthLimitExceeded); + } + match read_u8(bytes, cur)? { + TAG_NULL => Ok(IValue::NULL), + TAG_FALSE => Ok(false.into()), + TAG_TRUE => Ok(true.into()), + TAG_I64 => Ok(read_i64(bytes, cur)?.into()), + TAG_U64 => Ok(read_u64(bytes, cur)?.into()), + TAG_F64 => { + let v = read_f64(bytes, cur)?; + Ok(INumber::try_from(v) + .map(Into::into) + .unwrap_or(IValue::NULL)) + } + TAG_STRING => { + let len = read_u32(bytes, cur)? as usize; + let raw = read_bytes(bytes, cur, len)?; + let s = std::str::from_utf8(raw).map_err(|_| BinaryDecodeError::InvalidUtf8)?; + Ok(IString::from(s).into()) + } + TAG_OBJECT => { + let count = read_u32(bytes, cur)? as usize; + // Each entry needs at least 5 bytes: 4-byte key-len + 1-byte value tag. + let hint = count.min((bytes.len() - *cur) / 5); + let mut obj = IObject::with_capacity(hint); + for _ in 0..count { + let key_len = read_u32(bytes, cur)? as usize; + let key_raw = read_bytes(bytes, cur, key_len)?; + let key = std::str::from_utf8(key_raw).map_err(|_| BinaryDecodeError::InvalidUtf8)?; + let val = decode_value(bytes, cur, depth + 1)?; + obj.insert(key, val); + } + Ok(obj.into()) + } + TAG_ARRAY_HETERO => { + let count = read_u32(bytes, cur)? as usize; + // Each element needs at least 1 byte (tag). + let hint = count.min(bytes.len() - *cur); + let mut arr = IArray::with_capacity(hint).map_err(|_| BinaryDecodeError::AllocError)?; + for _ in 0..count { + let v = decode_value(bytes, cur, depth + 1)?; + arr.push(v).map_err(|_| BinaryDecodeError::AllocError)?; + } + Ok(arr.into()) + } + TAG_ARRAY_I8 => { + let count = read_u32(bytes, cur)? as usize; + let raw = read_bytes(bytes, cur, count)?; + let typed: &[i8] = unsafe { std::slice::from_raw_parts(raw.as_ptr() as *const i8, count) }; + IArray::try_from(typed).map(Into::into).map_err(|_| BinaryDecodeError::AllocError) + } + TAG_ARRAY_U8 => { + let count = read_u32(bytes, cur)? as usize; + let raw = read_bytes(bytes, cur, count)?; + IArray::try_from(raw).map(Into::into).map_err(|_| BinaryDecodeError::AllocError) + } + TAG_ARRAY_I16 => decode_primitive_array::(bytes, cur, 2), + TAG_ARRAY_U16 => decode_primitive_array::(bytes, cur, 2), + TAG_ARRAY_F16 => { + let count = read_u32(bytes, cur)? as usize; + let byte_len = count.checked_mul(2).ok_or(BinaryDecodeError::UnexpectedEof)?; + let raw = read_bytes(bytes, cur, byte_len)?; + let vec: Vec = raw + .chunks_exact(2) + .map(|c| f16::from_le_bytes(c.try_into().unwrap())) + .collect(); + IArray::try_from(vec).map(Into::into).map_err(|_| BinaryDecodeError::AllocError) + } + TAG_ARRAY_BF16 => { + let count = read_u32(bytes, cur)? as usize; + let byte_len = count.checked_mul(2).ok_or(BinaryDecodeError::UnexpectedEof)?; + let raw = read_bytes(bytes, cur, byte_len)?; + let vec: Vec = raw + .chunks_exact(2) + .map(|c| bf16::from_le_bytes(c.try_into().unwrap())) + .collect(); + IArray::try_from(vec).map(Into::into).map_err(|_| BinaryDecodeError::AllocError) + } + TAG_ARRAY_I32 => decode_primitive_array::(bytes, cur, 4), + TAG_ARRAY_U32 => decode_primitive_array::(bytes, cur, 4), + TAG_ARRAY_F32 => decode_primitive_array::(bytes, cur, 4), + TAG_ARRAY_I64 => decode_primitive_array::(bytes, cur, 8), + TAG_ARRAY_U64 => decode_primitive_array::(bytes, cur, 8), + TAG_ARRAY_F64 => decode_primitive_array::(bytes, cur, 8), + tag => Err(BinaryDecodeError::UnknownTag(tag)), + } +} + +trait FromLeBytes: Copy + Sized + 'static { + fn from_le_bytes_slice(s: &[u8]) -> Self; +} + +macro_rules! impl_from_le_bytes { + ($ty:ty, $size:expr) => { + impl FromLeBytes for $ty { + fn from_le_bytes_slice(s: &[u8]) -> Self { + Self::from_le_bytes(s.try_into().unwrap()) + } + } + }; +} + +impl_from_le_bytes!(i16, 2); +impl_from_le_bytes!(u16, 2); +impl_from_le_bytes!(i32, 4); +impl_from_le_bytes!(u32, 4); +impl_from_le_bytes!(f32, 4); +impl_from_le_bytes!(i64, 8); +impl_from_le_bytes!(u64, 8); +impl_from_le_bytes!(f64, 8); + +fn decode_primitive_array(bytes: &[u8], cur: &mut usize, elem_size: usize) -> Result +where + T: FromLeBytes, + IArray: TryFrom>, +{ + let count = read_u32(bytes, cur)? as usize; + let byte_len = count.checked_mul(elem_size).ok_or(BinaryDecodeError::UnexpectedEof)?; + let raw = read_bytes(bytes, cur, byte_len)?; + let mut vec: Vec = Vec::with_capacity(count); + for chunk in raw.chunks_exact(elem_size) { + vec.push(T::from_le_bytes_slice(chunk)); + } + IArray::try_from(vec) + .map(Into::into) + .map_err(|_| BinaryDecodeError::AllocError) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::array::ArraySliceRef; + use crate::IValueDeserSeed; + use serde::de::DeserializeSeed; + + fn round_trip(value: &IValue) -> IValue { + let bytes = encode(value); + decode(&bytes).expect("decode should succeed") + } + + #[test] + fn test_null() { + let v: IValue = IValue::NULL; + assert_eq!(round_trip(&v), v); + } + + #[test] + fn test_bool() { + let t: IValue = true.into(); + let f: IValue = false.into(); + assert_eq!(round_trip(&t), t); + assert_eq!(round_trip(&f), f); + } + + #[test] + fn test_numbers() { + let cases: Vec = vec![ + 0i64.into(), + 42i64.into(), + (-1i64).into(), + i64::MAX.into(), + u64::MAX.into(), + 1.5f64.into(), + (-3.14f64).into(), + ]; + for v in &cases { + assert_eq!(round_trip(v), *v); + } + } + + #[test] + fn test_string() { + let v: IValue = IString::from("hello world").into(); + assert_eq!(round_trip(&v), v); + } + + #[test] + fn test_heterogeneous_array() { + let mut arr = IArray::new(); + arr.push(IValue::NULL).unwrap(); + arr.push(IValue::from(true)).unwrap(); + arr.push(IValue::from(42i64)).unwrap(); + arr.push(IValue::from(IString::from("hi"))).unwrap(); + let v: IValue = arr.into(); + let result = round_trip(&v); + let result_arr = result.as_array().unwrap(); + assert!(matches!(result_arr.as_slice(), ArraySliceRef::Heterogeneous(_))); + assert_eq!(result_arr.len(), 4); + } + + #[test] + fn test_f32_array_preserves_tag() { + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type(crate::FloatType::F32))); + let json = r#"[1.5, 2.5, 3.5]"#; + let mut de = serde_json::Deserializer::from_str(json); + let v = seed.deserialize(&mut de).unwrap(); + assert!(matches!(v.as_array().unwrap().as_slice(), ArraySliceRef::F32(_))); + + let result = round_trip(&v); + let arr = result.as_array().unwrap(); + assert!(matches!(arr.as_slice(), ArraySliceRef::F32(_)), "F32 tag should survive encode/decode"); + assert_eq!(arr.len(), 3); + } + + #[test] + fn test_f16_array_preserves_tag() { + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type(crate::FloatType::F16))); + let json = r#"[0.5, 1.0, 1.5]"#; + let mut de = serde_json::Deserializer::from_str(json); + let v = seed.deserialize(&mut de).unwrap(); + assert!(matches!(v.as_array().unwrap().as_slice(), ArraySliceRef::F16(_))); + + let result = round_trip(&v); + let arr = result.as_array().unwrap(); + assert!(matches!(arr.as_slice(), ArraySliceRef::F16(_)), "F16 tag should survive encode/decode"); + } + + #[test] + fn test_bf16_array_preserves_tag() { + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type(crate::FloatType::BF16))); + let json = r#"[1.0, 2.0, 3.0]"#; + let mut de = serde_json::Deserializer::from_str(json); + let v = seed.deserialize(&mut de).unwrap(); + + let result = round_trip(&v); + let arr = result.as_array().unwrap(); + assert!(matches!(arr.as_slice(), ArraySliceRef::BF16(_)), "BF16 tag should survive encode/decode"); + } + + #[test] + fn test_f64_array_preserves_tag() { + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type(crate::FloatType::F64))); + let json = r#"[1.0, 2.0, 3.0]"#; + let mut de = serde_json::Deserializer::from_str(json); + let v = seed.deserialize(&mut de).unwrap(); + + let result = round_trip(&v); + let arr = result.as_array().unwrap(); + assert!(matches!(arr.as_slice(), ArraySliceRef::F64(_)), "F64 tag should survive encode/decode"); + } + + #[test] + fn test_nested_object_with_typed_arrays() { + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type(crate::FloatType::F32))); + let json = r#"{"a": [1.0, 2.0], "b": "text", "c": [3.0, 4.0]}"#; + let mut de = serde_json::Deserializer::from_str(json); + let v = seed.deserialize(&mut de).unwrap(); + + let result = round_trip(&v); + let obj = result.as_object().unwrap(); + let a = obj.get("a").unwrap().as_array().unwrap(); + let c = obj.get("c").unwrap().as_array().unwrap(); + assert!(matches!(a.as_slice(), ArraySliceRef::F32(_)), "nested F32 array 'a' should survive"); + assert!(matches!(c.as_slice(), ArraySliceRef::F32(_)), "nested F32 array 'c' should survive"); + assert_eq!(obj.get("b").unwrap().as_string().unwrap().as_str(), "text"); + } + + #[test] + fn test_truncated_input_returns_error() { + let v: IValue = 42i64.into(); + let bytes = encode(&v); + for len in 0..bytes.len() { + assert!(decode(&bytes[..len]).is_err(), "truncated at {len} should fail"); + } + } + + #[test] + fn test_unknown_tag_returns_error() { + let bytes = [0xFF]; + assert_eq!(decode(&bytes), Err(BinaryDecodeError::UnknownTag(0xFF))); + } + + #[test] + fn test_object_huge_count_does_not_oom() { + // TAG_OBJECT with count=0x94940606 (~2.5 billion) followed by no actual data. + // Must return an error, not OOM. + let bytes = [0x07, 0x06, 0x06, 0x94, 0x94]; + assert!(decode(&bytes).is_err()); + } + + #[test] + fn test_hetero_array_huge_count_does_not_oom() { + // TAG_ARRAY_HETERO with count=0xFFFFFFFF followed by no data. + let bytes = [0x08, 0xFF, 0xFF, 0xFF, 0xFF]; + assert!(decode(&bytes).is_err()); + } + + #[test] + fn test_depth_limit() { + // Build MAX_DEPTH+1 levels of nested single-element hetero arrays. + // Each level: TAG_ARRAY_HETERO (1) + count=1 (4) = 5 bytes, then recurse. + let mut bytes: Vec = Vec::new(); + for _ in 0..=super::MAX_DEPTH { + bytes.push(TAG_ARRAY_HETERO); + bytes.extend_from_slice(&1u32.to_le_bytes()); + } + bytes.push(TAG_NULL); + assert_eq!(decode(&bytes), Err(BinaryDecodeError::DepthLimitExceeded)); + } + + #[test] + fn test_depth_limit_exact() { + // MAX_DEPTH-1 array wrappers: the leaf is decoded at depth=MAX_DEPTH-1, which is allowed. + let mut bytes: Vec = Vec::new(); + for _ in 0..super::MAX_DEPTH - 1 { + bytes.push(TAG_ARRAY_HETERO); + bytes.extend_from_slice(&1u32.to_le_bytes()); + } + bytes.push(TAG_NULL); + assert!(decode(&bytes).is_ok()); + } +} diff --git a/src/lib.rs b/src/lib.rs index 86e8255..5634589 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,6 +51,9 @@ pub use value::{ mod de; mod ser; +/// Binary encode/decode for [`IValue`], preserving typed array tags. +pub mod binary; +pub use binary::{decode, encode, BinaryDecodeError}; pub use de::{from_value, FPHAConfig, IValueDeserSeed}; pub use ser::to_value; From fa80dbed00ac8060327c916de40cb9582cdca5b3 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 19 Feb 2026 09:50:54 +0200 Subject: [PATCH 11/33] remove fallback from FPHAConfig --- src/de.rs | 32 +++----------------------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/src/de.rs b/src/de.rs index c7d2926..837fe5f 100644 --- a/src/de.rs +++ b/src/de.rs @@ -15,31 +15,12 @@ use crate::{DestructuredRef, FloatType, IArray, INumber, IObject, IString, IValu pub struct FPHAConfig { /// Floating point type for homogeneous arrays. pub fpha_type: FloatType, - /// If `fallback` is true, arrays that don't fit the fpha_type will fall back to regular push. - pub fpha_fallback: bool, } impl FPHAConfig { /// Creates a new [`FPHAConfig`] with the given floating point type. - pub fn new(fpha_type: FloatType, fpha_fallback: bool) -> Self { - Self { - fpha_type, - fpha_fallback, - } - } - - /// Creates a new [`FPHAConfig`] with the given floating point type and fallback behavior. pub fn new_with_type(fpha_type: FloatType) -> Self { - Self { - fpha_type, - fpha_fallback: false, - } - } - - /// Sets the fallback behavior. - pub fn with_fallback(mut self, fallback: bool) -> Self { - self.fpha_fallback = fallback; - self + Self { fpha_type } } } @@ -286,15 +267,8 @@ impl<'de> Visitor<'de> for ArrayVisitor { let mut arr = IArray::with_capacity(visitor.size_hint().unwrap_or(0)) .map_err(|_| SError::custom("Failed to allocate array"))?; while let Some(v) = visitor.next_element_seed(IValueDeserSeed::new(self.fpha_config))? { - // Matching Some(..) twice, to avoind cloning the value :/ match self.fpha_config { - Some(FPHAConfig { - fpha_type, - fpha_fallback: true, - }) => arr - .push_with_fp_type(v.clone(), fpha_type) - .or_else(|_| arr.push(v).map_err(Into::into)), - Some(FPHAConfig { fpha_type, .. }) => arr.push_with_fp_type(v, fpha_type), + Some(FPHAConfig { fpha_type }) => arr.push_with_fp_type(v, fpha_type), None => arr.push(v).map_err(Into::into), } .map_err(|e| SError::custom(e.to_string()))?; @@ -1200,7 +1174,7 @@ mod tests { let serialized = serde_json::to_string(&original).unwrap(); let reload_seed = - IValueDeserSeed::new(Some(FPHAConfig::new_with_type(fp_type).with_fallback(true))); + IValueDeserSeed::new(Some(FPHAConfig::new_with_type(fp_type))); let mut de = serde_json::Deserializer::from_str(&serialized); let roundtripped = reload_seed.deserialize(&mut de).unwrap(); From 6780eac33dbe4f7ab541250f68ca0e3ce8e8f882 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 19 Feb 2026 09:51:01 +0200 Subject: [PATCH 12/33] more fuzz tests --- fuzz/Cargo.toml | 20 +++++++++- fuzz/fuzz_targets/fuzz_binary_decode.rs | 9 +++++ fuzz/fuzz_targets/fuzz_binary_roundtrip.rs | 22 +++++++++++ fuzz/fuzz_targets/fuzz_json_de.rs | 44 +--------------------- 4 files changed, 51 insertions(+), 44 deletions(-) create mode 100644 fuzz/fuzz_targets/fuzz_binary_decode.rs create mode 100644 fuzz/fuzz_targets/fuzz_binary_roundtrip.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 09cd0b9..c804eb8 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -7,10 +7,14 @@ edition = "2021" [package.metadata] cargo-fuzz = true +[lib] +name = "ijson_fuzz" +path = "src/lib.rs" + [dependencies] libfuzzer-sys = "0.4" arbitrary = { version = "1.3", features = ["derive"] } -serde = { workspace = true } +serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } [dependencies.ijson] @@ -22,3 +26,17 @@ path = "fuzz_targets/fuzz_json_de.rs" test = false doc = false bench = false + +[[bin]] +name = "fuzz_binary_decode" +path = "fuzz_targets/fuzz_binary_decode.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_binary_roundtrip" +path = "fuzz_targets/fuzz_binary_roundtrip.rs" +test = false +doc = false +bench = false diff --git a/fuzz/fuzz_targets/fuzz_binary_decode.rs b/fuzz/fuzz_targets/fuzz_binary_decode.rs new file mode 100644 index 0000000..5dc49a2 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_binary_decode.rs @@ -0,0 +1,9 @@ +#![no_main] + +use ijson::binary::decode; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + println!("data: {:?}", data); + let _ = decode(data); +}); diff --git a/fuzz/fuzz_targets/fuzz_binary_roundtrip.rs b/fuzz/fuzz_targets/fuzz_binary_roundtrip.rs new file mode 100644 index 0000000..a3001fc --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_binary_roundtrip.rs @@ -0,0 +1,22 @@ +#![no_main] + +use ijson::{binary, IValue}; +use ijson_fuzz::JsonValue; +use libfuzzer_sys::fuzz_target; +use serde::Deserialize; + +fuzz_target!(|value: JsonValue| { + let json_string = value.to_json_string(); + let mut deserializer = serde_json::Deserializer::from_str(&json_string); + let Ok(original) = IValue::deserialize(&mut deserializer) else { + return; + }; + + let encoded = binary::encode(&original); + let decoded = binary::decode(&encoded).expect("encode->decode round-trip must not fail"); + + assert_eq!( + original, decoded, + "round-trip mismatch for input: {json_string}" + ); +}); diff --git a/fuzz/fuzz_targets/fuzz_json_de.rs b/fuzz/fuzz_targets/fuzz_json_de.rs index 1592e59..9193a40 100644 --- a/fuzz/fuzz_targets/fuzz_json_de.rs +++ b/fuzz/fuzz_targets/fuzz_json_de.rs @@ -1,51 +1,9 @@ #![no_main] -use arbitrary::Arbitrary; use ijson::IValue; +use ijson_fuzz::JsonValue; use libfuzzer_sys::fuzz_target; use serde::Deserialize; -use std::collections::HashMap; - -#[derive(Arbitrary, Debug)] -enum JsonValue { - Null, - Bool(bool), - Number(f64), - String(String), - Array(Vec), - Object(HashMap), -} - -impl JsonValue { - fn to_json_string(&self) -> String { - match self { - JsonValue::Null => "null".to_string(), - JsonValue::Bool(b) => b.to_string(), - JsonValue::Number(n) => { - if n.is_finite() { - n.to_string() - } else { - "0".to_string() - } - } - JsonValue::String(s) => format!("\"{}\"", s), - JsonValue::Array(arr) => { - let items: Vec = arr.iter().map(|v| v.to_json_string()).collect(); - format!("[{}]", items.join(",")) - } - JsonValue::Object(obj) => { - let items: Vec = obj - .iter() - .map(|(k, v)| { - let key = k.clone(); - format!("\"{}\":{}", key, v.to_json_string()) - }) - .collect(); - format!("{{{}}}", items.join(",")) - } - } - } -} fuzz_target!(|value: JsonValue| { let json_string = value.to_json_string(); From 86e63be0cab3091821f77d6bb4f750847d271064 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 19 Feb 2026 09:54:08 +0200 Subject: [PATCH 13/33] tag to enum --- src/binary.rs | 323 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 217 insertions(+), 106 deletions(-) diff --git a/src/binary.rs b/src/binary.rs index 32e803c..98062d9 100644 --- a/src/binary.rs +++ b/src/binary.rs @@ -5,27 +5,61 @@ use half::{bf16, f16}; use crate::array::ArraySliceRef; use crate::{DestructuredRef, IArray, INumber, IObject, IString, IValue}; -const TAG_NULL: u8 = 0x00; -const TAG_FALSE: u8 = 0x01; -const TAG_TRUE: u8 = 0x02; -const TAG_I64: u8 = 0x03; -const TAG_U64: u8 = 0x04; -const TAG_F64: u8 = 0x05; -const TAG_STRING: u8 = 0x06; -const TAG_OBJECT: u8 = 0x07; -const TAG_ARRAY_HETERO: u8 = 0x08; -const TAG_ARRAY_I8: u8 = 0x10; -const TAG_ARRAY_U8: u8 = 0x11; -const TAG_ARRAY_I16: u8 = 0x12; -const TAG_ARRAY_U16: u8 = 0x13; -const TAG_ARRAY_F16: u8 = 0x14; -const TAG_ARRAY_BF16: u8 = 0x15; -const TAG_ARRAY_I32: u8 = 0x16; -const TAG_ARRAY_U32: u8 = 0x17; -const TAG_ARRAY_F32: u8 = 0x18; -const TAG_ARRAY_I64: u8 = 0x19; -const TAG_ARRAY_U64: u8 = 0x1A; -const TAG_ARRAY_F64: u8 = 0x1B; +#[repr(u8)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Tag { + Null = 0x00, + False = 0x01, + True = 0x02, + I64 = 0x03, + U64 = 0x04, + F64 = 0x05, + String = 0x06, + Object = 0x07, + ArrayHetero = 0x08, + ArrayI8 = 0x10, + ArrayU8 = 0x11, + ArrayI16 = 0x12, + ArrayU16 = 0x13, + ArrayF16 = 0x14, + ArrayBF16 = 0x15, + ArrayI32 = 0x16, + ArrayU32 = 0x17, + ArrayF32 = 0x18, + ArrayI64 = 0x19, + ArrayU64 = 0x1A, + ArrayF64 = 0x1B, +} + +impl TryFrom for Tag { + type Error = u8; + fn try_from(v: u8) -> Result { + match v { + 0x00 => Ok(Tag::Null), + 0x01 => Ok(Tag::False), + 0x02 => Ok(Tag::True), + 0x03 => Ok(Tag::I64), + 0x04 => Ok(Tag::U64), + 0x05 => Ok(Tag::F64), + 0x06 => Ok(Tag::String), + 0x07 => Ok(Tag::Object), + 0x08 => Ok(Tag::ArrayHetero), + 0x10 => Ok(Tag::ArrayI8), + 0x11 => Ok(Tag::ArrayU8), + 0x12 => Ok(Tag::ArrayI16), + 0x13 => Ok(Tag::ArrayU16), + 0x14 => Ok(Tag::ArrayF16), + 0x15 => Ok(Tag::ArrayBF16), + 0x16 => Ok(Tag::ArrayI32), + 0x17 => Ok(Tag::ArrayU32), + 0x18 => Ok(Tag::ArrayF32), + 0x19 => Ok(Tag::ArrayI64), + 0x1A => Ok(Tag::ArrayU64), + 0x1B => Ok(Tag::ArrayF64), + other => Err(other), + } + } +} /// Error returned when decoding fails. #[derive(Debug, Clone, PartialEq, Eq)] @@ -64,11 +98,15 @@ pub fn encode(value: &IValue) -> Vec { out } +fn push_tag(tag: Tag, out: &mut Vec) { + out.push(tag as u8); +} + fn encode_into(value: &IValue, out: &mut Vec) { match value.destructure_ref() { - DestructuredRef::Null => out.push(TAG_NULL), - DestructuredRef::Bool(false) => out.push(TAG_FALSE), - DestructuredRef::Bool(true) => out.push(TAG_TRUE), + DestructuredRef::Null => push_tag(Tag::Null, out), + DestructuredRef::Bool(false) => push_tag(Tag::False, out), + DestructuredRef::Bool(true) => push_tag(Tag::True, out), DestructuredRef::Number(n) => encode_number(n, out), DestructuredRef::String(s) => encode_string(s, out), DestructuredRef::Array(a) => encode_array(a, out), @@ -78,20 +116,20 @@ fn encode_into(value: &IValue, out: &mut Vec) { fn encode_number(n: &INumber, out: &mut Vec) { if n.has_decimal_point() { - out.push(TAG_F64); + push_tag(Tag::F64, out); out.extend_from_slice(&n.to_f64().unwrap().to_le_bytes()); } else if let Some(v) = n.to_i64() { - out.push(TAG_I64); + push_tag(Tag::I64, out); out.extend_from_slice(&v.to_le_bytes()); } else { - out.push(TAG_U64); + push_tag(Tag::U64, out); out.extend_from_slice(&n.to_u64().unwrap().to_le_bytes()); } } fn encode_string(s: &IString, out: &mut Vec) { let bytes = s.as_str().as_bytes(); - out.push(TAG_STRING); + push_tag(Tag::String, out); out.extend_from_slice(&(bytes.len() as u32).to_le_bytes()); out.extend_from_slice(bytes); } @@ -100,33 +138,33 @@ fn encode_array(a: &IArray, out: &mut Vec) { let len = a.len() as u32; match a.as_slice() { ArraySliceRef::Heterogeneous(s) => { - out.push(TAG_ARRAY_HETERO); + push_tag(Tag::ArrayHetero, out); out.extend_from_slice(&len.to_le_bytes()); for v in s { encode_into(v, out); } } ArraySliceRef::I8(s) => { - out.push(TAG_ARRAY_I8); + push_tag(Tag::ArrayI8, out); out.extend_from_slice(&len.to_le_bytes()); let bytes = unsafe { std::slice::from_raw_parts(s.as_ptr() as *const u8, s.len()) }; out.extend_from_slice(bytes); } ArraySliceRef::U8(s) => { - out.push(TAG_ARRAY_U8); + push_tag(Tag::ArrayU8, out); out.extend_from_slice(&len.to_le_bytes()); out.extend_from_slice(s); } - ArraySliceRef::I16(s) => encode_typed_array(TAG_ARRAY_I16, s, out), - ArraySliceRef::U16(s) => encode_typed_array(TAG_ARRAY_U16, s, out), - ArraySliceRef::F16(s) => encode_typed_array(TAG_ARRAY_F16, s, out), - ArraySliceRef::BF16(s) => encode_typed_array(TAG_ARRAY_BF16, s, out), - ArraySliceRef::I32(s) => encode_typed_array(TAG_ARRAY_I32, s, out), - ArraySliceRef::U32(s) => encode_typed_array(TAG_ARRAY_U32, s, out), - ArraySliceRef::F32(s) => encode_typed_array(TAG_ARRAY_F32, s, out), - ArraySliceRef::I64(s) => encode_typed_array(TAG_ARRAY_I64, s, out), - ArraySliceRef::U64(s) => encode_typed_array(TAG_ARRAY_U64, s, out), - ArraySliceRef::F64(s) => encode_typed_array(TAG_ARRAY_F64, s, out), + ArraySliceRef::I16(s) => encode_typed_array(Tag::ArrayI16, s, out), + ArraySliceRef::U16(s) => encode_typed_array(Tag::ArrayU16, s, out), + ArraySliceRef::F16(s) => encode_typed_array(Tag::ArrayF16, s, out), + ArraySliceRef::BF16(s) => encode_typed_array(Tag::ArrayBF16, s, out), + ArraySliceRef::I32(s) => encode_typed_array(Tag::ArrayI32, s, out), + ArraySliceRef::U32(s) => encode_typed_array(Tag::ArrayU32, s, out), + ArraySliceRef::F32(s) => encode_typed_array(Tag::ArrayF32, s, out), + ArraySliceRef::I64(s) => encode_typed_array(Tag::ArrayI64, s, out), + ArraySliceRef::U64(s) => encode_typed_array(Tag::ArrayU64, s, out), + ArraySliceRef::F64(s) => encode_typed_array(Tag::ArrayF64, s, out), } } @@ -155,8 +193,8 @@ impl_to_le_bytes!(i64); impl_to_le_bytes!(u64); impl_to_le_bytes!(f64); -fn encode_typed_array(tag: u8, s: &[T], out: &mut Vec) { - out.push(tag); +fn encode_typed_array(tag: Tag, s: &[T], out: &mut Vec) { + push_tag(tag, out); out.extend_from_slice(&(s.len() as u32).to_le_bytes()); for v in s { out.extend_from_slice(v.to_le_bytes_vec().as_ref()); @@ -164,7 +202,7 @@ fn encode_typed_array(tag: u8, s: &[T], out: &mut Vec) { } fn encode_object(o: &IObject, out: &mut Vec) { - out.push(TAG_OBJECT); + push_tag(Tag::Object, out); out.extend_from_slice(&(o.len() as u32).to_le_bytes()); for (k, v) in o { let key_bytes = k.as_str().as_bytes(); @@ -197,35 +235,49 @@ fn read_u8(bytes: &[u8], cur: &mut usize) -> Result { fn read_u32(bytes: &[u8], cur: &mut usize) -> Result { let end = cur.checked_add(4).ok_or(BinaryDecodeError::UnexpectedEof)?; - let slice = bytes.get(*cur..end).ok_or(BinaryDecodeError::UnexpectedEof)?; + let slice = bytes + .get(*cur..end) + .ok_or(BinaryDecodeError::UnexpectedEof)?; *cur = end; Ok(u32::from_le_bytes(slice.try_into().unwrap())) } fn read_i64(bytes: &[u8], cur: &mut usize) -> Result { let end = cur.checked_add(8).ok_or(BinaryDecodeError::UnexpectedEof)?; - let slice = bytes.get(*cur..end).ok_or(BinaryDecodeError::UnexpectedEof)?; + let slice = bytes + .get(*cur..end) + .ok_or(BinaryDecodeError::UnexpectedEof)?; *cur = end; Ok(i64::from_le_bytes(slice.try_into().unwrap())) } fn read_u64(bytes: &[u8], cur: &mut usize) -> Result { let end = cur.checked_add(8).ok_or(BinaryDecodeError::UnexpectedEof)?; - let slice = bytes.get(*cur..end).ok_or(BinaryDecodeError::UnexpectedEof)?; + let slice = bytes + .get(*cur..end) + .ok_or(BinaryDecodeError::UnexpectedEof)?; *cur = end; Ok(u64::from_le_bytes(slice.try_into().unwrap())) } fn read_f64(bytes: &[u8], cur: &mut usize) -> Result { let end = cur.checked_add(8).ok_or(BinaryDecodeError::UnexpectedEof)?; - let slice = bytes.get(*cur..end).ok_or(BinaryDecodeError::UnexpectedEof)?; + let slice = bytes + .get(*cur..end) + .ok_or(BinaryDecodeError::UnexpectedEof)?; *cur = end; Ok(f64::from_le_bytes(slice.try_into().unwrap())) } -fn read_bytes<'a>(bytes: &'a [u8], cur: &mut usize, n: usize) -> Result<&'a [u8], BinaryDecodeError> { +fn read_bytes<'a>( + bytes: &'a [u8], + cur: &mut usize, + n: usize, +) -> Result<&'a [u8], BinaryDecodeError> { let end = cur.checked_add(n).ok_or(BinaryDecodeError::UnexpectedEof)?; - let slice = bytes.get(*cur..end).ok_or(BinaryDecodeError::UnexpectedEof)?; + let slice = bytes + .get(*cur..end) + .ok_or(BinaryDecodeError::UnexpectedEof)?; *cur = end; Ok(slice) } @@ -234,25 +286,25 @@ fn decode_value(bytes: &[u8], cur: &mut usize, depth: u32) -> Result= MAX_DEPTH { return Err(BinaryDecodeError::DepthLimitExceeded); } - match read_u8(bytes, cur)? { - TAG_NULL => Ok(IValue::NULL), - TAG_FALSE => Ok(false.into()), - TAG_TRUE => Ok(true.into()), - TAG_I64 => Ok(read_i64(bytes, cur)?.into()), - TAG_U64 => Ok(read_u64(bytes, cur)?.into()), - TAG_F64 => { + let raw_tag = read_u8(bytes, cur)?; + let tag = Tag::try_from(raw_tag).map_err(BinaryDecodeError::UnknownTag)?; + match tag { + Tag::Null => Ok(IValue::NULL), + Tag::False => Ok(false.into()), + Tag::True => Ok(true.into()), + Tag::I64 => Ok(read_i64(bytes, cur)?.into()), + Tag::U64 => Ok(read_u64(bytes, cur)?.into()), + Tag::F64 => { let v = read_f64(bytes, cur)?; - Ok(INumber::try_from(v) - .map(Into::into) - .unwrap_or(IValue::NULL)) + Ok(INumber::try_from(v).map(Into::into).unwrap_or(IValue::NULL)) } - TAG_STRING => { + Tag::String => { let len = read_u32(bytes, cur)? as usize; let raw = read_bytes(bytes, cur, len)?; let s = std::str::from_utf8(raw).map_err(|_| BinaryDecodeError::InvalidUtf8)?; Ok(IString::from(s).into()) } - TAG_OBJECT => { + Tag::Object => { let count = read_u32(bytes, cur)? as usize; // Each entry needs at least 5 bytes: 4-byte key-len + 1-byte value tag. let hint = count.min((bytes.len() - *cur) / 5); @@ -260,13 +312,14 @@ fn decode_value(bytes: &[u8], cur: &mut usize, depth: u32) -> Result { + Tag::ArrayHetero => { let count = read_u32(bytes, cur)? as usize; // Each element needs at least 1 byte (tag). let hint = count.min(bytes.len() - *cur); @@ -277,46 +330,58 @@ fn decode_value(bytes: &[u8], cur: &mut usize, depth: u32) -> Result { + Tag::ArrayI8 => { let count = read_u32(bytes, cur)? as usize; let raw = read_bytes(bytes, cur, count)?; - let typed: &[i8] = unsafe { std::slice::from_raw_parts(raw.as_ptr() as *const i8, count) }; - IArray::try_from(typed).map(Into::into).map_err(|_| BinaryDecodeError::AllocError) + let typed: &[i8] = + unsafe { std::slice::from_raw_parts(raw.as_ptr() as *const i8, count) }; + IArray::try_from(typed) + .map(Into::into) + .map_err(|_| BinaryDecodeError::AllocError) } - TAG_ARRAY_U8 => { + Tag::ArrayU8 => { let count = read_u32(bytes, cur)? as usize; let raw = read_bytes(bytes, cur, count)?; - IArray::try_from(raw).map(Into::into).map_err(|_| BinaryDecodeError::AllocError) + IArray::try_from(raw) + .map(Into::into) + .map_err(|_| BinaryDecodeError::AllocError) } - TAG_ARRAY_I16 => decode_primitive_array::(bytes, cur, 2), - TAG_ARRAY_U16 => decode_primitive_array::(bytes, cur, 2), - TAG_ARRAY_F16 => { + Tag::ArrayI16 => decode_primitive_array::(bytes, cur, 2), + Tag::ArrayU16 => decode_primitive_array::(bytes, cur, 2), + Tag::ArrayF16 => { let count = read_u32(bytes, cur)? as usize; - let byte_len = count.checked_mul(2).ok_or(BinaryDecodeError::UnexpectedEof)?; + let byte_len = count + .checked_mul(2) + .ok_or(BinaryDecodeError::UnexpectedEof)?; let raw = read_bytes(bytes, cur, byte_len)?; let vec: Vec = raw .chunks_exact(2) .map(|c| f16::from_le_bytes(c.try_into().unwrap())) .collect(); - IArray::try_from(vec).map(Into::into).map_err(|_| BinaryDecodeError::AllocError) + IArray::try_from(vec) + .map(Into::into) + .map_err(|_| BinaryDecodeError::AllocError) } - TAG_ARRAY_BF16 => { + Tag::ArrayBF16 => { let count = read_u32(bytes, cur)? as usize; - let byte_len = count.checked_mul(2).ok_or(BinaryDecodeError::UnexpectedEof)?; + let byte_len = count + .checked_mul(2) + .ok_or(BinaryDecodeError::UnexpectedEof)?; let raw = read_bytes(bytes, cur, byte_len)?; let vec: Vec = raw .chunks_exact(2) .map(|c| bf16::from_le_bytes(c.try_into().unwrap())) .collect(); - IArray::try_from(vec).map(Into::into).map_err(|_| BinaryDecodeError::AllocError) + IArray::try_from(vec) + .map(Into::into) + .map_err(|_| BinaryDecodeError::AllocError) } - TAG_ARRAY_I32 => decode_primitive_array::(bytes, cur, 4), - TAG_ARRAY_U32 => decode_primitive_array::(bytes, cur, 4), - TAG_ARRAY_F32 => decode_primitive_array::(bytes, cur, 4), - TAG_ARRAY_I64 => decode_primitive_array::(bytes, cur, 8), - TAG_ARRAY_U64 => decode_primitive_array::(bytes, cur, 8), - TAG_ARRAY_F64 => decode_primitive_array::(bytes, cur, 8), - tag => Err(BinaryDecodeError::UnknownTag(tag)), + Tag::ArrayI32 => decode_primitive_array::(bytes, cur, 4), + Tag::ArrayU32 => decode_primitive_array::(bytes, cur, 4), + Tag::ArrayF32 => decode_primitive_array::(bytes, cur, 4), + Tag::ArrayI64 => decode_primitive_array::(bytes, cur, 8), + Tag::ArrayU64 => decode_primitive_array::(bytes, cur, 8), + Tag::ArrayF64 => decode_primitive_array::(bytes, cur, 8), } } @@ -343,13 +408,19 @@ impl_from_le_bytes!(i64, 8); impl_from_le_bytes!(u64, 8); impl_from_le_bytes!(f64, 8); -fn decode_primitive_array(bytes: &[u8], cur: &mut usize, elem_size: usize) -> Result +fn decode_primitive_array( + bytes: &[u8], + cur: &mut usize, + elem_size: usize, +) -> Result where T: FromLeBytes, IArray: TryFrom>, { let count = read_u32(bytes, cur)? as usize; - let byte_len = count.checked_mul(elem_size).ok_or(BinaryDecodeError::UnexpectedEof)?; + let byte_len = count + .checked_mul(elem_size) + .ok_or(BinaryDecodeError::UnexpectedEof)?; let raw = read_bytes(bytes, cur, byte_len)?; let mut vec: Vec = Vec::with_capacity(count); for chunk in raw.chunks_exact(elem_size) { @@ -418,64 +489,95 @@ mod tests { let v: IValue = arr.into(); let result = round_trip(&v); let result_arr = result.as_array().unwrap(); - assert!(matches!(result_arr.as_slice(), ArraySliceRef::Heterogeneous(_))); + assert!(matches!( + result_arr.as_slice(), + ArraySliceRef::Heterogeneous(_) + )); assert_eq!(result_arr.len(), 4); } #[test] fn test_f32_array_preserves_tag() { - let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type(crate::FloatType::F32))); + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( + crate::FloatType::F32, + ))); let json = r#"[1.5, 2.5, 3.5]"#; let mut de = serde_json::Deserializer::from_str(json); let v = seed.deserialize(&mut de).unwrap(); - assert!(matches!(v.as_array().unwrap().as_slice(), ArraySliceRef::F32(_))); + assert!(matches!( + v.as_array().unwrap().as_slice(), + ArraySliceRef::F32(_) + )); let result = round_trip(&v); let arr = result.as_array().unwrap(); - assert!(matches!(arr.as_slice(), ArraySliceRef::F32(_)), "F32 tag should survive encode/decode"); + assert!( + matches!(arr.as_slice(), ArraySliceRef::F32(_)), + "F32 tag should survive encode/decode" + ); assert_eq!(arr.len(), 3); } #[test] fn test_f16_array_preserves_tag() { - let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type(crate::FloatType::F16))); + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( + crate::FloatType::F16, + ))); let json = r#"[0.5, 1.0, 1.5]"#; let mut de = serde_json::Deserializer::from_str(json); let v = seed.deserialize(&mut de).unwrap(); - assert!(matches!(v.as_array().unwrap().as_slice(), ArraySliceRef::F16(_))); + assert!(matches!( + v.as_array().unwrap().as_slice(), + ArraySliceRef::F16(_) + )); let result = round_trip(&v); let arr = result.as_array().unwrap(); - assert!(matches!(arr.as_slice(), ArraySliceRef::F16(_)), "F16 tag should survive encode/decode"); + assert!( + matches!(arr.as_slice(), ArraySliceRef::F16(_)), + "F16 tag should survive encode/decode" + ); } #[test] fn test_bf16_array_preserves_tag() { - let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type(crate::FloatType::BF16))); + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( + crate::FloatType::BF16, + ))); let json = r#"[1.0, 2.0, 3.0]"#; let mut de = serde_json::Deserializer::from_str(json); let v = seed.deserialize(&mut de).unwrap(); let result = round_trip(&v); let arr = result.as_array().unwrap(); - assert!(matches!(arr.as_slice(), ArraySliceRef::BF16(_)), "BF16 tag should survive encode/decode"); + assert!( + matches!(arr.as_slice(), ArraySliceRef::BF16(_)), + "BF16 tag should survive encode/decode" + ); } #[test] fn test_f64_array_preserves_tag() { - let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type(crate::FloatType::F64))); + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( + crate::FloatType::F64, + ))); let json = r#"[1.0, 2.0, 3.0]"#; let mut de = serde_json::Deserializer::from_str(json); let v = seed.deserialize(&mut de).unwrap(); let result = round_trip(&v); let arr = result.as_array().unwrap(); - assert!(matches!(arr.as_slice(), ArraySliceRef::F64(_)), "F64 tag should survive encode/decode"); + assert!( + matches!(arr.as_slice(), ArraySliceRef::F64(_)), + "F64 tag should survive encode/decode" + ); } #[test] fn test_nested_object_with_typed_arrays() { - let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type(crate::FloatType::F32))); + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( + crate::FloatType::F32, + ))); let json = r#"{"a": [1.0, 2.0], "b": "text", "c": [3.0, 4.0]}"#; let mut de = serde_json::Deserializer::from_str(json); let v = seed.deserialize(&mut de).unwrap(); @@ -484,8 +586,14 @@ mod tests { let obj = result.as_object().unwrap(); let a = obj.get("a").unwrap().as_array().unwrap(); let c = obj.get("c").unwrap().as_array().unwrap(); - assert!(matches!(a.as_slice(), ArraySliceRef::F32(_)), "nested F32 array 'a' should survive"); - assert!(matches!(c.as_slice(), ArraySliceRef::F32(_)), "nested F32 array 'c' should survive"); + assert!( + matches!(a.as_slice(), ArraySliceRef::F32(_)), + "nested F32 array 'a' should survive" + ); + assert!( + matches!(c.as_slice(), ArraySliceRef::F32(_)), + "nested F32 array 'c' should survive" + ); assert_eq!(obj.get("b").unwrap().as_string().unwrap().as_str(), "text"); } @@ -494,7 +602,10 @@ mod tests { let v: IValue = 42i64.into(); let bytes = encode(&v); for len in 0..bytes.len() { - assert!(decode(&bytes[..len]).is_err(), "truncated at {len} should fail"); + assert!( + decode(&bytes[..len]).is_err(), + "truncated at {len} should fail" + ); } } @@ -522,13 +633,13 @@ mod tests { #[test] fn test_depth_limit() { // Build MAX_DEPTH+1 levels of nested single-element hetero arrays. - // Each level: TAG_ARRAY_HETERO (1) + count=1 (4) = 5 bytes, then recurse. + // Each level: ArrayHetero tag (1) + count=1 (4) = 5 bytes, then recurse. let mut bytes: Vec = Vec::new(); for _ in 0..=super::MAX_DEPTH { - bytes.push(TAG_ARRAY_HETERO); + bytes.push(super::Tag::ArrayHetero as u8); bytes.extend_from_slice(&1u32.to_le_bytes()); } - bytes.push(TAG_NULL); + bytes.push(super::Tag::Null as u8); assert_eq!(decode(&bytes), Err(BinaryDecodeError::DepthLimitExceeded)); } @@ -537,10 +648,10 @@ mod tests { // MAX_DEPTH-1 array wrappers: the leaf is decoded at depth=MAX_DEPTH-1, which is allowed. let mut bytes: Vec = Vec::new(); for _ in 0..super::MAX_DEPTH - 1 { - bytes.push(TAG_ARRAY_HETERO); + bytes.push(super::Tag::ArrayHetero as u8); bytes.extend_from_slice(&1u32.to_le_bytes()); } - bytes.push(TAG_NULL); + bytes.push(super::Tag::Null as u8); assert!(decode(&bytes).is_ok()); } } From 746ac877665f83d31e12106ed0226fdae55b5ea5 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 19 Feb 2026 10:22:16 +0200 Subject: [PATCH 14/33] update fuzz parameters in ci --- .github/actions/fuzz_tests/action.yml | 4 ++-- .github/workflows/ci.yml | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/actions/fuzz_tests/action.yml b/.github/actions/fuzz_tests/action.yml index 16774d5..7b736ac 100644 --- a/.github/actions/fuzz_tests/action.yml +++ b/.github/actions/fuzz_tests/action.yml @@ -8,7 +8,7 @@ inputs: fuzz_time: description: 'Maximum time in seconds to run fuzzing' required: false - default: '120' + default: '300' cargo_fuzz_version: description: 'Version of cargo-fuzz to install' required: false @@ -26,5 +26,5 @@ runs: - name: Run Fuzz Tests shell: bash working-directory: fuzz - run: cargo fuzz run ${{ inputs.fuzz_target }} --release -- -max_total_time=${{ inputs.fuzz_time }} + run: cargo fuzz run ${{ inputs.fuzz_target }} --release -- -max_total_time=${{ inputs.fuzz_time }} -ignore_ooms=1 -rss_limit_mb=0 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f690705..2f7b13d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,8 @@ jobs: matrix: fuzz_target: - fuzz_json_de - # Add more fuzz targets here as needed + - fuzz_binary_decode + - fuzz_binary_roundtrip steps: - name: Checkout repository uses: actions/checkout@v4 From f3d275d4c2f378bd903c32611426b81f461aede3 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 19 Feb 2026 10:24:21 +0200 Subject: [PATCH 15/33] fmt --- src/de.rs | 3 +-- src/lib.rs | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/de.rs b/src/de.rs index 837fe5f..5cc7cfe 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1173,8 +1173,7 @@ mod tests { let serialized = serde_json::to_string(&original).unwrap(); - let reload_seed = - IValueDeserSeed::new(Some(FPHAConfig::new_with_type(fp_type))); + let reload_seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(fp_type))); let mut de = serde_json::Deserializer::from_str(&serialized); let roundtripped = reload_seed.deserialize(&mut de).unwrap(); diff --git a/src/lib.rs b/src/lib.rs index 5634589..a4e63d4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,10 +49,10 @@ pub use value::{ BoolMut, Destructured, DestructuredMut, DestructuredRef, IValue, ValueIndex, ValueType, }; -mod de; -mod ser; /// Binary encode/decode for [`IValue`], preserving typed array tags. pub mod binary; +mod de; +mod ser; pub use binary::{decode, encode, BinaryDecodeError}; pub use de::{from_value, FPHAConfig, IValueDeserSeed}; pub use ser::to_value; From 954fa389e6c3e75161994d75a9b36d01246c7758 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 19 Feb 2026 10:41:08 +0200 Subject: [PATCH 16/33] remove print in fuzz --- fuzz/fuzz_targets/fuzz_binary_decode.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/fuzz/fuzz_targets/fuzz_binary_decode.rs b/fuzz/fuzz_targets/fuzz_binary_decode.rs index 5dc49a2..265f06c 100644 --- a/fuzz/fuzz_targets/fuzz_binary_decode.rs +++ b/fuzz/fuzz_targets/fuzz_binary_decode.rs @@ -4,6 +4,5 @@ use ijson::binary::decode; use libfuzzer_sys::fuzz_target; fuzz_target!(|data: &[u8]| { - println!("data: {:?}", data); let _ = decode(data); }); From 82228326298d4a55d07cdd6d1609859328b73271 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 19 Feb 2026 12:00:34 +0200 Subject: [PATCH 17/33] wrap with zstd compression --- Cargo.toml | 2 ++ src/binary.rs | 17 +++++++++++++++++ src/lib.rs | 2 +- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index ed0021a..c642743 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,9 +32,11 @@ ctor = { version = "0.1.16", optional = true } paste = "1.0.15" half = "2.0.0" thiserror = "2.0.18" +zstd = "0.13" [dev-dependencies] mockalloc = "0.1.2" ctor = "0.1.16" rand = "0.8.4" +zstd = "0.13" diff --git a/src/binary.rs b/src/binary.rs index 98062d9..1beb1f5 100644 --- a/src/binary.rs +++ b/src/binary.rs @@ -74,6 +74,8 @@ pub enum BinaryDecodeError { AllocError, /// Nesting depth exceeded the limit. DepthLimitExceeded, + /// Decompression failed (zstd error). + DecompressError, } const MAX_DEPTH: u32 = 128; @@ -86,6 +88,7 @@ impl fmt::Display for BinaryDecodeError { BinaryDecodeError::InvalidUtf8 => write!(f, "invalid UTF-8 in string"), BinaryDecodeError::AllocError => write!(f, "memory allocation failed"), BinaryDecodeError::DepthLimitExceeded => write!(f, "nesting depth limit exceeded"), + BinaryDecodeError::DecompressError => write!(f, "zstd decompression failed"), } } } @@ -98,6 +101,20 @@ pub fn encode(value: &IValue) -> Vec { out } +/// Encodes an [`IValue`] tree and compresses the result with zstd (level 3). +/// +/// Use [`decode_compressed`] to decode the output. +pub fn encode_compressed(value: &IValue) -> Vec { + let raw = encode(value); + zstd::bulk::compress(&raw, 3).expect("zstd compress") +} + +/// Decodes an [`IValue`] tree from bytes produced by [`encode_compressed`]. +pub fn decode_compressed(bytes: &[u8]) -> Result { + let raw = zstd::decode_all(bytes).map_err(|_| BinaryDecodeError::DecompressError)?; + decode(&raw) +} + fn push_tag(tag: Tag, out: &mut Vec) { out.push(tag as u8); } diff --git a/src/lib.rs b/src/lib.rs index a4e63d4..4315245 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,7 +53,7 @@ pub use value::{ pub mod binary; mod de; mod ser; -pub use binary::{decode, encode, BinaryDecodeError}; +pub use binary::{decode, decode_compressed, encode, encode_compressed, BinaryDecodeError}; pub use de::{from_value, FPHAConfig, IValueDeserSeed}; pub use ser::to_value; From 4bfdfec582cf15930c2e01010d5522044d359f7f Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 19 Feb 2026 14:01:03 +0200 Subject: [PATCH 18/33] misc --- Cargo.toml | 1 + src/binary.rs | 15 ++++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c642743..14b699d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ serde = { workspace = true } serde_json = { workspace = true } ctor = { version = "0.1.16", optional = true } paste = "1.0.15" +bytemuck = "1.25.0" half = "2.0.0" thiserror = "2.0.18" zstd = "0.13" diff --git a/src/binary.rs b/src/binary.rs index 1beb1f5..1b76a45 100644 --- a/src/binary.rs +++ b/src/binary.rs @@ -1,5 +1,6 @@ use std::fmt; +use bytemuck; use half::{bf16, f16}; use crate::array::ArraySliceRef; @@ -76,6 +77,8 @@ pub enum BinaryDecodeError { DepthLimitExceeded, /// Decompression failed (zstd error). DecompressError, + /// Failed to cast slice. + CastError, } const MAX_DEPTH: u32 = 128; @@ -89,6 +92,7 @@ impl fmt::Display for BinaryDecodeError { BinaryDecodeError::AllocError => write!(f, "memory allocation failed"), BinaryDecodeError::DepthLimitExceeded => write!(f, "nesting depth limit exceeded"), BinaryDecodeError::DecompressError => write!(f, "zstd decompression failed"), + BinaryDecodeError::CastError => write!(f, "failed to cast slice"), } } } @@ -106,7 +110,9 @@ pub fn encode(value: &IValue) -> Vec { /// Use [`decode_compressed`] to decode the output. pub fn encode_compressed(value: &IValue) -> Vec { let raw = encode(value); - zstd::bulk::compress(&raw, 3).expect("zstd compress") + zstd::bulk::Compressor::default() + .compress(&raw) + .expect("zstd compress") } /// Decodes an [`IValue`] tree from bytes produced by [`encode_compressed`]. @@ -164,8 +170,7 @@ fn encode_array(a: &IArray, out: &mut Vec) { ArraySliceRef::I8(s) => { push_tag(Tag::ArrayI8, out); out.extend_from_slice(&len.to_le_bytes()); - let bytes = unsafe { std::slice::from_raw_parts(s.as_ptr() as *const u8, s.len()) }; - out.extend_from_slice(bytes); + out.extend_from_slice(bytemuck::cast_slice::(s)); } ArraySliceRef::U8(s) => { push_tag(Tag::ArrayU8, out); @@ -350,8 +355,8 @@ fn decode_value(bytes: &[u8], cur: &mut usize, depth: u32) -> Result { let count = read_u32(bytes, cur)? as usize; let raw = read_bytes(bytes, cur, count)?; - let typed: &[i8] = - unsafe { std::slice::from_raw_parts(raw.as_ptr() as *const i8, count) }; + let typed: &[i8] = bytemuck::try_cast_slice::(raw) + .map_err(|_| BinaryDecodeError::CastError)?; IArray::try_from(typed) .map(Into::into) .map_err(|_| BinaryDecodeError::AllocError) From b6e1ac71f46967e02f094694d0b5787aa0721941 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Sun, 22 Feb 2026 11:49:08 +0200 Subject: [PATCH 19/33] move to cbor based implementation --- .github/workflows/ci.yml | 4 +- Cargo.toml | 2 +- examples/size_profile.rs | 205 ++++++ fuzz/Cargo.toml | 8 +- ...z_binary_decode.rs => fuzz_cbor_decode.rs} | 2 +- ...ry_roundtrip.rs => fuzz_cbor_roundtrip.rs} | 6 +- src/binary.rs | 679 ------------------ src/cbor.rs | 476 ++++++++++++ src/lib.rs | 6 +- 9 files changed, 695 insertions(+), 693 deletions(-) create mode 100644 examples/size_profile.rs rename fuzz/fuzz_targets/{fuzz_binary_decode.rs => fuzz_cbor_decode.rs} (79%) rename fuzz/fuzz_targets/{fuzz_binary_roundtrip.rs => fuzz_cbor_roundtrip.rs} (73%) delete mode 100644 src/binary.rs create mode 100644 src/cbor.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f7b13d..79a6cad 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,8 +24,8 @@ jobs: matrix: fuzz_target: - fuzz_json_de - - fuzz_binary_decode - - fuzz_binary_roundtrip + - fuzz_cbor_decode + - fuzz_cbor_roundtrip steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/Cargo.toml b/Cargo.toml index 14b699d..dce0325 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ serde = { workspace = true } serde_json = { workspace = true } ctor = { version = "0.1.16", optional = true } paste = "1.0.15" -bytemuck = "1.25.0" +ciborium = "0.2" half = "2.0.0" thiserror = "2.0.18" zstd = "0.13" diff --git a/examples/size_profile.rs b/examples/size_profile.rs new file mode 100644 index 0000000..3bbfbe4 --- /dev/null +++ b/examples/size_profile.rs @@ -0,0 +1,205 @@ +/// Compares CBOR and zstd-compressed CBOR sizes vs JSON for representative IValue documents. +/// +/// Run with: +/// cargo run --example size_profile +use ijson::{cbor, FPHAConfig, FloatType, IValue, IValueDeserSeed}; +use serde::de::DeserializeSeed; +use serde::Deserialize; + +struct Case { + name: &'static str, + value: IValue, +} + +fn json_size(v: &IValue) -> usize { + serde_json::to_string(v).unwrap().len() +} + +fn cbor_size(v: &IValue) -> usize { + cbor::encode(v).len() +} + +fn cbor_zstd_size(v: &IValue) -> usize { + cbor::encode_compressed(v).len() +} + +fn from_json(s: &str) -> IValue { + IValue::deserialize(&mut serde_json::Deserializer::from_str(s)).unwrap() +} + +fn from_json_fpha(s: &str, ft: FloatType) -> IValue { + let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(ft))); + seed.deserialize(&mut serde_json::Deserializer::from_str(s)) + .unwrap() +} + +fn make_cases() -> Vec { + // ── typed float arrays ────────────────────────────────────────────────── + let n = 1000usize; + let fp32_json = format!( + "[{}]", + (0..n) + .map(|i| format!("{:.6}", i as f32 * 0.001)) + .collect::>() + .join(",") + ); + let fp64_json = format!( + "[{}]", + (0..n) + .map(|i| format!("{:.15}", i as f64 * 0.001)) + .collect::>() + .join(",") + ); + + // ── string-heavy object ───────────────────────────────────────────────── + let string_obj_json = format!( + "{{{}}}", + (0..50) + .map(|i| format!("\"key_{i}\":\"value_{i}_some_longer_string_here\"")) + .collect::>() + .join(",") + ); + + // ── heterogeneous object ──────────────────────────────────────────────── + let hetero_json = r#"{ + "name": "Alice", + "age": 30, + "scores": [1, 2, 3, null, true, "bonus"], + "meta": {"active": true, "level": 42} + }"#; + + // ── nested typed arrays ───────────────────────────────────────────────── + let small_fp32 = format!( + "[{}]", + (0..100) + .map(|i| format!("{:.4}", i as f32 * 0.1)) + .collect::>() + .join(",") + ); + let nested_fp32_json = format!("{{\"a\":{small_fp32},\"b\":{small_fp32},\"label\":\"test\"}}"); + + // ── big mixed JSON: array of 200 records, each with scalars + fp32 embedding ─ + // Simulates a realistic workload: repeated schema, repeated key names, + // mix of strings / integers / booleans, and a typed float sub-array. + let big_mixed_json = { + let records: Vec = (0..200) + .map(|i| { + let embedding: String = (0..32) + .map(|j| format!("{:.6}", (i as f32 * 0.01 + j as f32 * 0.001).sin())) + .collect::>() + .join(","); + format!( + r#"{{"id":{i},"name":"user_{i}","active":{},"score":{:.4},"tags":["alpha","beta","gamma"],"embedding":[{embedding}]}}"#, + i % 2 == 0, + i as f64 * 1.5, + ) + }) + .collect(); + format!("[{}]", records.join(",")) + }; + + // ── repeated strings: 500 objects sharing the same schema and many identical values ─ + // Targets RED-141886: string-reuse gap between in-memory and RDB representation. + // Keys ("status", "region", "tier", "owner") and values ("active", "us-east-1", + // "premium", "team-a") repeat across every record, stressing string deduplication. + let repeated_strings_json = { + let statuses = ["active", "inactive", "pending"]; + let regions = ["us-east-1", "eu-west-1", "ap-southeast-1"]; + let tiers = ["free", "standard", "premium"]; + let owners = ["team-a", "team-b", "team-c", "team-d"]; + let records: Vec = (0..500) + .map(|i| { + format!( + r#"{{"id":{i},"status":"{}","region":"{}","tier":"{}","owner":"{}","count":{}}}"#, + statuses[i % statuses.len()], + regions[i % regions.len()], + tiers[i % tiers.len()], + owners[i % owners.len()], + i * 10, + ) + }) + .collect(); + format!("[{}]", records.join(",")) + }; + + vec![ + Case { + name: "FP32 array (1000 elements)", + value: from_json_fpha(&fp32_json, FloatType::F32), + }, + Case { + name: "FP64 array (1000 elements)", + value: from_json_fpha(&fp64_json, FloatType::F64), + }, + Case { + name: "Heterogeneous array (1000 numbers, no hint)", + value: from_json(&fp32_json), + }, + Case { + name: "String-heavy object (50 keys)", + value: from_json(&string_obj_json), + }, + Case { + name: "Mixed object (hetero)", + value: from_json(hetero_json), + }, + Case { + name: "Nested FP32 arrays + string", + value: from_json_fpha(&nested_fp32_json, FloatType::F32), + }, + Case { + name: "Big mixed JSON (200 records, hetero embed)", + value: from_json(&big_mixed_json), + }, + Case { + name: "Repeated strings (500 records, RED-141886)", + value: from_json(&repeated_strings_json), + }, + ] +} + +fn pct(new: usize, base: usize) -> String { + let p = (new as f64 - base as f64) / base as f64 * 100.0; + let sign = if p < 0.0 { "" } else { "+" }; + format!("{sign}{p:.1}%") +} + +fn main() { + let cases = make_cases(); + + let name_w = 42usize; + let col_w = 12usize; + + println!( + "\n{:col_w$} {:>col_w$} {:>col_w$} {:>col_w$}", + "Document", "JSON (B)", "cbor (B)", "cbor Δ%", "cbor+z (B)", + ); + println!( + "{:col_w$}", + "", "cbor+z Δ%", + ); + println!("{}", "-".repeat(name_w + col_w * 5 + 4)); + + for c in &cases { + let j = json_size(&c.value); + let cb = cbor_size(&c.value); + let cbz = cbor_zstd_size(&c.value); + println!( + "{:col_w$} {:>col_w$} {:>col_w$} {:>col_w$}", + c.name, + j, + cb, + pct(cb, j), + cbz, + ); + println!( + "{:col_w$}", + "", + pct(cbz, j), + ); + println!(); + } + + println!("Δ%: relative to JSON size. Negative = smaller than JSON."); + println!(); +} diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index c804eb8..f876d47 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -28,15 +28,15 @@ doc = false bench = false [[bin]] -name = "fuzz_binary_decode" -path = "fuzz_targets/fuzz_binary_decode.rs" +name = "fuzz_cbor_decode" +path = "fuzz_targets/fuzz_cbor_decode.rs" test = false doc = false bench = false [[bin]] -name = "fuzz_binary_roundtrip" -path = "fuzz_targets/fuzz_binary_roundtrip.rs" +name = "fuzz_cbor_roundtrip" +path = "fuzz_targets/fuzz_cbor_roundtrip.rs" test = false doc = false bench = false diff --git a/fuzz/fuzz_targets/fuzz_binary_decode.rs b/fuzz/fuzz_targets/fuzz_cbor_decode.rs similarity index 79% rename from fuzz/fuzz_targets/fuzz_binary_decode.rs rename to fuzz/fuzz_targets/fuzz_cbor_decode.rs index 265f06c..c58951d 100644 --- a/fuzz/fuzz_targets/fuzz_binary_decode.rs +++ b/fuzz/fuzz_targets/fuzz_cbor_decode.rs @@ -1,6 +1,6 @@ #![no_main] -use ijson::binary::decode; +use ijson::cbor::decode; use libfuzzer_sys::fuzz_target; fuzz_target!(|data: &[u8]| { diff --git a/fuzz/fuzz_targets/fuzz_binary_roundtrip.rs b/fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs similarity index 73% rename from fuzz/fuzz_targets/fuzz_binary_roundtrip.rs rename to fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs index a3001fc..27bac38 100644 --- a/fuzz/fuzz_targets/fuzz_binary_roundtrip.rs +++ b/fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs @@ -1,6 +1,6 @@ #![no_main] -use ijson::{binary, IValue}; +use ijson::{cbor, IValue}; use ijson_fuzz::JsonValue; use libfuzzer_sys::fuzz_target; use serde::Deserialize; @@ -12,8 +12,8 @@ fuzz_target!(|value: JsonValue| { return; }; - let encoded = binary::encode(&original); - let decoded = binary::decode(&encoded).expect("encode->decode round-trip must not fail"); + let encoded = cbor::encode(&original); + let decoded = cbor::decode(&encoded).expect("encode->decode round-trip must not fail"); assert_eq!( original, decoded, diff --git a/src/binary.rs b/src/binary.rs deleted file mode 100644 index 1b76a45..0000000 --- a/src/binary.rs +++ /dev/null @@ -1,679 +0,0 @@ -use std::fmt; - -use bytemuck; -use half::{bf16, f16}; - -use crate::array::ArraySliceRef; -use crate::{DestructuredRef, IArray, INumber, IObject, IString, IValue}; - -#[repr(u8)] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum Tag { - Null = 0x00, - False = 0x01, - True = 0x02, - I64 = 0x03, - U64 = 0x04, - F64 = 0x05, - String = 0x06, - Object = 0x07, - ArrayHetero = 0x08, - ArrayI8 = 0x10, - ArrayU8 = 0x11, - ArrayI16 = 0x12, - ArrayU16 = 0x13, - ArrayF16 = 0x14, - ArrayBF16 = 0x15, - ArrayI32 = 0x16, - ArrayU32 = 0x17, - ArrayF32 = 0x18, - ArrayI64 = 0x19, - ArrayU64 = 0x1A, - ArrayF64 = 0x1B, -} - -impl TryFrom for Tag { - type Error = u8; - fn try_from(v: u8) -> Result { - match v { - 0x00 => Ok(Tag::Null), - 0x01 => Ok(Tag::False), - 0x02 => Ok(Tag::True), - 0x03 => Ok(Tag::I64), - 0x04 => Ok(Tag::U64), - 0x05 => Ok(Tag::F64), - 0x06 => Ok(Tag::String), - 0x07 => Ok(Tag::Object), - 0x08 => Ok(Tag::ArrayHetero), - 0x10 => Ok(Tag::ArrayI8), - 0x11 => Ok(Tag::ArrayU8), - 0x12 => Ok(Tag::ArrayI16), - 0x13 => Ok(Tag::ArrayU16), - 0x14 => Ok(Tag::ArrayF16), - 0x15 => Ok(Tag::ArrayBF16), - 0x16 => Ok(Tag::ArrayI32), - 0x17 => Ok(Tag::ArrayU32), - 0x18 => Ok(Tag::ArrayF32), - 0x19 => Ok(Tag::ArrayI64), - 0x1A => Ok(Tag::ArrayU64), - 0x1B => Ok(Tag::ArrayF64), - other => Err(other), - } - } -} - -/// Error returned when decoding fails. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum BinaryDecodeError { - /// The input was too short to decode the next value. - UnexpectedEof, - /// An unknown type tag was encountered. - UnknownTag(u8), - /// A string was not valid UTF-8. - InvalidUtf8, - /// An array allocation failed. - AllocError, - /// Nesting depth exceeded the limit. - DepthLimitExceeded, - /// Decompression failed (zstd error). - DecompressError, - /// Failed to cast slice. - CastError, -} - -const MAX_DEPTH: u32 = 128; - -impl fmt::Display for BinaryDecodeError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - BinaryDecodeError::UnexpectedEof => write!(f, "unexpected end of input"), - BinaryDecodeError::UnknownTag(t) => write!(f, "unknown type tag: 0x{:02X}", t), - BinaryDecodeError::InvalidUtf8 => write!(f, "invalid UTF-8 in string"), - BinaryDecodeError::AllocError => write!(f, "memory allocation failed"), - BinaryDecodeError::DepthLimitExceeded => write!(f, "nesting depth limit exceeded"), - BinaryDecodeError::DecompressError => write!(f, "zstd decompression failed"), - BinaryDecodeError::CastError => write!(f, "failed to cast slice"), - } - } -} - -/// Encodes an [`IValue`] tree into a compact binary representation that -/// preserves the [`ArrayTag`](crate::array::ArrayTag) of every array. -pub fn encode(value: &IValue) -> Vec { - let mut out = Vec::new(); - encode_into(value, &mut out); - out -} - -/// Encodes an [`IValue`] tree and compresses the result with zstd (level 3). -/// -/// Use [`decode_compressed`] to decode the output. -pub fn encode_compressed(value: &IValue) -> Vec { - let raw = encode(value); - zstd::bulk::Compressor::default() - .compress(&raw) - .expect("zstd compress") -} - -/// Decodes an [`IValue`] tree from bytes produced by [`encode_compressed`]. -pub fn decode_compressed(bytes: &[u8]) -> Result { - let raw = zstd::decode_all(bytes).map_err(|_| BinaryDecodeError::DecompressError)?; - decode(&raw) -} - -fn push_tag(tag: Tag, out: &mut Vec) { - out.push(tag as u8); -} - -fn encode_into(value: &IValue, out: &mut Vec) { - match value.destructure_ref() { - DestructuredRef::Null => push_tag(Tag::Null, out), - DestructuredRef::Bool(false) => push_tag(Tag::False, out), - DestructuredRef::Bool(true) => push_tag(Tag::True, out), - DestructuredRef::Number(n) => encode_number(n, out), - DestructuredRef::String(s) => encode_string(s, out), - DestructuredRef::Array(a) => encode_array(a, out), - DestructuredRef::Object(o) => encode_object(o, out), - } -} - -fn encode_number(n: &INumber, out: &mut Vec) { - if n.has_decimal_point() { - push_tag(Tag::F64, out); - out.extend_from_slice(&n.to_f64().unwrap().to_le_bytes()); - } else if let Some(v) = n.to_i64() { - push_tag(Tag::I64, out); - out.extend_from_slice(&v.to_le_bytes()); - } else { - push_tag(Tag::U64, out); - out.extend_from_slice(&n.to_u64().unwrap().to_le_bytes()); - } -} - -fn encode_string(s: &IString, out: &mut Vec) { - let bytes = s.as_str().as_bytes(); - push_tag(Tag::String, out); - out.extend_from_slice(&(bytes.len() as u32).to_le_bytes()); - out.extend_from_slice(bytes); -} - -fn encode_array(a: &IArray, out: &mut Vec) { - let len = a.len() as u32; - match a.as_slice() { - ArraySliceRef::Heterogeneous(s) => { - push_tag(Tag::ArrayHetero, out); - out.extend_from_slice(&len.to_le_bytes()); - for v in s { - encode_into(v, out); - } - } - ArraySliceRef::I8(s) => { - push_tag(Tag::ArrayI8, out); - out.extend_from_slice(&len.to_le_bytes()); - out.extend_from_slice(bytemuck::cast_slice::(s)); - } - ArraySliceRef::U8(s) => { - push_tag(Tag::ArrayU8, out); - out.extend_from_slice(&len.to_le_bytes()); - out.extend_from_slice(s); - } - ArraySliceRef::I16(s) => encode_typed_array(Tag::ArrayI16, s, out), - ArraySliceRef::U16(s) => encode_typed_array(Tag::ArrayU16, s, out), - ArraySliceRef::F16(s) => encode_typed_array(Tag::ArrayF16, s, out), - ArraySliceRef::BF16(s) => encode_typed_array(Tag::ArrayBF16, s, out), - ArraySliceRef::I32(s) => encode_typed_array(Tag::ArrayI32, s, out), - ArraySliceRef::U32(s) => encode_typed_array(Tag::ArrayU32, s, out), - ArraySliceRef::F32(s) => encode_typed_array(Tag::ArrayF32, s, out), - ArraySliceRef::I64(s) => encode_typed_array(Tag::ArrayI64, s, out), - ArraySliceRef::U64(s) => encode_typed_array(Tag::ArrayU64, s, out), - ArraySliceRef::F64(s) => encode_typed_array(Tag::ArrayF64, s, out), - } -} - -trait ToLeBytes { - fn to_le_bytes_vec(&self) -> impl AsRef<[u8]>; -} - -macro_rules! impl_to_le_bytes { - ($ty:ty) => { - impl ToLeBytes for $ty { - fn to_le_bytes_vec(&self) -> impl AsRef<[u8]> { - self.to_le_bytes() - } - } - }; -} - -impl_to_le_bytes!(i16); -impl_to_le_bytes!(u16); -impl_to_le_bytes!(f16); -impl_to_le_bytes!(bf16); -impl_to_le_bytes!(i32); -impl_to_le_bytes!(u32); -impl_to_le_bytes!(f32); -impl_to_le_bytes!(i64); -impl_to_le_bytes!(u64); -impl_to_le_bytes!(f64); - -fn encode_typed_array(tag: Tag, s: &[T], out: &mut Vec) { - push_tag(tag, out); - out.extend_from_slice(&(s.len() as u32).to_le_bytes()); - for v in s { - out.extend_from_slice(v.to_le_bytes_vec().as_ref()); - } -} - -fn encode_object(o: &IObject, out: &mut Vec) { - push_tag(Tag::Object, out); - out.extend_from_slice(&(o.len() as u32).to_le_bytes()); - for (k, v) in o { - let key_bytes = k.as_str().as_bytes(); - out.extend_from_slice(&(key_bytes.len() as u32).to_le_bytes()); - out.extend_from_slice(key_bytes); - encode_into(v, out); - } -} - -/// Decodes an [`IValue`] tree from bytes produced by [`encode`]. -/// -/// # Errors -/// -/// Returns [`BinaryDecodeError`] if the bytes are malformed. -pub fn decode(bytes: &[u8]) -> Result { - let mut cur = 0usize; - decode_value(bytes, &mut cur, 0) -} - -fn read_u8(bytes: &[u8], cur: &mut usize) -> Result { - bytes - .get(*cur) - .copied() - .map(|b| { - *cur += 1; - b - }) - .ok_or(BinaryDecodeError::UnexpectedEof) -} - -fn read_u32(bytes: &[u8], cur: &mut usize) -> Result { - let end = cur.checked_add(4).ok_or(BinaryDecodeError::UnexpectedEof)?; - let slice = bytes - .get(*cur..end) - .ok_or(BinaryDecodeError::UnexpectedEof)?; - *cur = end; - Ok(u32::from_le_bytes(slice.try_into().unwrap())) -} - -fn read_i64(bytes: &[u8], cur: &mut usize) -> Result { - let end = cur.checked_add(8).ok_or(BinaryDecodeError::UnexpectedEof)?; - let slice = bytes - .get(*cur..end) - .ok_or(BinaryDecodeError::UnexpectedEof)?; - *cur = end; - Ok(i64::from_le_bytes(slice.try_into().unwrap())) -} - -fn read_u64(bytes: &[u8], cur: &mut usize) -> Result { - let end = cur.checked_add(8).ok_or(BinaryDecodeError::UnexpectedEof)?; - let slice = bytes - .get(*cur..end) - .ok_or(BinaryDecodeError::UnexpectedEof)?; - *cur = end; - Ok(u64::from_le_bytes(slice.try_into().unwrap())) -} - -fn read_f64(bytes: &[u8], cur: &mut usize) -> Result { - let end = cur.checked_add(8).ok_or(BinaryDecodeError::UnexpectedEof)?; - let slice = bytes - .get(*cur..end) - .ok_or(BinaryDecodeError::UnexpectedEof)?; - *cur = end; - Ok(f64::from_le_bytes(slice.try_into().unwrap())) -} - -fn read_bytes<'a>( - bytes: &'a [u8], - cur: &mut usize, - n: usize, -) -> Result<&'a [u8], BinaryDecodeError> { - let end = cur.checked_add(n).ok_or(BinaryDecodeError::UnexpectedEof)?; - let slice = bytes - .get(*cur..end) - .ok_or(BinaryDecodeError::UnexpectedEof)?; - *cur = end; - Ok(slice) -} - -fn decode_value(bytes: &[u8], cur: &mut usize, depth: u32) -> Result { - if depth >= MAX_DEPTH { - return Err(BinaryDecodeError::DepthLimitExceeded); - } - let raw_tag = read_u8(bytes, cur)?; - let tag = Tag::try_from(raw_tag).map_err(BinaryDecodeError::UnknownTag)?; - match tag { - Tag::Null => Ok(IValue::NULL), - Tag::False => Ok(false.into()), - Tag::True => Ok(true.into()), - Tag::I64 => Ok(read_i64(bytes, cur)?.into()), - Tag::U64 => Ok(read_u64(bytes, cur)?.into()), - Tag::F64 => { - let v = read_f64(bytes, cur)?; - Ok(INumber::try_from(v).map(Into::into).unwrap_or(IValue::NULL)) - } - Tag::String => { - let len = read_u32(bytes, cur)? as usize; - let raw = read_bytes(bytes, cur, len)?; - let s = std::str::from_utf8(raw).map_err(|_| BinaryDecodeError::InvalidUtf8)?; - Ok(IString::from(s).into()) - } - Tag::Object => { - let count = read_u32(bytes, cur)? as usize; - // Each entry needs at least 5 bytes: 4-byte key-len + 1-byte value tag. - let hint = count.min((bytes.len() - *cur) / 5); - let mut obj = IObject::with_capacity(hint); - for _ in 0..count { - let key_len = read_u32(bytes, cur)? as usize; - let key_raw = read_bytes(bytes, cur, key_len)?; - let key = - std::str::from_utf8(key_raw).map_err(|_| BinaryDecodeError::InvalidUtf8)?; - let val = decode_value(bytes, cur, depth + 1)?; - obj.insert(key, val); - } - Ok(obj.into()) - } - Tag::ArrayHetero => { - let count = read_u32(bytes, cur)? as usize; - // Each element needs at least 1 byte (tag). - let hint = count.min(bytes.len() - *cur); - let mut arr = IArray::with_capacity(hint).map_err(|_| BinaryDecodeError::AllocError)?; - for _ in 0..count { - let v = decode_value(bytes, cur, depth + 1)?; - arr.push(v).map_err(|_| BinaryDecodeError::AllocError)?; - } - Ok(arr.into()) - } - Tag::ArrayI8 => { - let count = read_u32(bytes, cur)? as usize; - let raw = read_bytes(bytes, cur, count)?; - let typed: &[i8] = bytemuck::try_cast_slice::(raw) - .map_err(|_| BinaryDecodeError::CastError)?; - IArray::try_from(typed) - .map(Into::into) - .map_err(|_| BinaryDecodeError::AllocError) - } - Tag::ArrayU8 => { - let count = read_u32(bytes, cur)? as usize; - let raw = read_bytes(bytes, cur, count)?; - IArray::try_from(raw) - .map(Into::into) - .map_err(|_| BinaryDecodeError::AllocError) - } - Tag::ArrayI16 => decode_primitive_array::(bytes, cur, 2), - Tag::ArrayU16 => decode_primitive_array::(bytes, cur, 2), - Tag::ArrayF16 => { - let count = read_u32(bytes, cur)? as usize; - let byte_len = count - .checked_mul(2) - .ok_or(BinaryDecodeError::UnexpectedEof)?; - let raw = read_bytes(bytes, cur, byte_len)?; - let vec: Vec = raw - .chunks_exact(2) - .map(|c| f16::from_le_bytes(c.try_into().unwrap())) - .collect(); - IArray::try_from(vec) - .map(Into::into) - .map_err(|_| BinaryDecodeError::AllocError) - } - Tag::ArrayBF16 => { - let count = read_u32(bytes, cur)? as usize; - let byte_len = count - .checked_mul(2) - .ok_or(BinaryDecodeError::UnexpectedEof)?; - let raw = read_bytes(bytes, cur, byte_len)?; - let vec: Vec = raw - .chunks_exact(2) - .map(|c| bf16::from_le_bytes(c.try_into().unwrap())) - .collect(); - IArray::try_from(vec) - .map(Into::into) - .map_err(|_| BinaryDecodeError::AllocError) - } - Tag::ArrayI32 => decode_primitive_array::(bytes, cur, 4), - Tag::ArrayU32 => decode_primitive_array::(bytes, cur, 4), - Tag::ArrayF32 => decode_primitive_array::(bytes, cur, 4), - Tag::ArrayI64 => decode_primitive_array::(bytes, cur, 8), - Tag::ArrayU64 => decode_primitive_array::(bytes, cur, 8), - Tag::ArrayF64 => decode_primitive_array::(bytes, cur, 8), - } -} - -trait FromLeBytes: Copy + Sized + 'static { - fn from_le_bytes_slice(s: &[u8]) -> Self; -} - -macro_rules! impl_from_le_bytes { - ($ty:ty, $size:expr) => { - impl FromLeBytes for $ty { - fn from_le_bytes_slice(s: &[u8]) -> Self { - Self::from_le_bytes(s.try_into().unwrap()) - } - } - }; -} - -impl_from_le_bytes!(i16, 2); -impl_from_le_bytes!(u16, 2); -impl_from_le_bytes!(i32, 4); -impl_from_le_bytes!(u32, 4); -impl_from_le_bytes!(f32, 4); -impl_from_le_bytes!(i64, 8); -impl_from_le_bytes!(u64, 8); -impl_from_le_bytes!(f64, 8); - -fn decode_primitive_array( - bytes: &[u8], - cur: &mut usize, - elem_size: usize, -) -> Result -where - T: FromLeBytes, - IArray: TryFrom>, -{ - let count = read_u32(bytes, cur)? as usize; - let byte_len = count - .checked_mul(elem_size) - .ok_or(BinaryDecodeError::UnexpectedEof)?; - let raw = read_bytes(bytes, cur, byte_len)?; - let mut vec: Vec = Vec::with_capacity(count); - for chunk in raw.chunks_exact(elem_size) { - vec.push(T::from_le_bytes_slice(chunk)); - } - IArray::try_from(vec) - .map(Into::into) - .map_err(|_| BinaryDecodeError::AllocError) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::array::ArraySliceRef; - use crate::IValueDeserSeed; - use serde::de::DeserializeSeed; - - fn round_trip(value: &IValue) -> IValue { - let bytes = encode(value); - decode(&bytes).expect("decode should succeed") - } - - #[test] - fn test_null() { - let v: IValue = IValue::NULL; - assert_eq!(round_trip(&v), v); - } - - #[test] - fn test_bool() { - let t: IValue = true.into(); - let f: IValue = false.into(); - assert_eq!(round_trip(&t), t); - assert_eq!(round_trip(&f), f); - } - - #[test] - fn test_numbers() { - let cases: Vec = vec![ - 0i64.into(), - 42i64.into(), - (-1i64).into(), - i64::MAX.into(), - u64::MAX.into(), - 1.5f64.into(), - (-3.14f64).into(), - ]; - for v in &cases { - assert_eq!(round_trip(v), *v); - } - } - - #[test] - fn test_string() { - let v: IValue = IString::from("hello world").into(); - assert_eq!(round_trip(&v), v); - } - - #[test] - fn test_heterogeneous_array() { - let mut arr = IArray::new(); - arr.push(IValue::NULL).unwrap(); - arr.push(IValue::from(true)).unwrap(); - arr.push(IValue::from(42i64)).unwrap(); - arr.push(IValue::from(IString::from("hi"))).unwrap(); - let v: IValue = arr.into(); - let result = round_trip(&v); - let result_arr = result.as_array().unwrap(); - assert!(matches!( - result_arr.as_slice(), - ArraySliceRef::Heterogeneous(_) - )); - assert_eq!(result_arr.len(), 4); - } - - #[test] - fn test_f32_array_preserves_tag() { - let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( - crate::FloatType::F32, - ))); - let json = r#"[1.5, 2.5, 3.5]"#; - let mut de = serde_json::Deserializer::from_str(json); - let v = seed.deserialize(&mut de).unwrap(); - assert!(matches!( - v.as_array().unwrap().as_slice(), - ArraySliceRef::F32(_) - )); - - let result = round_trip(&v); - let arr = result.as_array().unwrap(); - assert!( - matches!(arr.as_slice(), ArraySliceRef::F32(_)), - "F32 tag should survive encode/decode" - ); - assert_eq!(arr.len(), 3); - } - - #[test] - fn test_f16_array_preserves_tag() { - let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( - crate::FloatType::F16, - ))); - let json = r#"[0.5, 1.0, 1.5]"#; - let mut de = serde_json::Deserializer::from_str(json); - let v = seed.deserialize(&mut de).unwrap(); - assert!(matches!( - v.as_array().unwrap().as_slice(), - ArraySliceRef::F16(_) - )); - - let result = round_trip(&v); - let arr = result.as_array().unwrap(); - assert!( - matches!(arr.as_slice(), ArraySliceRef::F16(_)), - "F16 tag should survive encode/decode" - ); - } - - #[test] - fn test_bf16_array_preserves_tag() { - let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( - crate::FloatType::BF16, - ))); - let json = r#"[1.0, 2.0, 3.0]"#; - let mut de = serde_json::Deserializer::from_str(json); - let v = seed.deserialize(&mut de).unwrap(); - - let result = round_trip(&v); - let arr = result.as_array().unwrap(); - assert!( - matches!(arr.as_slice(), ArraySliceRef::BF16(_)), - "BF16 tag should survive encode/decode" - ); - } - - #[test] - fn test_f64_array_preserves_tag() { - let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( - crate::FloatType::F64, - ))); - let json = r#"[1.0, 2.0, 3.0]"#; - let mut de = serde_json::Deserializer::from_str(json); - let v = seed.deserialize(&mut de).unwrap(); - - let result = round_trip(&v); - let arr = result.as_array().unwrap(); - assert!( - matches!(arr.as_slice(), ArraySliceRef::F64(_)), - "F64 tag should survive encode/decode" - ); - } - - #[test] - fn test_nested_object_with_typed_arrays() { - let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( - crate::FloatType::F32, - ))); - let json = r#"{"a": [1.0, 2.0], "b": "text", "c": [3.0, 4.0]}"#; - let mut de = serde_json::Deserializer::from_str(json); - let v = seed.deserialize(&mut de).unwrap(); - - let result = round_trip(&v); - let obj = result.as_object().unwrap(); - let a = obj.get("a").unwrap().as_array().unwrap(); - let c = obj.get("c").unwrap().as_array().unwrap(); - assert!( - matches!(a.as_slice(), ArraySliceRef::F32(_)), - "nested F32 array 'a' should survive" - ); - assert!( - matches!(c.as_slice(), ArraySliceRef::F32(_)), - "nested F32 array 'c' should survive" - ); - assert_eq!(obj.get("b").unwrap().as_string().unwrap().as_str(), "text"); - } - - #[test] - fn test_truncated_input_returns_error() { - let v: IValue = 42i64.into(); - let bytes = encode(&v); - for len in 0..bytes.len() { - assert!( - decode(&bytes[..len]).is_err(), - "truncated at {len} should fail" - ); - } - } - - #[test] - fn test_unknown_tag_returns_error() { - let bytes = [0xFF]; - assert_eq!(decode(&bytes), Err(BinaryDecodeError::UnknownTag(0xFF))); - } - - #[test] - fn test_object_huge_count_does_not_oom() { - // TAG_OBJECT with count=0x94940606 (~2.5 billion) followed by no actual data. - // Must return an error, not OOM. - let bytes = [0x07, 0x06, 0x06, 0x94, 0x94]; - assert!(decode(&bytes).is_err()); - } - - #[test] - fn test_hetero_array_huge_count_does_not_oom() { - // TAG_ARRAY_HETERO with count=0xFFFFFFFF followed by no data. - let bytes = [0x08, 0xFF, 0xFF, 0xFF, 0xFF]; - assert!(decode(&bytes).is_err()); - } - - #[test] - fn test_depth_limit() { - // Build MAX_DEPTH+1 levels of nested single-element hetero arrays. - // Each level: ArrayHetero tag (1) + count=1 (4) = 5 bytes, then recurse. - let mut bytes: Vec = Vec::new(); - for _ in 0..=super::MAX_DEPTH { - bytes.push(super::Tag::ArrayHetero as u8); - bytes.extend_from_slice(&1u32.to_le_bytes()); - } - bytes.push(super::Tag::Null as u8); - assert_eq!(decode(&bytes), Err(BinaryDecodeError::DepthLimitExceeded)); - } - - #[test] - fn test_depth_limit_exact() { - // MAX_DEPTH-1 array wrappers: the leaf is decoded at depth=MAX_DEPTH-1, which is allowed. - let mut bytes: Vec = Vec::new(); - for _ in 0..super::MAX_DEPTH - 1 { - bytes.push(super::Tag::ArrayHetero as u8); - bytes.extend_from_slice(&1u32.to_le_bytes()); - } - bytes.push(super::Tag::Null as u8); - assert!(decode(&bytes).is_ok()); - } -} diff --git a/src/cbor.rs b/src/cbor.rs new file mode 100644 index 0000000..f920eaa --- /dev/null +++ b/src/cbor.rs @@ -0,0 +1,476 @@ +//! CBOR encode/decode for [`IValue`], preserving typed array tags via RFC 8746. +//! +//! Typed homogeneous arrays are encoded as `Tag(rfc8746_tag, Bytes(raw_le_bytes))`. +//! BF16 arrays use private tag `0x10000` (no standard RFC 8746 equivalent). +//! +//! Use [`encode`] / [`decode`] for raw CBOR, or [`encode_compressed`] / +//! [`decode_compressed`] for zstd-compressed CBOR. + +use std::fmt; + +use ciborium::value::{Integer, Value}; +use half::{bf16, f16}; + +use crate::array::ArraySliceRef; +use crate::{DestructuredRef, IArray, INumber, IObject, IString, IValue}; + +// RFC 8746 typed array tags (little-endian variants where applicable). +// Bit layout within the tag byte: 0b010_f_s_e_ll +// f=0 integer, f=1 float | s=0 unsigned, s=1 signed | e=1 little-endian | ll = width +const TAG_U8: u64 = 64; // uint8 (endianness irrelevant) +const TAG_I8: u64 = 72; // sint8 (endianness irrelevant) +const TAG_U16_LE: u64 = 69; +const TAG_I16_LE: u64 = 77; +const TAG_F16_LE: u64 = 84; // IEEE 754 binary16 LE +const TAG_U32_LE: u64 = 70; +const TAG_I32_LE: u64 = 78; +const TAG_F32_LE: u64 = 85; // IEEE 754 binary32 LE +const TAG_U64_LE: u64 = 71; +const TAG_I64_LE: u64 = 79; +const TAG_F64_LE: u64 = 86; // IEEE 754 binary64 LE + +/// Private CBOR tag for BF16 arrays (no RFC 8746 standard tag exists for BF16). +const TAG_BF16_LE: u64 = 0x10000; + +const MAX_DEPTH: u32 = 128; + +/// Error returned when CBOR decoding fails. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CborDecodeError { + /// The CBOR stream was malformed or could not be parsed. + DecodeError, + /// An unrecognised CBOR tag was encountered where a typed array was expected. + UnknownTag(u64), + /// A CBOR map key was not a text string. + InvalidValue, + /// An array allocation failed. + AllocError, + /// Nesting depth exceeded the limit. + DepthLimitExceeded, + /// Failed to reinterpret a byte slice. + CastError, + /// Zstd decompression failed. + DecompressError, +} + +impl fmt::Display for CborDecodeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CborDecodeError::DecodeError => write!(f, "CBOR decode error"), + CborDecodeError::UnknownTag(t) => write!(f, "unknown CBOR tag: {t}"), + CborDecodeError::InvalidValue => write!(f, "unexpected CBOR value type"), + CborDecodeError::AllocError => write!(f, "memory allocation failed"), + CborDecodeError::DepthLimitExceeded => write!(f, "nesting depth limit exceeded"), + CborDecodeError::CastError => write!(f, "failed to cast byte slice"), + CborDecodeError::DecompressError => write!(f, "zstd decompression failed"), + } + } +} + +impl std::error::Error for CborDecodeError {} + +// ── Encode ──────────────────────────────────────────────────────────────────── + +/// Encodes an [`IValue`] tree into CBOR bytes, preserving typed array tags via +/// RFC 8746. +pub fn encode(value: &IValue) -> Vec { + let cbor = ivalue_to_cbor(value); + let mut out = Vec::new(); + ciborium::into_writer(&cbor, &mut out).expect("write to Vec never fails"); + out +} + +/// Encodes an [`IValue`] tree as CBOR and then compresses it with zstd (level 3). +/// +/// Use [`decode_compressed`] to decode the output. +pub fn encode_compressed(value: &IValue) -> Vec { + let raw = encode(value); + zstd::bulk::Compressor::default() + .compress(&raw) + .expect("zstd compress") +} + +fn ivalue_to_cbor(value: &IValue) -> Value { + match value.destructure_ref() { + DestructuredRef::Null => Value::Null, + DestructuredRef::Bool(b) => Value::Bool(b), + DestructuredRef::Number(n) => number_to_cbor(n), + DestructuredRef::String(s) => Value::Text(s.as_str().to_owned()), + DestructuredRef::Array(a) => array_to_cbor(a), + DestructuredRef::Object(o) => object_to_cbor(o), + } +} + +fn number_to_cbor(n: &INumber) -> Value { + if n.has_decimal_point() { + Value::Float(n.to_f64().unwrap()) + } else if let Some(i) = n.to_i64() { + Value::Integer(Integer::from(i)) + } else { + Value::Integer(Integer::from(n.to_u64().unwrap())) + } +} + +fn array_to_cbor(a: &IArray) -> Value { + match a.as_slice() { + ArraySliceRef::Heterogeneous(s) => Value::Array(s.iter().map(ivalue_to_cbor).collect()), + ArraySliceRef::I8(s) => typed_le_tag(TAG_I8, s), + ArraySliceRef::U8(s) => Value::Tag(TAG_U8, Box::new(Value::Bytes(s.to_vec()))), + ArraySliceRef::I16(s) => typed_le_tag(TAG_I16_LE, s), + ArraySliceRef::U16(s) => typed_le_tag(TAG_U16_LE, s), + ArraySliceRef::F16(s) => typed_le_tag(TAG_F16_LE, s), + ArraySliceRef::BF16(s) => typed_le_tag(TAG_BF16_LE, s), + ArraySliceRef::I32(s) => typed_le_tag(TAG_I32_LE, s), + ArraySliceRef::U32(s) => typed_le_tag(TAG_U32_LE, s), + ArraySliceRef::F32(s) => typed_le_tag(TAG_F32_LE, s), + ArraySliceRef::I64(s) => typed_le_tag(TAG_I64_LE, s), + ArraySliceRef::U64(s) => typed_le_tag(TAG_U64_LE, s), + ArraySliceRef::F64(s) => typed_le_tag(TAG_F64_LE, s), + } +} + +fn object_to_cbor(o: &IObject) -> Value { + Value::Map( + o.iter() + .map(|(k, v)| (Value::Text(k.as_str().to_owned()), ivalue_to_cbor(v))) + .collect(), + ) +} + +trait ToLeBytes { + fn to_le_bytes_vec(&self) -> impl AsRef<[u8]>; +} + +macro_rules! impl_to_le_bytes { + ($ty:ty) => { + impl ToLeBytes for $ty { + fn to_le_bytes_vec(&self) -> impl AsRef<[u8]> { + self.to_le_bytes() + } + } + }; +} + +impl_to_le_bytes!(i8); +impl_to_le_bytes!(i16); +impl_to_le_bytes!(u16); +impl_to_le_bytes!(f16); +impl_to_le_bytes!(bf16); +impl_to_le_bytes!(i32); +impl_to_le_bytes!(u32); +impl_to_le_bytes!(f32); +impl_to_le_bytes!(i64); +impl_to_le_bytes!(u64); +impl_to_le_bytes!(f64); + +fn typed_le_tag(tag: u64, s: &[T]) -> Value { + let mut bytes = Vec::new(); + for v in s { + bytes.extend_from_slice(v.to_le_bytes_vec().as_ref()); + } + Value::Tag(tag, Box::new(Value::Bytes(bytes))) +} + +// ── Decode ──────────────────────────────────────────────────────────────────── + +/// Decodes an [`IValue`] tree from CBOR bytes produced by [`encode`]. +pub fn decode(bytes: &[u8]) -> Result { + let cbor: Value = + ciborium::from_reader(bytes).map_err(|_| CborDecodeError::DecodeError)?; + cbor_to_ivalue(cbor, 0) +} + +/// Decodes an [`IValue`] tree from bytes produced by [`encode_compressed`]. +pub fn decode_compressed(bytes: &[u8]) -> Result { + let raw = zstd::decode_all(bytes).map_err(|_| CborDecodeError::DecompressError)?; + decode(&raw) +} + +fn cbor_to_ivalue(val: Value, depth: u32) -> Result { + if depth >= MAX_DEPTH { + return Err(CborDecodeError::DepthLimitExceeded); + } + match val { + Value::Null => Ok(IValue::NULL), + Value::Bool(b) => Ok(b.into()), + Value::Float(f) => { + Ok(INumber::try_from(f).map(Into::into).unwrap_or(IValue::NULL)) + } + Value::Integer(i) => { + if let Ok(v) = i64::try_from(i.clone()) { + Ok(IValue::from(v)) + } else if let Ok(v) = u64::try_from(i) { + Ok(IValue::from(v)) + } else { + Err(CborDecodeError::InvalidValue) + } + } + Value::Text(s) => Ok(IString::from(s.as_str()).into()), + Value::Array(arr) => { + let hint = arr.len().min(1024); + let mut out = + IArray::with_capacity(hint).map_err(|_| CborDecodeError::AllocError)?; + for v in arr { + let iv = cbor_to_ivalue(v, depth + 1)?; + out.push(iv).map_err(|_| CborDecodeError::AllocError)?; + } + Ok(out.into()) + } + Value::Map(entries) => { + let mut obj = IObject::with_capacity(entries.len()); + for (k, v) in entries { + let key = match k { + Value::Text(s) => s, + _ => return Err(CborDecodeError::InvalidValue), + }; + let val = cbor_to_ivalue(v, depth + 1)?; + obj.insert(&key, val); + } + Ok(obj.into()) + } + Value::Tag(tag, inner) => decode_typed_array(tag, *inner), + Value::Bytes(_) => Err(CborDecodeError::InvalidValue), + _ => Err(CborDecodeError::InvalidValue), + } +} + +fn decode_typed_array(tag: u64, inner: Value) -> Result { + let bytes = match inner { + Value::Bytes(b) => b, + _ => return Err(CborDecodeError::InvalidValue), + }; + match tag { + TAG_U8 => IArray::try_from(bytes.as_slice()) + .map(Into::into) + .map_err(|_| CborDecodeError::AllocError), + TAG_I8 => decode_le_array::(&bytes, 1), + TAG_U16_LE => decode_le_array::(&bytes, 2), + TAG_I16_LE => decode_le_array::(&bytes, 2), + TAG_F16_LE => decode_le_array::(&bytes, 2), + TAG_BF16_LE => decode_le_array::(&bytes, 2), + TAG_U32_LE => decode_le_array::(&bytes, 4), + TAG_I32_LE => decode_le_array::(&bytes, 4), + TAG_F32_LE => decode_le_array::(&bytes, 4), + TAG_U64_LE => decode_le_array::(&bytes, 8), + TAG_I64_LE => decode_le_array::(&bytes, 8), + TAG_F64_LE => decode_le_array::(&bytes, 8), + other => Err(CborDecodeError::UnknownTag(other)), + } +} + +trait FromLeBytes: Copy + Sized + 'static { + fn from_le_bytes_slice(s: &[u8]) -> Self; +} + +macro_rules! impl_from_le_bytes { + ($ty:ty) => { + impl FromLeBytes for $ty { + fn from_le_bytes_slice(s: &[u8]) -> Self { + Self::from_le_bytes(s.try_into().unwrap()) + } + } + }; +} + +impl_from_le_bytes!(i8); +impl_from_le_bytes!(i16); +impl_from_le_bytes!(u16); +impl_from_le_bytes!(f16); +impl_from_le_bytes!(bf16); +impl_from_le_bytes!(i32); +impl_from_le_bytes!(u32); +impl_from_le_bytes!(f32); +impl_from_le_bytes!(i64); +impl_from_le_bytes!(u64); +impl_from_le_bytes!(f64); + +fn decode_le_array(bytes: &[u8], elem_size: usize) -> Result +where + T: FromLeBytes, + IArray: TryFrom>, +{ + let mut vec: Vec = Vec::with_capacity(bytes.len() / elem_size); + for chunk in bytes.chunks_exact(elem_size) { + vec.push(T::from_le_bytes_slice(chunk)); + } + IArray::try_from(vec) + .map(Into::into) + .map_err(|_| CborDecodeError::AllocError) +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use crate::array::ArraySliceRef; + use crate::IValueDeserSeed; + use serde::de::DeserializeSeed; + + fn round_trip(value: &IValue) -> IValue { + let bytes = encode(value); + decode(&bytes).expect("decode should succeed") + } + + #[test] + fn test_null() { + assert_eq!(round_trip(&IValue::NULL), IValue::NULL); + } + + #[test] + fn test_bool() { + let t: IValue = true.into(); + let f: IValue = false.into(); + assert_eq!(round_trip(&t), t); + assert_eq!(round_trip(&f), f); + } + + #[test] + fn test_numbers() { + let cases: Vec = vec![ + 0i64.into(), + 42i64.into(), + (-1i64).into(), + i64::MAX.into(), + u64::MAX.into(), + 1.5f64.into(), + (-3.14f64).into(), + ]; + for v in &cases { + assert_eq!(round_trip(v), *v); + } + } + + #[test] + fn test_string() { + let v: IValue = IString::from("hello world").into(); + assert_eq!(round_trip(&v), v); + } + + #[test] + fn test_heterogeneous_array() { + let mut arr = IArray::new(); + arr.push(IValue::NULL).unwrap(); + arr.push(IValue::from(true)).unwrap(); + arr.push(IValue::from(42i64)).unwrap(); + arr.push(IValue::from(IString::from("hi"))).unwrap(); + let v: IValue = arr.into(); + let result = round_trip(&v); + let result_arr = result.as_array().unwrap(); + assert!(matches!(result_arr.as_slice(), ArraySliceRef::Heterogeneous(_))); + assert_eq!(result_arr.len(), 4); + } + + #[test] + fn test_f32_array_preserves_tag() { + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( + crate::FloatType::F32, + ))); + let json = r#"[1.5, 2.5, 3.5]"#; + let mut de = serde_json::Deserializer::from_str(json); + let v = seed.deserialize(&mut de).unwrap(); + assert!(matches!(v.as_array().unwrap().as_slice(), ArraySliceRef::F32(_))); + + let result = round_trip(&v); + let arr = result.as_array().unwrap(); + assert!( + matches!(arr.as_slice(), ArraySliceRef::F32(_)), + "F32 tag should survive CBOR encode/decode" + ); + assert_eq!(arr.len(), 3); + } + + #[test] + fn test_f16_array_preserves_tag() { + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( + crate::FloatType::F16, + ))); + let json = r#"[0.5, 1.0, 1.5]"#; + let mut de = serde_json::Deserializer::from_str(json); + let v = seed.deserialize(&mut de).unwrap(); + assert!(matches!(v.as_array().unwrap().as_slice(), ArraySliceRef::F16(_))); + + let result = round_trip(&v); + let arr = result.as_array().unwrap(); + assert!( + matches!(arr.as_slice(), ArraySliceRef::F16(_)), + "F16 tag should survive CBOR encode/decode" + ); + } + + #[test] + fn test_bf16_array_preserves_tag() { + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( + crate::FloatType::BF16, + ))); + let json = r#"[1.0, 2.0, 3.0]"#; + let mut de = serde_json::Deserializer::from_str(json); + let v = seed.deserialize(&mut de).unwrap(); + + let result = round_trip(&v); + let arr = result.as_array().unwrap(); + assert!( + matches!(arr.as_slice(), ArraySliceRef::BF16(_)), + "BF16 tag should survive CBOR encode/decode" + ); + } + + #[test] + fn test_f64_array_preserves_tag() { + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( + crate::FloatType::F64, + ))); + let json = r#"[1.0, 2.0, 3.0]"#; + let mut de = serde_json::Deserializer::from_str(json); + let v = seed.deserialize(&mut de).unwrap(); + + let result = round_trip(&v); + let arr = result.as_array().unwrap(); + assert!( + matches!(arr.as_slice(), ArraySliceRef::F64(_)), + "F64 tag should survive CBOR encode/decode" + ); + } + + #[test] + fn test_nested_object_with_typed_arrays() { + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( + crate::FloatType::F32, + ))); + let json = r#"{"a": [1.0, 2.0], "b": "text", "c": [3.0, 4.0]}"#; + let mut de = serde_json::Deserializer::from_str(json); + let v = seed.deserialize(&mut de).unwrap(); + + let result = round_trip(&v); + let obj = result.as_object().unwrap(); + let a = obj.get("a").unwrap().as_array().unwrap(); + let c = obj.get("c").unwrap().as_array().unwrap(); + assert!(matches!(a.as_slice(), ArraySliceRef::F32(_))); + assert!(matches!(c.as_slice(), ArraySliceRef::F32(_))); + assert_eq!(obj.get("b").unwrap().as_string().unwrap().as_str(), "text"); + } + + #[test] + fn test_compressed_round_trip() { + let seed = IValueDeserSeed::new(Some(crate::FPHAConfig::new_with_type( + crate::FloatType::F32, + ))); + let json = r#"[1.5, 2.5, 3.5, 4.5, 5.5]"#; + let mut de = serde_json::Deserializer::from_str(json); + let v = seed.deserialize(&mut de).unwrap(); + + let bytes = encode_compressed(&v); + let result = decode_compressed(&bytes).expect("decode_compressed should succeed"); + assert!(matches!(result.as_array().unwrap().as_slice(), ArraySliceRef::F32(_))); + } + + #[test] + fn test_small_integers_compact() { + // Small integers should be encoded more compactly in CBOR than custom binary. + let v: IValue = 42i64.into(); + let cbor_bytes = encode(&v); + // 42 fits in a single CBOR byte (major type 0, value 24 triggers 1-byte header + 1-byte value) + // Either way it's much smaller than the custom binary's fixed 9 bytes. + assert!(cbor_bytes.len() < 9, "expected CBOR to be smaller than 9-byte fixed encoding"); + } +} diff --git a/src/lib.rs b/src/lib.rs index 4315245..ecebc69 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,11 +49,11 @@ pub use value::{ BoolMut, Destructured, DestructuredMut, DestructuredRef, IValue, ValueIndex, ValueType, }; -/// Binary encode/decode for [`IValue`], preserving typed array tags. -pub mod binary; +/// CBOR encode/decode for [`IValue`] using RFC 8746 typed array tags. +pub mod cbor; mod de; mod ser; -pub use binary::{decode, decode_compressed, encode, encode_compressed, BinaryDecodeError}; +pub use cbor::{decode, decode_compressed, encode, encode_compressed, CborDecodeError}; pub use de::{from_value, FPHAConfig, IValueDeserSeed}; pub use ser::to_value; From 2a547e2056d85b2105ac4cdba60679b2a9ab0a6e Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Sun, 22 Feb 2026 11:50:40 +0200 Subject: [PATCH 20/33] . --- examples/size_profile.rs | 205 --------------------------------------- 1 file changed, 205 deletions(-) delete mode 100644 examples/size_profile.rs diff --git a/examples/size_profile.rs b/examples/size_profile.rs deleted file mode 100644 index 3bbfbe4..0000000 --- a/examples/size_profile.rs +++ /dev/null @@ -1,205 +0,0 @@ -/// Compares CBOR and zstd-compressed CBOR sizes vs JSON for representative IValue documents. -/// -/// Run with: -/// cargo run --example size_profile -use ijson::{cbor, FPHAConfig, FloatType, IValue, IValueDeserSeed}; -use serde::de::DeserializeSeed; -use serde::Deserialize; - -struct Case { - name: &'static str, - value: IValue, -} - -fn json_size(v: &IValue) -> usize { - serde_json::to_string(v).unwrap().len() -} - -fn cbor_size(v: &IValue) -> usize { - cbor::encode(v).len() -} - -fn cbor_zstd_size(v: &IValue) -> usize { - cbor::encode_compressed(v).len() -} - -fn from_json(s: &str) -> IValue { - IValue::deserialize(&mut serde_json::Deserializer::from_str(s)).unwrap() -} - -fn from_json_fpha(s: &str, ft: FloatType) -> IValue { - let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(ft))); - seed.deserialize(&mut serde_json::Deserializer::from_str(s)) - .unwrap() -} - -fn make_cases() -> Vec { - // ── typed float arrays ────────────────────────────────────────────────── - let n = 1000usize; - let fp32_json = format!( - "[{}]", - (0..n) - .map(|i| format!("{:.6}", i as f32 * 0.001)) - .collect::>() - .join(",") - ); - let fp64_json = format!( - "[{}]", - (0..n) - .map(|i| format!("{:.15}", i as f64 * 0.001)) - .collect::>() - .join(",") - ); - - // ── string-heavy object ───────────────────────────────────────────────── - let string_obj_json = format!( - "{{{}}}", - (0..50) - .map(|i| format!("\"key_{i}\":\"value_{i}_some_longer_string_here\"")) - .collect::>() - .join(",") - ); - - // ── heterogeneous object ──────────────────────────────────────────────── - let hetero_json = r#"{ - "name": "Alice", - "age": 30, - "scores": [1, 2, 3, null, true, "bonus"], - "meta": {"active": true, "level": 42} - }"#; - - // ── nested typed arrays ───────────────────────────────────────────────── - let small_fp32 = format!( - "[{}]", - (0..100) - .map(|i| format!("{:.4}", i as f32 * 0.1)) - .collect::>() - .join(",") - ); - let nested_fp32_json = format!("{{\"a\":{small_fp32},\"b\":{small_fp32},\"label\":\"test\"}}"); - - // ── big mixed JSON: array of 200 records, each with scalars + fp32 embedding ─ - // Simulates a realistic workload: repeated schema, repeated key names, - // mix of strings / integers / booleans, and a typed float sub-array. - let big_mixed_json = { - let records: Vec = (0..200) - .map(|i| { - let embedding: String = (0..32) - .map(|j| format!("{:.6}", (i as f32 * 0.01 + j as f32 * 0.001).sin())) - .collect::>() - .join(","); - format!( - r#"{{"id":{i},"name":"user_{i}","active":{},"score":{:.4},"tags":["alpha","beta","gamma"],"embedding":[{embedding}]}}"#, - i % 2 == 0, - i as f64 * 1.5, - ) - }) - .collect(); - format!("[{}]", records.join(",")) - }; - - // ── repeated strings: 500 objects sharing the same schema and many identical values ─ - // Targets RED-141886: string-reuse gap between in-memory and RDB representation. - // Keys ("status", "region", "tier", "owner") and values ("active", "us-east-1", - // "premium", "team-a") repeat across every record, stressing string deduplication. - let repeated_strings_json = { - let statuses = ["active", "inactive", "pending"]; - let regions = ["us-east-1", "eu-west-1", "ap-southeast-1"]; - let tiers = ["free", "standard", "premium"]; - let owners = ["team-a", "team-b", "team-c", "team-d"]; - let records: Vec = (0..500) - .map(|i| { - format!( - r#"{{"id":{i},"status":"{}","region":"{}","tier":"{}","owner":"{}","count":{}}}"#, - statuses[i % statuses.len()], - regions[i % regions.len()], - tiers[i % tiers.len()], - owners[i % owners.len()], - i * 10, - ) - }) - .collect(); - format!("[{}]", records.join(",")) - }; - - vec![ - Case { - name: "FP32 array (1000 elements)", - value: from_json_fpha(&fp32_json, FloatType::F32), - }, - Case { - name: "FP64 array (1000 elements)", - value: from_json_fpha(&fp64_json, FloatType::F64), - }, - Case { - name: "Heterogeneous array (1000 numbers, no hint)", - value: from_json(&fp32_json), - }, - Case { - name: "String-heavy object (50 keys)", - value: from_json(&string_obj_json), - }, - Case { - name: "Mixed object (hetero)", - value: from_json(hetero_json), - }, - Case { - name: "Nested FP32 arrays + string", - value: from_json_fpha(&nested_fp32_json, FloatType::F32), - }, - Case { - name: "Big mixed JSON (200 records, hetero embed)", - value: from_json(&big_mixed_json), - }, - Case { - name: "Repeated strings (500 records, RED-141886)", - value: from_json(&repeated_strings_json), - }, - ] -} - -fn pct(new: usize, base: usize) -> String { - let p = (new as f64 - base as f64) / base as f64 * 100.0; - let sign = if p < 0.0 { "" } else { "+" }; - format!("{sign}{p:.1}%") -} - -fn main() { - let cases = make_cases(); - - let name_w = 42usize; - let col_w = 12usize; - - println!( - "\n{:col_w$} {:>col_w$} {:>col_w$} {:>col_w$}", - "Document", "JSON (B)", "cbor (B)", "cbor Δ%", "cbor+z (B)", - ); - println!( - "{:col_w$}", - "", "cbor+z Δ%", - ); - println!("{}", "-".repeat(name_w + col_w * 5 + 4)); - - for c in &cases { - let j = json_size(&c.value); - let cb = cbor_size(&c.value); - let cbz = cbor_zstd_size(&c.value); - println!( - "{:col_w$} {:>col_w$} {:>col_w$} {:>col_w$}", - c.name, - j, - cb, - pct(cb, j), - cbz, - ); - println!( - "{:col_w$}", - "", - pct(cbz, j), - ); - println!(); - } - - println!("Δ%: relative to JSON size. Negative = smaller than JSON."); - println!(); -} From b40b8d881bf370cf4af131e76209f6cc9c7c5169 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Sun, 22 Feb 2026 11:52:53 +0200 Subject: [PATCH 21/33] fmt --- src/cbor.rs | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/cbor.rs b/src/cbor.rs index f920eaa..acef892 100644 --- a/src/cbor.rs +++ b/src/cbor.rs @@ -175,8 +175,7 @@ fn typed_le_tag(tag: u64, s: &[T]) -> Value { /// Decodes an [`IValue`] tree from CBOR bytes produced by [`encode`]. pub fn decode(bytes: &[u8]) -> Result { - let cbor: Value = - ciborium::from_reader(bytes).map_err(|_| CborDecodeError::DecodeError)?; + let cbor: Value = ciborium::from_reader(bytes).map_err(|_| CborDecodeError::DecodeError)?; cbor_to_ivalue(cbor, 0) } @@ -193,9 +192,7 @@ fn cbor_to_ivalue(val: Value, depth: u32) -> Result { match val { Value::Null => Ok(IValue::NULL), Value::Bool(b) => Ok(b.into()), - Value::Float(f) => { - Ok(INumber::try_from(f).map(Into::into).unwrap_or(IValue::NULL)) - } + Value::Float(f) => Ok(INumber::try_from(f).map(Into::into).unwrap_or(IValue::NULL)), Value::Integer(i) => { if let Ok(v) = i64::try_from(i.clone()) { Ok(IValue::from(v)) @@ -208,8 +205,7 @@ fn cbor_to_ivalue(val: Value, depth: u32) -> Result { Value::Text(s) => Ok(IString::from(s.as_str()).into()), Value::Array(arr) => { let hint = arr.len().min(1024); - let mut out = - IArray::with_capacity(hint).map_err(|_| CborDecodeError::AllocError)?; + let mut out = IArray::with_capacity(hint).map_err(|_| CborDecodeError::AllocError)?; for v in arr { let iv = cbor_to_ivalue(v, depth + 1)?; out.push(iv).map_err(|_| CborDecodeError::AllocError)?; @@ -357,7 +353,10 @@ mod tests { let v: IValue = arr.into(); let result = round_trip(&v); let result_arr = result.as_array().unwrap(); - assert!(matches!(result_arr.as_slice(), ArraySliceRef::Heterogeneous(_))); + assert!(matches!( + result_arr.as_slice(), + ArraySliceRef::Heterogeneous(_) + )); assert_eq!(result_arr.len(), 4); } @@ -369,7 +368,10 @@ mod tests { let json = r#"[1.5, 2.5, 3.5]"#; let mut de = serde_json::Deserializer::from_str(json); let v = seed.deserialize(&mut de).unwrap(); - assert!(matches!(v.as_array().unwrap().as_slice(), ArraySliceRef::F32(_))); + assert!(matches!( + v.as_array().unwrap().as_slice(), + ArraySliceRef::F32(_) + )); let result = round_trip(&v); let arr = result.as_array().unwrap(); @@ -388,7 +390,10 @@ mod tests { let json = r#"[0.5, 1.0, 1.5]"#; let mut de = serde_json::Deserializer::from_str(json); let v = seed.deserialize(&mut de).unwrap(); - assert!(matches!(v.as_array().unwrap().as_slice(), ArraySliceRef::F16(_))); + assert!(matches!( + v.as_array().unwrap().as_slice(), + ArraySliceRef::F16(_) + )); let result = round_trip(&v); let arr = result.as_array().unwrap(); @@ -461,7 +466,10 @@ mod tests { let bytes = encode_compressed(&v); let result = decode_compressed(&bytes).expect("decode_compressed should succeed"); - assert!(matches!(result.as_array().unwrap().as_slice(), ArraySliceRef::F32(_))); + assert!(matches!( + result.as_array().unwrap().as_slice(), + ArraySliceRef::F32(_) + )); } #[test] @@ -471,6 +479,9 @@ mod tests { let cbor_bytes = encode(&v); // 42 fits in a single CBOR byte (major type 0, value 24 triggers 1-byte header + 1-byte value) // Either way it's much smaller than the custom binary's fixed 9 bytes. - assert!(cbor_bytes.len() < 9, "expected CBOR to be smaller than 9-byte fixed encoding"); + assert!( + cbor_bytes.len() < 9, + "expected CBOR to be smaller than 9-byte fixed encoding" + ); } } From 043b76ccdbdd22f3a48b4ca945cbc6f3d9e54108 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Sun, 22 Feb 2026 12:17:41 +0200 Subject: [PATCH 22/33] CR --- src/cbor.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/cbor.rs b/src/cbor.rs index acef892..caf0a1b 100644 --- a/src/cbor.rs +++ b/src/cbor.rs @@ -285,6 +285,9 @@ where T: FromLeBytes, IArray: TryFrom>, { + if bytes.len() % elem_size != 0 { + return Err(CborDecodeError::CastError); + } let mut vec: Vec = Vec::with_capacity(bytes.len() / elem_size); for chunk in bytes.chunks_exact(elem_size) { vec.push(T::from_le_bytes_slice(chunk)); @@ -484,4 +487,36 @@ mod tests { "expected CBOR to be smaller than 9-byte fixed encoding" ); } + + #[test] + fn test_misaligned_typed_array_is_rejected() { + // Hand-craft valid CBOR: Tag(85, Bytes[13 bytes]). + // 13 bytes is not divisible by 4 (F32 element size), so decode must + // return an error rather than silently truncating to 3 elements. + // + // CBOR layout: + // 0xD8 0x55 — tag 85 (RFC 8746 F32-LE), 2-byte form (tag >= 24) + // 0x4D — byte string, length 13 (0x40 | 13) + // [0u8; 13] — 13 zero bytes (misaligned: 13 % 4 != 0) + let mut bytes = vec![0xD8, 0x55, 0x4D]; + bytes.extend_from_slice(&[0u8; 13]); + assert_eq!( + decode(&bytes), + Err(CborDecodeError::CastError), + "F32 typed array with 13 bytes (not a multiple of 4) must be rejected" + ); + } + + #[test] + fn test_misaligned_f16_array_is_rejected() { + // Tag 84 (F16-LE), byte string of 5 bytes (not divisible by 2). + // 0xD8 0x54 — tag 84; 0x45 — byte string length 5 + let mut bytes = vec![0xD8, 0x54, 0x45]; + bytes.extend_from_slice(&[0u8; 5]); + assert_eq!( + decode(&bytes), + Err(CborDecodeError::CastError), + "F16 typed array with 5 bytes (not a multiple of 2) must be rejected" + ); + } } From 8fdfd04447cb98f5fc52cda9af19bacea557b052 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 26 Feb 2026 14:24:03 +0200 Subject: [PATCH 23/33] move to fork of ciborium --- Cargo.toml | 7 +-- src/cbor.rs | 121 ++++++++++++++-------------------------------------- 2 files changed, 36 insertions(+), 92 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index dce0325..4ef9042 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,8 +30,10 @@ serde = { workspace = true } serde_json = { workspace = true } ctor = { version = "0.1.16", optional = true } paste = "1.0.15" -ciborium = "0.2" -half = "2.0.0" +ciborium = { git = "https://github.com/AvivDavid23/ciborium", branch = "main" } +ciborium-ll = { git = "https://github.com/AvivDavid23/ciborium", branch = "main" } +bytemuck = "1" +half = { version = "2.0.0", features = ["bytemuck"] } thiserror = "2.0.18" zstd = "0.13" @@ -40,4 +42,3 @@ mockalloc = "0.1.2" ctor = "0.1.16" rand = "0.8.4" zstd = "0.13" - diff --git a/src/cbor.rs b/src/cbor.rs index caf0a1b..e54bd5c 100644 --- a/src/cbor.rs +++ b/src/cbor.rs @@ -7,27 +7,22 @@ //! [`decode_compressed`] for zstd-compressed CBOR. use std::fmt; +use std::mem::size_of; +use bytemuck::Pod; use ciborium::value::{Integer, Value}; +use ciborium_ll::tag; use half::{bf16, f16}; use crate::array::ArraySliceRef; use crate::{DestructuredRef, IArray, INumber, IObject, IString, IValue}; -// RFC 8746 typed array tags (little-endian variants where applicable). -// Bit layout within the tag byte: 0b010_f_s_e_ll -// f=0 integer, f=1 float | s=0 unsigned, s=1 signed | e=1 little-endian | ll = width -const TAG_U8: u64 = 64; // uint8 (endianness irrelevant) -const TAG_I8: u64 = 72; // sint8 (endianness irrelevant) -const TAG_U16_LE: u64 = 69; -const TAG_I16_LE: u64 = 77; -const TAG_F16_LE: u64 = 84; // IEEE 754 binary16 LE -const TAG_U32_LE: u64 = 70; -const TAG_I32_LE: u64 = 78; -const TAG_F32_LE: u64 = 85; // IEEE 754 binary32 LE -const TAG_U64_LE: u64 = 71; -const TAG_I64_LE: u64 = 79; -const TAG_F64_LE: u64 = 86; // IEEE 754 binary64 LE +use tag::{ + TYPED_F16_LE as TAG_F16_LE, TYPED_F32_LE as TAG_F32_LE, TYPED_F64_LE as TAG_F64_LE, + TYPED_I16_LE as TAG_I16_LE, TYPED_I32_LE as TAG_I32_LE, TYPED_I64_LE as TAG_I64_LE, + TYPED_I8 as TAG_I8, TYPED_U16_LE as TAG_U16_LE, TYPED_U32_LE as TAG_U32_LE, + TYPED_U64_LE as TAG_U64_LE, TYPED_U8 as TAG_U8, +}; /// Private CBOR tag for BF16 arrays (no RFC 8746 standard tag exists for BF16). const TAG_BF16_LE: u64 = 0x10000; @@ -137,38 +132,11 @@ fn object_to_cbor(o: &IObject) -> Value { ) } -trait ToLeBytes { - fn to_le_bytes_vec(&self) -> impl AsRef<[u8]>; -} - -macro_rules! impl_to_le_bytes { - ($ty:ty) => { - impl ToLeBytes for $ty { - fn to_le_bytes_vec(&self) -> impl AsRef<[u8]> { - self.to_le_bytes() - } - } - }; -} - -impl_to_le_bytes!(i8); -impl_to_le_bytes!(i16); -impl_to_le_bytes!(u16); -impl_to_le_bytes!(f16); -impl_to_le_bytes!(bf16); -impl_to_le_bytes!(i32); -impl_to_le_bytes!(u32); -impl_to_le_bytes!(f32); -impl_to_le_bytes!(i64); -impl_to_le_bytes!(u64); -impl_to_le_bytes!(f64); - -fn typed_le_tag(tag: u64, s: &[T]) -> Value { - let mut bytes = Vec::new(); - for v in s { - bytes.extend_from_slice(v.to_le_bytes_vec().as_ref()); - } - Value::Tag(tag, Box::new(Value::Bytes(bytes))) +fn typed_le_tag(tag: u64, s: &[T]) -> Value { + Value::Tag( + tag, + Box::new(Value::Bytes(bytemuck::cast_slice(s).to_vec())), + ) } // ── Decode ──────────────────────────────────────────────────────────────────── @@ -239,59 +207,34 @@ fn decode_typed_array(tag: u64, inner: Value) -> Result TAG_U8 => IArray::try_from(bytes.as_slice()) .map(Into::into) .map_err(|_| CborDecodeError::AllocError), - TAG_I8 => decode_le_array::(&bytes, 1), - TAG_U16_LE => decode_le_array::(&bytes, 2), - TAG_I16_LE => decode_le_array::(&bytes, 2), - TAG_F16_LE => decode_le_array::(&bytes, 2), - TAG_BF16_LE => decode_le_array::(&bytes, 2), - TAG_U32_LE => decode_le_array::(&bytes, 4), - TAG_I32_LE => decode_le_array::(&bytes, 4), - TAG_F32_LE => decode_le_array::(&bytes, 4), - TAG_U64_LE => decode_le_array::(&bytes, 8), - TAG_I64_LE => decode_le_array::(&bytes, 8), - TAG_F64_LE => decode_le_array::(&bytes, 8), + TAG_I8 => decode_le_array::(&bytes), + TAG_U16_LE => decode_le_array::(&bytes), + TAG_I16_LE => decode_le_array::(&bytes), + TAG_F16_LE => decode_le_array::(&bytes), + TAG_BF16_LE => decode_le_array::(&bytes), + TAG_U32_LE => decode_le_array::(&bytes), + TAG_I32_LE => decode_le_array::(&bytes), + TAG_F32_LE => decode_le_array::(&bytes), + TAG_U64_LE => decode_le_array::(&bytes), + TAG_I64_LE => decode_le_array::(&bytes), + TAG_F64_LE => decode_le_array::(&bytes), other => Err(CborDecodeError::UnknownTag(other)), } } -trait FromLeBytes: Copy + Sized + 'static { - fn from_le_bytes_slice(s: &[u8]) -> Self; -} - -macro_rules! impl_from_le_bytes { - ($ty:ty) => { - impl FromLeBytes for $ty { - fn from_le_bytes_slice(s: &[u8]) -> Self { - Self::from_le_bytes(s.try_into().unwrap()) - } - } - }; -} - -impl_from_le_bytes!(i8); -impl_from_le_bytes!(i16); -impl_from_le_bytes!(u16); -impl_from_le_bytes!(f16); -impl_from_le_bytes!(bf16); -impl_from_le_bytes!(i32); -impl_from_le_bytes!(u32); -impl_from_le_bytes!(f32); -impl_from_le_bytes!(i64); -impl_from_le_bytes!(u64); -impl_from_le_bytes!(f64); - -fn decode_le_array(bytes: &[u8], elem_size: usize) -> Result +fn decode_le_array(bytes: &[u8]) -> Result where - T: FromLeBytes, + T: Pod, IArray: TryFrom>, { + let elem_size = size_of::(); if bytes.len() % elem_size != 0 { return Err(CborDecodeError::CastError); } - let mut vec: Vec = Vec::with_capacity(bytes.len() / elem_size); - for chunk in bytes.chunks_exact(elem_size) { - vec.push(T::from_le_bytes_slice(chunk)); - } + let vec: Vec = bytes + .chunks_exact(elem_size) + .map(bytemuck::pod_read_unaligned) + .collect(); IArray::try_from(vec) .map(Into::into) .map_err(|_| CborDecodeError::AllocError) From 2351ace6690ef976a22703562773fa9e02f69830 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 26 Feb 2026 15:40:49 +0200 Subject: [PATCH 24/33] fix CR --- src/array.rs | 6 +++++- src/de.rs | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/array.rs b/src/array.rs index 8d725c8..b658dbb 100644 --- a/src/array.rs +++ b/src/array.rs @@ -1151,15 +1151,19 @@ impl IArray { } /// Pushes a new item onto the back of the array with a specific floating-point type, potentially losing precision. + /// If the item is not a number, it is pushed as is. pub(crate) fn push_with_fp_type( &mut self, item: impl Into, fp_type: FloatType, ) -> Result<(), IJsonError> { + let item = item.into(); + if !item.is_number() { + return self.push(item); + } let desired_tag = fp_type.into(); let current_tag = self.header().type_tag(); let len = self.len(); - let item = item.into(); let can_fit = || match fp_type { FloatType::F16 => item.to_f16_lossy().map_or(false, |v| v.is_finite()), FloatType::BF16 => item.to_bf16_lossy().map_or(false, |v| v.is_finite()), diff --git a/src/de.rs b/src/de.rs index 5cc7cfe..5a5650c 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1162,6 +1162,44 @@ mod tests { let _error = seed.deserialize(&mut deserializer).unwrap_err(); } + #[test] + fn test_fpha_outer_array_of_objects_succeeds() { + // The classic embedding use-case: outer array holds objects, not numbers. + // Before the fix, push_with_fp_type would error on the object element. + let json = r#"[{"embedding": [1.0, 2.0]}, {"embedding": [3.0, 4.0]}]"#; + let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::F16))); + let mut deserializer = serde_json::Deserializer::from_str(json); + let value = seed.deserialize(&mut deserializer).unwrap(); + + let arr = value.as_array().unwrap(); + assert_eq!(arr.len(), 2); + assert!(matches!(arr.as_slice(), ArraySliceRef::Heterogeneous(_))); + + // Inner arrays should still be typed f16 + assert!(matches!( + arr[0].as_object().unwrap().get("embedding").unwrap().as_array().unwrap().as_slice(), + ArraySliceRef::F16(_) + )); + } + + #[test] + fn test_fpha_outer_array_of_nested_arrays_succeeds() { + // Outer array holds inner float arrays; outer must become heterogeneous. + let json = r#"[[1.0, 2.0], [3.0, 4.0]]"#; + let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::F16))); + let mut deserializer = serde_json::Deserializer::from_str(json); + let value = seed.deserialize(&mut deserializer).unwrap(); + + let arr = value.as_array().unwrap(); + assert_eq!(arr.len(), 2); + assert!(matches!(arr.as_slice(), ArraySliceRef::Heterogeneous(_))); + // Inner arrays should still be typed f16 + assert!(matches!( + arr[0].as_array().unwrap().as_slice(), + ArraySliceRef::F16(_) + )); + } + #[test] fn test_ser_deser_roundtrip_preserves_type() { let json = r#"[0.2, 1.0, 1.2]"#; From a62bd2d26439c661b3a7fde1d295faa014b83b00 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 26 Feb 2026 15:41:59 +0200 Subject: [PATCH 25/33] fmt --- src/de.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/de.rs b/src/de.rs index 5a5650c..f36e4ac 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1177,7 +1177,14 @@ mod tests { // Inner arrays should still be typed f16 assert!(matches!( - arr[0].as_object().unwrap().get("embedding").unwrap().as_array().unwrap().as_slice(), + arr[0] + .as_object() + .unwrap() + .get("embedding") + .unwrap() + .as_array() + .unwrap() + .as_slice(), ArraySliceRef::F16(_) )); } From 00cefdc1eebe1957d8691c38735c66004921661c Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 26 Feb 2026 15:46:38 +0200 Subject: [PATCH 26/33] misc: JsonValue in fuzz tests to use serde --- fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs | 2 +- fuzz/fuzz_targets/fuzz_json_de.rs | 2 +- fuzz/src/lib.rs | 40 ++---------------------- 3 files changed, 4 insertions(+), 40 deletions(-) diff --git a/fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs b/fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs index 27bac38..406b5b1 100644 --- a/fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs +++ b/fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs @@ -6,7 +6,7 @@ use libfuzzer_sys::fuzz_target; use serde::Deserialize; fuzz_target!(|value: JsonValue| { - let json_string = value.to_json_string(); + let json_string = serde_json::to_string(&value).unwrap(); let mut deserializer = serde_json::Deserializer::from_str(&json_string); let Ok(original) = IValue::deserialize(&mut deserializer) else { return; diff --git a/fuzz/fuzz_targets/fuzz_json_de.rs b/fuzz/fuzz_targets/fuzz_json_de.rs index 9193a40..6a32bed 100644 --- a/fuzz/fuzz_targets/fuzz_json_de.rs +++ b/fuzz/fuzz_targets/fuzz_json_de.rs @@ -6,7 +6,7 @@ use libfuzzer_sys::fuzz_target; use serde::Deserialize; fuzz_target!(|value: JsonValue| { - let json_string = value.to_json_string(); + let json_string = serde_json::to_string(&value).unwrap(); let mut deserializer = serde_json::Deserializer::from_str(&json_string); let _ = IValue::deserialize(&mut deserializer); }); diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index e68f013..90015a9 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -3,6 +3,7 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Arbitrary, Serialize, Deserialize)] pub enum JsonValue { + #[serde(rename = "null")] Null, Bool(bool), Integer(u64), @@ -10,41 +11,4 @@ pub enum JsonValue { Str(String), Array(Vec), Object(Vec<(String, JsonValue)>), -} - -impl JsonValue { - pub fn to_json_string(&self) -> String { - match self { - JsonValue::Null => "null".to_string(), - JsonValue::Bool(b) => b.to_string(), - JsonValue::Integer(n) => n.to_string(), - JsonValue::Float(n) => { - if n.is_finite() { - n.to_string() - } else { - "0".to_string() - } - } - JsonValue::Str(s) => { - format!("\"{}\"", s.replace('\\', "\\\\").replace('"', "\\\"")) - } - JsonValue::Array(arr) => { - let items: Vec = arr.iter().map(|v| v.to_json_string()).collect(); - format!("[{}]", items.join(",")) - } - JsonValue::Object(obj) => { - let items: Vec = obj - .iter() - .map(|(k, v)| { - format!( - "\"{}\":{}", - k.replace('\\', "\\\\").replace('"', "\\\""), - v.to_json_string() - ) - }) - .collect(); - format!("{{{}}}", items.join(",")) - } - } - } -} +} \ No newline at end of file From be16c77d504064c50c861f1280868aa3bab0aaa2 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Thu, 26 Feb 2026 15:57:54 +0200 Subject: [PATCH 27/33] fmt --- fuzz/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index 90015a9..d31d3ea 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -11,4 +11,4 @@ pub enum JsonValue { Str(String), Array(Vec), Object(Vec<(String, JsonValue)>), -} \ No newline at end of file +} From 432f62eed1ad2535b88254834f8f3619a04def6f Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Sun, 1 Mar 2026 09:09:25 +0200 Subject: [PATCH 28/33] revert to manual string in fuzz, fix push_with_fp case --- fuzz/src/lib.rs | 38 +++++++++++++++++++++++++++++++++++++- src/array.rs | 6 +++--- src/de.rs | 6 +++++- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index d31d3ea..e68f013 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -3,7 +3,6 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Arbitrary, Serialize, Deserialize)] pub enum JsonValue { - #[serde(rename = "null")] Null, Bool(bool), Integer(u64), @@ -12,3 +11,40 @@ pub enum JsonValue { Array(Vec), Object(Vec<(String, JsonValue)>), } + +impl JsonValue { + pub fn to_json_string(&self) -> String { + match self { + JsonValue::Null => "null".to_string(), + JsonValue::Bool(b) => b.to_string(), + JsonValue::Integer(n) => n.to_string(), + JsonValue::Float(n) => { + if n.is_finite() { + n.to_string() + } else { + "0".to_string() + } + } + JsonValue::Str(s) => { + format!("\"{}\"", s.replace('\\', "\\\\").replace('"', "\\\"")) + } + JsonValue::Array(arr) => { + let items: Vec = arr.iter().map(|v| v.to_json_string()).collect(); + format!("[{}]", items.join(",")) + } + JsonValue::Object(obj) => { + let items: Vec = obj + .iter() + .map(|(k, v)| { + format!( + "\"{}\":{}", + k.replace('\\', "\\\\").replace('"', "\\\""), + v.to_json_string() + ) + }) + .collect(); + format!("{{{}}}", items.join(",")) + } + } + } +} diff --git a/src/array.rs b/src/array.rs index b658dbb..29d4f16 100644 --- a/src/array.rs +++ b/src/array.rs @@ -1158,12 +1158,12 @@ impl IArray { fp_type: FloatType, ) -> Result<(), IJsonError> { let item = item.into(); - if !item.is_number() { - return self.push(item); - } let desired_tag = fp_type.into(); let current_tag = self.header().type_tag(); let len = self.len(); + if !item.is_number() || (current_tag != desired_tag && len > 0) { + return self.push(item); + } let can_fit = || match fp_type { FloatType::F16 => item.to_f16_lossy().map_or(false, |v| v.is_finite()), FloatType::BF16 => item.to_bf16_lossy().map_or(false, |v| v.is_finite()), diff --git a/src/de.rs b/src/de.rs index f36e4ac..e1c9068 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1123,7 +1123,11 @@ mod tests { let json = r#"[1, "string", 3.5]"#; let seed = IValueDeserSeed::new(Some(FPHAConfig::new_with_type(FloatType::F32))); let mut deserializer = serde_json::Deserializer::from_str(json); - let _error = seed.deserialize(&mut deserializer).unwrap_err(); + let value = seed.deserialize(&mut deserializer).unwrap(); + + let arr = value.as_array().unwrap(); + assert!(matches!(arr.as_slice(), ArraySliceRef::Heterogeneous(_))); + assert_eq!(arr.len(), 3); } #[test] From 0d11fe8c0db3dc353d1b36ab2960627f9878ed74 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Sun, 1 Mar 2026 09:39:10 +0200 Subject: [PATCH 29/33] cr --- fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs | 2 +- fuzz/fuzz_targets/fuzz_json_de.rs | 2 +- src/cbor.rs | 64 ++++++++++++++++-------- 3 files changed, 46 insertions(+), 22 deletions(-) diff --git a/fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs b/fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs index 406b5b1..27bac38 100644 --- a/fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs +++ b/fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs @@ -6,7 +6,7 @@ use libfuzzer_sys::fuzz_target; use serde::Deserialize; fuzz_target!(|value: JsonValue| { - let json_string = serde_json::to_string(&value).unwrap(); + let json_string = value.to_json_string(); let mut deserializer = serde_json::Deserializer::from_str(&json_string); let Ok(original) = IValue::deserialize(&mut deserializer) else { return; diff --git a/fuzz/fuzz_targets/fuzz_json_de.rs b/fuzz/fuzz_targets/fuzz_json_de.rs index 6a32bed..9193a40 100644 --- a/fuzz/fuzz_targets/fuzz_json_de.rs +++ b/fuzz/fuzz_targets/fuzz_json_de.rs @@ -6,7 +6,7 @@ use libfuzzer_sys::fuzz_target; use serde::Deserialize; fuzz_target!(|value: JsonValue| { - let json_string = serde_json::to_string(&value).unwrap(); + let json_string = value.to_json_string(); let mut deserializer = serde_json::Deserializer::from_str(&json_string); let _ = IValue::deserialize(&mut deserializer); }); diff --git a/src/cbor.rs b/src/cbor.rs index e54bd5c..b67fda0 100644 --- a/src/cbor.rs +++ b/src/cbor.rs @@ -7,13 +7,50 @@ //! [`decode_compressed`] for zstd-compressed CBOR. use std::fmt; -use std::mem::size_of; use bytemuck::Pod; use ciborium::value::{Integer, Value}; use ciborium_ll::tag; use half::{bf16, f16}; +/// Converts a typed slice to/from little-endian bytes using each type's own +/// `to_le_bytes` / `from_le_bytes`, which are correct on every host endianness. +trait LeBytes: Pod + Copy { + fn slice_to_le_bytes(s: &[Self]) -> Vec; + fn slice_from_le_bytes(bytes: &[u8]) -> Result, CborDecodeError>; +} + +macro_rules! impl_le_bytes { + ($($t:ty => $n:literal),* $(,)?) => {$( + impl LeBytes for $t { + fn slice_to_le_bytes(s: &[Self]) -> Vec { + s.iter().flat_map(|v| v.to_le_bytes()).collect() + } + fn slice_from_le_bytes(bytes: &[u8]) -> Result, CborDecodeError> { + if bytes.len() % $n != 0 { + return Err(CborDecodeError::CastError); + } + Ok(bytes + .chunks_exact($n) + .map(|c| { + // SAFETY: `chunks_exact($n)` guarantees every chunk + // is exactly $n bytes, matching [u8; $n]. + let arr: [u8; $n] = c.try_into().unwrap(); + Self::from_le_bytes(arr) + }) + .collect()) + } + } + )*}; +} + +impl_le_bytes!( + i8 => 1, u8 => 1, + i16 => 2, u16 => 2, f16 => 2, bf16 => 2, + i32 => 4, u32 => 4, f32 => 4, + i64 => 8, u64 => 8, f64 => 8, +); + use crate::array::ArraySliceRef; use crate::{DestructuredRef, IArray, INumber, IObject, IString, IValue}; @@ -110,7 +147,7 @@ fn array_to_cbor(a: &IArray) -> Value { match a.as_slice() { ArraySliceRef::Heterogeneous(s) => Value::Array(s.iter().map(ivalue_to_cbor).collect()), ArraySliceRef::I8(s) => typed_le_tag(TAG_I8, s), - ArraySliceRef::U8(s) => Value::Tag(TAG_U8, Box::new(Value::Bytes(s.to_vec()))), + ArraySliceRef::U8(s) => typed_le_tag(TAG_U8, s), ArraySliceRef::I16(s) => typed_le_tag(TAG_I16_LE, s), ArraySliceRef::U16(s) => typed_le_tag(TAG_U16_LE, s), ArraySliceRef::F16(s) => typed_le_tag(TAG_F16_LE, s), @@ -132,11 +169,8 @@ fn object_to_cbor(o: &IObject) -> Value { ) } -fn typed_le_tag(tag: u64, s: &[T]) -> Value { - Value::Tag( - tag, - Box::new(Value::Bytes(bytemuck::cast_slice(s).to_vec())), - ) +fn typed_le_tag(tag: u64, s: &[T]) -> Value { + Value::Tag(tag, Box::new(Value::Bytes(T::slice_to_le_bytes(s)))) } // ── Decode ──────────────────────────────────────────────────────────────────── @@ -204,9 +238,7 @@ fn decode_typed_array(tag: u64, inner: Value) -> Result _ => return Err(CborDecodeError::InvalidValue), }; match tag { - TAG_U8 => IArray::try_from(bytes.as_slice()) - .map(Into::into) - .map_err(|_| CborDecodeError::AllocError), + TAG_U8 => decode_le_array::(&bytes), TAG_I8 => decode_le_array::(&bytes), TAG_U16_LE => decode_le_array::(&bytes), TAG_I16_LE => decode_le_array::(&bytes), @@ -224,18 +256,10 @@ fn decode_typed_array(tag: u64, inner: Value) -> Result fn decode_le_array(bytes: &[u8]) -> Result where - T: Pod, + T: LeBytes, IArray: TryFrom>, { - let elem_size = size_of::(); - if bytes.len() % elem_size != 0 { - return Err(CborDecodeError::CastError); - } - let vec: Vec = bytes - .chunks_exact(elem_size) - .map(bytemuck::pod_read_unaligned) - .collect(); - IArray::try_from(vec) + IArray::try_from(T::slice_from_le_bytes(bytes)?) .map(Into::into) .map_err(|_| CborDecodeError::AllocError) } From ebcad7643c440b8aa8285d0dd621f08bdd50ca45 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Sun, 1 Mar 2026 09:54:04 +0200 Subject: [PATCH 30/33] remove dead code --- src/array.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/array.rs b/src/array.rs index 29d4f16..319895c 100644 --- a/src/array.rs +++ b/src/array.rs @@ -1171,10 +1171,6 @@ impl IArray { FloatType::F64 => item.to_f64_lossy().map_or(false, |v| v.is_finite()), }; - if (desired_tag != current_tag && len > 0) || !can_fit() { - return Err(IJsonError::OutOfRange(fp_type)); - } - // We can fit the item into the array, so we can push it directly if len == 0 { From c1ff908332fc3b877f3322af5047e0163567bc21 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Sun, 1 Mar 2026 10:02:51 +0200 Subject: [PATCH 31/33] . --- src/array.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/array.rs b/src/array.rs index 319895c..864f85e 100644 --- a/src/array.rs +++ b/src/array.rs @@ -1171,6 +1171,10 @@ impl IArray { FloatType::F64 => item.to_f64_lossy().map_or(false, |v| v.is_finite()), }; + if !can_fit() { + return Err(IJsonError::OutOfRange(fp_type)); + } + // We can fit the item into the array, so we can push it directly if len == 0 { From 95369f0c3e7663f418513661c27c35adf2c70150 Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Tue, 3 Mar 2026 09:15:14 +0200 Subject: [PATCH 32/33] CR --- fuzz/src/lib.rs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index e68f013..a77a999 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -18,15 +18,9 @@ impl JsonValue { JsonValue::Null => "null".to_string(), JsonValue::Bool(b) => b.to_string(), JsonValue::Integer(n) => n.to_string(), - JsonValue::Float(n) => { - if n.is_finite() { - n.to_string() - } else { - "0".to_string() - } - } + JsonValue::Float(n) => n.to_string(), JsonValue::Str(s) => { - format!("\"{}\"", s.replace('\\', "\\\\").replace('"', "\\\"")) + format!(r#""{}"#, s.replace('\\', r"\\").replace('"', r#"\""#)) } JsonValue::Array(arr) => { let items: Vec = arr.iter().map(|v| v.to_json_string()).collect(); @@ -37,8 +31,8 @@ impl JsonValue { .iter() .map(|(k, v)| { format!( - "\"{}\":{}", - k.replace('\\', "\\\\").replace('"', "\\\""), + r#""{}":{}"#, + k.replace('\\', r"\\").replace('"', r#"\""#), v.to_json_string() ) }) From 9675e630b9e2fadb392f1ac4a63d0465c3d2264c Mon Sep 17 00:00:00 2001 From: avivdavid23 Date: Tue, 3 Mar 2026 10:08:37 +0200 Subject: [PATCH 33/33] move to arbitrary-json crate in fuzz tests --- fuzz/Cargo.toml | 1 + fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs | 6 ++-- fuzz/fuzz_targets/fuzz_json_de.rs | 6 ++-- fuzz/src/lib.rs | 43 ------------------------ 4 files changed, 7 insertions(+), 49 deletions(-) diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index f876d47..74da9c7 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -16,6 +16,7 @@ libfuzzer-sys = "0.4" arbitrary = { version = "1.3", features = ["derive"] } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } +arbitrary-json = "=0.1.1" [dependencies.ijson] path = ".." diff --git a/fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs b/fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs index 27bac38..7da09a5 100644 --- a/fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs +++ b/fuzz/fuzz_targets/fuzz_cbor_roundtrip.rs @@ -1,12 +1,12 @@ #![no_main] +use arbitrary_json::ArbitraryValue; use ijson::{cbor, IValue}; -use ijson_fuzz::JsonValue; use libfuzzer_sys::fuzz_target; use serde::Deserialize; -fuzz_target!(|value: JsonValue| { - let json_string = value.to_json_string(); +fuzz_target!(|value: ArbitraryValue| { + let json_string = value.to_string(); let mut deserializer = serde_json::Deserializer::from_str(&json_string); let Ok(original) = IValue::deserialize(&mut deserializer) else { return; diff --git a/fuzz/fuzz_targets/fuzz_json_de.rs b/fuzz/fuzz_targets/fuzz_json_de.rs index 9193a40..b5a35cf 100644 --- a/fuzz/fuzz_targets/fuzz_json_de.rs +++ b/fuzz/fuzz_targets/fuzz_json_de.rs @@ -1,12 +1,12 @@ #![no_main] +use arbitrary_json::ArbitraryValue; use ijson::IValue; -use ijson_fuzz::JsonValue; use libfuzzer_sys::fuzz_target; use serde::Deserialize; -fuzz_target!(|value: JsonValue| { - let json_string = value.to_json_string(); +fuzz_target!(|value: ArbitraryValue| { + let json_string = value.to_string(); let mut deserializer = serde_json::Deserializer::from_str(&json_string); let _ = IValue::deserialize(&mut deserializer); }); diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index a77a999..8b13789 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -1,44 +1 @@ -use arbitrary::Arbitrary; -use serde::{Deserialize, Serialize}; -#[derive(Debug, Arbitrary, Serialize, Deserialize)] -pub enum JsonValue { - Null, - Bool(bool), - Integer(u64), - Float(f64), - Str(String), - Array(Vec), - Object(Vec<(String, JsonValue)>), -} - -impl JsonValue { - pub fn to_json_string(&self) -> String { - match self { - JsonValue::Null => "null".to_string(), - JsonValue::Bool(b) => b.to_string(), - JsonValue::Integer(n) => n.to_string(), - JsonValue::Float(n) => n.to_string(), - JsonValue::Str(s) => { - format!(r#""{}"#, s.replace('\\', r"\\").replace('"', r#"\""#)) - } - JsonValue::Array(arr) => { - let items: Vec = arr.iter().map(|v| v.to_json_string()).collect(); - format!("[{}]", items.join(",")) - } - JsonValue::Object(obj) => { - let items: Vec = obj - .iter() - .map(|(k, v)| { - format!( - r#""{}":{}"#, - k.replace('\\', r"\\").replace('"', r#"\""#), - v.to_json_string() - ) - }) - .collect(); - format!("{{{}}}", items.join(",")) - } - } - } -}