From 83ccc09bf6f3dee6417e3e2a91025f33acb653c7 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Fri, 30 May 2025 21:39:34 +0200 Subject: [PATCH 1/4] Propose new ClassName API --- src/descriptors.rs | 145 +++++++++++++-------------------------------- 1 file changed, 40 insertions(+), 105 deletions(-) diff --git a/src/descriptors.rs b/src/descriptors.rs index a2b6cc7..a9f7cf2 100644 --- a/src/descriptors.rs +++ b/src/descriptors.rs @@ -1,38 +1,20 @@ use std::{ borrow::Cow, fmt::{self, Write}, + ops::Deref, }; use crate::ParseError; -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub struct UnqualifiedSegment<'a> { - pub name: Cow<'a, str>, -} - // Returns the unqualified segment and the following char (either '/' or ';') // or an error. This only extracts the unqualified segment at the start of // the given data, and ignores anything following. -fn parse_unqualified_segment<'a>( - data: &Cow<'a, str>, - start_index: usize, -) -> Result<(UnqualifiedSegment<'a>, char), ParseError> { +fn parse_unqualified_segment(data: &str, start_index: usize) -> Result<(&str, char), ParseError> { for (ix, c) in data[start_index..].char_indices() { match c { '/' if ix == 0 => fail!("Unexpected / at start of unqualified segment"), ';' if ix == 0 => fail!("Unexpected ; at start of unqualified segment"), - '/' | ';' => { - let name = match data { - Cow::Borrowed(borrowed_str) => { - Cow::Borrowed(&borrowed_str[start_index..start_index + ix]) - } - Cow::Owned(ref owned_str) => { - Cow::Owned(owned_str[start_index..start_index + ix].to_string()) - } - }; - let segment = UnqualifiedSegment { name }; - return Ok((segment, c)); - } + '/' | ';' => return Ok((&data[start_index..start_index + ix], c)), '.' | '[' | '<' | '>' => fail!("Disallowed character in unqualified segment"), _ => (), }; @@ -40,22 +22,34 @@ fn parse_unqualified_segment<'a>( fail!("Unterminated unqualified segment"); } -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub struct ClassName<'a> { - pub segments: Vec>, +/// Represents a valid binary class or interface name in the syntax of +/// the [JVM Spec](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.2.1). +#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct ClassName<'a>(Cow<'a, str>); + +impl<'a> From> for Cow<'a, str> { + fn from(value: ClassName<'a>) -> Self { + value.0 + } } -impl ClassName<'_> { +impl<'a> Deref for ClassName<'a> { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'a> ClassName<'a> { fn byte_len(&self) -> usize { - self.segments - .iter() - .fold(0, |sum, segment| sum + segment.name.len() + 1) + self.0.len() + 1 // one for the terminating ';' } } + impl<'a> fmt::Display for ClassName<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - let segments: Vec> = self.segments.iter().map(|s| s.name.clone()).collect(); - write!(f, "{}", segments.join("/")) + write!(f, "{}", self.0) } } @@ -65,19 +59,20 @@ fn parse_class_descriptor<'a>( data: &Cow<'a, str>, index: usize, ) -> Result, ParseError> { - let mut segments = vec![]; - let mut remaining_index = index; + let mut next_index = index; loop { - match parse_unqualified_segment(data, remaining_index)? { + match parse_unqualified_segment(data, next_index)? { (segment, ';') => { - segments.push(segment); - return Ok(ClassName { segments }); - } - (segment, '/') => { - remaining_index += segment.name.len() + 1; - segments.push(segment); - continue; + return Ok(ClassName(match data { + Cow::Borrowed(data) => { + Cow::Borrowed(&data[index..(next_index + segment.len())]) + } + Cow::Owned(data) => { + Cow::Owned(data[index..(next_index + segment.len())].to_string()) + } + })) } + (segment, '/') => next_index += segment.len() + 1, _ => panic!("Got unexpected return value from parse_unqualified_segment"), } } @@ -487,19 +482,7 @@ mod tests { parameters.next().unwrap(), FieldDescriptor { dimensions: 0, - field_type: FieldType::Object(ClassName { - segments: vec![ - UnqualifiedSegment { - name: Cow::Borrowed("java") - }, - UnqualifiedSegment { - name: Cow::Borrowed("lang") - }, - UnqualifiedSegment { - name: Cow::Borrowed("Object") - }, - ], - }), + field_type: FieldType::Object(ClassName("java/lang/Object".into())), }, ); assert!(parameters.next().is_none()); @@ -521,19 +504,7 @@ mod tests { parameters.next().unwrap(), FieldDescriptor { dimensions: 0, - field_type: FieldType::Object(ClassName { - segments: vec![ - UnqualifiedSegment { - name: Cow::Borrowed("java") - }, - UnqualifiedSegment { - name: Cow::Borrowed("lang") - }, - UnqualifiedSegment { - name: Cow::Borrowed("Object") - }, - ], - }), + field_type: FieldType::Object(ClassName("java/lang/Object".into())), }, ); assert!(parameters.next().is_none()); @@ -552,38 +523,14 @@ mod tests { parameters.next().unwrap(), FieldDescriptor { dimensions: 0, - field_type: FieldType::Object(ClassName { - segments: vec![ - UnqualifiedSegment { - name: Cow::Borrowed("java") - }, - UnqualifiedSegment { - name: Cow::Borrowed("lang") - }, - UnqualifiedSegment { - name: Cow::Borrowed("Object") - }, - ], - }), + field_type: FieldType::Object(ClassName("java/lang/Object".into())), }, ); assert_eq!( parameters.next().unwrap(), FieldDescriptor { dimensions: 0, - field_type: FieldType::Object(ClassName { - segments: vec![ - UnqualifiedSegment { - name: Cow::Borrowed("java") - }, - UnqualifiedSegment { - name: Cow::Borrowed("lang") - }, - UnqualifiedSegment { - name: Cow::Borrowed("String") - }, - ], - }), + field_type: FieldType::Object(ClassName("java/lang/String".into())), }, ); assert!(parameters.next().is_none()); @@ -606,19 +553,7 @@ mod tests { return_type, ReturnDescriptor::Return(FieldDescriptor { dimensions: 0, - field_type: FieldType::Object(ClassName { - segments: vec![ - UnqualifiedSegment { - name: Cow::Borrowed("java") - }, - UnqualifiedSegment { - name: Cow::Borrowed("lang") - }, - UnqualifiedSegment { - name: Cow::Borrowed("Object") - }, - ], - }), + field_type: FieldType::Object(ClassName("java/lang/Object".into())), }), ); From 38c8e4012f7a1063913363e52dccac9cce394ff9 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sat, 31 May 2025 11:55:00 +0200 Subject: [PATCH 2/4] Update byte_len to make ClassName more standalone This decouples the ClassName byte_len implementation from the descriptor usage, and makes ClassName usable in other contexts. --- src/descriptors.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/descriptors.rs b/src/descriptors.rs index a9f7cf2..f0d8e51 100644 --- a/src/descriptors.rs +++ b/src/descriptors.rs @@ -43,7 +43,7 @@ impl<'a> Deref for ClassName<'a> { impl<'a> ClassName<'a> { fn byte_len(&self) -> usize { - self.0.len() + 1 // one for the terminating ';' + self.0.len() } } @@ -94,7 +94,8 @@ pub enum FieldType<'a> { impl FieldType<'_> { fn byte_len(&self) -> usize { match self { - FieldType::Object(class_name) => 1 + class_name.byte_len(), + // +1 for the beginning 'L'; +1 for the terminating ';' + FieldType::Object(class_name) => 2 + class_name.byte_len(), _ => 1, } } From 02417d7696ecdfc85e181a458f923d2d9d8825c6 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sat, 31 May 2025 11:43:58 +0200 Subject: [PATCH 3/4] Wrap ClassFile#this_class, #super_class, and #interfaces with ClassName --- src/descriptors.rs | 50 ++++++++++++++++++++++++++++++++++++---------- src/lib.rs | 24 +++++++++++++--------- 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/src/descriptors.rs b/src/descriptors.rs index f0d8e51..97f8fc7 100644 --- a/src/descriptors.rs +++ b/src/descriptors.rs @@ -1,25 +1,26 @@ use std::{ borrow::Cow, + convert::TryFrom, fmt::{self, Write}, ops::Deref, }; use crate::ParseError; -// Returns the unqualified segment and the following char (either '/' or ';') +// Returns the unqualified segment and the following char ('/', ';', or None) // or an error. This only extracts the unqualified segment at the start of // the given data, and ignores anything following. -fn parse_unqualified_segment(data: &str, start_index: usize) -> Result<(&str, char), ParseError> { - for (ix, c) in data[start_index..].char_indices() { +fn parse_unqualified_segment(data: &str) -> Result<(&str, Option), ParseError> { + for (ix, c) in data.char_indices() { match c { - '/' if ix == 0 => fail!("Unexpected / at start of unqualified segment"), - ';' if ix == 0 => fail!("Unexpected ; at start of unqualified segment"), - '/' | ';' => return Ok((&data[start_index..start_index + ix], c)), + '/' if ix == 0 => fail!("Unexpected '/' at start of unqualified segment"), + ';' if ix == 0 => fail!("Unexpected ';' at start of unqualified segment"), + '/' | ';' => return Ok((&data[0..ix], Some(c))), '.' | '[' | '<' | '>' => fail!("Disallowed character in unqualified segment"), _ => (), }; } - fail!("Unterminated unqualified segment"); + Ok((data, None)) } /// Represents a valid binary class or interface name in the syntax of @@ -27,6 +28,24 @@ fn parse_unqualified_segment(data: &str, start_index: usize) -> Result<(&str, ch #[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct ClassName<'a>(Cow<'a, str>); +impl<'a> TryFrom> for ClassName<'a> { + type Error = ParseError; + + // `value` (as a whole) must consist of a sequence of unqualified segements + fn try_from(value: Cow<'a, str>) -> Result { + let mut index = 0; + loop { + match parse_unqualified_segment(&value[index..])? { + (_, None) => break, + (_, Some(';')) => fail!("Disallowed ';' in class name"), + (segment, Some('/')) => index += segment.len() + 1, + _ => panic!("Got unexpected return value from parse_unqualified_segment"), + } + } + Ok(Self(value)) + } +} + impl<'a> From> for Cow<'a, str> { fn from(value: ClassName<'a>) -> Self { value.0 @@ -61,8 +80,8 @@ fn parse_class_descriptor<'a>( ) -> Result, ParseError> { let mut next_index = index; loop { - match parse_unqualified_segment(data, next_index)? { - (segment, ';') => { + match parse_unqualified_segment(&data[next_index..])? { + (segment, Some(';')) => { return Ok(ClassName(match data { Cow::Borrowed(data) => { Cow::Borrowed(&data[index..(next_index + segment.len())]) @@ -72,7 +91,8 @@ fn parse_class_descriptor<'a>( } })) } - (segment, '/') => next_index += segment.len() + 1, + (segment, Some('/')) => next_index += segment.len() + 1, + (_, None) => fail!("Unterminated unqualified segment"), _ => panic!("Got unexpected return value from parse_unqualified_segment"), } } @@ -632,4 +652,14 @@ mod tests { assert!(parse_method_descriptor(&chars_ok, 0).is_ok()); assert!(parse_method_descriptor(&chars_bad, 0).is_err()); } + + #[test] + fn test_classname_parsing() { + assert!(ClassName::try_from(Cow::Borrowed("java/lang/Object")).is_ok()); + assert!(ClassName::try_from(Cow::Borrowed("/bad/classname")).is_err()); + assert!(ClassName::try_from(Cow::Borrowed("another//bad/one")).is_err()); + assert!(ClassName::try_from(Cow::Borrowed("yet/another/bad/one;")).is_err()); + assert!(ClassName::try_from(Cow::Borrowed("also/bogus;one")).is_err()); + assert!(ClassName::try_from(Cow::Borrowed("Ldefinitely/not/ok;")).is_err()); + } } diff --git a/src/lib.rs b/src/lib.rs index db4c2eb..e6a2971 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,7 @@ pub mod names; use std::borrow::Cow; use std::collections::HashSet; +use std::convert::TryFrom; use std::ops::Deref; #[cfg(not(feature = "threadsafe"))] @@ -30,7 +31,7 @@ use crate::constant_pool::{ ConstantPoolIter, }; use crate::descriptors::{ - parse_field_descriptor, parse_method_descriptor, FieldDescriptor, MethodDescriptor, + parse_field_descriptor, parse_method_descriptor, ClassName, FieldDescriptor, MethodDescriptor, ReturnDescriptor, }; pub use crate::error::ParseError; @@ -94,12 +95,15 @@ fn read_interfaces<'a>( bytes: &'a [u8], ix: &mut usize, pool: &[CafeRc>], -) -> Result>, ParseError> { +) -> Result>, ParseError> { let count = read_u2(bytes, ix)?; let mut interfaces = Vec::with_capacity(count.into()); for i in 0..count { - interfaces - .push(read_cp_classinfo(bytes, ix, pool).map_err(|e| err!(e, "interface {}", i))?); + interfaces.push( + read_cp_classinfo(bytes, ix, pool) + .and_then(ClassName::try_from) + .map_err(|e| err!(e, "interface {}", i))?, + ); } Ok(interfaces) } @@ -320,9 +324,9 @@ pub struct ClassFile<'a> { pub minor_version: u16, constant_pool: Vec>>, pub access_flags: ClassAccessFlags, - pub this_class: Cow<'a, str>, - pub super_class: Option>, - pub interfaces: Vec>, + pub this_class: ClassName<'a>, + pub super_class: Option>, + pub interfaces: Vec>, pub fields: Vec>, pub methods: Vec>, pub attributes: Vec>, @@ -394,9 +398,11 @@ pub fn parse_class_with_options<'a>( ); } } - let this_class = - read_cp_classinfo(raw_bytes, &mut ix, &constant_pool).map_err(|e| err!(e, "this_class"))?; + let this_class = read_cp_classinfo(raw_bytes, &mut ix, &constant_pool) + .and_then(ClassName::try_from) + .map_err(|e| err!(e, "this_class"))?; let super_class = read_cp_classinfo_opt(raw_bytes, &mut ix, &constant_pool) + .and_then(|name| name.map(ClassName::try_from).transpose()) .map_err(|e| err!(e, "super_class"))?; let interfaces = read_interfaces(raw_bytes, &mut ix, &constant_pool)?; let fields = read_fields(raw_bytes, &mut ix, &constant_pool, opts)?; From 18c5d3ddb070c7ac1087d055272c391055274a7f Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 1 Jun 2025 19:49:58 -0400 Subject: [PATCH 4/4] Documentation tweaks --- src/descriptors.rs | 1 + src/lib.rs | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/descriptors.rs b/src/descriptors.rs index 97f8fc7..eadf427 100644 --- a/src/descriptors.rs +++ b/src/descriptors.rs @@ -139,6 +139,7 @@ impl fmt::Display for FieldType<'_> { #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct FieldDescriptor<'a> { + /// Non-zero for array types denoting the arrays dimensions, otherwise `0`. pub dimensions: u8, pub field_type: FieldType<'a>, } diff --git a/src/lib.rs b/src/lib.rs index e6a2971..e1683e1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -354,11 +354,11 @@ impl Default for ParseOptions { impl ParseOptions { /// Turns on or off parsing of bytecode from the Code attributes of methods. If parsing - /// is enabled, the CodeData structure's optional bytecode field will be populated - /// (or parsing will fail entirely if bytecode parsing failed). If parsing is disabled, - /// the CodeData structure's optional bytecode field will be set to None. Parsing is - /// enabled by default, but can be disabled to speed up parsing in cases where the - /// parsed bytecode is not needed. + /// is enabled, the [CodeData](crate::attributes::CodeData) structure's optional bytecode + /// field will be populated (or parsing will fail entirely if bytecode parsing failed). + /// If parsing is disabled, the [CodeData](crate::attributes::CodeData) structure's optional + /// bytecode field will be set to None. Parsing is enabled by default, but can be disabled + /// to speed up parsing in cases where the parsed bytecode is not needed. pub fn parse_bytecode(&mut self, parse: bool) -> &mut ParseOptions { self.parse_bytecode = parse; self