diff --git a/src/descriptors.rs b/src/descriptors.rs index a2b6cc7..eadf427 100644 --- a/src/descriptors.rs +++ b/src/descriptors.rs @@ -1,61 +1,74 @@ use std::{ borrow::Cow, + convert::TryFrom, fmt::{self, Write}, + ops::Deref, }; use crate::ParseError; -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub struct UnqualifiedSegment<'a> { - pub name: Cow<'a, str>, -} - -// Returns the unqualified segment and the following char (either '/' or ';') +// Returns the unqualified segment and the following char ('/', ';', or None) // or an error. This only extracts the unqualified segment at the start of // the given data, and ignores anything following. -fn parse_unqualified_segment<'a>( - data: &Cow<'a, str>, - start_index: usize, -) -> Result<(UnqualifiedSegment<'a>, char), ParseError> { - for (ix, c) in data[start_index..].char_indices() { +fn parse_unqualified_segment(data: &str) -> Result<(&str, Option), ParseError> { + for (ix, c) in data.char_indices() { match c { - '/' if ix == 0 => fail!("Unexpected / at start of unqualified segment"), - ';' if ix == 0 => fail!("Unexpected ; at start of unqualified segment"), - '/' | ';' => { - let name = match data { - Cow::Borrowed(borrowed_str) => { - Cow::Borrowed(&borrowed_str[start_index..start_index + ix]) - } - Cow::Owned(ref owned_str) => { - Cow::Owned(owned_str[start_index..start_index + ix].to_string()) - } - }; - let segment = UnqualifiedSegment { name }; - return Ok((segment, c)); - } + '/' if ix == 0 => fail!("Unexpected '/' at start of unqualified segment"), + ';' if ix == 0 => fail!("Unexpected ';' at start of unqualified segment"), + '/' | ';' => return Ok((&data[0..ix], Some(c))), '.' | '[' | '<' | '>' => fail!("Disallowed character in unqualified segment"), _ => (), }; } - fail!("Unterminated unqualified segment"); + Ok((data, None)) } -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub struct ClassName<'a> { - pub segments: Vec>, +/// Represents a valid binary class or interface name in the syntax of +/// the [JVM Spec](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.2.1). +#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct ClassName<'a>(Cow<'a, str>); + +impl<'a> TryFrom> for ClassName<'a> { + type Error = ParseError; + + // `value` (as a whole) must consist of a sequence of unqualified segements + fn try_from(value: Cow<'a, str>) -> Result { + let mut index = 0; + loop { + match parse_unqualified_segment(&value[index..])? { + (_, None) => break, + (_, Some(';')) => fail!("Disallowed ';' in class name"), + (segment, Some('/')) => index += segment.len() + 1, + _ => panic!("Got unexpected return value from parse_unqualified_segment"), + } + } + Ok(Self(value)) + } +} + +impl<'a> From> for Cow<'a, str> { + fn from(value: ClassName<'a>) -> Self { + value.0 + } } -impl ClassName<'_> { +impl<'a> Deref for ClassName<'a> { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'a> ClassName<'a> { fn byte_len(&self) -> usize { - self.segments - .iter() - .fold(0, |sum, segment| sum + segment.name.len() + 1) + self.0.len() } } + impl<'a> fmt::Display for ClassName<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - let segments: Vec> = self.segments.iter().map(|s| s.name.clone()).collect(); - write!(f, "{}", segments.join("/")) + write!(f, "{}", self.0) } } @@ -65,19 +78,21 @@ fn parse_class_descriptor<'a>( data: &Cow<'a, str>, index: usize, ) -> Result, ParseError> { - let mut segments = vec![]; - let mut remaining_index = index; + let mut next_index = index; loop { - match parse_unqualified_segment(data, remaining_index)? { - (segment, ';') => { - segments.push(segment); - return Ok(ClassName { segments }); - } - (segment, '/') => { - remaining_index += segment.name.len() + 1; - segments.push(segment); - continue; + match parse_unqualified_segment(&data[next_index..])? { + (segment, Some(';')) => { + return Ok(ClassName(match data { + Cow::Borrowed(data) => { + Cow::Borrowed(&data[index..(next_index + segment.len())]) + } + Cow::Owned(data) => { + Cow::Owned(data[index..(next_index + segment.len())].to_string()) + } + })) } + (segment, Some('/')) => next_index += segment.len() + 1, + (_, None) => fail!("Unterminated unqualified segment"), _ => panic!("Got unexpected return value from parse_unqualified_segment"), } } @@ -99,7 +114,8 @@ pub enum FieldType<'a> { impl FieldType<'_> { fn byte_len(&self) -> usize { match self { - FieldType::Object(class_name) => 1 + class_name.byte_len(), + // +1 for the beginning 'L'; +1 for the terminating ';' + FieldType::Object(class_name) => 2 + class_name.byte_len(), _ => 1, } } @@ -123,6 +139,7 @@ impl fmt::Display for FieldType<'_> { #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct FieldDescriptor<'a> { + /// Non-zero for array types denoting the arrays dimensions, otherwise `0`. pub dimensions: u8, pub field_type: FieldType<'a>, } @@ -487,19 +504,7 @@ mod tests { parameters.next().unwrap(), FieldDescriptor { dimensions: 0, - field_type: FieldType::Object(ClassName { - segments: vec![ - UnqualifiedSegment { - name: Cow::Borrowed("java") - }, - UnqualifiedSegment { - name: Cow::Borrowed("lang") - }, - UnqualifiedSegment { - name: Cow::Borrowed("Object") - }, - ], - }), + field_type: FieldType::Object(ClassName("java/lang/Object".into())), }, ); assert!(parameters.next().is_none()); @@ -521,19 +526,7 @@ mod tests { parameters.next().unwrap(), FieldDescriptor { dimensions: 0, - field_type: FieldType::Object(ClassName { - segments: vec![ - UnqualifiedSegment { - name: Cow::Borrowed("java") - }, - UnqualifiedSegment { - name: Cow::Borrowed("lang") - }, - UnqualifiedSegment { - name: Cow::Borrowed("Object") - }, - ], - }), + field_type: FieldType::Object(ClassName("java/lang/Object".into())), }, ); assert!(parameters.next().is_none()); @@ -552,38 +545,14 @@ mod tests { parameters.next().unwrap(), FieldDescriptor { dimensions: 0, - field_type: FieldType::Object(ClassName { - segments: vec![ - UnqualifiedSegment { - name: Cow::Borrowed("java") - }, - UnqualifiedSegment { - name: Cow::Borrowed("lang") - }, - UnqualifiedSegment { - name: Cow::Borrowed("Object") - }, - ], - }), + field_type: FieldType::Object(ClassName("java/lang/Object".into())), }, ); assert_eq!( parameters.next().unwrap(), FieldDescriptor { dimensions: 0, - field_type: FieldType::Object(ClassName { - segments: vec![ - UnqualifiedSegment { - name: Cow::Borrowed("java") - }, - UnqualifiedSegment { - name: Cow::Borrowed("lang") - }, - UnqualifiedSegment { - name: Cow::Borrowed("String") - }, - ], - }), + field_type: FieldType::Object(ClassName("java/lang/String".into())), }, ); assert!(parameters.next().is_none()); @@ -606,19 +575,7 @@ mod tests { return_type, ReturnDescriptor::Return(FieldDescriptor { dimensions: 0, - field_type: FieldType::Object(ClassName { - segments: vec![ - UnqualifiedSegment { - name: Cow::Borrowed("java") - }, - UnqualifiedSegment { - name: Cow::Borrowed("lang") - }, - UnqualifiedSegment { - name: Cow::Borrowed("Object") - }, - ], - }), + field_type: FieldType::Object(ClassName("java/lang/Object".into())), }), ); @@ -696,4 +653,14 @@ mod tests { assert!(parse_method_descriptor(&chars_ok, 0).is_ok()); assert!(parse_method_descriptor(&chars_bad, 0).is_err()); } + + #[test] + fn test_classname_parsing() { + assert!(ClassName::try_from(Cow::Borrowed("java/lang/Object")).is_ok()); + assert!(ClassName::try_from(Cow::Borrowed("/bad/classname")).is_err()); + assert!(ClassName::try_from(Cow::Borrowed("another//bad/one")).is_err()); + assert!(ClassName::try_from(Cow::Borrowed("yet/another/bad/one;")).is_err()); + assert!(ClassName::try_from(Cow::Borrowed("also/bogus;one")).is_err()); + assert!(ClassName::try_from(Cow::Borrowed("Ldefinitely/not/ok;")).is_err()); + } } diff --git a/src/lib.rs b/src/lib.rs index db4c2eb..e1683e1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,7 @@ pub mod names; use std::borrow::Cow; use std::collections::HashSet; +use std::convert::TryFrom; use std::ops::Deref; #[cfg(not(feature = "threadsafe"))] @@ -30,7 +31,7 @@ use crate::constant_pool::{ ConstantPoolIter, }; use crate::descriptors::{ - parse_field_descriptor, parse_method_descriptor, FieldDescriptor, MethodDescriptor, + parse_field_descriptor, parse_method_descriptor, ClassName, FieldDescriptor, MethodDescriptor, ReturnDescriptor, }; pub use crate::error::ParseError; @@ -94,12 +95,15 @@ fn read_interfaces<'a>( bytes: &'a [u8], ix: &mut usize, pool: &[CafeRc>], -) -> Result>, ParseError> { +) -> Result>, ParseError> { let count = read_u2(bytes, ix)?; let mut interfaces = Vec::with_capacity(count.into()); for i in 0..count { - interfaces - .push(read_cp_classinfo(bytes, ix, pool).map_err(|e| err!(e, "interface {}", i))?); + interfaces.push( + read_cp_classinfo(bytes, ix, pool) + .and_then(ClassName::try_from) + .map_err(|e| err!(e, "interface {}", i))?, + ); } Ok(interfaces) } @@ -320,9 +324,9 @@ pub struct ClassFile<'a> { pub minor_version: u16, constant_pool: Vec>>, pub access_flags: ClassAccessFlags, - pub this_class: Cow<'a, str>, - pub super_class: Option>, - pub interfaces: Vec>, + pub this_class: ClassName<'a>, + pub super_class: Option>, + pub interfaces: Vec>, pub fields: Vec>, pub methods: Vec>, pub attributes: Vec>, @@ -350,11 +354,11 @@ impl Default for ParseOptions { impl ParseOptions { /// Turns on or off parsing of bytecode from the Code attributes of methods. If parsing - /// is enabled, the CodeData structure's optional bytecode field will be populated - /// (or parsing will fail entirely if bytecode parsing failed). If parsing is disabled, - /// the CodeData structure's optional bytecode field will be set to None. Parsing is - /// enabled by default, but can be disabled to speed up parsing in cases where the - /// parsed bytecode is not needed. + /// is enabled, the [CodeData](crate::attributes::CodeData) structure's optional bytecode + /// field will be populated (or parsing will fail entirely if bytecode parsing failed). + /// If parsing is disabled, the [CodeData](crate::attributes::CodeData) structure's optional + /// bytecode field will be set to None. Parsing is enabled by default, but can be disabled + /// to speed up parsing in cases where the parsed bytecode is not needed. pub fn parse_bytecode(&mut self, parse: bool) -> &mut ParseOptions { self.parse_bytecode = parse; self @@ -394,9 +398,11 @@ pub fn parse_class_with_options<'a>( ); } } - let this_class = - read_cp_classinfo(raw_bytes, &mut ix, &constant_pool).map_err(|e| err!(e, "this_class"))?; + let this_class = read_cp_classinfo(raw_bytes, &mut ix, &constant_pool) + .and_then(ClassName::try_from) + .map_err(|e| err!(e, "this_class"))?; let super_class = read_cp_classinfo_opt(raw_bytes, &mut ix, &constant_pool) + .and_then(|name| name.map(ClassName::try_from).transpose()) .map_err(|e| err!(e, "super_class"))?; let interfaces = read_interfaces(raw_bytes, &mut ix, &constant_pool)?; let fields = read_fields(raw_bytes, &mut ix, &constant_pool, opts)?;