Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 79 additions & 112 deletions src/descriptors.rs
Original file line number Diff line number Diff line change
@@ -1,61 +1,74 @@
use std::{
borrow::Cow,
convert::TryFrom,
fmt::{self, Write},
ops::Deref,
};

use crate::ParseError;

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct UnqualifiedSegment<'a> {
pub name: Cow<'a, str>,
}

// Returns the unqualified segment and the following char (either '/' or ';')
// Returns the unqualified segment and the following char ('/', ';', or None)
// or an error. This only extracts the unqualified segment at the start of
// the given data, and ignores anything following.
fn parse_unqualified_segment<'a>(
data: &Cow<'a, str>,
start_index: usize,
) -> Result<(UnqualifiedSegment<'a>, char), ParseError> {
for (ix, c) in data[start_index..].char_indices() {
fn parse_unqualified_segment(data: &str) -> Result<(&str, Option<char>), ParseError> {
for (ix, c) in data.char_indices() {
match c {
'/' if ix == 0 => fail!("Unexpected / at start of unqualified segment"),
';' if ix == 0 => fail!("Unexpected ; at start of unqualified segment"),
'/' | ';' => {
let name = match data {
Cow::Borrowed(borrowed_str) => {
Cow::Borrowed(&borrowed_str[start_index..start_index + ix])
}
Cow::Owned(ref owned_str) => {
Cow::Owned(owned_str[start_index..start_index + ix].to_string())
}
};
let segment = UnqualifiedSegment { name };
return Ok((segment, c));
}
'/' if ix == 0 => fail!("Unexpected '/' at start of unqualified segment"),
';' if ix == 0 => fail!("Unexpected ';' at start of unqualified segment"),
'/' | ';' => return Ok((&data[0..ix], Some(c))),
'.' | '[' | '<' | '>' => fail!("Disallowed character in unqualified segment"),
_ => (),
};
}
fail!("Unterminated unqualified segment");
Ok((data, None))
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct ClassName<'a> {
pub segments: Vec<UnqualifiedSegment<'a>>,
/// Represents a valid binary class or interface name in the syntax of
/// the [JVM Spec](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.2.1).
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct ClassName<'a>(Cow<'a, str>);

impl<'a> TryFrom<Cow<'a, str>> for ClassName<'a> {
type Error = ParseError;

// `value` (as a whole) must consist of a sequence of unqualified segements
fn try_from(value: Cow<'a, str>) -> Result<Self, Self::Error> {
let mut index = 0;
loop {
match parse_unqualified_segment(&value[index..])? {
(_, None) => break,
(_, Some(';')) => fail!("Disallowed ';' in class name"),
(segment, Some('/')) => index += segment.len() + 1,
_ => panic!("Got unexpected return value from parse_unqualified_segment"),
}
}
Ok(Self(value))
}
}

impl<'a> From<ClassName<'a>> for Cow<'a, str> {
fn from(value: ClassName<'a>) -> Self {
value.0
}
}

impl ClassName<'_> {
impl<'a> Deref for ClassName<'a> {
type Target = str;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<'a> ClassName<'a> {
fn byte_len(&self) -> usize {
self.segments
.iter()
.fold(0, |sum, segment| sum + segment.name.len() + 1)
self.0.len()
}
}

impl<'a> fmt::Display for ClassName<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
let segments: Vec<Cow<'a, str>> = self.segments.iter().map(|s| s.name.clone()).collect();
write!(f, "{}", segments.join("/"))
write!(f, "{}", self.0)
}
}

Expand All @@ -65,19 +78,21 @@ fn parse_class_descriptor<'a>(
data: &Cow<'a, str>,
index: usize,
) -> Result<ClassName<'a>, ParseError> {
let mut segments = vec![];
let mut remaining_index = index;
let mut next_index = index;
loop {
match parse_unqualified_segment(data, remaining_index)? {
(segment, ';') => {
segments.push(segment);
return Ok(ClassName { segments });
}
(segment, '/') => {
remaining_index += segment.name.len() + 1;
segments.push(segment);
continue;
match parse_unqualified_segment(&data[next_index..])? {
(segment, Some(';')) => {
return Ok(ClassName(match data {
Cow::Borrowed(data) => {
Cow::Borrowed(&data[index..(next_index + segment.len())])
}
Cow::Owned(data) => {
Cow::Owned(data[index..(next_index + segment.len())].to_string())
}
}))
}
(segment, Some('/')) => next_index += segment.len() + 1,
(_, None) => fail!("Unterminated unqualified segment"),
_ => panic!("Got unexpected return value from parse_unqualified_segment"),
}
}
Expand All @@ -99,7 +114,8 @@ pub enum FieldType<'a> {
impl FieldType<'_> {
fn byte_len(&self) -> usize {
match self {
FieldType::Object(class_name) => 1 + class_name.byte_len(),
// +1 for the beginning 'L'; +1 for the terminating ';'
FieldType::Object(class_name) => 2 + class_name.byte_len(),
_ => 1,
}
}
Expand All @@ -123,6 +139,7 @@ impl fmt::Display for FieldType<'_> {

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct FieldDescriptor<'a> {
/// Non-zero for array types denoting the arrays dimensions, otherwise `0`.
pub dimensions: u8,
pub field_type: FieldType<'a>,
}
Expand Down Expand Up @@ -487,19 +504,7 @@ mod tests {
parameters.next().unwrap(),
FieldDescriptor {
dimensions: 0,
field_type: FieldType::Object(ClassName {
segments: vec![
UnqualifiedSegment {
name: Cow::Borrowed("java")
},
UnqualifiedSegment {
name: Cow::Borrowed("lang")
},
UnqualifiedSegment {
name: Cow::Borrowed("Object")
},
],
}),
field_type: FieldType::Object(ClassName("java/lang/Object".into())),
},
);
assert!(parameters.next().is_none());
Expand All @@ -521,19 +526,7 @@ mod tests {
parameters.next().unwrap(),
FieldDescriptor {
dimensions: 0,
field_type: FieldType::Object(ClassName {
segments: vec![
UnqualifiedSegment {
name: Cow::Borrowed("java")
},
UnqualifiedSegment {
name: Cow::Borrowed("lang")
},
UnqualifiedSegment {
name: Cow::Borrowed("Object")
},
],
}),
field_type: FieldType::Object(ClassName("java/lang/Object".into())),
},
);
assert!(parameters.next().is_none());
Expand All @@ -552,38 +545,14 @@ mod tests {
parameters.next().unwrap(),
FieldDescriptor {
dimensions: 0,
field_type: FieldType::Object(ClassName {
segments: vec![
UnqualifiedSegment {
name: Cow::Borrowed("java")
},
UnqualifiedSegment {
name: Cow::Borrowed("lang")
},
UnqualifiedSegment {
name: Cow::Borrowed("Object")
},
],
}),
field_type: FieldType::Object(ClassName("java/lang/Object".into())),
},
);
assert_eq!(
parameters.next().unwrap(),
FieldDescriptor {
dimensions: 0,
field_type: FieldType::Object(ClassName {
segments: vec![
UnqualifiedSegment {
name: Cow::Borrowed("java")
},
UnqualifiedSegment {
name: Cow::Borrowed("lang")
},
UnqualifiedSegment {
name: Cow::Borrowed("String")
},
],
}),
field_type: FieldType::Object(ClassName("java/lang/String".into())),
},
);
assert!(parameters.next().is_none());
Expand All @@ -606,19 +575,7 @@ mod tests {
return_type,
ReturnDescriptor::Return(FieldDescriptor {
dimensions: 0,
field_type: FieldType::Object(ClassName {
segments: vec![
UnqualifiedSegment {
name: Cow::Borrowed("java")
},
UnqualifiedSegment {
name: Cow::Borrowed("lang")
},
UnqualifiedSegment {
name: Cow::Borrowed("Object")
},
],
}),
field_type: FieldType::Object(ClassName("java/lang/Object".into())),
}),
);

Expand Down Expand Up @@ -696,4 +653,14 @@ mod tests {
assert!(parse_method_descriptor(&chars_ok, 0).is_ok());
assert!(parse_method_descriptor(&chars_bad, 0).is_err());
}

#[test]
fn test_classname_parsing() {
assert!(ClassName::try_from(Cow::Borrowed("java/lang/Object")).is_ok());
assert!(ClassName::try_from(Cow::Borrowed("/bad/classname")).is_err());
assert!(ClassName::try_from(Cow::Borrowed("another//bad/one")).is_err());
assert!(ClassName::try_from(Cow::Borrowed("yet/another/bad/one;")).is_err());
assert!(ClassName::try_from(Cow::Borrowed("also/bogus;one")).is_err());
assert!(ClassName::try_from(Cow::Borrowed("Ldefinitely/not/ok;")).is_err());
}
}
34 changes: 20 additions & 14 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pub mod names;

use std::borrow::Cow;
use std::collections::HashSet;
use std::convert::TryFrom;
use std::ops::Deref;

#[cfg(not(feature = "threadsafe"))]
Expand All @@ -30,7 +31,7 @@ use crate::constant_pool::{
ConstantPoolIter,
};
use crate::descriptors::{
parse_field_descriptor, parse_method_descriptor, FieldDescriptor, MethodDescriptor,
parse_field_descriptor, parse_method_descriptor, ClassName, FieldDescriptor, MethodDescriptor,
ReturnDescriptor,
};
pub use crate::error::ParseError;
Expand Down Expand Up @@ -94,12 +95,15 @@ fn read_interfaces<'a>(
bytes: &'a [u8],
ix: &mut usize,
pool: &[CafeRc<ConstantPoolEntry<'a>>],
) -> Result<Vec<Cow<'a, str>>, ParseError> {
) -> Result<Vec<ClassName<'a>>, ParseError> {
let count = read_u2(bytes, ix)?;
let mut interfaces = Vec::with_capacity(count.into());
for i in 0..count {
interfaces
.push(read_cp_classinfo(bytes, ix, pool).map_err(|e| err!(e, "interface {}", i))?);
interfaces.push(
read_cp_classinfo(bytes, ix, pool)
.and_then(ClassName::try_from)
.map_err(|e| err!(e, "interface {}", i))?,
);
}
Ok(interfaces)
}
Expand Down Expand Up @@ -320,9 +324,9 @@ pub struct ClassFile<'a> {
pub minor_version: u16,
constant_pool: Vec<CafeRc<ConstantPoolEntry<'a>>>,
pub access_flags: ClassAccessFlags,
pub this_class: Cow<'a, str>,
pub super_class: Option<Cow<'a, str>>,
pub interfaces: Vec<Cow<'a, str>>,
pub this_class: ClassName<'a>,
pub super_class: Option<ClassName<'a>>,
pub interfaces: Vec<ClassName<'a>>,
pub fields: Vec<FieldInfo<'a>>,
pub methods: Vec<MethodInfo<'a>>,
pub attributes: Vec<AttributeInfo<'a>>,
Expand Down Expand Up @@ -350,11 +354,11 @@ impl Default for ParseOptions {

impl ParseOptions {
/// Turns on or off parsing of bytecode from the Code attributes of methods. If parsing
/// is enabled, the CodeData structure's optional bytecode field will be populated
/// (or parsing will fail entirely if bytecode parsing failed). If parsing is disabled,
/// the CodeData structure's optional bytecode field will be set to None. Parsing is
/// enabled by default, but can be disabled to speed up parsing in cases where the
/// parsed bytecode is not needed.
/// is enabled, the [CodeData](crate::attributes::CodeData) structure's optional bytecode
/// field will be populated (or parsing will fail entirely if bytecode parsing failed).
/// If parsing is disabled, the [CodeData](crate::attributes::CodeData) structure's optional
/// bytecode field will be set to None. Parsing is enabled by default, but can be disabled
/// to speed up parsing in cases where the parsed bytecode is not needed.
pub fn parse_bytecode(&mut self, parse: bool) -> &mut ParseOptions {
self.parse_bytecode = parse;
self
Expand Down Expand Up @@ -394,9 +398,11 @@ pub fn parse_class_with_options<'a>(
);
}
}
let this_class =
read_cp_classinfo(raw_bytes, &mut ix, &constant_pool).map_err(|e| err!(e, "this_class"))?;
let this_class = read_cp_classinfo(raw_bytes, &mut ix, &constant_pool)
.and_then(ClassName::try_from)
.map_err(|e| err!(e, "this_class"))?;
let super_class = read_cp_classinfo_opt(raw_bytes, &mut ix, &constant_pool)
.and_then(|name| name.map(ClassName::try_from).transpose())
.map_err(|e| err!(e, "super_class"))?;
let interfaces = read_interfaces(raw_bytes, &mut ix, &constant_pool)?;
let fields = read_fields(raw_bytes, &mut ix, &constant_pool, opts)?;
Expand Down