From 30fe5036d498537d305dd97035bb0db8aafe9020 Mon Sep 17 00:00:00 2001 From: VegOwOtenks Date: Fri, 30 Aug 2024 15:33:54 +0200 Subject: [PATCH] Code cleanup --- src/accessmasks.rs | 107 +++++++++++++ src/bytecode.rs | 115 +++++++++++--- src/classfile.rs | 374 +++++++++++++++++++++++++++++---------------- src/classstore.rs | 118 ++++++++++++++ src/jvm.rs | 62 ++++++++ src/main.rs | 12 +- src/stackframe.rs | 61 ++++++++ 7 files changed, 691 insertions(+), 158 deletions(-) create mode 100644 src/accessmasks.rs create mode 100644 src/classstore.rs create mode 100644 src/jvm.rs create mode 100644 src/stackframe.rs diff --git a/src/accessmasks.rs b/src/accessmasks.rs new file mode 100644 index 0000000..bc5f5b1 --- /dev/null +++ b/src/accessmasks.rs @@ -0,0 +1,107 @@ +use core::fmt::{Display, Formatter, Debug}; + +#[derive(Debug, Copy, Clone)] +pub enum MethodAccessFlag { + Public = 0x0001, // Declared public; may be accessed from outside its package. + Private = 0x0002, // Declared private; accessible only within the defining class and other classes belonging to the same nest (§5.4.4). + Protected = 0x0004, // Declared protected; may be accessed within subclasses. + Static = 0x0008, // Declared static. + Final = 0x0010, // Declared final; must not be overridden (§5.4.5). + Synchronized = 0x0020, // Declared synchronized; invocation is wrapped by a monitor use. + Bridge = 0x0040, // A bridge method, generated by the compiler. + Varargs = 0x0080, // Declared with variable number of arguments. + Native = 0x0100, // Declared native; implemented in a language other than the Java programming language. + Abstract = 0x0400, // Declared abstract; no implementation is provided. + Strict = 0x0800, // In a class file whose major version number is at least 46 and at most 60: Declared strict. + Synthetic = 0x1000, // Declared synthetic; not present in the source code. +} + +pub struct MethodAccessFlagMask { + pub mask: u16, +} + +impl Debug for MethodAccessFlagMask { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + let mut flag_vec = Vec::new(); + + let flags = [MethodAccessFlag::Public,MethodAccessFlag::Private,MethodAccessFlag::Protected,MethodAccessFlag::Static,MethodAccessFlag::Final,MethodAccessFlag::Synchronized,MethodAccessFlag::Bridge,MethodAccessFlag::Varargs,MethodAccessFlag::Native,MethodAccessFlag::Abstract,MethodAccessFlag::Strict,MethodAccessFlag::Synthetic]; + + for flag in flags { + if (flag as u16 & self.mask) != 0 { + flag_vec.push(flag) + } + } + + f.debug_list().entries(flag_vec) + .finish() + } +} + +#[derive(Debug, Copy, Clone)] +pub enum ClassAccessFlag { + Public = 0x0001, // Declared public; may be accessed from outside its package. + Final = 0x0010, // Declared final; no subclasses allowed. + Super = 0x0020, // Treat superclass methods specially when invoked by the invokespecial instruction. + Interface = 0x0200, // Is an interface, not a class. + Abstract = 0x0400, // Declared abstract; must not be instantiated. + Synthetic = 0x1000, // Declared synthetic; not present in the source code. + Annotation = 0x2000, // Declared as an annotation interface. + Enum = 0x4000, // Declared as an enum class. + Module = 0x8000, // Is a module, not a class or interface. +} + +pub struct ClassAccessFlagMask { + pub mask: u16, +} + +impl Debug for ClassAccessFlagMask { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + let mut flag_vec = Vec::new(); + + let flags = [ClassAccessFlag::Public,ClassAccessFlag::Final,ClassAccessFlag::Super,ClassAccessFlag::Interface,ClassAccessFlag::Abstract,ClassAccessFlag::Synthetic,ClassAccessFlag::Annotation,ClassAccessFlag::Enum,ClassAccessFlag::Module]; + for flag in flags { + if (flag as u16 & self.mask) != 0 { + flag_vec.push(flag) + } + } + + f.debug_list().entries(flag_vec) + .finish() + } +} + +#[derive(Debug, Clone, Copy)] +#[repr(u16)] +pub enum FieldAccessFlag { + Public = 0x0001, // Declared public; may be accessed from outside its package. + Private = 0x0002, // Declared private; accessible only within the defining class and other classes belonging to the same nest (§5.4.4). + Protected = 0x0004, // Declared protected; may be accessed within subclasses. + Static = 0x0008, // Declared static. + Final = 0x0010, // Declared final; never directly assigned to after object construction (JLS §17.5). + Volatile = 0x0040, // Declared volatile; cannot be cached. + Transient = 0x0080, // Declared transient; not written or read by a persistent object manager. + Synthetic = 0x1000, // Declared synthetic; not present in the source code. + Enum = 0x4000, // Declared as an element of an enum class. + } + + pub struct FieldAccessFlagMask { + pub mask: u16 + } + +impl Debug for FieldAccessFlagMask { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + let mut flag_vec = Vec::new(); + + let flags = [FieldAccessFlag::Public,FieldAccessFlag::Private,FieldAccessFlag::Protected,FieldAccessFlag::Static,FieldAccessFlag::Final,FieldAccessFlag::Volatile,FieldAccessFlag::Transient,FieldAccessFlag::Synthetic,FieldAccessFlag::Enum,]; + + for flag in flags { + if (flag as u16 & self.mask) != 0 { + flag_vec.push(flag) + } + } + + f.debug_list().entries(flag_vec) + .finish() + } +} + diff --git a/src/bytecode.rs b/src/bytecode.rs index 78ac273..67d17e9 100644 --- a/src/bytecode.rs +++ b/src/bytecode.rs @@ -1,5 +1,4 @@ use core::fmt::Debug; -use core::fmt; pub struct Bytecode { pub code: Box<[u8]> @@ -14,20 +13,57 @@ impl Bytecode { let opcode = self.code[i]; let (instruction, offset) = match opcode { + 0x00 => (Instruction::NoOperation(), 1), + 0x01 => (Instruction::StoreIntoIntArray(), 1), + 0x02 => (Instruction::PushConstIntM1(), 1), + 0x03 => (Instruction::PushConstInt0(), 1), + 0x04 => (Instruction::PushConstInt1(), 1), + 0x05 => (Instruction::PushConstInt2(), 1), + 0x06 => (Instruction::PushConstInt3(), 1), + 0x07 => (Instruction::PushConstInt4(), 1), + 0x08 => (Instruction::PushConstInt5(), 1), + 0x0E => (Instruction::PushConstDouble0(), 1), + 0x0F => (Instruction::PushConstDouble1(), 1), + + 0x11 => (Instruction::LoadShortImmediate((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), 0x12 => (Instruction::LoadConstant(self.code[i+1]), 2), + 0x14 => (Instruction::LoadConstant64((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), + + 0x26 => (Instruction::LoadDouble0(), 1), + 0x27 => (Instruction::LoadDouble1(), 1), + 0x28 => (Instruction::LoadDouble2(), 1), + 0x29 => (Instruction::LoadDouble3(), 1), 0x2A => (Instruction::LoadReference0(), 1), 0x2B => (Instruction::LoadReference1(), 1), 0x2C => (Instruction::LoadReference2(), 1), 0x2D => (Instruction::LoadReference3(), 1), + + 0x4B => (Instruction::StoreReference0(), 1), + 0x4C => (Instruction::StoreReference1(), 1), + 0x4D => (Instruction::StoreReference2(), 1), + 0x4E => (Instruction::StoreReference3(), 1), + + 0x57 => (Instruction::Pop(), 1), 0x59 => (Instruction::Duplicate(), 1), + + 0x6D => (Instruction::DivideLong(), 1), + + 0x7A => (Instruction::ShiftIntRight(), 1), + + 0x80 => (Instruction::OrInt(), 1), + + 0xAC => (Instruction::ReturnInt(), 1), + 0xB0 => (Instruction::ReturnReference(), 1), 0xB1 => (Instruction::ReturnVoid(), 1), - 0xB2 => (Instruction::GetStatic(self.code[i+1], self.code[i+2]), 3), - 0xB4 => (Instruction::GetField(self.code[i+1], self.code[i+2]), 3), - 0xB5 => (Instruction::PutField(self.code[i+1], self.code[i+2]), 3), - 0xB6 => (Instruction::InvokeVirtual(self.code[i+1], self.code[i+2]), 3), - 0xB7 => (Instruction::InvokeSpecial(self.code[i+1], self.code[i+2]), 3), - 0xBB => (Instruction::NewObject(self.code[i+1], self.code[i+2]), 3), + 0xB2 => (Instruction::GetStatic((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), + 0xB3 => (Instruction::PutStatic((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), + 0xB4 => (Instruction::GetField((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), + 0xB5 => (Instruction::PutField((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), + 0xB6 => (Instruction::InvokeVirtual((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), + 0xB7 => (Instruction::InvokeSpecial((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), + 0xBA => (Instruction::InvokeDynamic((self.code[i+1] as u16) << 8 | self.code[i+2] as u16, (self.code[i+3] as u16) << 8 | self.code[i+4] as u16), 5), + 0xBB => (Instruction::NewObject((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), _ => (Instruction::Unknown(opcode), 1) }; @@ -50,19 +86,56 @@ impl Debug for Bytecode { #[derive(Debug)] #[repr(u8)] pub enum Instruction { - LoadConstant(u8) = 0x12, // Push from constant pool - LoadReference0() = 0x2A, // Load local variable reference onto stack - LoadReference1() = 0x2B, // Load local variable reference onto stack - LoadReference2() = 0x2C, // Load local variable reference onto stack - LoadReference3() = 0x2D, // Load local variable reference onto stack - Duplicate() = 0x59, // duplicate top stack value - ReturnReference() = 0xB0, // return top-ref from current function - ReturnVoid() = 0xB1, // return void from function - GetStatic(u8, u8) = 0xB2, // get static field from class - GetField(u8, u8) = 0xB4, // get field from class - PutField(u8, u8) = 0xB5, // set field to a value - InvokeVirtual(u8, u8) = 0xB6, // invoke function on a class - InvokeSpecial(u8, u8) = 0xB7, // invoke instance method - NewObject(u8, u8) = 0xBB, // Create a new object from a constant-pool reference + NoOperation() = 0x00, // No-Operation + StoreIntoIntArray() = 0x01, // ..., arrayref, index, value + PushConstIntM1() = 0x02, // Push -1 + PushConstInt0() = 0x03, // Push 0 + PushConstInt1() = 0x04, // Push 1 + PushConstInt2() = 0x05, // Push 2 + PushConstInt3() = 0x06, // Push 3 + PushConstInt4() = 0x07, // Push 4 + PushConstInt5() = 0x08, // Push 5 + PushConstDouble0() = 0x0E, // Push 0.0 + PushConstDouble1() = 0x0F, // Push 1.0 + + LoadShortImmediate(u16) = 0x11, // push immediate short + LoadConstant(u8) = 0x12, // Push from constant pool + LoadConstant64(u16) = 0x14, // Push Long or Double from constant pool + + LoadDouble0() = 0x26, // Load local double variable reference onto stack + LoadDouble1() = 0x27, // Load local double variable reference onto stack + LoadDouble2() = 0x28, // Load local double variable reference onto stack + LoadDouble3() = 0x29, // Load local double variable reference onto stack + LoadReference0() = 0x2A, // Load local reference variable reference onto stack + LoadReference1() = 0x2B, // Load local reference variable reference onto stack + LoadReference2() = 0x2C, // Load local reference variable reference onto stack + LoadReference3() = 0x2D, // Load local reference variable reference onto stack + + StoreReference0() = 0x4B, // store reference into local variable + StoreReference1() = 0x4C, // store reference into local variable + StoreReference2() = 0x4D, // store reference into local variable + StoreReference3() = 0x4E, // store reference into local variable + + Pop() = 0x57, // Pop top stack value + Duplicate() = 0x59, // duplicate top stack value + + DivideLong() = 0x6D, // long division + + ShiftIntRight() = 0x7a, // shift int + + OrInt() = 0x80, // value, value => or + + ReturnInt() = 0xAC, // return integer from function + + ReturnReference() = 0xB0, // return top-ref from current function + ReturnVoid() = 0xB1, // return void from function + GetStatic(u16) = 0xB2, // get static field from class + PutStatic(u16) = 0xB3, // set static field on class + GetField(u16) = 0xB4, // get field from class + PutField(u16) = 0xB5, // set field to a value + InvokeVirtual(u16) = 0xB6, // invoke function on a class + InvokeSpecial(u16) = 0xB7, // invoke instance method + InvokeDynamic(u16, u16) = 0xBA, // invoke dynamic function + NewObject(u16) = 0xBB, // Create a new object from a constant-pool reference Unknown(u8), } diff --git a/src/classfile.rs b/src/classfile.rs index 0c015be..de94e93 100644 --- a/src/classfile.rs +++ b/src/classfile.rs @@ -4,6 +4,7 @@ use core::fmt::{Display, Formatter, Debug}; use core::str::Utf8Error; use crate::bytecode::Bytecode; +use crate::accessmasks::*; #[derive(Debug)] pub enum Error { @@ -48,24 +49,23 @@ impl From for Error { #[derive(Debug)] pub struct JavaClassFile { - minor_version: u16, - major_version: u16, + pub minor_version: u16, + pub major_version: u16, - constant_pool: Box<[ConstantPoolInfo]>, - - access_flags: ClassAccessFlagMask, - - this_class: u16, - super_class: u16, - - interfaces: Box<[u16]>, - - fields: Box<[FieldInfo]>, - - methods: Box<[MethodInfo]>, - - attributes: Box<[AttributeInfo]>, + pub constant_pool: Box<[ConstantPoolInfo]>, + + pub access_flags: ClassAccessFlagMask, + + pub this_class: u16, + pub super_class: u16, + + pub interfaces: Box<[u16]>, + + pub fields: Box<[FieldInfo]>, + pub methods: Box<[MethodInfo]>, + + pub attributes: Box<[AttributeInfo]>, } impl JavaClassFile { @@ -160,6 +160,35 @@ impl JavaClassFile { } ) } + + pub fn get_classname(&self) -> Result { + let class_info_entry = pool_entry(&self.constant_pool, self.this_class as usize)?; + + let class_info_entry = match class_info_entry { + ConstantPoolInfo::Class(data) => data, + _ => return Err(Error::BadFileError(format!("Invalid this_class index, expected index to ClassInfo but found {:?}", class_info_entry))) + }; + + let name_entry = pool_entry(&self.constant_pool, class_info_entry.name_index.into())?; + let name_entry = match name_entry { + ConstantPoolInfo::Utf8(utf8data) => utf8data, + _ => return Err(Error::BadFileError(format!("Invalid name_index class_info from this_class, expected index to Utf8 but found {:?}", name_entry))) + }; + + return Ok(name_entry.utf8.clone()); + } + + + pub fn find_method_index(&self, name: &String) -> Option { + + for (index, method_info) in self.methods.iter().enumerate() { + if method_info.name == *name { + return Some(index); + } + } + + return None; + } } #[derive(Debug)] @@ -432,57 +461,54 @@ impl ConstantPoolInfo { } } -#[derive(Debug, Clone, Copy)] -#[repr(u16)] -pub enum FieldAccessFlag { - Public = 0x0001, // Declared public; may be accessed from outside its package. - Private = 0x0002, // Declared private; accessible only within the defining class and other classes belonging to the same nest (§5.4.4). - Protected = 0x0004, // Declared protected; may be accessed within subclasses. - Static = 0x0008, // Declared static. - Final = 0x0010, // Declared final; never directly assigned to after object construction (JLS §17.5). - Volatile = 0x0040, // Declared volatile; cannot be cached. - Transient = 0x0080, // Declared transient; not written or read by a persistent object manager. - Synthetic = 0x1000, // Declared synthetic; not present in the source code. - Enum = 0x4000, // Declared as an element of an enum class. - } - - pub struct FieldAccessFlagMask { - mask: u16 - } - -impl Debug for FieldAccessFlagMask { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { - let mut flag_vec = Vec::new(); - - let flags = [FieldAccessFlag::Public,FieldAccessFlag::Private,FieldAccessFlag::Protected,FieldAccessFlag::Static,FieldAccessFlag::Final,FieldAccessFlag::Volatile,FieldAccessFlag::Transient,FieldAccessFlag::Synthetic,FieldAccessFlag::Enum,]; - - for flag in flags { - if (flag as u16 & self.mask) != 0 { - flag_vec.push(flag) - } - } - - f.debug_list().entries(flag_vec) - .finish() - } -} - #[derive(Debug)] pub struct FieldInfo { access_flags: FieldAccessFlagMask, - name_index: u16, - descriptor_index: u16, + name: String, + descriptor: AbstractTypeDescription, attributes: Box<[AttributeInfo]>, } impl FieldInfo { fn from_reader(reader: &mut dyn Read, pool: &Box<[ConstantPoolInfo]>) -> Result { + let access_flags = FieldAccessFlagMask { mask: read_u16(reader)? }; + let name = { + let name_index = read_u16(reader)?; + + let name_entry = pool_entry(pool, name_index.into())?; + + match name_entry { + ConstantPoolInfo::Utf8(utf8info) => utf8info.utf8.clone(), + _ => return Err(Error::BadFileError(format!("Bad index into constant pool, expected type Utf8 but found {:?}", name_entry))), + } + }; + let descriptor: AbstractTypeDescription = { + let descriptor_index = read_u16(reader)?; + + let descriptor_entry = pool_entry(pool, descriptor_index.into())?; + + match descriptor_entry { + ConstantPoolInfo::Utf8(utf8info) => { + let borrow = &utf8info.utf8; + + let (length_parsed, type_desc) = AbstractTypeDescription::parse_first(borrow)?; + if length_parsed != borrow.len() { + Err(Error::BadFileError(format!("Bad field descriptor found: {}", borrow)))? + } + + type_desc + } + _ => return Err(Error::BadFileError(format!("Bad index into constant pool, expected type Utf8 but found {:?}", descriptor_entry))), + } + }; + let attributes = AttributeInfo::array_from_reader(reader, pool, true)?; + Ok( FieldInfo { - access_flags: FieldAccessFlagMask { mask: read_u16(reader)? }, - name_index: read_u16(reader)?, - descriptor_index: read_u16(reader)?, - attributes: AttributeInfo::array_from_reader(reader, pool, true)?, + access_flags, + name, + descriptor, + attributes } ) } @@ -563,11 +589,11 @@ impl ExceptionTableEntry { #[derive(Debug)] pub struct CodeAttributeData { - max_stack: u16, - max_locals: u16, - code: Bytecode, - exception_table: Box<[ExceptionTableEntry]>, - attributes: Box<[AttributeInfo]>, + pub max_stack: u16, + pub max_locals: u16, + pub code: Bytecode, + pub exception_table: Box<[ExceptionTableEntry]>, + pub attributes: Box<[AttributeInfo]>, } impl CodeAttributeData { @@ -716,8 +742,8 @@ pub enum AttributeData { #[derive(Debug)] pub struct AttributeInfo { - attribute_name_index: u16, - data: AttributeData + pub attribute_name_index: u16, + pub data: AttributeData } impl AttributeInfo { @@ -788,94 +814,163 @@ impl AttributeInfo { } } -#[derive(Debug)] -pub struct MethodInfo { - access_flags: MethodAccessFlagMask, - name_index: u16, - descriptor_index: u16, - attributes: Box<[AttributeInfo]>, +#[repr(u8)] +#[derive(Debug)] +pub enum AbstractTypeKind { + Void() = b'V', // void + Byte() = b'B', // signed byte + Char() = b'C', // Unicode character code point in the Basic Multilingual Plane, encoded with UTF-16 + Double() = b'D', // double-precision floating-point value + Float() = b'F', // single-precision floating-point value + Int() = b'I', // integer + Long() = b'J', // long integer + Classname(String) = b'L', // an instance of class ClassName + Short() = b'S', // signed short + Boolean() = b'Z', // true or false } -impl MethodInfo { - fn from_reader(reader: &mut dyn Read, pool: &Box<[ConstantPoolInfo]>) -> Result { +#[derive(Debug)] +pub struct AbstractTypeDescription { + array_level: u8, + kind: AbstractTypeKind, +} + +impl AbstractTypeDescription { + fn parse_first(s: &str) -> Result<(usize, Self), Error> { + let mut offset: usize = 0; + let arrays_parsed = s.trim_start_matches("["); + let array_level = (s.len() - arrays_parsed.len()).try_into(); + let array_level: u8 = match array_level { + Ok(s) => s, + Err(_e) => return Err(Error::BadFileError(format!("Too many array levels in method descriptor! Max is 255 but found {}", s.len() - arrays_parsed.len()))), + }; + offset += array_level as usize; + + let type_char = arrays_parsed.chars().nth(0).ok_or(Error::BadFileError("Missing type char in method descriptor".to_string()))?; + offset += 1; + let kind = match type_char { + 'B' => AbstractTypeKind::Byte(), + 'C' => AbstractTypeKind::Char(), + 'D' => AbstractTypeKind::Double(), + 'F' => AbstractTypeKind::Float(), + 'I' => AbstractTypeKind::Int(), + 'J' => AbstractTypeKind::Long(), + 'S' => AbstractTypeKind::Short(), + 'Z' => AbstractTypeKind::Boolean(), + 'V' => AbstractTypeKind::Void(), + 'L' => { + let semicolon_index = s.get(offset..).unwrap().find(";").ok_or(Error::BadFileError(format!("Missing ';' in type descriptor: {}", s)))?; + let classname_start = offset; + let classname_end = offset + semicolon_index; + let classname_string = s.get(classname_start..classname_end).unwrap(); + + offset += classname_string.len() + 1; + + AbstractTypeKind::Classname(classname_string.to_string()) + } + _ => return Err(Error::BadFileError(format!("Invalid Type character: '{}' in string \"{}\"", type_char, s))), + }; + + return Ok((offset, AbstractTypeDescription { array_level, kind })) + } +} + +#[derive(Debug)] +pub struct MethodDescriptor { + argument_types: Box<[AbstractTypeDescription]>, + return_type: AbstractTypeDescription, +} + +impl TryFrom<&String> for MethodDescriptor { + type Error = Error; + + fn try_from(s: &String) -> Result { + + let mut total_offset: usize = 0; + s.strip_prefix("(") + .ok_or(Error::BadFileError(format!("Bad method descriptor: '{}'", s)))?; + total_offset += 1; + + let mut args = Vec::new(); + + while ! s.get(total_offset..).unwrap().starts_with(")") { + let (offset, arg_type) = AbstractTypeDescription::parse_first(s.get(total_offset..).unwrap())?; + + total_offset += offset; + args.push(arg_type); + } + + s.get(total_offset..).unwrap() + .strip_prefix(")") + .ok_or(Error::BadFileError(format!("Bad method descriptor")))?; + total_offset += 1; + + + let (offset, return_type) = AbstractTypeDescription::parse_first(s.get(total_offset..).unwrap())?; + if offset != s.get(total_offset..).unwrap().len() { + return Err(Error::BadFileError(format!("Trailing characters in method descriptor string: \"{}\"", s))) + } + Ok( - MethodInfo { - access_flags: MethodAccessFlagMask { mask: read_u16(reader)? }, - name_index: read_u16(reader)?, - descriptor_index: read_u16(reader)?, - attributes: AttributeInfo::array_from_reader(reader, pool, true)? + MethodDescriptor { + argument_types: args.into_boxed_slice(), + return_type, } ) } } -#[derive(Debug, Copy, Clone)] -pub enum MethodAccessFlag { - Public = 0x0001, // Declared public; may be accessed from outside its package. - Private = 0x0002, // Declared private; accessible only within the defining class and other classes belonging to the same nest (§5.4.4). - Protected = 0x0004, // Declared protected; may be accessed within subclasses. - Static = 0x0008, // Declared static. - Final = 0x0010, // Declared final; must not be overridden (§5.4.5). - Synchronized = 0x0020, // Declared synchronized; invocation is wrapped by a monitor use. - Bridge = 0x0040, // A bridge method, generated by the compiler. - Varargs = 0x0080, // Declared with variable number of arguments. - Native = 0x0100, // Declared native; implemented in a language other than the Java programming language. - Abstract = 0x0400, // Declared abstract; no implementation is provided. - Strict = 0x0800, // In a class file whose major version number is at least 46 and at most 60: Declared strictfp. - Synthetic = 0x1000, // Declared synthetic; not present in the source code. +#[derive(Debug)] +pub struct MethodInfo { + pub access_flags: MethodAccessFlagMask, + pub name: String, + pub descriptor: MethodDescriptor, + pub code_attribute_index: usize, + pub attributes: Box<[AttributeInfo]>, } -pub struct MethodAccessFlagMask { - mask: u16, -} +impl MethodInfo { + fn from_reader(reader: &mut dyn Read, pool: &Box<[ConstantPoolInfo]>) -> Result { + let access_flags = MethodAccessFlagMask { mask: read_u16(reader)? }; + let name = { + let name_index = read_u16(reader)?; -impl Debug for MethodAccessFlagMask { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { - let mut flag_vec = Vec::new(); + let name_entry = pool_entry(pool, name_index.into())?; - let flags = [MethodAccessFlag::Public,MethodAccessFlag::Private,MethodAccessFlag::Protected,MethodAccessFlag::Static,MethodAccessFlag::Final,MethodAccessFlag::Synchronized,MethodAccessFlag::Bridge,MethodAccessFlag::Varargs,MethodAccessFlag::Native,MethodAccessFlag::Abstract,MethodAccessFlag::Strict,MethodAccessFlag::Synthetic]; - - for flag in flags { - if (flag as u16 & self.mask) != 0 { - flag_vec.push(flag) + match name_entry { + ConstantPoolInfo::Utf8(utf8info) => utf8info.utf8.clone(), + _ => return Err(Error::BadFileError(format!("Bad index into constant pool, expected type Utf8 but found {:?}", name_entry))), } - } + }; + let descriptor: MethodDescriptor = { + let descriptor_index = read_u16(reader)?; - f.debug_list().entries(flag_vec) - .finish() - } -} + let descriptor_entry = pool_entry(pool, descriptor_index.into())?; -#[derive(Debug, Copy, Clone)] -pub enum ClassAccessFlag { - Public = 0x0001, // Declared public; may be accessed from outside its package. - Final = 0x0010, // Declared final; no subclasses allowed. - Super = 0x0020, // Treat superclass methods specially when invoked by the invokespecial instruction. - Interface = 0x0200, // Is an interface, not a class. - Abstract = 0x0400, // Declared abstract; must not be instantiated. - Synthetic = 0x1000, // Declared synthetic; not present in the source code. - Annotation = 0x2000, // Declared as an annotation interface. - Enum = 0x4000, // Declared as an enum class. - Module = 0x8000, // Is a module, not a class or interface. -} + match descriptor_entry { + ConstantPoolInfo::Utf8(utf8info) => { + let borrow = &utf8info.utf8; -pub struct ClassAccessFlagMask { - mask: u16, -} - -impl Debug for ClassAccessFlagMask { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { - let mut flag_vec = Vec::new(); - - let flags = [ClassAccessFlag::Public,ClassAccessFlag::Final,ClassAccessFlag::Super,ClassAccessFlag::Interface,ClassAccessFlag::Abstract,ClassAccessFlag::Synthetic,ClassAccessFlag::Annotation,ClassAccessFlag::Enum,ClassAccessFlag::Module]; - for flag in flags { - if (flag as u16 & self.mask) != 0 { - flag_vec.push(flag) + borrow.try_into()? + } + _ => return Err(Error::BadFileError(format!("Bad index into constant pool, expected type Utf8 but found {:?}", descriptor_entry))), } - } + }; + let attributes = AttributeInfo::array_from_reader(reader, pool, true)?; - f.debug_list().entries(flag_vec) - .finish() + let code_attribute_index = attributes.iter() + .position(|info| match info.data {AttributeData::Code(_) => true, _ => false }) + .unwrap_or(attributes.len()); + + Ok( + MethodInfo { + access_flags, + name, + descriptor, + code_attribute_index, + attributes + } + ) } } @@ -953,3 +1048,14 @@ fn read_u8(reader: &mut dyn Read) -> Result { return Ok(u8::from_be_bytes(u8_buffer)); } + +fn pool_entry<'a>(pool: &Box<[ConstantPoolInfo]>, index: usize) -> Result<&ConstantPoolInfo, Error> { + if index == 0 { + return Err(Error::BadFileError(format!("Bad pool index: 0"))); + } + if index - 1 >= pool.len() { + return Err(Error::BadFileError(format!("Bad pool index: {}", index - 1))); + } + + return Ok(&pool[index - 1]); +} diff --git a/src/classstore.rs b/src/classstore.rs new file mode 100644 index 0000000..0868da7 --- /dev/null +++ b/src/classstore.rs @@ -0,0 +1,118 @@ +use core::fmt::{Display, Formatter}; + +use std::collections::HashMap; +use std::error::Error as ErrorTrait; +use std::fs::File; +use std::path::PathBuf; + +use crate::classfile::JavaClassFile; +use crate::classfile; + +#[derive(Debug)] +pub struct ClassStore { + class_ids: HashMap, + classes: Vec, + class_path_fragments: Vec, +} + +#[derive(Debug)] +pub enum Error { + ClassNotFoundError(String), + IOError(std::io::Error), + ClassFileError(String, classfile::Error), +} + +impl From for Error { + fn from(value: std::io::Error) -> Self { + return Error::IOError( + value + ); + } +} + +impl From for Error { + fn from(value: classfile::Error) -> Self { + return Error::ClassFileError( + "An error occured while loading a classfile".to_string(), + value + ); + } +} + +impl ErrorTrait for Error {} +impl Display for Error { + fn fmt(&self, formatter: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + writeln!(formatter, "{self}")?; + if let Some(e) = self.source() { + writeln!(formatter, "\tCaused by: {e:?}")?; + } + Ok(()) + } +} + +impl ClassStore { + pub fn new() -> Self { + let current_dir_path = PathBuf::from("./"); + + ClassStore { + class_ids: HashMap::new(), + classes: Vec::new(), + class_path_fragments: vec![current_dir_path], + } + } + + pub fn load_class_from_file(&mut self, class_file_path: &PathBuf) -> Result { + let mut file_reader = File::open(class_file_path)?; + let classfile = JavaClassFile::new(&mut file_reader)?; + + let classname = classfile.get_classname()?; + self.class_ids.insert(classname.clone(), self.classes.len()); + self.classes.push(classfile); + + return Ok(classname); + } + + pub fn load_class(&mut self, classname: &String) -> Result { + let mut path_buf = PathBuf::new(); + + for class_path in &self.class_path_fragments { + path_buf.push(class_path); + path_buf.push(&classname); + path_buf.set_extension("class"); + + if path_buf.is_file() { + return self.load_class_from_file(&path_buf); + } + }; + + return Err(Error::ClassNotFoundError(format!("Could not find class '{}' in classpath", classname))); + } + + pub fn have_class(&mut self, classname: &String) -> bool { + return self.class_ids.contains_key(classname); + } + + pub fn get_class(&mut self, classname: &String) -> Result<(&JavaClassFile, usize), Error> { + let class_id = self.class_ids.get(classname); + + return match class_id { + Some(id) => Ok((&self.classes[*id], *id)), + None => Err(Error::ClassNotFoundError(format!("Could not locate class '{}'", classname))), + } + } + + pub fn get_or_load_class(&mut self, classname: &String) -> Result<(&JavaClassFile, usize), Error> { + if self.have_class(classname) { + return Ok(self.get_class(classname)?); + } else { + let real_class_name = self.load_class(classname)?; + return Ok(self.get_class(&real_class_name)?); + } + } + + pub fn class_id_from_name(&mut self, classname: &String) -> Option<&usize> { + return self.class_ids.get(classname); + } +} + + diff --git a/src/jvm.rs b/src/jvm.rs new file mode 100644 index 0000000..d85c608 --- /dev/null +++ b/src/jvm.rs @@ -0,0 +1,62 @@ +use core::fmt::{Display, Formatter}; +use std::error::Error as ErrorTrait; + +use crate::classstore; +use crate::classstore::ClassStore; +use crate::stackframe::StackFrame; + +#[derive(Debug)] +pub enum Error { + ClassStoreError(classstore::Error), + BadNameError(String), +} + +impl From for Error { + fn from(value: classstore::Error) -> Self { + return Error::ClassStoreError(value); + } +} + +impl ErrorTrait for Error {} +impl Display for Error { + fn fmt(&self, formatter: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + writeln!(formatter, "{self}")?; + if let Some(e) = self.source() { + writeln!(formatter, "\tCaused by: {e:?}")?; + } + Ok(()) + } +} + +#[derive(Debug)] +pub struct JVM { + class_store: ClassStore, + stack_frames: Vec, +} + +impl JVM { + pub fn new() -> Self { + return JVM { + class_store: ClassStore::new(), + stack_frames: Vec::new(), + } + } + + pub fn load_class(&mut self, name: &String) -> Result { + return self.class_store.load_class(name); + } + + pub fn invoke_static(&mut self, class_name: &String, method_name: &String) -> Result<(), Error> { + + let (class_file, class_index) = self.class_store.get_or_load_class(class_name)?; + + let method_index = class_file.find_method_index(method_name) + .ok_or(Error::BadNameError(format!("Could not find method '{}' in class '{}'", method_name, class_name)))?; + + let new_frame = StackFrame::new(class_file, class_index, method_index.try_into().expect(&format!("Bad method index: {}", method_index))); + + self.stack_frames.push(new_frame); + + return Ok(()); + } +} diff --git a/src/main.rs b/src/main.rs index 48d3164..b79cc82 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,16 @@ -use std::fs::File; mod classfile; +mod classstore; mod bytecode; +mod jvm; +mod stackframe; +mod accessmasks; fn main() { - let class_file = classfile::JavaClassFile::new(&mut File::open("class/Enumerator$EnumeratorIterator.class").unwrap()).unwrap(); + let mut jvm = jvm::JVM::new(); + let loaded_name = jvm.load_class(&"class/Main".to_string()).expect("Could not load class"); - println!("{:#?}", class_file); + jvm.invoke_static(&loaded_name, &"main".to_string()).expect("failed to call main() on supplied class"); + + println!("{:#?}", jvm); } diff --git a/src/stackframe.rs b/src/stackframe.rs new file mode 100644 index 0000000..9e332fa --- /dev/null +++ b/src/stackframe.rs @@ -0,0 +1,61 @@ + +use crate::classfile::{ JavaClassFile, AttributeData }; + +#[derive(Copy, Clone, Debug)] +pub enum LocalVariable { + Boolean(bool), + Byte(u8), + Char(u16), + Short(u16), + Int(u32), + Float(u32), + Reference(u32), + ReturnAddress(u32), + Double0(u32), + Double1(u32), + Long0(u32), + Long1(u32), + Empty(), +} + +#[derive(Debug)] +pub struct OperandStack { + stack: Box<[LocalVariable]>, + depth: u16, +} + +impl OperandStack { + fn new(size: u16) -> Self { + return OperandStack { + stack: vec![LocalVariable::Empty(); size.into()].into_boxed_slice(), + depth: 0, + } + } +} + +#[derive(Debug)] +pub struct StackFrame { + locals: Box<[LocalVariable]>, + operand_stack: OperandStack, + class_id: usize, + method_index: u16, + instruction_pointer: u32, +} + +impl StackFrame { + pub fn new(classfile: &JavaClassFile, class_id: usize, method_index: u16) -> Self { + let method_info = &classfile.methods[method_index as usize]; + let code_data = match &method_info.attributes[method_info.code_attribute_index].data { + AttributeData::Code(data) => data, + _ => unreachable!(), + }; + + StackFrame { + locals: vec![LocalVariable::Empty(); code_data.max_locals.into()].into_boxed_slice(), + operand_stack: OperandStack::new(code_data.max_stack), + class_id, + method_index, + instruction_pointer: 0, + } + } +}