From 2042007242428c8a866535f966b1abeffafd81ce Mon Sep 17 00:00:00 2001 From: VegOwOtenks Date: Mon, 2 Sep 2024 11:28:00 +0200 Subject: [PATCH] Entrypoint call entry methodgit add . --- src/accessmasks.rs | 37 ++++++++ src/bytecode.rs | 128 ++++++++++++++------------- src/classfile.rs | 160 ++++++++++++++++++++++++++++++--- src/classstore.rs | 41 ++++++--- src/constantpool.rs | 32 +++---- src/heap_area.rs | 39 ++++++++ src/jvm.rs | 210 ++++++++++++++++++++++++++++++++++++++++++-- src/main.rs | 16 +++- src/stackframe.rs | 29 +++--- 9 files changed, 573 insertions(+), 119 deletions(-) create mode 100644 src/heap_area.rs diff --git a/src/accessmasks.rs b/src/accessmasks.rs index 7392803..33b7dbf 100644 --- a/src/accessmasks.rs +++ b/src/accessmasks.rs @@ -1,6 +1,7 @@ use core::fmt::{Formatter, Debug}; #[derive(Debug, Copy, Clone)] +#[repr(u16)] pub enum MethodAccessFlag { Public = 0x0001, // Declared public; may be accessed from outside its package. Private = 0x0002, // Declared private; accessible only within the defining class and other classes belonging to the same nest (ยง5.4.4). @@ -16,6 +17,25 @@ pub enum MethodAccessFlag { Synthetic = 0x1000, // Declared synthetic; not present in the source code. } +impl MethodAccessFlag { + pub fn discriminant(&self) -> u16 { + return match self { + Self::Public => 0x0001, + Self::Private => 0x0002, + Self::Protected => 0x0004, + Self::Static => 0x0008, + Self::Final => 0x0010, + Self::Synchronized => 0x0020, + Self::Bridge => 0x0040, + Self::Varargs => 0x0080, + Self::Native => 0x0100, + Self::Abstract => 0x0400, + Self::Strict => 0x0800, + Self::Synthetic => 0x1000, + } + } +} + pub struct MethodAccessFlagMask { pub mask: u16, } @@ -38,6 +58,7 @@ impl Debug for MethodAccessFlagMask { } #[derive(Debug, Copy, Clone)] +#[repr(u16)] pub enum ClassAccessFlag { Public = 0x0001, // Declared public; may be accessed from outside its package. Final = 0x0010, // Declared final; no subclasses allowed. @@ -50,6 +71,22 @@ pub enum ClassAccessFlag { Module = 0x8000, // Is a module, not a class or interface. } +impl ClassAccessFlag { + pub fn discriminant(&self) -> u16 { + return match self { + Self::Public => 0x0001, // Declared public; may be accessed from outside its package. + Self::Final => 0x0010, // Declared final; no subclasses allowed. + Self::Super => 0x0020, // Treat superclass methods specially when invoked by the invokespecial instruction. + Self::Interface => 0x0200, // Is an interface, not a class. + Self::Abstract => 0x0400, // Declared abstract; must not be instantiated. + Self::Synthetic => 0x1000, // Declared synthetic; not present in the source code. + Self::Annotation => 0x2000, // Declared as an annotation interface. + Self::Enum => 0x4000, // Declared as an enum class. + Self::Module => 0x8000, // Is a module, not a class or interface. + } + } +} + pub struct ClassAccessFlagMask { pub mask: u16, } diff --git a/src/bytecode.rs b/src/bytecode.rs index 67d17e9..690865c 100644 --- a/src/bytecode.rs +++ b/src/bytecode.rs @@ -1,71 +1,78 @@ use core::fmt::Debug; pub struct Bytecode { - pub code: Box<[u8]> + pub bytes: Box<[u8]> } impl Bytecode { - pub fn opcodes(&self) -> Box<[Instruction]> { - let mut v = Vec::with_capacity(self.code.len()); + + pub fn next_instruction(&self, offset: usize) -> (Instruction, usize) { + let opcode = self.bytes[offset]; + + match opcode { + 0x00 => (Instruction::NoOperation(), 1), + 0x01 => (Instruction::StoreIntoIntArray(), 1), + 0x02 => (Instruction::PushConstIntM1(), 1), + 0x03 => (Instruction::PushConstInt0(), 1), + 0x04 => (Instruction::PushConstInt1(), 1), + 0x05 => (Instruction::PushConstInt2(), 1), + 0x06 => (Instruction::PushConstInt3(), 1), + 0x07 => (Instruction::PushConstInt4(), 1), + 0x08 => (Instruction::PushConstInt5(), 1), + 0x0E => (Instruction::PushConstDouble0(), 1), + 0x0F => (Instruction::PushConstDouble1(), 1), + + 0x11 => (Instruction::LoadShortImmediate((self.bytes[offset+1] as u16) << 8 | self.bytes[offset+2] as u16), 3), + 0x12 => (Instruction::LoadConstant(self.bytes[offset+1]), 2), + 0x14 => (Instruction::LoadConstant64((self.bytes[offset+1] as u16) << 8 | self.bytes[offset+2] as u16), 3), + + 0x26 => (Instruction::LoadDouble0(), 1), + 0x27 => (Instruction::LoadDouble1(), 1), + 0x28 => (Instruction::LoadDouble2(), 1), + 0x29 => (Instruction::LoadDouble3(), 1), + 0x2A => (Instruction::LoadReference0(), 1), + 0x2B => (Instruction::LoadReference1(), 1), + 0x2C => (Instruction::LoadReference2(), 1), + 0x2D => (Instruction::LoadReference3(), 1), + + 0x4B => (Instruction::StoreReference0(), 1), + 0x4C => (Instruction::StoreReference1(), 1), + 0x4D => (Instruction::StoreReference2(), 1), + 0x4E => (Instruction::StoreReference3(), 1), + + 0x57 => (Instruction::Pop(), 1), + 0x59 => (Instruction::Duplicate(), 1), + + 0x6D => (Instruction::DivideLong(), 1), + + 0x7A => (Instruction::ShiftIntRight(), 1), + + 0x80 => (Instruction::OrInt(), 1), + + 0xAC => (Instruction::ReturnInt(), 1), + + 0xB0 => (Instruction::ReturnReference(), 1), + 0xB1 => (Instruction::ReturnVoid(), 1), + 0xB2 => (Instruction::GetStatic((self.bytes[offset+1] as u16) << 8 | self.bytes[offset+2] as u16), 3), + 0xB3 => (Instruction::PutStatic((self.bytes[offset+1] as u16) << 8 | self.bytes[offset+2] as u16), 3), + 0xB4 => (Instruction::GetField((self.bytes[offset+1] as u16) << 8 | self.bytes[offset+2] as u16), 3), + 0xB5 => (Instruction::PutField((self.bytes[offset+1] as u16) << 8 | self.bytes[offset+2] as u16), 3), + 0xB6 => (Instruction::InvokeVirtual((self.bytes[offset+1] as u16) << 8 | self.bytes[offset+2] as u16), 3), + 0xB7 => (Instruction::InvokeSpecial((self.bytes[offset+1] as u16) << 8 | self.bytes[offset+2] as u16), 3), + 0xB8 => (Instruction::InvokeStatic((self.bytes[offset+1] as u16) << 8 | self.bytes[offset+2] as u16), 3), + 0xBA => (Instruction::InvokeDynamic((self.bytes[offset+1] as u16) << 8 | self.bytes[offset+2] as u16, (self.bytes[offset+3] as u16) << 8 | self.bytes[offset+4] as u16), 5), + 0xBB => (Instruction::NewObject((self.bytes[offset+1] as u16) << 8 | self.bytes[offset+2] as u16), 3), + _ => (Instruction::Unknown(opcode), 1) + } + } + + pub fn instructions(&self) -> Box<[Instruction]> { + let mut v = Vec::with_capacity(self.bytes.len()); let mut i = 0; - while i < self.code.len() { - let opcode = self.code[i]; + while i < self.bytes.len() { - let (instruction, offset) = match opcode { - 0x00 => (Instruction::NoOperation(), 1), - 0x01 => (Instruction::StoreIntoIntArray(), 1), - 0x02 => (Instruction::PushConstIntM1(), 1), - 0x03 => (Instruction::PushConstInt0(), 1), - 0x04 => (Instruction::PushConstInt1(), 1), - 0x05 => (Instruction::PushConstInt2(), 1), - 0x06 => (Instruction::PushConstInt3(), 1), - 0x07 => (Instruction::PushConstInt4(), 1), - 0x08 => (Instruction::PushConstInt5(), 1), - 0x0E => (Instruction::PushConstDouble0(), 1), - 0x0F => (Instruction::PushConstDouble1(), 1), - - 0x11 => (Instruction::LoadShortImmediate((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), - 0x12 => (Instruction::LoadConstant(self.code[i+1]), 2), - 0x14 => (Instruction::LoadConstant64((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), - - 0x26 => (Instruction::LoadDouble0(), 1), - 0x27 => (Instruction::LoadDouble1(), 1), - 0x28 => (Instruction::LoadDouble2(), 1), - 0x29 => (Instruction::LoadDouble3(), 1), - 0x2A => (Instruction::LoadReference0(), 1), - 0x2B => (Instruction::LoadReference1(), 1), - 0x2C => (Instruction::LoadReference2(), 1), - 0x2D => (Instruction::LoadReference3(), 1), - - 0x4B => (Instruction::StoreReference0(), 1), - 0x4C => (Instruction::StoreReference1(), 1), - 0x4D => (Instruction::StoreReference2(), 1), - 0x4E => (Instruction::StoreReference3(), 1), - - 0x57 => (Instruction::Pop(), 1), - 0x59 => (Instruction::Duplicate(), 1), - - 0x6D => (Instruction::DivideLong(), 1), - - 0x7A => (Instruction::ShiftIntRight(), 1), - - 0x80 => (Instruction::OrInt(), 1), - - 0xAC => (Instruction::ReturnInt(), 1), - - 0xB0 => (Instruction::ReturnReference(), 1), - 0xB1 => (Instruction::ReturnVoid(), 1), - 0xB2 => (Instruction::GetStatic((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), - 0xB3 => (Instruction::PutStatic((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), - 0xB4 => (Instruction::GetField((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), - 0xB5 => (Instruction::PutField((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), - 0xB6 => (Instruction::InvokeVirtual((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), - 0xB7 => (Instruction::InvokeSpecial((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), - 0xBA => (Instruction::InvokeDynamic((self.code[i+1] as u16) << 8 | self.code[i+2] as u16, (self.code[i+3] as u16) << 8 | self.code[i+4] as u16), 5), - 0xBB => (Instruction::NewObject((self.code[i+1] as u16) << 8 | self.code[i+2] as u16), 3), - _ => (Instruction::Unknown(opcode), 1) - }; + let (instruction, offset) = self.next_instruction(i); v.push(instruction); i += offset; @@ -78,7 +85,7 @@ impl Bytecode { impl Debug for Bytecode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { f.debug_list() - .entries(self.opcodes()) + .entries(self.instructions()) .finish() } } @@ -135,6 +142,7 @@ pub enum Instruction { PutField(u16) = 0xB5, // set field to a value InvokeVirtual(u16) = 0xB6, // invoke function on a class InvokeSpecial(u16) = 0xB7, // invoke instance method + InvokeStatic(u16) = 0xB8, // invoke static function InvokeDynamic(u16, u16) = 0xBA, // invoke dynamic function NewObject(u16) = 0xBB, // Create a new object from a constant-pool reference Unknown(u8), diff --git a/src/classfile.rs b/src/classfile.rs index 47d39fb..2a55654 100644 --- a/src/classfile.rs +++ b/src/classfile.rs @@ -1,11 +1,12 @@ use std::io::Read; use std::error::Error as ErrorTrait; use core::fmt::{Display, Formatter, Debug}; +use core::mem::{ discriminant, Discriminant }; use core::str::Utf8Error; use crate::accessmasks::*; use crate::bytecode::Bytecode; -use crate::constantpool::{ ConstantPoolInfo, ConstantUtf8Info }; +use crate::constantpool::{ ConstantPoolInfo, ConstantUtf8Info, ConstantMethodRefInfo, ConstantClassInfo, ConstantNameAndTypeInfo }; #[derive(Debug)] pub enum Error { @@ -173,7 +174,7 @@ impl JavaClassFile { let name_entry = pool_entry(&self.constant_pool, class_info_entry.name_index.into())?; let name_entry = match name_entry { ConstantPoolInfo::Utf8(utf8data) => utf8data, - _ => return Err(Error::BadFileError(format!("Invalid name_index class_info from this_class, expected index to Utf8 but found {:?}", name_entry))) + _ => return Err(Error::BadFileError(format!("Invalid class_info.name_index from this_class, expected index to Utf8 but found {:?}", name_entry))) }; return Ok(name_entry.utf8.clone()); @@ -190,6 +191,77 @@ impl JavaClassFile { return None; } + + pub fn typed_pool_entry(&self, index: u16, variant: Discriminant) -> Result<&ConstantPoolInfo, Error> { + let pool_entry = &self.constant_pool[(index - 1) as usize]; + + if discriminant(pool_entry) != variant { + return Err(Error::BadFileError(format!("Expected constant pool entry {} in class {} to be of type {:#?} but found {:#?}", index, self.get_classname()?, variant, discriminant(pool_entry)))); + } + + return Ok(pool_entry); + } + + pub fn pool_methodref_entry(&self, index: u16) -> Result<&ConstantMethodRefInfo, Error> { + let pool_entry = self.typed_pool_entry( + index, + discriminant( + &ConstantPoolInfo::MethodRef( + ConstantMethodRefInfo { + class_index: 0, + name_and_type_index: 0 + } + ) + ) + )?; + + let methodref_entry = match pool_entry { + ConstantPoolInfo::MethodRef(data) => data, + _ => unreachable!(), + }; + + return Ok(methodref_entry); + } + + pub fn pool_class_entry(&self, index: u16) -> Result<&ConstantClassInfo, Error> { + let pool_entry = self.typed_pool_entry(index, discriminant(&ConstantPoolInfo::Class(ConstantClassInfo {name_index: 0})))?; + + return match pool_entry { + ConstantPoolInfo::Class(data) => Ok(data), + _ => unreachable!(), + }; + } + + pub fn pool_utf8_entry(&self, index: u16) -> Result<&ConstantUtf8Info, Error> { + let pool_entry = self.typed_pool_entry(index, discriminant(&ConstantPoolInfo::Utf8(ConstantUtf8Info {utf8: "".to_string()})))?; + + return match pool_entry { + ConstantPoolInfo::Utf8(data) => Ok(data), + _ => unreachable!(), + }; + } + + pub fn pool_nameandtype_entry(&self, index: u16) -> Result<&ConstantNameAndTypeInfo, Error> { + let pool_entry = self.typed_pool_entry(index, discriminant(&ConstantPoolInfo::NameAndType(ConstantNameAndTypeInfo {name_index: 0, descriptor_index: 0})))?; + + return match pool_entry { + ConstantPoolInfo::NameAndType(data) => Ok(data), + _ => unreachable!(), + }; + } + + pub fn gather_methodref(&self, index: u16) -> Result<(&String, &String, &String), Error> { + let methodref = self.pool_methodref_entry(index)?; + let class_entry = self.pool_class_entry(methodref.class_index)?; + let class_name_entry = self.pool_utf8_entry(class_entry.name_index)?; + let name_and_type_entry = self.pool_nameandtype_entry(methodref.name_and_type_index)?; + + let class_name = &class_name_entry.utf8; + let method_name = &self.pool_utf8_entry(name_and_type_entry.name_index)?.utf8; + let method_descriptor = &self.pool_utf8_entry(name_and_type_entry.descriptor_index)?.utf8; + + return Ok((class_name, method_name, method_descriptor)); + } } #[derive(Debug)] @@ -352,7 +424,7 @@ impl CodeAttributeData { CodeAttributeData { max_stack, max_locals, - code: Bytecode { code }, + code: Bytecode { bytes: code }, exception_table, attributes } @@ -546,7 +618,7 @@ impl AttributeInfo { } #[repr(u8)] -#[derive(Debug)] +#[derive(Debug, Eq, PartialEq)] pub enum AbstractTypeKind { Void() = b'V', // void Byte() = b'B', // signed byte @@ -560,10 +632,43 @@ pub enum AbstractTypeKind { Boolean() = b'Z', // true or false } -#[derive(Debug)] +impl Into for &AbstractTypeKind { + fn into(self) -> String { + match self { + AbstractTypeKind::Void() => "V".to_string(), + AbstractTypeKind::Byte() => "B".to_string(), + AbstractTypeKind::Char() => "C".to_string(), + AbstractTypeKind::Double() => "D".to_string(), + AbstractTypeKind::Float() => "F".to_string(), + AbstractTypeKind::Int() => "I".to_string(), + AbstractTypeKind::Long() => "J".to_string(), + AbstractTypeKind::Classname(name) => "L".to_string() + &name + ";", + AbstractTypeKind::Short() => "S".to_string(), + AbstractTypeKind::Boolean() => "Z".to_string(), + } + } +} + +#[derive(Debug, Eq, PartialEq)] pub struct AbstractTypeDescription { - array_level: u8, - kind: AbstractTypeKind, + pub array_level: u8, + pub kind: AbstractTypeKind, +} + +impl Into for &AbstractTypeDescription { + fn into(self) -> String { + let name_len = match self.kind { + AbstractTypeKind::Classname(ref s) => 2 + s.len(), + _ => 1, + }; + let mut s = String::with_capacity(self.array_level as usize + name_len); + + s.push_str(&"[".repeat(self.array_level as usize)); + let kind_string: String = (&self.kind).into(); + s.push_str(&kind_string); + + s + } } impl AbstractTypeDescription { @@ -606,10 +711,28 @@ impl AbstractTypeDescription { } } -#[derive(Debug)] +#[derive(Debug, Eq, PartialEq)] pub struct MethodDescriptor { - argument_types: Box<[AbstractTypeDescription]>, - return_type: AbstractTypeDescription, + pub argument_types: Box<[AbstractTypeDescription]>, + pub return_type: AbstractTypeDescription, +} + +impl MethodDescriptor { + pub fn source_string(&self) -> String { + let mut s = "".to_string(); + + s += "("; + for argument_type in &self.argument_types { + let arg_string: String = argument_type.into(); + s.push_str(&arg_string); + } + s += ")"; + + let return_string: String = (&self.return_type).into(); + s.push_str(&return_string); + + s + } } impl TryFrom<&String> for MethodDescriptor { @@ -651,6 +774,12 @@ impl TryFrom<&String> for MethodDescriptor { } } +impl Into for &MethodDescriptor { + fn into(self) -> String { + return self.source_string() + } +} + #[derive(Debug)] pub struct MethodInfo { pub access_flags: MethodAccessFlagMask, @@ -703,6 +832,17 @@ impl MethodInfo { } ) } + + pub fn get_code_attribute(&self) -> Option<&CodeAttributeData> { + return if self.code_attribute_index != self.attributes.len() { + match &self.attributes[self.code_attribute_index].data { + AttributeData::Code(data) => Some(data), + _ => None, + } + } else { + None + }; + } } diff --git a/src/classstore.rs b/src/classstore.rs index 0868da7..715302b 100644 --- a/src/classstore.rs +++ b/src/classstore.rs @@ -11,7 +11,7 @@ use crate::classfile; #[derive(Debug)] pub struct ClassStore { class_ids: HashMap, - classes: Vec, + classes: Vec<(bool, JavaClassFile)>, // was_init, class_file class_path_fragments: Vec, } @@ -61,18 +61,22 @@ impl ClassStore { } } - pub fn load_class_from_file(&mut self, class_file_path: &PathBuf) -> Result { + pub fn add_class(&mut self, class_file: JavaClassFile, was_init: bool) { + return self.classes.push((was_init, class_file)); + } + + pub fn load_class_from_file(&mut self, class_file_path: &PathBuf) -> Result { let mut file_reader = File::open(class_file_path)?; let classfile = JavaClassFile::new(&mut file_reader)?; let classname = classfile.get_classname()?; - self.class_ids.insert(classname.clone(), self.classes.len()); - self.classes.push(classfile); + self.class_ids.insert(classname, self.classes.len()); + self.classes.push((false, classfile)); - return Ok(classname); + return Ok(self.classes.len() - 1); } - pub fn load_class(&mut self, classname: &String) -> Result { + pub fn load_class(&mut self, classname: &String) -> Result { let mut path_buf = PathBuf::new(); for class_path in &self.class_path_fragments { @@ -88,15 +92,15 @@ impl ClassStore { return Err(Error::ClassNotFoundError(format!("Could not find class '{}' in classpath", classname))); } - pub fn have_class(&mut self, classname: &String) -> bool { + pub fn have_class(&self, classname: &String) -> bool { return self.class_ids.contains_key(classname); } - pub fn get_class(&mut self, classname: &String) -> Result<(&JavaClassFile, usize), Error> { + pub fn get_class(&self, classname: &String) -> Result<(&JavaClassFile, usize), Error> { let class_id = self.class_ids.get(classname); return match class_id { - Some(id) => Ok((&self.classes[*id], *id)), + Some(id) => Ok((&self.classes[*id].1, *id)), None => Err(Error::ClassNotFoundError(format!("Could not locate class '{}'", classname))), } } @@ -105,14 +109,27 @@ impl ClassStore { if self.have_class(classname) { return Ok(self.get_class(classname)?); } else { - let real_class_name = self.load_class(classname)?; - return Ok(self.get_class(&real_class_name)?); + let class_idx = self.load_class(classname)?; + return Ok((&self.classes[class_idx].1, class_idx)); } } - pub fn class_id_from_name(&mut self, classname: &String) -> Option<&usize> { + pub fn class_file_from_idx(&self, idx: usize) -> Option<&JavaClassFile> { + return match self.classes.get(idx) { + Some((_was_init, class_file)) => Some(class_file), + None => None, + } + } + + pub fn class_idx_from_name(&self, classname: &String) -> Option<&usize> { return self.class_ids.get(classname); } + + pub fn was_init(&self, classname: &String) -> Option<&bool> { + let (was_init, _) = self.classes.get(*self.class_idx_from_name(classname).unwrap()).unwrap(); + + return Some(was_init); + } } diff --git a/src/constantpool.rs b/src/constantpool.rs index 0398d26..88f869b 100644 --- a/src/constantpool.rs +++ b/src/constantpool.rs @@ -3,66 +3,66 @@ use std::io::Read; use crate::classfile::Error; use crate::classfile::{read_u16, read_u8, read_f32, read_f64, read_u64, read_i32}; -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct ConstantClassInfo { pub name_index: u16, } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct ConstantFieldRefInfo { pub class_index: u16, pub name_and_type_index: u16, } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct ConstantMethodRefInfo { pub class_index: u16, pub name_and_type_index: u16, } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct ConstantInterfaceMethodRefInfo { pub class_index: u16, pub name_and_type_index: u16, } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct ConstantStringInfo { pub string_index: u16, } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct ConstantIntegerInfo { pub value: i32, } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct ConstantFloatInfo { pub value: f32, } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct ConstantLongInfo { pub value: u64, } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct ConstantDoubleInfo { pub value: f64, } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct ConstantNameAndTypeInfo { pub name_index: u16, pub descriptor_index: u16, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ConstantUtf8Info { pub utf8: String } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] #[repr(u8)] pub enum ConstantMethodHandleType { RefGetField(u8) = 1, @@ -97,25 +97,25 @@ impl TryFrom for ConstantMethodHandleType { } } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct ConstantMethodHandleInfo { reference_kind: ConstantMethodHandleType, reference_index: u16, } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct ConstantMethodTypeInfo { descriptor_index: u16, } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct ConstantInvokeDynamicInfo { bootstrap_method_attr_index: u16, name_and_type_index: u16, } -#[derive(Debug)] +#[derive(Debug, Clone)] #[repr(u8)] pub enum ConstantPoolInfo { Class(ConstantClassInfo) = 7, diff --git a/src/heap_area.rs b/src/heap_area.rs new file mode 100644 index 0000000..918ee55 --- /dev/null +++ b/src/heap_area.rs @@ -0,0 +1,39 @@ +use std::collections::HashMap; + +use crate::stackframe::Value; +use crate::classfile::{ AbstractTypeDescription, MethodInfo }; + +pub struct HeapArea { + pub object_area: ObjectArea, + pub static_area: StaticArea, +} + +type ObjectReference=u32; + +pub struct ObjectArea { + compartments: Vec, +} + +pub struct ObjectCompartment { + objects: Box<[HeapObject]>, +} + +pub struct HeapObject { + fields: Box<[ObjectField]>, +} + +pub struct StaticArea { + static_objects: HashMap, +} + +pub struct StaticObject { + pub class_index: usize, + pub fields: Box<[ObjectField]>, + pub methods: Box<[MethodInfo]>, +} + +pub struct ObjectField { + pub type_description: AbstractTypeDescription, + pub value: Value, +} + diff --git a/src/jvm.rs b/src/jvm.rs index d85c608..385bb26 100644 --- a/src/jvm.rs +++ b/src/jvm.rs @@ -1,14 +1,28 @@ use core::fmt::{Display, Formatter}; + use std::error::Error as ErrorTrait; +use crate::accessmasks::{ ClassAccessFlagMask, ClassAccessFlag, MethodAccessFlagMask, MethodAccessFlag}; +use crate::bytecode::{ Bytecode, Instruction }; +use crate::classfile; +use crate::classfile::{ JavaClassFile, MethodInfo, MethodDescriptor, AbstractTypeDescription, AbstractTypeKind, AttributeInfo, AttributeData, CodeAttributeData }; use crate::classstore; use crate::classstore::ClassStore; -use crate::stackframe::StackFrame; +use crate::constantpool::{ ConstantPoolInfo, ConstantClassInfo, ConstantUtf8Info, ConstantMethodRefInfo, ConstantNameAndTypeInfo}; +use crate::stackframe::{ StackFrame, Value }; #[derive(Debug)] pub enum Error { ClassStoreError(classstore::Error), + ClassFileError(classfile::Error), BadNameError(String), + RunTimeError(String), +} + +impl From for Error { + fn from(value: classfile::Error) -> Self { + return Error::ClassFileError(value); + } } impl From for Error { @@ -42,21 +56,203 @@ impl JVM { } } - pub fn load_class(&mut self, name: &String) -> Result { - return self.class_store.load_class(name); + pub fn entrypoint(&mut self, class_name: &String, method_name: &String, arguments: &[Value]) -> Result<(), Error> { + let entry_class = JavaClassFile { + minor_version: 0, + major_version: 0, + constant_pool: Box::new([ + ConstantPoolInfo::Class(ConstantClassInfo { name_index: 2 }), + ConstantPoolInfo::Utf8(ConstantUtf8Info { utf8: "::EntryPoint".to_string() }), + ConstantPoolInfo::Utf8(ConstantUtf8Info { utf8: "Code".to_string() }), + ConstantPoolInfo::MethodRef(ConstantMethodRefInfo { class_index: 5, name_and_type_index: 6}), + ConstantPoolInfo::Class(ConstantClassInfo { name_index: 7 }), + ConstantPoolInfo::NameAndType(ConstantNameAndTypeInfo { name_index: 8, descriptor_index: 9 }), + ConstantPoolInfo::Utf8(ConstantUtf8Info { utf8: class_name.to_string() }), + ConstantPoolInfo::Utf8(ConstantUtf8Info { utf8: method_name.to_string() }), + ConstantPoolInfo::Utf8(ConstantUtf8Info { utf8: "()V".to_string() }), + ] + ), + access_flags: ClassAccessFlagMask { mask: ClassAccessFlag::Super.discriminant() }, + this_class: 1, + super_class: 0, + interfaces: Box::new([]), + fields: Box::new([]), + methods: Box::new([ + MethodInfo { + access_flags: MethodAccessFlagMask { + mask: MethodAccessFlag::Public.discriminant() | MethodAccessFlag::Static.discriminant() + }, + name: "call_main".to_string(), + descriptor: MethodDescriptor { + argument_types: Box::new([]), + return_type: AbstractTypeDescription { + array_level: 0, + kind: AbstractTypeKind::Void(), + } + }, + code_attribute_index: 0, + attributes: Box::new([ + AttributeInfo { + attribute_name_index: 3, + data: AttributeData::Code( + CodeAttributeData { + max_stack: 0, + max_locals: 0, + code: Bytecode { + bytes: Box::new([ + 0xB8_u8.to_be(), // invokestatic + 0x04_u16.to_be_bytes()[0], // index 4 into the constant + 0x04_u16.to_be_bytes()[1], // pool + ]), + }, + exception_table: Box::new([]), + attributes: Box::new([]), + } + ) + } + ]) + } + ]), + attributes: Box::new([]), + }; + + self.stack_frames.push( + StackFrame::new(&entry_class, 0, 0, arguments), + ); + + self.class_store.add_class(entry_class, true); + + Ok(()) } - pub fn invoke_static(&mut self, class_name: &String, method_name: &String) -> Result<(), Error> { + pub fn run(&mut self) -> Result<(), Error> { + while self.stack_frames.len() != 0 { + let jvm_op = self.bytecode_loop()?; + match jvm_op { + JVMCallbackOperation::PopFrame() => self.stack_frames.truncate(self.stack_frames.len() - 1), + JVMCallbackOperation::PushFrame(frame) => self.stack_frames.push(frame), + JVMCallbackOperation::LoadClass(name) => { + self.class_store.load_class(&name)?; + () + }, + JVMCallbackOperation::InitClass(name) => { + self.init_class(*self.class_store.class_idx_from_name(&name).unwrap()); + } + } + } - let (class_file, class_index) = self.class_store.get_or_load_class(class_name)?; + Ok(()) + } + + pub fn init_class(&mut self, class_idx: usize) { + let class_file = self.class_store.class_file_from_idx(class_idx).unwrap(); + let clinit_idx = class_file.find_method_index(&"".to_string()); + + // TODO: ConstantValue Attributes (final) + // TODO: Static Stuff + + } + + fn prepare_invoke_static(&mut self, class_index: usize, method_name: &String, arguments: &[Value]) -> Result<(), Error> { + + let class_file = self.class_store.class_file_from_idx(class_index).unwrap(); let method_index = class_file.find_method_index(method_name) - .ok_or(Error::BadNameError(format!("Could not find method '{}' in class '{}'", method_name, class_name)))?; + .ok_or(Error::BadNameError(format!("Could not find method '{}' in class '{}'", method_name, class_file.get_classname()?)))?; - let new_frame = StackFrame::new(class_file, class_index, method_index.try_into().expect(&format!("Bad method index: {}", method_index))); + let new_frame = StackFrame::new( + class_file, + class_index, + method_index.try_into().expect(&format!("Bad method index: {}", method_index)), + arguments + ); self.stack_frames.push(new_frame); return Ok(()); } + + fn bytecode_loop(&mut self) -> Result { + + let frame = { + let frame_index = self.stack_frames.len() - 1; + &mut self.stack_frames[frame_index] + }; + let class = self.class_store.class_file_from_idx(frame.class_index).unwrap(); + let method = & class.methods[frame.method_index as usize]; + let code_attr = method.get_code_attribute().unwrap(); + let bytecode = & code_attr.code; + + while frame.instruction_pointer as usize != bytecode.bytes.len() { + let (instruction, offset) = bytecode.next_instruction(frame.instruction_pointer as usize); + frame.instruction_pointer += offset as u32; + + match instruction { + Instruction::InvokeStatic(methodref_index) => { + let (supplied_class_name, supplied_method_name, supplied_descriptor_string) = class.gather_methodref(methodref_index)?; + + if ! self.class_store.have_class(supplied_class_name) { + // rewind the bytecode offset, I'll need to execute this instruction again + frame.instruction_pointer -= offset as u32; + + return Ok(JVMCallbackOperation::LoadClass(supplied_class_name.to_string())); + } + if ! self.class_store.was_init(supplied_class_name).unwrap() { + // rewind the bytecode offset, I'll need to execute this instruction again + frame.instruction_pointer -= offset as u32; + + return Ok(JVMCallbackOperation::InitClass(supplied_class_name.to_string())); + } + + let (callee_class_file, callee_class_index) = self.class_store.get_class(supplied_class_name)?; + // TODO: Throw exception on fail + let callee_method_index = callee_class_file.find_method_index(supplied_method_name).unwrap(); + // TODO: Throw exception on fail + let callee_method_info = &callee_class_file.methods[callee_method_index]; + + let supplied_descriptor: MethodDescriptor = supplied_descriptor_string.try_into()?; + // TODO: Throw exception on fail + + if supplied_descriptor != callee_method_info.descriptor { + // TODO: Throw exception on fail + return Err(Error::RunTimeError(format!( + "Mismatched method descriptors between caller and callee: Caller ({}) wanted '{}' but found '{}' on Callee ({})", + class.get_classname().unwrap(), + supplied_descriptor_string, + callee_method_info.descriptor.source_string(), + supplied_class_name, + ))); + } + + let arguments = Vec::new(); + + let new_frame = StackFrame::new( + callee_class_file, + callee_class_index, + callee_method_index as u16, + &arguments.into_boxed_slice(), + ); + + //println!("{} {} {}", class_name, method_name, method_descriptor); + + return Ok(JVMCallbackOperation::PushFrame(new_frame)); + }, + + _ => { + return Err(Error::RunTimeError(format!("Opcode not implemented yet: {:?}", instruction))) + }, + + } + } + + Ok(JVMCallbackOperation::PopFrame()) + } + +} + +enum JVMCallbackOperation { + PopFrame(), + PushFrame(StackFrame), + LoadClass(String), + InitClass(String), } diff --git a/src/main.rs b/src/main.rs index eed4a3f..f19fe18 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,3 @@ - mod classfile; mod classstore; mod bytecode; @@ -6,12 +5,23 @@ mod jvm; mod stackframe; mod accessmasks; mod constantpool; +mod heap_area; + +use std::fs::File; + +use crate::stackframe::Value; +use crate::classfile::JavaClassFile; fn main() { let mut jvm = jvm::JVM::new(); - let loaded_name = jvm.load_class(&"class/Main".to_string()).expect("Could not load class"); - jvm.invoke_static(&loaded_name, &"main".to_string()).expect("failed to call main() on supplied class"); + jvm.entrypoint( + &"java/Math".to_string(), + &"v".to_string(), + &[]//&[Value::Int(1), Value::Int(2)], + ).expect("failed to call main() on supplied class"); + + jvm.run().unwrap(); println!("{:#?}", jvm); } diff --git a/src/stackframe.rs b/src/stackframe.rs index 9e332fa..0521986 100644 --- a/src/stackframe.rs +++ b/src/stackframe.rs @@ -2,7 +2,7 @@ use crate::classfile::{ JavaClassFile, AttributeData }; #[derive(Copy, Clone, Debug)] -pub enum LocalVariable { +pub enum Value { Boolean(bool), Byte(u8), Char(u16), @@ -20,14 +20,14 @@ pub enum LocalVariable { #[derive(Debug)] pub struct OperandStack { - stack: Box<[LocalVariable]>, + stack: Box<[Value]>, depth: u16, } impl OperandStack { fn new(size: u16) -> Self { return OperandStack { - stack: vec![LocalVariable::Empty(); size.into()].into_boxed_slice(), + stack: vec![Value::Empty(); size.into()].into_boxed_slice(), depth: 0, } } @@ -35,25 +35,32 @@ impl OperandStack { #[derive(Debug)] pub struct StackFrame { - locals: Box<[LocalVariable]>, - operand_stack: OperandStack, - class_id: usize, - method_index: u16, - instruction_pointer: u32, + pub locals: Box<[Value]>, + pub operand_stack: OperandStack, + pub class_index: usize, + pub method_index: u16, + pub instruction_pointer: u32, } impl StackFrame { - pub fn new(classfile: &JavaClassFile, class_id: usize, method_index: u16) -> Self { + pub fn new(classfile: &JavaClassFile, class_index: usize, method_index: u16, arguments: &[Value]) -> Self { let method_info = &classfile.methods[method_index as usize]; let code_data = match &method_info.attributes[method_info.code_attribute_index].data { AttributeData::Code(data) => data, _ => unreachable!(), }; + let mut locals = vec![Value::Empty(); code_data.max_locals.into()].into_boxed_slice(); + + assert!(locals.len() >= arguments.len()); + + for (index, v) in arguments.iter().enumerate() { + locals[index] = *v; + } StackFrame { - locals: vec![LocalVariable::Empty(); code_data.max_locals.into()].into_boxed_slice(), + locals, operand_stack: OperandStack::new(code_data.max_stack), - class_id, + class_index, method_index, instruction_pointer: 0, }