commit 10646275cb4b249789ec30284a3b59e8a9065f06 Author: VegOwOtenks Date: Thu Aug 29 14:48:40 2024 +0200 Broken attribute pasing diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..67ac981 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "jvm" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..41b476a --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "jvm" +version = "0.1.0" +edition = "2021" diff --git a/src/classfile.rs b/src/classfile.rs new file mode 100644 index 0000000..ade0917 --- /dev/null +++ b/src/classfile.rs @@ -0,0 +1,692 @@ +use std::io::Read; +use std::error::Error as ErrorTrait; +use core::fmt::{Display, Formatter, Debug}; +use core::str::Utf8Error; + + +#[derive(Debug)] +pub enum Error { + BadFileError(String), + IOError(std::io::Error), + Utf8Error(Utf8Error), + PlatformIntError(core::num::TryFromIntError), + BadEnumError(String), +} + +impl ErrorTrait for Error {} + +impl Display for Error { + fn fmt(&self, formatter: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + writeln!(formatter, "{self}")?; + if let Some(e) = self.source() { + writeln!(formatter, "\tCaused by: {e:?}")?; + } + Ok(()) + } +} + +impl From for Error { + fn from(value: std::io::Error) -> Self { + return Error::IOError( + value + ); + } +} + +impl From for Error { + fn from(value: Utf8Error) -> Self { + return Error::Utf8Error(value); + } +} + +impl From for Error { + fn from (value: core::num::TryFromIntError) -> Self { + return Error::PlatformIntError(value); + } +} + +#[derive(Debug)] +pub struct JavaClassFile { + minor_version: u16, + major_version: u16, + + constant_pool: Box<[ConstantPoolInfo]>, + + access_flags: AccessFlagMask, + + this_class: u16, + super_class: u16, + + interfaces: Box<[u16]>, + + fields: Box<[FieldInfo]>, + + methods: Box<[MethodInfo]>, + + attributes: Box<[AttributeInfo]>, + +} + +impl JavaClassFile { + pub fn new(reader: &mut dyn Read) -> Result { + + { + let magic = read_u32(reader)?; + if magic != 0xCAFEBABE { + return Err( + Error::BadFileError( + format!("Expected magic bytes 0xCAFEBABE but found {:#X}", magic) + ) + ) + } + } + + let minor_version = read_u16(reader)?; + let major_version = read_u16(reader)?; + let constant_pool = { + let constant_pool_count = read_u16(reader)?; + let constant_pool_size = constant_pool_count - 1; + + let mut constant_pool_vec = Vec::with_capacity(constant_pool_size.into()); + + let mut i = 0; + while i < constant_pool_size { + let cpinfo = ConstantPoolInfo::from_reader(reader)?; + i += match cpinfo { + ConstantPoolInfo::ConstantDouble(_) | ConstantPoolInfo::ConstantLong(_) => 2, + _ => 1 + }; + + constant_pool_vec.push(cpinfo); + } + + constant_pool_vec.into_boxed_slice() + }; + let access_flags = AccessFlagMask { mask: read_u16(reader)? }; + let this_class = read_u16(reader)?; + let super_class = read_u16(reader)?; + let interfaces = { + let length = read_u16(reader)?; + let mut if_vec = Vec::::with_capacity(length.into()); + + for _i in 0..length { + if_vec.push(read_u16(reader)?); + } + + if_vec.into_boxed_slice() + }; + let fields = { + let length = read_u16(reader)?; + let mut fields_vec = Vec::with_capacity(length.into()); + + for _i in 0..length { + fields_vec.push(FieldInfo::from_reader(reader, &constant_pool)?); + } + + fields_vec.into_boxed_slice() + }; + let methods = { + let length = read_u16(reader)?; + let mut methods_vec = Vec::with_capacity(length.into()); + + for _i in 0..length { + methods_vec.push(MethodInfo::from_reader(reader, &constant_pool)?); + } + + methods_vec.into_boxed_slice() + }; + let attributes = AttributeInfo::array_from_reader(reader, &constant_pool)?; + + Ok( + JavaClassFile { + minor_version, + major_version, + constant_pool, + access_flags, + this_class, + super_class, + interfaces, + fields, + methods, + attributes, + } + ) + } +} + +#[derive(Debug)] +pub struct ConstantClassInfo { + name_index: u16, +} + +#[derive(Debug)] +pub struct ConstantFieldRefInfo { + class_index: u16, + name_and_type_index: u16, +} + +#[derive(Debug)] +pub struct ConstantMethodRefInfo { + class_index: u16, + name_and_type_index: u16, +} + +#[derive(Debug)] +pub struct ConstantInterfaceMethodRefInfo { + class_index: u16, + name_and_type_index: u16, +} + +#[derive(Debug)] +pub struct ConstantStringInfo { + string_index: u16, +} + +#[derive(Debug)] +pub struct ConstantIntegerInfo { + value: i32, +} + +#[derive(Debug)] +pub struct ConstantFloatInfo { + value: f32, +} + +#[derive(Debug)] +pub struct ConstantLongInfo { + value: u64, +} + +#[derive(Debug)] +pub struct ConstantDoubleInfo { + value: f64, +} + +#[derive(Debug)] +pub struct ConstantNameAndTypeInfo { + name_index: u16, + descriptor_index: u16, +} + +#[derive(Debug)] +pub struct ConstantUtf8Info { + utf8: String +} + +#[derive(Debug)] +#[repr(u8)] +pub enum ConstantMethodHandleType { + RefGetField(u8) = 1, + RefGetStatic(u8) = 2, + RefPutField(u8) = 3, + RefPutStatic(u8) = 4, + RefInvokeVirtual(u8) = 5, + RefInvokeStatic(u8) = 6, + RefInvokeSpecial(u8) = 7, + RefNewInvokeSpecial(u8) = 8, + RefInvokeInterface(u8) = 9, +} + +impl TryFrom for ConstantMethodHandleType { + type Error = Error; + + fn try_from(value: u8) -> Result { + return match value { + 1 => Ok(Self::RefGetField(value)), + 2 => Ok(Self::RefGetStatic(value)), + 3 => Ok(Self::RefPutField(value)), + 4 => Ok(Self::RefPutStatic(value)), + 5 => Ok(Self::RefInvokeVirtual(value)), + 6 => Ok(Self::RefInvokeStatic(value)), + 7 => Ok(Self::RefInvokeSpecial(value)), + 8 => Ok(Self::RefNewInvokeSpecial(value)), + 9 => Ok(Self::RefInvokeInterface(value)), + _ => Err( + Error::BadEnumError(format!("Unexpected MethodHandleType: {}", value)) + ), + } + } +} + +#[derive(Debug)] +pub struct ConstantMethodHandleInfo { + reference_kind: ConstantMethodHandleType, + reference_index: u16, +} + +#[derive(Debug)] +pub struct ConstantMethodTypeInfo { + descriptor_index: u16, +} + +#[derive(Debug)] +pub struct ConstantInvokeDynamicInfo { + bootstrap_method_attr_index: u16, + name_and_type_index: u16, +} + + +#[derive(Debug)] +#[repr(u8)] +pub enum ConstantPoolInfo { + ConstantClass(ConstantClassInfo) = 7, + ConstantFieldRef(ConstantFieldRefInfo) = 9, + ConstantMethodRef(ConstantMethodRefInfo) = 10, + ConstantInterfaceMethodRef(ConstantInterfaceMethodRefInfo) = 11, + ConstantString(ConstantStringInfo) = 8, + ConstantInteger(ConstantIntegerInfo) = 3, + ConstantFloat(ConstantFloatInfo) = 4, + ConstantLong(ConstantLongInfo) = 5, + ConstantDouble(ConstantDoubleInfo) = 6, + ConstantNameAndType(ConstantNameAndTypeInfo) = 12, + ConstantUtf8(ConstantUtf8Info) = 1, + ConstantMethodHandle(ConstantMethodHandleInfo) = 15, + ConstantMethodType(ConstantMethodTypeInfo) = 16, + ConstantInvokeDynamic(ConstantInvokeDynamicInfo) = 18, +} + +impl ConstantPoolInfo { + fn from_reader(reader: &mut dyn Read) -> Result { + let tag = read_u8(reader)?; + + Ok( + match tag { + + 1 => { + ConstantPoolInfo::ConstantUtf8( + ConstantUtf8Info { + utf8: { + let length = read_u16(reader)?; + let mut buffer: Box<[u8]> = vec![0; length.into()].into_boxed_slice(); + + reader.read_exact(&mut buffer)?; + + std::str::from_utf8(&buffer)?.to_string() + } + } + ) + } + + 3 => { + ConstantPoolInfo::ConstantInteger( + ConstantIntegerInfo { + value: read_i32(reader)? + } + ) + } + + 4 => { + ConstantPoolInfo::ConstantFloat( + ConstantFloatInfo { + value: read_f32(reader)? + } + ) + } + + 5 => { + ConstantPoolInfo::ConstantLong( + ConstantLongInfo { + value: read_u64(reader)? + } + ) + } + + 6 => { + ConstantPoolInfo::ConstantDouble( + ConstantDoubleInfo { + value: read_f64(reader)? + } + ) + } + + 7 => { + ConstantPoolInfo::ConstantClass( + ConstantClassInfo { + name_index: read_u16(reader)? + } + ) + } + + 8 => { + ConstantPoolInfo::ConstantString( + ConstantStringInfo { + string_index: read_u16(reader)? + } + ) + } + + 9 => { + ConstantPoolInfo::ConstantFieldRef( + ConstantFieldRefInfo { + class_index: read_u16(reader)?, + name_and_type_index: read_u16(reader)? + } + ) + } + + 10 => { + ConstantPoolInfo::ConstantMethodRef( + ConstantMethodRefInfo { + class_index: read_u16(reader)?, + name_and_type_index: read_u16(reader)? + } + ) + } + + 11 => { + ConstantPoolInfo::ConstantInterfaceMethodRef( + ConstantInterfaceMethodRefInfo { + class_index: read_u16(reader)?, + name_and_type_index: read_u16(reader)? + } + ) + } + + 12 => { + ConstantPoolInfo::ConstantNameAndType( + ConstantNameAndTypeInfo { + name_index: read_u16(reader)?, + descriptor_index: read_u16(reader)? + } + ) + } + + 15 => { + ConstantPoolInfo::ConstantMethodHandle( + ConstantMethodHandleInfo { + reference_kind: ConstantMethodHandleType::try_from(read_u8(reader)?)?, + reference_index: read_u16(reader)?, + } + ) + } + + 16 => { + ConstantPoolInfo::ConstantMethodType( + ConstantMethodTypeInfo { + descriptor_index: read_u16(reader)?, + } + ) + } + + 18 => { + ConstantPoolInfo::ConstantInvokeDynamic( + ConstantInvokeDynamicInfo { + bootstrap_method_attr_index: read_u16(reader)?, + name_and_type_index: read_u16(reader)?, + } + ) + } + + _ => unreachable!() + } + ) + } +} + +#[derive(Debug, Clone, Copy)] +#[repr(u16)] +pub enum AccessFlag { + AccessPublic = 0x0001, // Declared public; may be accessed from outside its package. + AccessFinal = 0x0010, // Declared final; no subclasses allowed. + AccessSuper = 0x0020, // Treat superclass methods specially when invoked by the invokespecial instruction. + AccessInterface = 0x0200, // Is an interface, not a class. + AccessAbstract = 0x0400, // Declared abstract; must not be instantiated. + AccessSynthetic = 0x1000, // Declared synthetic; not present in the source code. + AccessAnnotation = 0x2000, // Declared as an annotation type. + AccessEnum = 0x4000, // Declared as an enum type. +} + +pub struct AccessFlagMask { + mask: u16 +} + +impl Debug for AccessFlagMask { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + let mut flag_vec = Vec::::new(); + + let flags = [AccessFlag::AccessPublic,AccessFlag::AccessFinal,AccessFlag::AccessSuper,AccessFlag::AccessInterface,AccessFlag::AccessAbstract,AccessFlag::AccessSynthetic,AccessFlag::AccessAnnotation,AccessFlag::AccessEnum]; + + for flag in flags { + if (flag as u16 & self.mask) != 0 { + flag_vec.push(flag) + } + } + + f.debug_list().entries(flag_vec) + .finish() + } +} + +#[derive(Debug)] +pub struct FieldInfo { + access_flags: AccessFlagMask, + name_index: u16, + descriptor_index: u16, + attributes: Box<[AttributeInfo]>, +} + +impl FieldInfo { + fn from_reader(reader: &mut dyn Read, pool: &Box<[ConstantPoolInfo]>) -> Result { + Ok( + FieldInfo { + access_flags: AccessFlagMask { mask: read_u16(reader)? }, + name_index: read_u16(reader)?, + descriptor_index: read_u16(reader)?, + attributes: AttributeInfo::array_from_reader(reader, pool)?, + } + ) + } +} + +#[derive(Debug)] +pub struct ConstantValueAttributeData { + constant_value_index: u16, +} + +#[derive(Debug)] +pub struct UnknownAttributeData { + info: Box<[u8]> +} + +#[derive(Debug)] +pub struct LineNumberTableEntry { + start_pc: u16, + line_number: u16, +} + +impl LineNumberTableEntry { + fn from_reader(reader: &mut dyn Read) -> Result { + let start_pc = read_u16(reader)?; + let line_number = read_u16(reader)?; + + Ok( + LineNumberTableEntry { + start_pc, + line_number + } + ) + } +} + +#[derive(Debug)] +pub struct LineNumberTableAttributeData { + entries: Box<[LineNumberTableEntry]> +} + +impl LineNumberTableAttributeData { + fn from_reader(reader: &mut dyn Read) -> Result { + let length = read_u16(reader)?; + let mut entry_vec = Vec::with_capacity(length.into()); + + for _i in 0..length { + entry_vec.push(LineNumberTableEntry::from_reader(reader)?); + } + + Ok( + LineNumberTableAttributeData { + entries: entry_vec.into_boxed_slice() + } + ) + } +} + +#[derive(Debug)] +pub enum AttributeData { + ConstantValue(ConstantValueAttributeData), + LineNumberTable(LineNumberTableAttributeData), + UnknownAttribute(UnknownAttributeData), +} + +#[derive(Debug)] +pub struct AttributeInfo { + attribute_name_index: u16, + data: AttributeData +} + +impl AttributeInfo { + fn array_from_reader(reader: &mut dyn Read, pool: &Box<[ConstantPoolInfo]>) -> Result, Error> { + let length = read_u16(reader)?; + let mut attr_vec = Vec::with_capacity(length.into()); + + for _i in 0..length { + attr_vec.push(AttributeInfo::from_reader(reader, &pool)?); + } + + Ok(attr_vec.into_boxed_slice()) + } + + fn from_reader(reader: &mut dyn Read, pool: &Box<[ConstantPoolInfo]>) -> Result { + let attribute_name_index: u16 = read_u16(reader)?; + let attribute_byte_size: usize = read_u32(reader)?.try_into()?; + + let data = { + let name_entry = &pool[attribute_name_index as usize]; + let utf8 = match name_entry { + ConstantPoolInfo::ConstantUtf8( ConstantUtf8Info { utf8 } ) => utf8, + _ => return Err(Error::BadFileError(format!("Bad name index for attribute info: {}", attribute_name_index))), + }; + + match &utf8[..] { +// "ConstantValue" => AttributeData::ConstantValue( +// ConstantValueAttributeData { +// constant_value_index: read_u16(reader)?, +// } +// ), +// +// "LineNumberTable" => AttributeData::LineNumberTable( +// LineNumberTableAttributeData::from_reader(reader)? +// ), + + &_ => AttributeData::UnknownAttribute( + UnknownAttributeData { + info: read_buffer(reader, attribute_byte_size)?, + } + ) + } + }; + + println!("{:#?}", data); + + Ok( + AttributeInfo { + attribute_name_index, + data + } + ) + } +} + +#[derive(Debug)] +pub struct MethodInfo { + access_flags: AccessFlagMask, + name_index: u16, + descriptor_index: u16, + attributes: Box<[AttributeInfo]>, +} + +impl MethodInfo { + fn from_reader(reader: &mut dyn Read, pool: &Box<[ConstantPoolInfo]>) -> Result { + Ok( + MethodInfo { + access_flags: AccessFlagMask { mask: read_u16(reader)? }, + name_index: read_u16(reader)?, + descriptor_index: read_u16(reader)?, + attributes: AttributeInfo::array_from_reader(reader, pool)? + } + ) + } +} + +fn read_buffer(reader: &mut dyn Read, size: usize) -> Result, Error> { + let mut buffer: Box<[u8]> = Vec::::with_capacity(size).into_boxed_slice(); + + reader.read_exact(&mut buffer)?; + + Ok(buffer) +} + +fn read_f64(reader: &mut dyn Read) -> Result { +// let bytes = read_u64(reader)?; +// +// let sign = if (bytes >> 63) == 0 { 1 } else { -1 } as f64; +// let exponent: f64 = ((bytes >> 52) & 0x7FF) as f64; +// let mantissa: f64 = if exponent == 0.0 { +// (bytes & 0xfffffffffffff) << 1 +// } else { +// (bytes & 0xfffffffffffff) | 0x10000000000000 +// } as f64; +// let base: f64 = 2.0; +// +// +// return Ok(sign * mantissa * base.powf(exponent-1075.0)); + + let mut buffer: [u8; 8] = [0; 8]; + reader.read_exact(&mut buffer)?; + + return Ok(f64::from_be_bytes(buffer)); +} + +fn read_f32(reader: &mut dyn Read) -> Result { + let mut buffer: [u8; 4] = [0; 4]; + reader.read_exact(&mut buffer)?; + + return Ok(f32::from_be_bytes(buffer)); +} + +fn read_i32(reader: &mut dyn Read) -> Result { + let mut buffer: [u8; 4] = [0; 4]; + reader.read_exact(&mut buffer)?; + + return Ok(i32::from_be_bytes(buffer)); +} + +fn read_u64(reader: &mut dyn Read) -> Result { + let high_bytes: u64 = read_u32(reader)?.into(); + let low_bytes: u64 = read_u32(reader)?.into(); + + return Ok((high_bytes << 32) | low_bytes); +} + +fn read_u32(reader: &mut dyn Read) -> Result { + let mut u32_buffer: [u8; 4] = [0; 4]; + + reader.read_exact(&mut u32_buffer)?; + + return Ok(u32::from_be_bytes(u32_buffer)); +} + +fn read_u16(reader: &mut dyn Read) -> Result { + let mut u16_buffer: [u8; 2] = [0; 2]; + + reader.read_exact(&mut u16_buffer)?; + + return Ok(u16::from_be_bytes(u16_buffer)); +} + +fn read_u8(reader: &mut dyn Read) -> Result { + let mut u8_buffer: [u8; 1] = [0; 1]; + + reader.read_exact(&mut u8_buffer)?; + + return Ok(u8::from_be_bytes(u8_buffer)); +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e100b2c --- /dev/null +++ b/src/main.rs @@ -0,0 +1,9 @@ +use std::fs::File; + +mod classfile; + +fn main() { + let class_file = classfile::JavaClassFile::new(&mut File::open("Main.class").unwrap()).unwrap(); + + println!("{:#?}", class_file); +}