jvm/src/classfile.rs

693 lines
20 KiB
Rust
Raw Normal View History

2024-08-29 14:48:40 +02:00
use std::io::Read;
use std::error::Error as ErrorTrait;
use core::fmt::{Display, Formatter, Debug};
use core::str::Utf8Error;
#[derive(Debug)]
pub enum Error {
BadFileError(String),
IOError(std::io::Error),
Utf8Error(Utf8Error),
PlatformIntError(core::num::TryFromIntError),
BadEnumError(String),
}
impl ErrorTrait for Error {}
impl Display for Error {
fn fmt(&self, formatter: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
writeln!(formatter, "{self}")?;
if let Some(e) = self.source() {
writeln!(formatter, "\tCaused by: {e:?}")?;
}
Ok(())
}
}
impl From<std::io::Error> for Error {
fn from(value: std::io::Error) -> Self {
return Error::IOError(
value
);
}
}
impl From<Utf8Error> for Error {
fn from(value: Utf8Error) -> Self {
return Error::Utf8Error(value);
}
}
impl From<core::num::TryFromIntError> for Error {
fn from (value: core::num::TryFromIntError) -> Self {
return Error::PlatformIntError(value);
}
}
#[derive(Debug)]
pub struct JavaClassFile {
minor_version: u16,
major_version: u16,
constant_pool: Box<[ConstantPoolInfo]>,
access_flags: AccessFlagMask,
this_class: u16,
super_class: u16,
interfaces: Box<[u16]>,
fields: Box<[FieldInfo]>,
methods: Box<[MethodInfo]>,
attributes: Box<[AttributeInfo]>,
}
impl JavaClassFile {
pub fn new(reader: &mut dyn Read) -> Result<Self, Error> {
{
let magic = read_u32(reader)?;
if magic != 0xCAFEBABE {
return Err(
Error::BadFileError(
format!("Expected magic bytes 0xCAFEBABE but found {:#X}", magic)
)
)
}
}
let minor_version = read_u16(reader)?;
let major_version = read_u16(reader)?;
let constant_pool = {
let constant_pool_count = read_u16(reader)?;
let constant_pool_size = constant_pool_count - 1;
let mut constant_pool_vec = Vec::with_capacity(constant_pool_size.into());
let mut i = 0;
while i < constant_pool_size {
let cpinfo = ConstantPoolInfo::from_reader(reader)?;
i += match cpinfo {
ConstantPoolInfo::ConstantDouble(_) | ConstantPoolInfo::ConstantLong(_) => 2,
_ => 1
};
constant_pool_vec.push(cpinfo);
}
constant_pool_vec.into_boxed_slice()
};
let access_flags = AccessFlagMask { mask: read_u16(reader)? };
let this_class = read_u16(reader)?;
let super_class = read_u16(reader)?;
let interfaces = {
let length = read_u16(reader)?;
let mut if_vec = Vec::<u16>::with_capacity(length.into());
for _i in 0..length {
if_vec.push(read_u16(reader)?);
}
if_vec.into_boxed_slice()
};
let fields = {
let length = read_u16(reader)?;
let mut fields_vec = Vec::with_capacity(length.into());
for _i in 0..length {
fields_vec.push(FieldInfo::from_reader(reader, &constant_pool)?);
}
fields_vec.into_boxed_slice()
};
let methods = {
let length = read_u16(reader)?;
let mut methods_vec = Vec::with_capacity(length.into());
for _i in 0..length {
methods_vec.push(MethodInfo::from_reader(reader, &constant_pool)?);
}
methods_vec.into_boxed_slice()
};
let attributes = AttributeInfo::array_from_reader(reader, &constant_pool)?;
Ok(
JavaClassFile {
minor_version,
major_version,
constant_pool,
access_flags,
this_class,
super_class,
interfaces,
fields,
methods,
attributes,
}
)
}
}
#[derive(Debug)]
pub struct ConstantClassInfo {
name_index: u16,
}
#[derive(Debug)]
pub struct ConstantFieldRefInfo {
class_index: u16,
name_and_type_index: u16,
}
#[derive(Debug)]
pub struct ConstantMethodRefInfo {
class_index: u16,
name_and_type_index: u16,
}
#[derive(Debug)]
pub struct ConstantInterfaceMethodRefInfo {
class_index: u16,
name_and_type_index: u16,
}
#[derive(Debug)]
pub struct ConstantStringInfo {
string_index: u16,
}
#[derive(Debug)]
pub struct ConstantIntegerInfo {
value: i32,
}
#[derive(Debug)]
pub struct ConstantFloatInfo {
value: f32,
}
#[derive(Debug)]
pub struct ConstantLongInfo {
value: u64,
}
#[derive(Debug)]
pub struct ConstantDoubleInfo {
value: f64,
}
#[derive(Debug)]
pub struct ConstantNameAndTypeInfo {
name_index: u16,
descriptor_index: u16,
}
#[derive(Debug)]
pub struct ConstantUtf8Info {
utf8: String
}
#[derive(Debug)]
#[repr(u8)]
pub enum ConstantMethodHandleType {
RefGetField(u8) = 1,
RefGetStatic(u8) = 2,
RefPutField(u8) = 3,
RefPutStatic(u8) = 4,
RefInvokeVirtual(u8) = 5,
RefInvokeStatic(u8) = 6,
RefInvokeSpecial(u8) = 7,
RefNewInvokeSpecial(u8) = 8,
RefInvokeInterface(u8) = 9,
}
impl TryFrom<u8> for ConstantMethodHandleType {
type Error = Error;
fn try_from(value: u8) -> Result<Self, Self::Error> {
return match value {
1 => Ok(Self::RefGetField(value)),
2 => Ok(Self::RefGetStatic(value)),
3 => Ok(Self::RefPutField(value)),
4 => Ok(Self::RefPutStatic(value)),
5 => Ok(Self::RefInvokeVirtual(value)),
6 => Ok(Self::RefInvokeStatic(value)),
7 => Ok(Self::RefInvokeSpecial(value)),
8 => Ok(Self::RefNewInvokeSpecial(value)),
9 => Ok(Self::RefInvokeInterface(value)),
_ => Err(
Error::BadEnumError(format!("Unexpected MethodHandleType: {}", value))
),
}
}
}
#[derive(Debug)]
pub struct ConstantMethodHandleInfo {
reference_kind: ConstantMethodHandleType,
reference_index: u16,
}
#[derive(Debug)]
pub struct ConstantMethodTypeInfo {
descriptor_index: u16,
}
#[derive(Debug)]
pub struct ConstantInvokeDynamicInfo {
bootstrap_method_attr_index: u16,
name_and_type_index: u16,
}
#[derive(Debug)]
#[repr(u8)]
pub enum ConstantPoolInfo {
ConstantClass(ConstantClassInfo) = 7,
ConstantFieldRef(ConstantFieldRefInfo) = 9,
ConstantMethodRef(ConstantMethodRefInfo) = 10,
ConstantInterfaceMethodRef(ConstantInterfaceMethodRefInfo) = 11,
ConstantString(ConstantStringInfo) = 8,
ConstantInteger(ConstantIntegerInfo) = 3,
ConstantFloat(ConstantFloatInfo) = 4,
ConstantLong(ConstantLongInfo) = 5,
ConstantDouble(ConstantDoubleInfo) = 6,
ConstantNameAndType(ConstantNameAndTypeInfo) = 12,
ConstantUtf8(ConstantUtf8Info) = 1,
ConstantMethodHandle(ConstantMethodHandleInfo) = 15,
ConstantMethodType(ConstantMethodTypeInfo) = 16,
ConstantInvokeDynamic(ConstantInvokeDynamicInfo) = 18,
}
impl ConstantPoolInfo {
fn from_reader(reader: &mut dyn Read) -> Result<Self, Error> {
let tag = read_u8(reader)?;
Ok(
match tag {
1 => {
ConstantPoolInfo::ConstantUtf8(
ConstantUtf8Info {
utf8: {
let length = read_u16(reader)?;
let mut buffer: Box<[u8]> = vec![0; length.into()].into_boxed_slice();
reader.read_exact(&mut buffer)?;
std::str::from_utf8(&buffer)?.to_string()
}
}
)
}
3 => {
ConstantPoolInfo::ConstantInteger(
ConstantIntegerInfo {
value: read_i32(reader)?
}
)
}
4 => {
ConstantPoolInfo::ConstantFloat(
ConstantFloatInfo {
value: read_f32(reader)?
}
)
}
5 => {
ConstantPoolInfo::ConstantLong(
ConstantLongInfo {
value: read_u64(reader)?
}
)
}
6 => {
ConstantPoolInfo::ConstantDouble(
ConstantDoubleInfo {
value: read_f64(reader)?
}
)
}
7 => {
ConstantPoolInfo::ConstantClass(
ConstantClassInfo {
name_index: read_u16(reader)?
}
)
}
8 => {
ConstantPoolInfo::ConstantString(
ConstantStringInfo {
string_index: read_u16(reader)?
}
)
}
9 => {
ConstantPoolInfo::ConstantFieldRef(
ConstantFieldRefInfo {
class_index: read_u16(reader)?,
name_and_type_index: read_u16(reader)?
}
)
}
10 => {
ConstantPoolInfo::ConstantMethodRef(
ConstantMethodRefInfo {
class_index: read_u16(reader)?,
name_and_type_index: read_u16(reader)?
}
)
}
11 => {
ConstantPoolInfo::ConstantInterfaceMethodRef(
ConstantInterfaceMethodRefInfo {
class_index: read_u16(reader)?,
name_and_type_index: read_u16(reader)?
}
)
}
12 => {
ConstantPoolInfo::ConstantNameAndType(
ConstantNameAndTypeInfo {
name_index: read_u16(reader)?,
descriptor_index: read_u16(reader)?
}
)
}
15 => {
ConstantPoolInfo::ConstantMethodHandle(
ConstantMethodHandleInfo {
reference_kind: ConstantMethodHandleType::try_from(read_u8(reader)?)?,
reference_index: read_u16(reader)?,
}
)
}
16 => {
ConstantPoolInfo::ConstantMethodType(
ConstantMethodTypeInfo {
descriptor_index: read_u16(reader)?,
}
)
}
18 => {
ConstantPoolInfo::ConstantInvokeDynamic(
ConstantInvokeDynamicInfo {
bootstrap_method_attr_index: read_u16(reader)?,
name_and_type_index: read_u16(reader)?,
}
)
}
_ => unreachable!()
}
)
}
}
#[derive(Debug, Clone, Copy)]
#[repr(u16)]
pub enum AccessFlag {
AccessPublic = 0x0001, // Declared public; may be accessed from outside its package.
AccessFinal = 0x0010, // Declared final; no subclasses allowed.
AccessSuper = 0x0020, // Treat superclass methods specially when invoked by the invokespecial instruction.
AccessInterface = 0x0200, // Is an interface, not a class.
AccessAbstract = 0x0400, // Declared abstract; must not be instantiated.
AccessSynthetic = 0x1000, // Declared synthetic; not present in the source code.
AccessAnnotation = 0x2000, // Declared as an annotation type.
AccessEnum = 0x4000, // Declared as an enum type.
}
pub struct AccessFlagMask {
mask: u16
}
impl Debug for AccessFlagMask {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
let mut flag_vec = Vec::<AccessFlag>::new();
let flags = [AccessFlag::AccessPublic,AccessFlag::AccessFinal,AccessFlag::AccessSuper,AccessFlag::AccessInterface,AccessFlag::AccessAbstract,AccessFlag::AccessSynthetic,AccessFlag::AccessAnnotation,AccessFlag::AccessEnum];
for flag in flags {
if (flag as u16 & self.mask) != 0 {
flag_vec.push(flag)
}
}
f.debug_list().entries(flag_vec)
.finish()
}
}
#[derive(Debug)]
pub struct FieldInfo {
access_flags: AccessFlagMask,
name_index: u16,
descriptor_index: u16,
attributes: Box<[AttributeInfo]>,
}
impl FieldInfo {
fn from_reader(reader: &mut dyn Read, pool: &Box<[ConstantPoolInfo]>) -> Result<Self, Error> {
Ok(
FieldInfo {
access_flags: AccessFlagMask { mask: read_u16(reader)? },
name_index: read_u16(reader)?,
descriptor_index: read_u16(reader)?,
attributes: AttributeInfo::array_from_reader(reader, pool)?,
}
)
}
}
#[derive(Debug)]
pub struct ConstantValueAttributeData {
constant_value_index: u16,
}
#[derive(Debug)]
pub struct UnknownAttributeData {
info: Box<[u8]>
}
#[derive(Debug)]
pub struct LineNumberTableEntry {
start_pc: u16,
line_number: u16,
}
impl LineNumberTableEntry {
fn from_reader(reader: &mut dyn Read) -> Result<Self, Error> {
let start_pc = read_u16(reader)?;
let line_number = read_u16(reader)?;
Ok(
LineNumberTableEntry {
start_pc,
line_number
}
)
}
}
#[derive(Debug)]
pub struct LineNumberTableAttributeData {
entries: Box<[LineNumberTableEntry]>
}
impl LineNumberTableAttributeData {
fn from_reader(reader: &mut dyn Read) -> Result<Self, Error> {
let length = read_u16(reader)?;
let mut entry_vec = Vec::with_capacity(length.into());
for _i in 0..length {
entry_vec.push(LineNumberTableEntry::from_reader(reader)?);
}
Ok(
LineNumberTableAttributeData {
entries: entry_vec.into_boxed_slice()
}
)
}
}
#[derive(Debug)]
pub enum AttributeData {
ConstantValue(ConstantValueAttributeData),
LineNumberTable(LineNumberTableAttributeData),
UnknownAttribute(UnknownAttributeData),
}
#[derive(Debug)]
pub struct AttributeInfo {
attribute_name_index: u16,
data: AttributeData
}
impl AttributeInfo {
fn array_from_reader(reader: &mut dyn Read, pool: &Box<[ConstantPoolInfo]>) -> Result<Box<[Self]>, Error> {
let length = read_u16(reader)?;
let mut attr_vec = Vec::with_capacity(length.into());
for _i in 0..length {
2024-08-29 14:54:00 +02:00
let attribute = AttributeInfo::from_reader(reader, &pool)?;
println!("{:#?}", attribute);
attr_vec.push(attribute);
2024-08-29 14:48:40 +02:00
}
Ok(attr_vec.into_boxed_slice())
}
fn from_reader(reader: &mut dyn Read, pool: &Box<[ConstantPoolInfo]>) -> Result<Self, Error> {
let attribute_name_index: u16 = read_u16(reader)?;
let attribute_byte_size: usize = read_u32(reader)?.try_into()?;
let data = {
let name_entry = &pool[attribute_name_index as usize];
let utf8 = match name_entry {
ConstantPoolInfo::ConstantUtf8( ConstantUtf8Info { utf8 } ) => utf8,
_ => return Err(Error::BadFileError(format!("Bad name index for attribute info: {}", attribute_name_index))),
};
match &utf8[..] {
// "ConstantValue" => AttributeData::ConstantValue(
// ConstantValueAttributeData {
// constant_value_index: read_u16(reader)?,
// }
// ),
//
// "LineNumberTable" => AttributeData::LineNumberTable(
// LineNumberTableAttributeData::from_reader(reader)?
// ),
&_ => AttributeData::UnknownAttribute(
UnknownAttributeData {
info: read_buffer(reader, attribute_byte_size)?,
}
)
}
};
Ok(
AttributeInfo {
attribute_name_index,
data
}
)
}
}
#[derive(Debug)]
pub struct MethodInfo {
access_flags: AccessFlagMask,
name_index: u16,
descriptor_index: u16,
attributes: Box<[AttributeInfo]>,
}
impl MethodInfo {
fn from_reader(reader: &mut dyn Read, pool: &Box<[ConstantPoolInfo]>) -> Result<Self, Error> {
Ok(
MethodInfo {
access_flags: AccessFlagMask { mask: read_u16(reader)? },
name_index: read_u16(reader)?,
descriptor_index: read_u16(reader)?,
attributes: AttributeInfo::array_from_reader(reader, pool)?
}
)
}
}
fn read_buffer(reader: &mut dyn Read, size: usize) -> Result<Box<[u8]>, Error> {
2024-08-29 14:54:00 +02:00
let mut buffer: Box<[u8]> = vec![0; size].into_boxed_slice();
2024-08-29 14:48:40 +02:00
reader.read_exact(&mut buffer)?;
Ok(buffer)
}
fn read_f64(reader: &mut dyn Read) -> Result<f64, std::io::Error> {
// let bytes = read_u64(reader)?;
//
// let sign = if (bytes >> 63) == 0 { 1 } else { -1 } as f64;
// let exponent: f64 = ((bytes >> 52) & 0x7FF) as f64;
// let mantissa: f64 = if exponent == 0.0 {
// (bytes & 0xfffffffffffff) << 1
// } else {
// (bytes & 0xfffffffffffff) | 0x10000000000000
// } as f64;
// let base: f64 = 2.0;
//
//
// return Ok(sign * mantissa * base.powf(exponent-1075.0));
let mut buffer: [u8; 8] = [0; 8];
reader.read_exact(&mut buffer)?;
return Ok(f64::from_be_bytes(buffer));
}
fn read_f32(reader: &mut dyn Read) -> Result<f32, std::io::Error> {
let mut buffer: [u8; 4] = [0; 4];
reader.read_exact(&mut buffer)?;
return Ok(f32::from_be_bytes(buffer));
}
fn read_i32(reader: &mut dyn Read) -> Result<i32, std::io::Error> {
let mut buffer: [u8; 4] = [0; 4];
reader.read_exact(&mut buffer)?;
return Ok(i32::from_be_bytes(buffer));
}
fn read_u64(reader: &mut dyn Read) -> Result<u64, std::io::Error> {
let high_bytes: u64 = read_u32(reader)?.into();
let low_bytes: u64 = read_u32(reader)?.into();
return Ok((high_bytes << 32) | low_bytes);
}
fn read_u32(reader: &mut dyn Read) -> Result<u32, std::io::Error> {
let mut u32_buffer: [u8; 4] = [0; 4];
reader.read_exact(&mut u32_buffer)?;
return Ok(u32::from_be_bytes(u32_buffer));
}
fn read_u16(reader: &mut dyn Read) -> Result<u16, std::io::Error> {
let mut u16_buffer: [u8; 2] = [0; 2];
reader.read_exact(&mut u16_buffer)?;
return Ok(u16::from_be_bytes(u16_buffer));
}
fn read_u8(reader: &mut dyn Read) -> Result<u8, std::io::Error> {
let mut u8_buffer: [u8; 1] = [0; 1];
reader.read_exact(&mut u8_buffer)?;
return Ok(u8::from_be_bytes(u8_buffer));
}