feat: constant-pool parsing, backtraces

This commit is contained in:
vegowotenks 2025-07-11 23:06:56 +02:00
parent 5f3e7b761e
commit 7da2a30cbb
7 changed files with 154 additions and 35 deletions

View file

@ -47,6 +47,7 @@ library
array
, base >=4.7 && <5
, bytestring
, text
default-language: Haskell2010
executable java-classfile-exe
@ -63,6 +64,7 @@ executable java-classfile-exe
, base >=4.7 && <5
, bytestring
, java-classfile
, text
default-language: Haskell2010
test-suite java-classfile-test
@ -80,4 +82,5 @@ test-suite java-classfile-test
, base >=4.7 && <5
, bytestring
, java-classfile
, text
default-language: Haskell2010

View file

@ -23,6 +23,7 @@ dependencies:
- array
- base >= 4.7 && < 5
- bytestring
- text
ghc-options:
- -Wall

View file

@ -1,15 +1,25 @@
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE InstanceSigs #-}
module Language.Java.Classfile.ConstantPool.Entry (Entry(..)) where
import GHC.Generics (Generic, Generically(..))
import Language.Java.Classfile.Extractable (Extractable)
import Language.Java.Classfile.Extractable (Extractable (extract))
import Language.Java.Classfile.Extractable.WithTag (Word8Tag)
import Language.Java.Classfile.ConstantPool.References (Utf8Reference, ClassReference, NameAndTypeReference, MethodHandleReferenceKind, OpaqueReference, BootstrapMethodIndex)
import Data.Int (Int32, Int64)
import Data.Word (Word16)
import Data.Text (Text)
import Language.Java.Classfile.Extract (Extract, bytes)
import qualified Data.Text.Encoding as Text
import qualified Data.ByteString.Lazy as ByteString
import qualified Data.ByteString as StrictByteString
data Entry
= Integer (Word8Tag 3 Int32)
= Utf8 (Word8Tag 1 (SizedText Word16))
| Integer (Word8Tag 3 Int32)
| Float (Word8Tag 4 Float)
| Long (Word8Tag 5 Int64)
| Double (Word8Tag 6 Double)
@ -31,3 +41,17 @@ data Entry
data MethodHandleInfo = MethodHandleInfo MethodHandleReferenceKind OpaqueReference
deriving stock (Show, Generic)
deriving Extractable via Generically MethodHandleInfo
newtype SizedText sizeType = SizedText Text
deriving stock (Show)
instance (Integral sizeType, Extractable sizeType) => Extractable (SizedText sizeType) where
extract :: Extract (SizedText sizeType)
extract = do
size <- extract @sizeType
slice <- StrictByteString.concat . ByteString.toChunks <$> bytes (fromIntegral size) -- TODO: Replace java zeroes with normal zeroes, normalize 2x3 bytes into 4 byte utf8
case Text.decodeUtf8' slice of
Left err -> fail $ show err
Right t -> pure $ SizedText t

View file

@ -1,24 +1,45 @@
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE InstanceSigs #-}
module Language.Java.Classfile.Extract (Extract(), bytes, runExtract, expectRaw, expectEqual) where
module Language.Java.Classfile.Extract (Extract(), bytes, runExtract, expectRaw, expectEqual, traceType, traceConstructor, traceField, traceIndex) where
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as ByteString
import Control.Applicative (Alternative (empty, (<|>)))
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Typeable (Typeable, typeOf)
data Extract a = Extract (Continuation a)
newtype Extract a = Extract (Continuation a)
deriving (Functor)
type Continuation a = ByteString -> Reply a
newtype Expected = Expected Text
deriving Show
newtype Actual = Actual Text
deriving Show
newtype TypeName = TypeName Text
deriving Show
data Reason
= EndOfInput
| UnexpectedValue Expected Actual TypeName
| Custom Text
| Unknown
deriving Show
data Trace
= InType Text
| InConstructor Text
| InField Text
| AtIndex Word
deriving Show
data Reply a
= Done ByteString a -- rest, result
| Fail
| Fail [Trace] Reason
deriving (Functor)
instance Functor Extract where
fmap :: (a -> b) -> Extract a -> Extract b
fmap f (Extract cont) = Extract $ \ input -> f <$> cont input
instance Applicative Extract where
pure :: a -> Extract a
pure x = Extract $ \ rest -> Done rest x
@ -26,8 +47,8 @@ instance Applicative Extract where
(<*>) (Extract computeF) (Extract computeX) = Extract $ \ input -> case computeF input of
Done rest f -> case computeX rest of
Done rest' x -> Done rest' (f x)
Fail -> Fail
Fail -> Fail
Fail ts r -> Fail ts r
Fail ts r -> Fail ts r
instance Monad Extract where
(>>=) :: Extract a -> (a -> Extract b) -> Extract b
@ -35,16 +56,20 @@ instance Monad Extract where
Done rest a -> let
(Extract computeB) = f a
in computeB rest
Fail -> Fail
Fail ts r -> Fail ts r
instance Alternative Extract where
empty :: Extract a
empty = Extract $ const Fail
empty = Extract $ const (Fail [] Unknown)
(<|>) :: Extract a -> Extract a -> Extract a
(<|>) (Extract left) (Extract right) = Extract $ \ input -> case left input of
Fail -> right input
Fail _ _ -> right input
t -> t
instance MonadFail Extract where
fail :: String -> Extract a
fail = Extract . const . Fail [] . Custom . Text.pack
-- | Get a specified count of bytes. Fail if there are not enough bytes available.
bytes :: Word -> Extract ByteString
@ -52,22 +77,36 @@ bytes count = Extract $ \ input -> let
count' = fromIntegral count
(bs, rest) = ByteString.splitAt count' input
in if ByteString.length bs /= count'
then Fail
then Fail [] EndOfInput
else Done rest bs
-- | Fail if the specified bytestring is not immediately parseable
expectRaw :: ByteString -> Extract ByteString
expectRaw e = Extract $ \ input -> let
(actual, rest) = ByteString.splitAt (ByteString.length e) input
in case actual == e of
True -> Done rest actual
False -> Fail
False -> Fail [] (mismatchReason e actual)
expectEqual :: Eq a => Extract a -> a -> Extract a
-- | Fail if the Extractor doesn't yield the specified value
expectEqual :: (Eq a, Show a, Typeable a) => Extract a -> a -> Extract a
expectEqual (Extract computeActual) expected = Extract $ \ input -> case computeActual input of
Done rest actual -> if actual == expected
then Done rest actual
else Fail
Fail -> Fail
else Fail [] (mismatchReason expected actual)
failed -> failed
mismatchReason :: (Typeable a, Show a) => a -> a -> Reason
mismatchReason expected actual = UnexpectedValue (Expected expectedText) (Actual actualText) (TypeName typeName)
where
actualText = textShow actual
expectedText = textShow expected
typeName = textShow $ typeOf expected
textShow :: Show a => a -> Text
textShow = Text.pack . show
{- It seems I cannot define a lawful monad instance
instance Monad Extract where
@ -77,8 +116,32 @@ instance Monad Extract where
Fail -> Fail
-}
runExtract :: ByteString -> Extract b -> Maybe (ByteString, b)
runExtract string (Extract computation) = case computation string of
Fail -> Nothing
Done rest x -> Just (rest, x)
-- | Apply the extractor to the bytestring, returns the result and the rest on success, otherwise a best-effort traceback.
runExtract :: ByteString -> Extract b -> Either (Reason, [Trace]) (ByteString, b)
runExtract string (Extract computation) = case computation string of
Fail trace reason -> Left (reason, trace)
Done rest x -> Right (rest, x)
-- | Trace the contained extractor, the backtrace will supply a hint with the given typeName
traceType :: Text -> Extract a -> Extract a
traceType typeName (Extract computeA) = Extract $ \ input -> case computeA input of
Fail ts r -> Fail (InType typeName:ts) r
t -> t
traceConstructor :: Text -> Extract a -> Extract a
traceConstructor conName (Extract computeA) = Extract $ \ input -> case computeA input of
Fail ts r -> Fail (InConstructor conName:ts) r
t -> t
traceField :: Text -> Extract a -> Extract a
traceField fieldName (Extract computeA) = Extract $ \ input -> case computeA input of
Fail ts r -> Fail (InField fieldName:ts) r
t -> t
traceIndex :: Word -> Extract a -> Extract a
traceIndex index (Extract computeA) = Extract $ \ input -> case computeA input of
Fail ts r -> Fail (AtIndex index:ts) r
t -> t

View file

@ -2,23 +2,29 @@
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DataKinds #-}
module Language.Java.Classfile.Extractable (Extractable(extract), expectConstant) where
import Language.Java.Classfile.Extract (Extract, bytes, expectEqual)
import Language.Java.Classfile.Extract (Extract, bytes, expectEqual, traceType, traceConstructor, traceField, traceIndex)
import Data.Word (Word8, Word16, Word32)
import qualified Data.ByteString.Lazy as ByteString
import Data.ByteString.Lazy (ByteString)
import Data.Bits (Bits(shiftL, (.|.)))
import GHC.Generics (U1 (U1), (:*:) ((:*:)), (:+:) (L1, R1), M1 (M1), K1 (K1), Generic (Rep, to), Generically (Generically))
import GHC.Generics (U1 (U1), (:*:) ((:*:)), (:+:) (L1, R1), M1 (M1), K1 (K1), Generic (Rep, to), Generically (Generically), Meta(..))
import Control.Applicative ((<|>))
import Data.Array.IArray (Array, listArray, Ix)
import Control.Monad (replicateM)
import Control.Monad (forM)
import Data.Int (Int32, Int64)
import Language.Java.Classfile.FromBigEndian (FromBigEndian(fromBigEndian))
import GHC.Float (castWord32ToFloat, castWord64ToDouble)
import qualified Data.Text as Text
import GHC.TypeLits (symbolVal, KnownSymbol)
import Data.Proxy (Proxy(Proxy))
import Data.Typeable (Typeable)
class Extractable a where
extract :: Extract a
@ -29,11 +35,11 @@ instance Extractable Word8 where
instance Extractable Word16 where
extract :: Extract Word16
extract = fromBigEndian . build <$> bytes 2
extract = build <$> bytes 2
instance Extractable Word32 where
extract :: Extract Word32
extract = fromBigEndian . build <$> bytes 4
extract = build <$> bytes 4
instance Extractable Int32 where
extract :: Extract Int32
@ -59,14 +65,14 @@ instance (Extractable index, Extractable element, Ix index, Integral index) => E
extract :: Extract (Array index element)
extract = do
count <- extract
elements <- replicateM (fromIntegral count) extract
elements <- forM [1..fromIntegral count] $ \ i -> traceIndex i extract
pure $ listArray (1, count) elements
deriving via Generically () instance Extractable ()
deriving via Generically (a, b) instance (Extractable a, Extractable b) => Extractable (a, b)
deriving via Generically (a, b, c) instance (Extractable a, Extractable b, Extractable c) => Extractable (a, b, c)
expectConstant :: (Extractable a, Eq a) => a -> Extract a
expectConstant :: (Extractable a, Eq a, Show a, Typeable a) => a -> Extract a
expectConstant = expectEqual extract
build :: (Bits a, Num a) => ByteString -> a
@ -95,8 +101,24 @@ instance (GenericExtractable l, GenericExtractable r) => GenericExtractable (l :
genericExtract = liftA2 (:*:) genericExtract genericExtract
-- meta information constructors
instance GenericExtractable a => GenericExtractable (M1 i c a) where
genericExtract :: Extract (M1 i c a i2)
instance (GenericExtractable a, KnownSymbol name) => GenericExtractable (M1 tag (MetaData name module_ package isNewtype) a) where
genericExtract :: Extract (M1 tag (MetaData name module_ package isNewtype) a i)
genericExtract = M1 <$> traceType typeName genericExtract
where
typeName = Text.pack $ symbolVal (Proxy @name)
instance (GenericExtractable a, KnownSymbol name) => GenericExtractable (M1 tag (MetaCons name fixity isRecord) a) where
genericExtract :: Extract (M1 tag (MetaCons name fixity isRecord) a i)
genericExtract = M1 <$> traceConstructor conName genericExtract
where
conName = Text.pack $ symbolVal (Proxy @name)
instance (GenericExtractable a, KnownSymbol name) => GenericExtractable (M1 tag (MetaSel (Just name) isUnpacked isStrictSource isStrictCompiler) a) where
genericExtract = M1 <$> traceField fieldName genericExtract
where
fieldName = Text.pack $ symbolVal (Proxy @name)
instance (GenericExtractable a) => GenericExtractable (M1 tag (MetaSel Nothing isUnpacked isStrictSource isStrictCompiler) a) where
genericExtract = M1 <$> genericExtract
-- fields !! yay

View file

@ -12,6 +12,7 @@ import GHC.TypeLits (Natural, natVal, KnownNat)
import Data.Kind (Type)
import Control.Monad (void)
import Data.Proxy (Proxy(Proxy))
import Data.Typeable ( Typeable )
import Data.Word (Word8)
type Word8Tag value a = WithNumericTag value Word8 a
@ -20,7 +21,7 @@ type WithNumericTag :: Natural -> Type -> Type -> Type
newtype WithNumericTag value tagType a = Tagged a
deriving newtype Show
instance (KnownNat value, Extractable a, Extractable tagType, Eq tagType, Num tagType) => Extractable (WithNumericTag value tagType a) where
instance (KnownNat value, Extractable a, Extractable tagType, Eq tagType, Num tagType, Show tagType, Typeable tagType) => Extractable (WithNumericTag value tagType a) where
extract :: Extract (WithNumericTag value tagType a)
extract = do
void $ expectConstant @tagType (fromIntegral . natVal $ Proxy @value)

View file

@ -5,10 +5,15 @@ import Language.Java.Classfile.Extractable (Extractable, extract, expectConstant
import Language.Java.Classfile.Extract (Extract)
import Data.Hex (Hex (Hex))
data Magic = Magic (Hex Word32)
newtype Magic = Magic (Hex Word32)
deriving Show
instance Extractable Magic where
extract :: Extract Magic
extract = Magic . Hex <$> expectConstant 0xCAFEBABE
-- >>> :set -XOverloadedLists
-- >>> import Language.Java.Classfile.Extract (runExtract)
-- >>> runExtract [0xCA, 0xFE, 0xBA, 0xBE] (Hex <$> (extract :: Extract (Word32)))
-- Just ("",Hex 0xbebafeca)