feat: constant-pool parsing, backtraces

This commit is contained in:
vegowotenks 2025-07-11 23:06:56 +02:00
parent 5f3e7b761e
commit 7da2a30cbb
7 changed files with 154 additions and 35 deletions

View file

@ -47,6 +47,7 @@ library
array array
, base >=4.7 && <5 , base >=4.7 && <5
, bytestring , bytestring
, text
default-language: Haskell2010 default-language: Haskell2010
executable java-classfile-exe executable java-classfile-exe
@ -63,6 +64,7 @@ executable java-classfile-exe
, base >=4.7 && <5 , base >=4.7 && <5
, bytestring , bytestring
, java-classfile , java-classfile
, text
default-language: Haskell2010 default-language: Haskell2010
test-suite java-classfile-test test-suite java-classfile-test
@ -80,4 +82,5 @@ test-suite java-classfile-test
, base >=4.7 && <5 , base >=4.7 && <5
, bytestring , bytestring
, java-classfile , java-classfile
, text
default-language: Haskell2010 default-language: Haskell2010

View file

@ -23,6 +23,7 @@ dependencies:
- array - array
- base >= 4.7 && < 5 - base >= 4.7 && < 5
- bytestring - bytestring
- text
ghc-options: ghc-options:
- -Wall - -Wall

View file

@ -1,15 +1,25 @@
{-# LANGUAGE DerivingVia #-} {-# LANGUAGE DerivingVia #-}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DataKinds #-} {-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE InstanceSigs #-}
module Language.Java.Classfile.ConstantPool.Entry (Entry(..)) where module Language.Java.Classfile.ConstantPool.Entry (Entry(..)) where
import GHC.Generics (Generic, Generically(..)) import GHC.Generics (Generic, Generically(..))
import Language.Java.Classfile.Extractable (Extractable) import Language.Java.Classfile.Extractable (Extractable (extract))
import Language.Java.Classfile.Extractable.WithTag (Word8Tag) import Language.Java.Classfile.Extractable.WithTag (Word8Tag)
import Language.Java.Classfile.ConstantPool.References (Utf8Reference, ClassReference, NameAndTypeReference, MethodHandleReferenceKind, OpaqueReference, BootstrapMethodIndex) import Language.Java.Classfile.ConstantPool.References (Utf8Reference, ClassReference, NameAndTypeReference, MethodHandleReferenceKind, OpaqueReference, BootstrapMethodIndex)
import Data.Int (Int32, Int64) import Data.Int (Int32, Int64)
import Data.Word (Word16)
import Data.Text (Text)
import Language.Java.Classfile.Extract (Extract, bytes)
import qualified Data.Text.Encoding as Text
import qualified Data.ByteString.Lazy as ByteString
import qualified Data.ByteString as StrictByteString
data Entry data Entry
= Integer (Word8Tag 3 Int32) = Utf8 (Word8Tag 1 (SizedText Word16))
| Integer (Word8Tag 3 Int32)
| Float (Word8Tag 4 Float) | Float (Word8Tag 4 Float)
| Long (Word8Tag 5 Int64) | Long (Word8Tag 5 Int64)
| Double (Word8Tag 6 Double) | Double (Word8Tag 6 Double)
@ -31,3 +41,17 @@ data Entry
data MethodHandleInfo = MethodHandleInfo MethodHandleReferenceKind OpaqueReference data MethodHandleInfo = MethodHandleInfo MethodHandleReferenceKind OpaqueReference
deriving stock (Show, Generic) deriving stock (Show, Generic)
deriving Extractable via Generically MethodHandleInfo deriving Extractable via Generically MethodHandleInfo
newtype SizedText sizeType = SizedText Text
deriving stock (Show)
instance (Integral sizeType, Extractable sizeType) => Extractable (SizedText sizeType) where
extract :: Extract (SizedText sizeType)
extract = do
size <- extract @sizeType
slice <- StrictByteString.concat . ByteString.toChunks <$> bytes (fromIntegral size) -- TODO: Replace java zeroes with normal zeroes, normalize 2x3 bytes into 4 byte utf8
case Text.decodeUtf8' slice of
Left err -> fail $ show err
Right t -> pure $ SizedText t

View file

@ -1,24 +1,45 @@
{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE InstanceSigs #-} {-# LANGUAGE InstanceSigs #-}
module Language.Java.Classfile.Extract (Extract(), bytes, runExtract, expectRaw, expectEqual) where module Language.Java.Classfile.Extract (Extract(), bytes, runExtract, expectRaw, expectEqual, traceType, traceConstructor, traceField, traceIndex) where
import Data.ByteString.Lazy (ByteString) import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as ByteString import qualified Data.ByteString.Lazy as ByteString
import Control.Applicative (Alternative (empty, (<|>))) import Control.Applicative (Alternative (empty, (<|>)))
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Typeable (Typeable, typeOf)
data Extract a = Extract (Continuation a) newtype Extract a = Extract (Continuation a)
deriving (Functor)
type Continuation a = ByteString -> Reply a type Continuation a = ByteString -> Reply a
newtype Expected = Expected Text
deriving Show
newtype Actual = Actual Text
deriving Show
newtype TypeName = TypeName Text
deriving Show
data Reason
= EndOfInput
| UnexpectedValue Expected Actual TypeName
| Custom Text
| Unknown
deriving Show
data Trace
= InType Text
| InConstructor Text
| InField Text
| AtIndex Word
deriving Show
data Reply a data Reply a
= Done ByteString a -- rest, result = Done ByteString a -- rest, result
| Fail | Fail [Trace] Reason
deriving (Functor) deriving (Functor)
instance Functor Extract where
fmap :: (a -> b) -> Extract a -> Extract b
fmap f (Extract cont) = Extract $ \ input -> f <$> cont input
instance Applicative Extract where instance Applicative Extract where
pure :: a -> Extract a pure :: a -> Extract a
pure x = Extract $ \ rest -> Done rest x pure x = Extract $ \ rest -> Done rest x
@ -26,8 +47,8 @@ instance Applicative Extract where
(<*>) (Extract computeF) (Extract computeX) = Extract $ \ input -> case computeF input of (<*>) (Extract computeF) (Extract computeX) = Extract $ \ input -> case computeF input of
Done rest f -> case computeX rest of Done rest f -> case computeX rest of
Done rest' x -> Done rest' (f x) Done rest' x -> Done rest' (f x)
Fail -> Fail Fail ts r -> Fail ts r
Fail -> Fail Fail ts r -> Fail ts r
instance Monad Extract where instance Monad Extract where
(>>=) :: Extract a -> (a -> Extract b) -> Extract b (>>=) :: Extract a -> (a -> Extract b) -> Extract b
@ -35,16 +56,20 @@ instance Monad Extract where
Done rest a -> let Done rest a -> let
(Extract computeB) = f a (Extract computeB) = f a
in computeB rest in computeB rest
Fail -> Fail Fail ts r -> Fail ts r
instance Alternative Extract where instance Alternative Extract where
empty :: Extract a empty :: Extract a
empty = Extract $ const Fail empty = Extract $ const (Fail [] Unknown)
(<|>) :: Extract a -> Extract a -> Extract a (<|>) :: Extract a -> Extract a -> Extract a
(<|>) (Extract left) (Extract right) = Extract $ \ input -> case left input of (<|>) (Extract left) (Extract right) = Extract $ \ input -> case left input of
Fail -> right input Fail _ _ -> right input
t -> t t -> t
instance MonadFail Extract where
fail :: String -> Extract a
fail = Extract . const . Fail [] . Custom . Text.pack
-- | Get a specified count of bytes. Fail if there are not enough bytes available. -- | Get a specified count of bytes. Fail if there are not enough bytes available.
bytes :: Word -> Extract ByteString bytes :: Word -> Extract ByteString
@ -52,22 +77,36 @@ bytes count = Extract $ \ input -> let
count' = fromIntegral count count' = fromIntegral count
(bs, rest) = ByteString.splitAt count' input (bs, rest) = ByteString.splitAt count' input
in if ByteString.length bs /= count' in if ByteString.length bs /= count'
then Fail then Fail [] EndOfInput
else Done rest bs else Done rest bs
-- | Fail if the specified bytestring is not immediately parseable
expectRaw :: ByteString -> Extract ByteString expectRaw :: ByteString -> Extract ByteString
expectRaw e = Extract $ \ input -> let expectRaw e = Extract $ \ input -> let
(actual, rest) = ByteString.splitAt (ByteString.length e) input (actual, rest) = ByteString.splitAt (ByteString.length e) input
in case actual == e of in case actual == e of
True -> Done rest actual True -> Done rest actual
False -> Fail False -> Fail [] (mismatchReason e actual)
expectEqual :: Eq a => Extract a -> a -> Extract a -- | Fail if the Extractor doesn't yield the specified value
expectEqual :: (Eq a, Show a, Typeable a) => Extract a -> a -> Extract a
expectEqual (Extract computeActual) expected = Extract $ \ input -> case computeActual input of expectEqual (Extract computeActual) expected = Extract $ \ input -> case computeActual input of
Done rest actual -> if actual == expected Done rest actual -> if actual == expected
then Done rest actual then Done rest actual
else Fail else Fail [] (mismatchReason expected actual)
Fail -> Fail failed -> failed
mismatchReason :: (Typeable a, Show a) => a -> a -> Reason
mismatchReason expected actual = UnexpectedValue (Expected expectedText) (Actual actualText) (TypeName typeName)
where
actualText = textShow actual
expectedText = textShow expected
typeName = textShow $ typeOf expected
textShow :: Show a => a -> Text
textShow = Text.pack . show
{- It seems I cannot define a lawful monad instance {- It seems I cannot define a lawful monad instance
instance Monad Extract where instance Monad Extract where
@ -77,8 +116,32 @@ instance Monad Extract where
Fail -> Fail Fail -> Fail
-} -}
runExtract :: ByteString -> Extract b -> Maybe (ByteString, b) -- | Apply the extractor to the bytestring, returns the result and the rest on success, otherwise a best-effort traceback.
runExtract string (Extract computation) = case computation string of
Fail -> Nothing runExtract :: ByteString -> Extract b -> Either (Reason, [Trace]) (ByteString, b)
Done rest x -> Just (rest, x) runExtract string (Extract computation) = case computation string of
Fail trace reason -> Left (reason, trace)
Done rest x -> Right (rest, x)
-- | Trace the contained extractor, the backtrace will supply a hint with the given typeName
traceType :: Text -> Extract a -> Extract a
traceType typeName (Extract computeA) = Extract $ \ input -> case computeA input of
Fail ts r -> Fail (InType typeName:ts) r
t -> t
traceConstructor :: Text -> Extract a -> Extract a
traceConstructor conName (Extract computeA) = Extract $ \ input -> case computeA input of
Fail ts r -> Fail (InConstructor conName:ts) r
t -> t
traceField :: Text -> Extract a -> Extract a
traceField fieldName (Extract computeA) = Extract $ \ input -> case computeA input of
Fail ts r -> Fail (InField fieldName:ts) r
t -> t
traceIndex :: Word -> Extract a -> Extract a
traceIndex index (Extract computeA) = Extract $ \ input -> case computeA input of
Fail ts r -> Fail (AtIndex index:ts) r
t -> t

View file

@ -2,23 +2,29 @@
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeApplications #-}
{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE DerivingVia #-} {-# LANGUAGE DerivingVia #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DataKinds #-}
module Language.Java.Classfile.Extractable (Extractable(extract), expectConstant) where module Language.Java.Classfile.Extractable (Extractable(extract), expectConstant) where
import Language.Java.Classfile.Extract (Extract, bytes, expectEqual) import Language.Java.Classfile.Extract (Extract, bytes, expectEqual, traceType, traceConstructor, traceField, traceIndex)
import Data.Word (Word8, Word16, Word32) import Data.Word (Word8, Word16, Word32)
import qualified Data.ByteString.Lazy as ByteString import qualified Data.ByteString.Lazy as ByteString
import Data.ByteString.Lazy (ByteString) import Data.ByteString.Lazy (ByteString)
import Data.Bits (Bits(shiftL, (.|.))) import Data.Bits (Bits(shiftL, (.|.)))
import GHC.Generics (U1 (U1), (:*:) ((:*:)), (:+:) (L1, R1), M1 (M1), K1 (K1), Generic (Rep, to), Generically (Generically)) import GHC.Generics (U1 (U1), (:*:) ((:*:)), (:+:) (L1, R1), M1 (M1), K1 (K1), Generic (Rep, to), Generically (Generically), Meta(..))
import Control.Applicative ((<|>)) import Control.Applicative ((<|>))
import Data.Array.IArray (Array, listArray, Ix) import Data.Array.IArray (Array, listArray, Ix)
import Control.Monad (replicateM) import Control.Monad (forM)
import Data.Int (Int32, Int64) import Data.Int (Int32, Int64)
import Language.Java.Classfile.FromBigEndian (FromBigEndian(fromBigEndian))
import GHC.Float (castWord32ToFloat, castWord64ToDouble) import GHC.Float (castWord32ToFloat, castWord64ToDouble)
import qualified Data.Text as Text
import GHC.TypeLits (symbolVal, KnownSymbol)
import Data.Proxy (Proxy(Proxy))
import Data.Typeable (Typeable)
class Extractable a where class Extractable a where
extract :: Extract a extract :: Extract a
@ -29,11 +35,11 @@ instance Extractable Word8 where
instance Extractable Word16 where instance Extractable Word16 where
extract :: Extract Word16 extract :: Extract Word16
extract = fromBigEndian . build <$> bytes 2 extract = build <$> bytes 2
instance Extractable Word32 where instance Extractable Word32 where
extract :: Extract Word32 extract :: Extract Word32
extract = fromBigEndian . build <$> bytes 4 extract = build <$> bytes 4
instance Extractable Int32 where instance Extractable Int32 where
extract :: Extract Int32 extract :: Extract Int32
@ -59,14 +65,14 @@ instance (Extractable index, Extractable element, Ix index, Integral index) => E
extract :: Extract (Array index element) extract :: Extract (Array index element)
extract = do extract = do
count <- extract count <- extract
elements <- replicateM (fromIntegral count) extract elements <- forM [1..fromIntegral count] $ \ i -> traceIndex i extract
pure $ listArray (1, count) elements pure $ listArray (1, count) elements
deriving via Generically () instance Extractable () deriving via Generically () instance Extractable ()
deriving via Generically (a, b) instance (Extractable a, Extractable b) => Extractable (a, b) deriving via Generically (a, b) instance (Extractable a, Extractable b) => Extractable (a, b)
deriving via Generically (a, b, c) instance (Extractable a, Extractable b, Extractable c) => Extractable (a, b, c) deriving via Generically (a, b, c) instance (Extractable a, Extractable b, Extractable c) => Extractable (a, b, c)
expectConstant :: (Extractable a, Eq a) => a -> Extract a expectConstant :: (Extractable a, Eq a, Show a, Typeable a) => a -> Extract a
expectConstant = expectEqual extract expectConstant = expectEqual extract
build :: (Bits a, Num a) => ByteString -> a build :: (Bits a, Num a) => ByteString -> a
@ -95,8 +101,24 @@ instance (GenericExtractable l, GenericExtractable r) => GenericExtractable (l :
genericExtract = liftA2 (:*:) genericExtract genericExtract genericExtract = liftA2 (:*:) genericExtract genericExtract
-- meta information constructors -- meta information constructors
instance GenericExtractable a => GenericExtractable (M1 i c a) where instance (GenericExtractable a, KnownSymbol name) => GenericExtractable (M1 tag (MetaData name module_ package isNewtype) a) where
genericExtract :: Extract (M1 i c a i2) genericExtract :: Extract (M1 tag (MetaData name module_ package isNewtype) a i)
genericExtract = M1 <$> traceType typeName genericExtract
where
typeName = Text.pack $ symbolVal (Proxy @name)
instance (GenericExtractable a, KnownSymbol name) => GenericExtractable (M1 tag (MetaCons name fixity isRecord) a) where
genericExtract :: Extract (M1 tag (MetaCons name fixity isRecord) a i)
genericExtract = M1 <$> traceConstructor conName genericExtract
where
conName = Text.pack $ symbolVal (Proxy @name)
instance (GenericExtractable a, KnownSymbol name) => GenericExtractable (M1 tag (MetaSel (Just name) isUnpacked isStrictSource isStrictCompiler) a) where
genericExtract = M1 <$> traceField fieldName genericExtract
where
fieldName = Text.pack $ symbolVal (Proxy @name)
instance (GenericExtractable a) => GenericExtractable (M1 tag (MetaSel Nothing isUnpacked isStrictSource isStrictCompiler) a) where
genericExtract = M1 <$> genericExtract genericExtract = M1 <$> genericExtract
-- fields !! yay -- fields !! yay

View file

@ -12,6 +12,7 @@ import GHC.TypeLits (Natural, natVal, KnownNat)
import Data.Kind (Type) import Data.Kind (Type)
import Control.Monad (void) import Control.Monad (void)
import Data.Proxy (Proxy(Proxy)) import Data.Proxy (Proxy(Proxy))
import Data.Typeable ( Typeable )
import Data.Word (Word8) import Data.Word (Word8)
type Word8Tag value a = WithNumericTag value Word8 a type Word8Tag value a = WithNumericTag value Word8 a
@ -20,7 +21,7 @@ type WithNumericTag :: Natural -> Type -> Type -> Type
newtype WithNumericTag value tagType a = Tagged a newtype WithNumericTag value tagType a = Tagged a
deriving newtype Show deriving newtype Show
instance (KnownNat value, Extractable a, Extractable tagType, Eq tagType, Num tagType) => Extractable (WithNumericTag value tagType a) where instance (KnownNat value, Extractable a, Extractable tagType, Eq tagType, Num tagType, Show tagType, Typeable tagType) => Extractable (WithNumericTag value tagType a) where
extract :: Extract (WithNumericTag value tagType a) extract :: Extract (WithNumericTag value tagType a)
extract = do extract = do
void $ expectConstant @tagType (fromIntegral . natVal $ Proxy @value) void $ expectConstant @tagType (fromIntegral . natVal $ Proxy @value)

View file

@ -5,10 +5,15 @@ import Language.Java.Classfile.Extractable (Extractable, extract, expectConstant
import Language.Java.Classfile.Extract (Extract) import Language.Java.Classfile.Extract (Extract)
import Data.Hex (Hex (Hex)) import Data.Hex (Hex (Hex))
data Magic = Magic (Hex Word32) newtype Magic = Magic (Hex Word32)
deriving Show deriving Show
instance Extractable Magic where instance Extractable Magic where
extract :: Extract Magic extract :: Extract Magic
extract = Magic . Hex <$> expectConstant 0xCAFEBABE extract = Magic . Hex <$> expectConstant 0xCAFEBABE
-- >>> :set -XOverloadedLists
-- >>> import Language.Java.Classfile.Extract (runExtract)
-- >>> runExtract [0xCA, 0xFE, 0xBA, 0xBE] (Hex <$> (extract :: Extract (Word32)))
-- Just ("",Hex 0xbebafeca)