diff --git a/javahc.cabal b/javahc.cabal index 724094a..9966236 100644 --- a/javahc.cabal +++ b/javahc.cabal @@ -27,8 +27,10 @@ library JavaClassModifier JavaExpression JavaFile + JavaLanguage JavaVariableInitializer Lib + LiteralExpression PrimitiveTypes Syntax other-modules: diff --git a/src/Annotation.hs b/src/Annotation.hs new file mode 100644 index 0000000..9156c39 --- /dev/null +++ b/src/Annotation.hs @@ -0,0 +1,8 @@ +module Annotation (Annotation(..) ) +where + +import JavaExpression (JavaExpression) + +import Data.Map (Map) + +data Annotation = Normal String (Map String JavaExpression) diff --git a/src/ClassLiteral.hs b/src/ClassLiteral.hs new file mode 100644 index 0000000..998bc46 --- /dev/null +++ b/src/ClassLiteral.hs @@ -0,0 +1,31 @@ +module ClassLiteral (ClassLiteral(..), parseClassLiteral ) where + +import qualified Data.List as List +import Numeric.Natural (Natural) + +import PrimitiveTypes (PrimitiveType, primitiveType) +import JavaLanguage (m_reserved, m_lexeme, m_identifier) + +import Text.Parsec (choice, string', many, sepBy, char, manyTill, lookAhead) +import Text.Parsec.String (Parser) + +data ClassLiteral = Void + | Primitive PrimitiveType Natural -- type, array dimension + | Type String Natural -- type, array dimension + deriving Show + +parseClassLiteral :: Parser ClassLiteral +parseClassLiteral = choice + [ m_reserved "void" *> return Void + , primitiveType >>= parseArrayDims . Primitive + , typeName >>= parseArrayDims . Type + ] <* string' ".class" + +typeName :: Parser String +typeName = do + start <- m_identifier + paths <- manyTill (char '.' *> m_identifier) (choice [ lookAhead $ string' ".class" , lookAhead $ string' "[]" ]) + return . List.intercalate "." $ (start:paths) + +parseArrayDims :: (Natural -> ClassLiteral) -> Parser ClassLiteral +parseArrayDims f = m_lexeme $ many (string' "[]") >>= return . f . fromInteger . toInteger . List.length diff --git a/src/JavaClassField.hs b/src/JavaClassField.hs new file mode 100644 index 0000000..37cc5d9 --- /dev/null +++ b/src/JavaClassField.hs @@ -0,0 +1,28 @@ +module JavaClassField (module JavaClassField) where + +import JavaClassFieldVisibility (JavaClassFieldVisibility) +import PrimitiveTypes (PrimitiveType) +import ClassTypeParameter (ClassType, ArrayType) +import JavaVariableInitializer (JavaVariableInitializer) + +import Data.Set (Set) + +data JavaClassField = JavaClassField { name :: String + , visiblity :: JavaClassFieldVisibility + , modifiers :: Set JavaClassFieldModifier + , type_ :: JavaClassFieldType + , initializer :: Maybe JavaVariableInitializer + } + deriving Show + +data JavaClassFieldModifier = Static + | Final + | Transient + | Volatile + deriving (Show, Ord, Eq) + +data JavaClassFieldType = PrimitiveFieldType PrimitiveType + | ClassFieldType ClassType + | VariableFieldType String + | ArrayFieldType ArrayType + deriving Show diff --git a/src/JavaClassFieldVisibility.hs b/src/JavaClassFieldVisibility.hs new file mode 100644 index 0000000..f22162a --- /dev/null +++ b/src/JavaClassFieldVisibility.hs @@ -0,0 +1,4 @@ +module JavaClassFieldVisibility (JavaClassFieldVisibility(..) ) where + +data JavaClassFieldVisibility = Public | Protected | Private | Package + deriving Show diff --git a/src/JavaLanguage.hs b/src/JavaLanguage.hs new file mode 100644 index 0000000..7dad1b2 --- /dev/null +++ b/src/JavaLanguage.hs @@ -0,0 +1,65 @@ +module JavaLanguage +( javaLanguageDef +, m_paren +, m_identifier +, m_reserved +, m_lexeme +, m_symbol +, m_commaSep1 +, m_commaSep +, m_angles +, m_braces +, m_reservedOp +) where + +import Text.Parsec +import Text.Parsec.Language +import Text.Parsec.Token + +import Data.Functor.Identity + +javaLanguageDef :: GenLanguageDef String u Identity +javaLanguageDef = emptyDef{ commentStart = "/*" + , commentEnd = "*/" + , commentLine = "//" + , nestedComments = False + , identStart = letter <|> char '_' + , identLetter = alphaNum <|> char '_' + , opStart = oneOf ".+-*/;" + , opLetter = oneOf ".+-*/;" + , reservedOpNames = [";"] + , reservedNames = [ "true" + , "false" + , "package" + , "class" + , "interface" + , "void" + , "boolean" + , "char" + , "short" + , "int" + , "float" + , "long" + , "double" + , "instanceof" + , "public" + , "private" + , "protected" + , "static" + , "final" + , "volatile" + , "transient" + ] + , caseSensitive = True + } + +TokenParser{ parens = m_paren + , identifier = m_identifier + , reserved = m_reserved + , lexeme = m_lexeme + , symbol = m_symbol + , commaSep1 = m_commaSep1 + , commaSep = m_commaSep + , angles = m_angles + , braces = m_braces + , reservedOp = m_reservedOp } = makeTokenParser javaLanguageDef diff --git a/src/JavaVariableInitializer.hs b/src/JavaVariableInitializer.hs new file mode 100644 index 0000000..2e3ef15 --- /dev/null +++ b/src/JavaVariableInitializer.hs @@ -0,0 +1,7 @@ +module JavaVariableInitializer (JavaVariableInitializer(..) ) where + +import JavaExpression (JavaExpression) + +data JavaVariableInitializer = ArrayVariableInitializer [JavaVariableInitializer] + | ExpressionVariableInitializer JavaExpression + deriving Show diff --git a/src/LiteralExpression.hs b/src/LiteralExpression.hs new file mode 100644 index 0000000..8354078 --- /dev/null +++ b/src/LiteralExpression.hs @@ -0,0 +1,140 @@ +module LiteralExpression (LiteralExpression(..)) +where + +import Data.Ratio ((%)) +import Data.Int (Int16) +import qualified Data.Char as Char + +import JavaLanguage (m_lexeme, m_reserved) + +import Text.Parsec (many1, digit, oneOf, option, char, choice, many, try, satisfy, manyTill, string', endOfLine, space) +import Text.Parsec.String (Parser) + +data LiteralExpression = NullLiteral + | IntegerLiteral Integer IntegerLiteralType + | FloatingLiteral Rational DecimalLiteralType + | BooleanLiteral Bool + | CharacterLiteral Char + | StringLiteral String + | TextLiteral String + deriving Show + +data IntegerLiteralType = IntLiteral | LongLiteral + deriving Show +data DecimalLiteralType = DoubleLiteral | FloatLiteral + deriving Show + +signedInteger :: Parser Integer +signedInteger = do + signFactor <- option 1 (char '-' *> return (-1)) + digits <- many1 digit + return (signFactor * read digits) + +parseNullLiteral :: Parser LiteralExpression +parseNullLiteral = m_reserved "null" *> return NullLiteral + +-- TODO: Hex, Oct, Bin +parseDecimalIntegerLiteral :: Parser LiteralExpression +parseDecimalIntegerLiteral = m_lexeme $ do + negative <- option False (char '-' *> return True) + digitString <- many1 digit + literalType <- option IntLiteral (oneOf "Ll" *> return LongLiteral) + let positiveNumber = read digitString :: Integer + let number = (if (negative) then negate else id) positiveNumber + return (IntegerLiteral number literalType) + +-- TODO: Hex +parseDecimalFloatingLiteral :: Parser LiteralExpression +parseDecimalFloatingLiteral = do + prefix <- option "0" $ many1 digit + _ <- option '.' (char '.') + suffix <- many digit + exponent <- option 0 (char 'e' *> signedInteger) + literalType <- option FloatLiteral $ choice [ + oneOf "fF" *> return FloatLiteral + , oneOf "dD" *> return DoubleLiteral + ] + let suffixLength = length suffix + let prefixPower = 10 ^ suffixLength :: Integer + let iprefix = read prefix :: Integer + let isuffix = if (suffixLength == 0) then 0 else read suffix :: Integer + let exponentPower = (if exponent > 0 then 10 ^ exponent else 1) % (if exponent < 0 then 10^(-exponent) else 1) + let rational = ((iprefix * prefixPower + isuffix) % prefixPower) * exponentPower + return (FloatingLiteral rational literalType) + + +parseBooleanLiteral :: Parser LiteralExpression +parseBooleanLiteral = choice + [ m_reserved "true" *> return (BooleanLiteral True) + , m_reserved "false" *> return (BooleanLiteral False) + ] + +parseCharLiteral :: Parser LiteralExpression +parseCharLiteral = (char '\'' *> choice [ + parseEscapedChar + , satisfy (not . flip elem "'\\") + ] <* char '\'') >>= return . CharacterLiteral + +-- TODO: Support double line terminator +parseEscapedChar :: Parser Char +parseEscapedChar = char '\\' *> choice [ + char 'b' *> return '\b' + , char 's' *> return ' ' + , char 't' *> return '\t' + , char 'n' *> return '\n' + , char 'f' *> return '\f' + , char 'r' *> return '\r' + , char '"' *> return '"' + , char '\'' *> return '\'' + , char '\\' *> return '\\' + , parseOctalEscapedChar + ] + +parseOctalEscapedChar :: Parser Char +parseOctalEscapedChar = choice [ + try $ tripleOctalEscapedChar + , try $ doubleOctalEscapedChar + , singleOctalEscapedChar + ] + +singleOctalEscapedChar :: Parser Char +singleOctalEscapedChar = do + char1 <- oneOf "01234567" + let num1 = read [char1] :: Int + return . Char.chr $ num1 + +doubleOctalEscapedChar :: Parser Char +doubleOctalEscapedChar = do + char1 <- oneOf "01234567" + char2 <- oneOf "01234567" + let num1 = read [char1] :: Int + let num2 = read [char2] :: Int + let num = num2 + num1 * 8 + return . Char.chr $ num + +tripleOctalEscapedChar :: Parser Char +tripleOctalEscapedChar = do + char1 <- oneOf "0123" + char2 <- oneOf "01234567" + char3 <- oneOf "01234567" + let num1 = read [char1] :: Int + let num2 = read [char2] :: Int + let num3 = read [char3] :: Int + let num = num3 + num2 * 8 + num1 * 64 + return . Char.chr $ num + +parseStringLiteral :: Parser LiteralExpression +parseStringLiteral = do + _ <- char '"' + content <- manyTill (choice + [ parseEscapedChar + , satisfy (not . flip elem "\r\n\"\\") + ]) (char '"') + return (StringLiteral content) + +parseTextLiteral :: Parser LiteralExpression +parseTextLiteral = do + _ <- string' "\"\"\"" + _ <- space `manyTill` endOfLine + content <- (choice [ parseEscapedChar , satisfy (not . flip elem "\\") ] ) `manyTill` (string' "\"\"\"") + return . TextLiteral $ content diff --git a/src/PrimaryExpression.hs b/src/PrimaryExpression.hs new file mode 100644 index 0000000..454060c --- /dev/null +++ b/src/PrimaryExpression.hs @@ -0,0 +1,26 @@ +module PrimaryExpression (PrimaryExpression(..) ) where + +import qualified Data.List as List + +import ClassLiteral (ClassLiteral) +import JavaLanguage (m_identifier) +import LiteralExpression (LiteralExpression) + +import Text.Parsec (manyTill, char, string') +import Text.Parsec.String (Parser) + +data PrimaryExpression = Literal LiteralExpression + | ClassLiteral ClassLiteral + | This + | TypedThis String + | InstanceCreation + deriving Show + +parseThis :: Parser PrimaryExpression +parseThis = string' "this" *> return This + +parseTypedThis :: Parser PrimaryExpression +parseTypedThis = do + start <- m_identifier + paths <- manyTill (char '.' *> m_identifier) (string' ".this") + return . TypedThis . List.intercalate "." $ (start:paths) diff --git a/src/PrimitiveTypes.hs b/src/PrimitiveTypes.hs index 1b958ec..489074e 100644 --- a/src/PrimitiveTypes.hs +++ b/src/PrimitiveTypes.hs @@ -1,6 +1,25 @@ -module PrimitiveTypes ( - PrimitiveType(..) +module PrimitiveTypes + ( PrimitiveType(..) + , primitiveType ) where +import JavaLanguage (m_reserved) + +import Text.Parsec (choice) +import Text.Parsec.String (Parser) + data PrimitiveType = PrimitiveBoolean | PrimitiveByte | PrimitiveShort | PrimitiveInt | PrimitiveLong | PrimitiveChar | PrimitiveFloat | PrimitiveDouble deriving Show + +primitiveType :: Parser PrimitiveType +primitiveType = choice [ + m_reserved "boolean" *> return PrimitiveTypes.PrimitiveBoolean + , m_reserved "byte" *> return PrimitiveTypes.PrimitiveByte + , m_reserved "char" *> return PrimitiveTypes.PrimitiveChar + , m_reserved "short" *> return PrimitiveTypes.PrimitiveShort + , m_reserved "int" *> return PrimitiveTypes.PrimitiveInt + , m_reserved "float" *> return PrimitiveTypes.PrimitiveFloat + , m_reserved "long" *> return PrimitiveTypes.PrimitiveLong + , m_reserved "double" *> return PrimitiveTypes.PrimitiveDouble + ] + diff --git a/src/Syntax.hs b/src/Syntax.hs index 30782d3..5507c3d 100644 --- a/src/Syntax.hs +++ b/src/Syntax.hs @@ -16,60 +16,14 @@ import qualified ClassTypeParameter as ClassTypeParameter import qualified JavaClassFieldVisibility as JavaClassFieldVisibility import qualified JavaClassField as JavaClassField import PrimitiveTypes +import JavaLanguage -import Data.Functor.Identity import qualified Data.List as List import qualified Data.Maybe as Maybe import qualified Data.Set as Set import Numeric.Natural -javaLanguageDef :: GenLanguageDef String u Identity -javaLanguageDef = emptyDef{ commentStart = "/*" - , commentEnd = "*/" - , commentLine = "//" - , nestedComments = False - , identStart = letter <|> char '_' - , identLetter = alphaNum <|> char '_' - , opStart = oneOf ".+-*/;" - , opLetter = oneOf ".+-*/;" - , reservedOpNames = [";"] - , reservedNames = [ "true" - , "false" - , "package" - , "class" - , "interface" - , "void" - , "boolean" - , "char" - , "short" - , "int" - , "float" - , "long" - , "double" - , "instanceof" - , "public" - , "private" - , "protected" - , "static" - , "final" - , "volatile" - , "transient" - ] - , caseSensitive = True - } - -TokenParser{ parens = m_paren - , identifier = m_identifier - , reserved = m_reserved - , lexeme = m_lexeme - , symbol = m_symbol - , commaSep1 = m_commaSep1 - , commaSep = m_commaSep - , angles = m_angles - , braces = m_braces - , reservedOp = m_reservedOp } = makeTokenParser javaLanguageDef - javaPathParser = m_identifier `sepBy1` (string ".") semicolon = (m_lexeme . char $ ';') "semicolon" @@ -136,17 +90,6 @@ arrayType = do arrayDepth <- m_lexeme $ (many1 $ string "[]") >>= return . List.length return (partialConstructor (fromInteger . toInteger $ arrayDepth)) -primitiveType = choice [ - m_reserved "boolean" *> return PrimitiveTypes.PrimitiveBoolean - , m_reserved "byte" *> return PrimitiveTypes.PrimitiveByte - , m_reserved "char" *> return PrimitiveTypes.PrimitiveChar - , m_reserved "short" *> return PrimitiveTypes.PrimitiveShort - , m_reserved "int" *> return PrimitiveTypes.PrimitiveInt - , m_reserved "float" *> return PrimitiveTypes.PrimitiveFloat - , m_reserved "long" *> return PrimitiveTypes.PrimitiveLong - , m_reserved "double" *> return PrimitiveTypes.PrimitiveDouble - ] - qualifiedClassType = do qualifiedType <- typeIdentifier `sepBy1` (string' ".") >>= return . List.intercalate "." maybeTypeArguments <- optionMaybe $ m_angles (m_commaSep1 typeArgument)