179 lines
7.3 KiB
Haskell
179 lines
7.3 KiB
Haskell
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE NoMonomorphismRestriction #-}
|
|
module Ubc.Parse.Syntax.Language
|
|
( languageDef
|
|
, typeName
|
|
, Ubc.Parse.Syntax.Language.identifier
|
|
, Ubc.Parse.Syntax.Language.reserved
|
|
, Ubc.Parse.Syntax.Language.operator
|
|
, Ubc.Parse.Syntax.Language.reservedOperator
|
|
, Ubc.Parse.Syntax.Language.characterLiteral
|
|
, Ubc.Parse.Syntax.Language.stringLiteral
|
|
, Ubc.Parse.Syntax.Language.natural
|
|
, Ubc.Parse.Syntax.Language.integer
|
|
, Ubc.Parse.Syntax.Language.float
|
|
, Ubc.Parse.Syntax.Language.naturalOrFloat
|
|
, Ubc.Parse.Syntax.Language.decimal
|
|
, Ubc.Parse.Syntax.Language.hexadecimal
|
|
, Ubc.Parse.Syntax.Language.octal
|
|
, Ubc.Parse.Syntax.Language.symbol
|
|
, Ubc.Parse.Syntax.Language.lexeme
|
|
, Ubc.Parse.Syntax.Language.whiteSpace
|
|
, Ubc.Parse.Syntax.Language.parens
|
|
, Ubc.Parse.Syntax.Language.braces
|
|
, Ubc.Parse.Syntax.Language.angles
|
|
, Ubc.Parse.Syntax.Language.brackets
|
|
, Ubc.Parse.Syntax.Language.semicolon
|
|
, Ubc.Parse.Syntax.Language.comma
|
|
, Ubc.Parse.Syntax.Language.colon
|
|
, Ubc.Parse.Syntax.Language.dot
|
|
, Ubc.Parse.Syntax.Language.semicolonSeparated
|
|
, Ubc.Parse.Syntax.Language.semicolonSeparated1
|
|
, Ubc.Parse.Syntax.Language.commaSeparated
|
|
, Ubc.Parse.Syntax.Language.commaSeparated1
|
|
)
|
|
where
|
|
|
|
import Data.Functor ( ($>), (<&>) )
|
|
import qualified Data.Text as Text
|
|
|
|
import Text.Parsec
|
|
( alphaNum, char, letter, oneOf, choice, (<|>), ParsecT )
|
|
import Text.Parsec.Token
|
|
( makeTokenParser,
|
|
GenLanguageDef(..),
|
|
GenTokenParser(TokenParser,
|
|
charLiteral, natural, integer, float,
|
|
naturalOrFloat, decimal, hexadecimal, octal, lexeme,
|
|
whiteSpace, parens, braces, angles, brackets,
|
|
semiSep, semiSep1, commaSep, commaSep1) )
|
|
import qualified Text.Parsec.Token as Token
|
|
|
|
import Ubc.Parse.Syntax (Token)
|
|
|
|
languageDef :: Monad m => GenLanguageDef Token u m
|
|
languageDef = LanguageDef {
|
|
commentStart = "/*"
|
|
, commentEnd = "*/"
|
|
, commentLine = "//"
|
|
, nestedComments = True
|
|
, identStart = letter <|> char '_'
|
|
, identLetter = alphaNum <|> char '_'
|
|
, opStart = oneOf "+-*/%"
|
|
, opLetter = oneOf "+-*/%"
|
|
, reservedNames = [ "struct"
|
|
, "enum"
|
|
, "u32"
|
|
, "i32"
|
|
, "f32"
|
|
, "if"
|
|
, "unless"
|
|
, "then"
|
|
, "else"
|
|
, "while"
|
|
, "until"
|
|
, "type"
|
|
, "import"
|
|
]
|
|
, reservedOpNames = [ "+"
|
|
, "-"
|
|
, "*"
|
|
, "/"
|
|
, "%"
|
|
, "<<"
|
|
, ">>"
|
|
, ">="
|
|
, "<="
|
|
, "<"
|
|
, ">"
|
|
, "=="
|
|
, "!="
|
|
, "&"
|
|
, "^"
|
|
, "|"
|
|
, "&&"
|
|
, "||"
|
|
, "="
|
|
]
|
|
, caseSensitive = True
|
|
}
|
|
|
|
tokenParser :: Monad m => GenTokenParser Token u m
|
|
tokenParser = makeTokenParser languageDef
|
|
|
|
characterLiteral :: Monad m => ParsecT Token u m Char
|
|
natural :: Monad m => ParsecT Token u m Integer
|
|
integer :: Monad m => ParsecT Token u m Integer
|
|
float :: Monad m => ParsecT Token u m Double
|
|
naturalOrFloat :: Monad m => ParsecT Token u m (Either Integer Double)
|
|
decimal :: Monad m => ParsecT Token u m Integer
|
|
hexadecimal :: Monad m => ParsecT Token u m Integer
|
|
octal :: Monad m => ParsecT Token u m Integer
|
|
lexeme :: Monad m => ParsecT Token u m a -> ParsecT Token u m a
|
|
whiteSpace :: Monad m => ParsecT Token u m ()
|
|
parens :: Monad m => ParsecT Token u m a -> ParsecT Token u m a
|
|
braces :: Monad m => ParsecT Token u m a -> ParsecT Token u m a
|
|
angles :: Monad m => ParsecT Token u m a -> ParsecT Token u m a
|
|
brackets :: Monad m => ParsecT Token u m a -> ParsecT Token u m a
|
|
semicolonSeparated :: Monad m => ParsecT Token u m a -> ParsecT Token u m [a]
|
|
semicolonSeparated1 :: Monad m => ParsecT Token u m a -> ParsecT Token u m [a]
|
|
commaSeparated :: Monad m => ParsecT Token u m a -> ParsecT Token u m [a]
|
|
commaSeparated1 :: Monad m => ParsecT Token u m a -> ParsecT Token u m [a]
|
|
TokenParser{
|
|
charLiteral = characterLiteral
|
|
, natural = natural -- decimal, hexadecimal or octal
|
|
, integer = integer -- decimal, hexadecimal or octal
|
|
, float = float
|
|
, naturalOrFloat = naturalOrFloat
|
|
, decimal = decimal
|
|
, hexadecimal = hexadecimal
|
|
, octal = octal
|
|
, lexeme = lexeme
|
|
, whiteSpace = whiteSpace
|
|
, parens = parens
|
|
, braces = braces
|
|
, angles = angles
|
|
, brackets = brackets
|
|
, semiSep = semicolonSeparated
|
|
, semiSep1 = semicolonSeparated1
|
|
, commaSep = commaSeparated
|
|
, commaSep1 = commaSeparated1
|
|
} = tokenParser
|
|
|
|
semicolon :: Monad m => ParsecT Token u m Token
|
|
semicolon = Token.semi tokenParser <&> Text.pack
|
|
|
|
comma :: Monad m => ParsecT Token u m Token
|
|
comma = Token.comma tokenParser <&> Text.pack
|
|
|
|
colon :: Monad m => ParsecT Token u m Token
|
|
colon = Token.colon tokenParser <&> Text.pack
|
|
|
|
dot :: Monad m => ParsecT Token u m Token
|
|
dot = Token.dot tokenParser <&> Text.pack
|
|
|
|
symbol :: Monad m => String -> ParsecT Token u m Token
|
|
symbol = (<&> Text.pack) . Token.symbol tokenParser
|
|
|
|
stringLiteral :: Monad m => ParsecT Token u m Token
|
|
stringLiteral = Token.stringLiteral tokenParser <&> Text.pack
|
|
|
|
identifier :: Monad m => ParsecT Token u m Token
|
|
identifier = Token.identifier tokenParser <&> Text.pack
|
|
|
|
reserved :: Monad m => String -> ParsecT Token u m ()
|
|
reserved = Token.reserved tokenParser
|
|
|
|
operator :: Monad m => ParsecT Token u m Token
|
|
operator = Token.operator tokenParser <&> Text.pack
|
|
|
|
reservedOperator :: Monad m => String -> ParsecT Token u m ()
|
|
reservedOperator = Token.reservedOp tokenParser
|
|
|
|
typeName :: Monad m => ParsecT Token u m Token
|
|
typeName = choice
|
|
[ Ubc.Parse.Syntax.Language.reserved "i32" $> "i32"
|
|
, Ubc.Parse.Syntax.Language.reserved "u32" $> "u32"
|
|
, Ubc.Parse.Syntax.Language.reserved "f32" $> "f32"
|
|
, Ubc.Parse.Syntax.Language.identifier
|
|
]
|