Refactoring into Syntax Parser, also float parsing

This commit is contained in:
VegOwOtenks 2025-01-20 19:53:28 +01:00
parent 212eecfdf7
commit 52e04d28cf
18 changed files with 287 additions and 176 deletions

View file

@ -5,8 +5,8 @@ where
import Text.Parsec ( runPT, ParseError, SourceName, ParsecT )
import qualified Ubc.Parse.ParserState as ParserState
import qualified Ubc.Parse.Config as Config
import qualified Ubc.Parse.Syntax.ParserState as ParserState
import qualified Ubc.Parse.Syntax.Config as Config
parseScript :: Monad m => SourceName -> String -> m (Either ParseError ())
parseScript = runPT topLevelParser (ParserState.initialState Config.defaultConfig)

View file

@ -1,95 +0,0 @@
module Ubc.Parse.Expression
( Expression(..)
, BinaryOperator(..)
, expressionParser
)
where
import Control.Monad ( (<$!>) )
import Data.Functor
import Text.Parsec.Expr (Operator(Infix), Assoc (AssocLeft), buildExpressionParser)
import qualified Ubc.Parse.Language as UbcLanguage
import Text.Parsec (ParsecT, (<|>), (<?>), choice, option)
data Expression = Binary BinaryOperator Expression Expression
| ConstantInteger Integer
| FunctionCall String [Expression]
| Variable String
| If Expression Expression (Maybe Expression)
deriving (Show)
data BinaryOperator = Plus
| Minus
| Multiply
| Divide
| Modulo
| ShiftLeft
| ShiftRight
| LessThan
| GreaterThan
| LessEqual
| GreaterEqual
| Equal
| NotEqual
| BitAnd
| BitOr
| BitXor
deriving (Show)
operatorTable :: Monad m => [[Operator String u m Expression]]
operatorTable =
[
[ Infix (UbcLanguage.reservedOperator "*" >> return (Binary Multiply)) AssocLeft
, Infix (UbcLanguage.reservedOperator "/" >> return (Binary Divide)) AssocLeft
, Infix (UbcLanguage.reservedOperator "%" >> return (Binary Modulo)) AssocLeft
]
,
[ Infix (UbcLanguage.reservedOperator "+" >> return (Binary Plus)) AssocLeft
, Infix (UbcLanguage.reservedOperator "-" >> return (Binary Minus)) AssocLeft
]
,
[ Infix (UbcLanguage.reservedOperator "<<" >> return (Binary ShiftLeft)) AssocLeft
, Infix (UbcLanguage.reservedOperator ">>" >> return (Binary ShiftRight)) AssocLeft
]
,
[ Infix (UbcLanguage.reservedOperator ">=" >> return (Binary GreaterEqual)) AssocLeft
, Infix (UbcLanguage.reservedOperator "<=" >> return (Binary LessEqual)) AssocLeft
, Infix (UbcLanguage.reservedOperator "<" >> return (Binary LessThan)) AssocLeft
, Infix (UbcLanguage.reservedOperator ">" >> return (Binary GreaterThan)) AssocLeft
]
,
[ Infix (UbcLanguage.reservedOperator "==" >> return (Binary Equal)) AssocLeft
, Infix (UbcLanguage.reservedOperator "!=" >> return (Binary NotEqual)) AssocLeft
]
,
[ Infix (UbcLanguage.reservedOperator "&" >> return (Binary BitAnd)) AssocLeft
, Infix (UbcLanguage.reservedOperator "|" >> return (Binary BitOr)) AssocLeft
, Infix (UbcLanguage.reservedOperator "^" >> return (Binary BitXor)) AssocLeft
]
]
expressionParser :: (Monad m) => ParsecT String u m Expression
expressionParser = buildExpressionParser operatorTable termParser <?> "expression"
termParser :: Monad m => ParsecT String u m Expression
termParser = UbcLanguage.parens expressionParser
<|> fmap ConstantInteger UbcLanguage.integer
<|> expressionIf
<|> functionCallOrVariable
expressionIf :: Monad m => ParsecT String u m Expression
expressionIf = do
_ <- UbcLanguage.reserved "if"
condition <- expressionParser
_ <- UbcLanguage.reserved "then"
then_ <- expressionParser
else_ <- option Nothing (UbcLanguage.reserved "else" >> expressionParser <&> Just)
return $ If condition then_ else_
functionCallOrVariable :: Monad m => ParsecT String u m Expression
functionCallOrVariable = do
name <- UbcLanguage.identifier
choice
[ FunctionCall name <$!> UbcLanguage.parens (UbcLanguage.commaSeparated expressionParser)
, return $ Variable name
]

View file

@ -1,5 +0,0 @@
module Ubc.Parse.File
(
)
where

View file

@ -1,4 +1,4 @@
module Ubc.Parse.Config
module Ubc.Parse.Syntax.Config
( Config(..)
, defaultConfig
)

View file

@ -1,9 +1,9 @@
module Ubc.Parse.Data.Struct
module Ubc.Parse.Syntax.Data.Struct
( Struct(..)
)
where
import Ubc.Parse.VariableType (VariableType)
import Ubc.Parse.Syntax.VariableType (VariableType)
type VariableName = String

View file

@ -0,0 +1,147 @@
module Ubc.Parse.Syntax.Expression
( Expression(..)
, BinaryOperator(..)
, expressionParser
)
where
import Control.Monad ( (<$!>) )
import Data.Functor ( (<&>), ($>) )
import Text.Parsec.Expr (Operator(Infix), Assoc (AssocLeft, AssocRight), buildExpressionParser)
import Text.Parsec (ParsecT, (<|>), (<?>), choice, option, many, many1, digit, lookAhead, oneOf, char, notFollowedBy)
import qualified Ubc.Parse.Syntax.Language as UbcLanguage
import Data.Ratio ((%))
data Expression = Binary BinaryOperator Expression Expression
| Unary UnaryOperator Expression
| ConstantInteger Integer
| ConstantFraction Rational
| FunctionCall String [Expression]
| Variable String
| If Expression Expression (Maybe Expression) -- if then else
| Loop Expression Expression -- condition body
| Block [Expression]
deriving (Show)
data UnaryOperator = LogicNot
deriving (Show)
data BinaryOperator = Plus
| Minus
| Multiply
| Divide
| Modulo
| ShiftLeft
| ShiftRight
| LessThan
| GreaterThan
| LessEqual
| GreaterEqual
| Equal
| NotEqual
| BitAnd
| BitOr
| BitXor
| LogicAnd
| LogicOr
| Assign
deriving (Show)
operatorTable :: Monad m => [[Operator String u m Expression]]
operatorTable =
[
[ Infix (UbcLanguage.reservedOperator "*" $> Binary Multiply) AssocLeft
, Infix (UbcLanguage.reservedOperator "/" $> Binary Divide) AssocLeft
, Infix (UbcLanguage.reservedOperator "%" $> Binary Modulo) AssocLeft
]
,
[ Infix (UbcLanguage.reservedOperator "+" $> Binary Plus) AssocLeft
, Infix (UbcLanguage.reservedOperator "-" $> Binary Minus) AssocLeft
]
,
[ Infix (UbcLanguage.reservedOperator "<<" $> Binary ShiftLeft) AssocLeft
, Infix (UbcLanguage.reservedOperator ">>" $> Binary ShiftRight) AssocLeft
]
,
[ Infix (UbcLanguage.reservedOperator ">=" $> Binary GreaterEqual) AssocLeft
, Infix (UbcLanguage.reservedOperator "<=" $> Binary LessEqual) AssocLeft
, Infix (UbcLanguage.reservedOperator "<" $> Binary LessThan) AssocLeft
, Infix (UbcLanguage.reservedOperator ">" $> Binary GreaterThan) AssocLeft
]
,
[ Infix (UbcLanguage.reservedOperator "==" $> Binary Equal) AssocLeft
, Infix (UbcLanguage.reservedOperator "!=" $> Binary NotEqual) AssocLeft
]
, [ Infix (UbcLanguage.reservedOperator "&" $> Binary BitAnd) AssocLeft ]
, [ Infix (UbcLanguage.reservedOperator "^" $> Binary BitXor) AssocLeft ]
, [ Infix (UbcLanguage.reservedOperator "|" $> Binary BitOr) AssocLeft ]
, [ Infix (UbcLanguage.reservedOperator "&&" $> Binary LogicAnd) AssocLeft ]
, [ Infix (UbcLanguage.reservedOperator "||" $> Binary LogicOr) AssocLeft ]
, [ Infix (UbcLanguage.reservedOperator "=" $> Binary Assign) AssocRight ]
]
expressionParser :: (Monad m) => ParsecT String u m Expression
expressionParser = buildExpressionParser operatorTable termParser <?> "expression"
numberTerm :: Monad m => ParsecT String u m Expression
numberTerm = do
notFollowedBy $ char '0'
integerDigits <- many1 digit
choice
[ decimalTerm integerDigits
, return $ ConstantInteger (read integerDigits)
]
<* UbcLanguage.whiteSpace
decimalTerm :: Monad m => [Char] -> ParsecT String u m Expression
decimalTerm integerDigits = do
_ <- lookAhead $ oneOf "e."
fractionalDigits <- option "" $ char '.' *> many1 digit
powerSuffix <- option 1 $ char 'e' *> UbcLanguage.integer
let numeratorPower = if powerSuffix > 0 then powerSuffix else 1
let denominatorPower = if powerSuffix < 0 then powerSuffix else 1
let numeratorInteger = read $ integerDigits ++ fractionalDigits
let denominatorInteger = 10 ^ length fractionalDigits
return $ ConstantFraction $ numeratorInteger ^ numeratorPower % denominatorInteger ^ denominatorPower
termParser :: Monad m => ParsecT String u m Expression
termParser = UbcLanguage.parens expressionParser
<|> numberTerm
<|> fmap ConstantInteger UbcLanguage.integer
<|> conditionalExpression "if" id
<|> conditionalExpression "unless" (Unary LogicNot)
<|> loopExpression "while" id
<|> loopExpression "until" (Unary LogicNot)
<|> blockExpression
<|> functionCallOrVariable
<?> "term"
loopExpression :: Monad m => String -> (Expression -> Expression) -> ParsecT String u m Expression
loopExpression startKeyword conditionWrapper = do
_ <- UbcLanguage.reserved startKeyword
condition <- conditionWrapper <$!> expressionParser
Loop condition <$> expressionParser
blockExpression :: Monad m => ParsecT String u m Expression
blockExpression = UbcLanguage.braces (many expressionParser) <&> Block
conditionalExpression :: Monad m => String -> (Expression -> Expression) -> ParsecT String u m Expression
conditionalExpression startKeyword conditionWrapper = do
_ <- UbcLanguage.reserved startKeyword
condition <- conditionWrapper <$!> expressionParser
_ <- UbcLanguage.reserved "then"
then_ <- expressionParser
else_ <- option Nothing (UbcLanguage.reserved "else" >> expressionParser <&> Just)
return $ If condition then_ else_
functionCallOrVariable :: Monad m => ParsecT String u m Expression
functionCallOrVariable = do
name <- UbcLanguage.identifier
choice
[ FunctionCall name <$!> UbcLanguage.parens (UbcLanguage.commaSeparated expressionParser)
, return $ Variable name
]

View file

@ -0,0 +1,5 @@
module Ubc.Parse.Syntax.File
(
)
where

View file

@ -0,0 +1,45 @@
module Ubc.Parse.Syntax.Function
( Function(..)
, parseFunction
)
where
import Control.Monad ((<$!>))
import Text.Parsec (lookAhead, try, ParsecT)
import Ubc.Parse.Syntax.VariableType (VariableType)
import Ubc.Parse.Syntax.Expression (Expression, expressionParser)
import qualified Ubc.Parse.Syntax.Language as UbcLanguage
import qualified Ubc.Parse.Syntax.VariableType as VariableType
data Function = Function
{ identifier :: String
, returnType :: VariableType
, body :: Expression
, arguments :: [(VariableType, String)]
}
deriving (Show)
parseFunction :: Monad m => ParsecT String u m Function
parseFunction = do
(resultType, name) <- try $ do
resultType <- UbcLanguage.typeName
name <- UbcLanguage.identifier
_ <- lookAhead $ UbcLanguage.symbol "("
return (VariableType.fromString resultType, name)
argumentList <- UbcLanguage.parens (UbcLanguage.commaSeparated argumentDefinition)
expressionBody <- expressionParser
return $ Function name resultType expressionBody argumentList
argumentDefinition :: Monad m => ParsecT String u m (VariableType, String)
argumentDefinition = do
argumentType <- VariableType.fromString <$!> UbcLanguage.typeName
argumentName <- UbcLanguage.identifier
return (argumentType, argumentName)

View file

@ -1,35 +1,35 @@
{-# LANGUAGE NoMonomorphismRestriction #-}
module Ubc.Parse.Language
module Ubc.Parse.Syntax.Language
( languageDef
, typeName
, Ubc.Parse.Language.identifier
, Ubc.Parse.Language.reserved
, Ubc.Parse.Language.operator
, Ubc.Parse.Language.reservedOperator
, Ubc.Parse.Language.characterLiteral
, Ubc.Parse.Language.stringLiteral
, Ubc.Parse.Language.natural
, Ubc.Parse.Language.integer
, Ubc.Parse.Language.float
, Ubc.Parse.Language.naturalOrFloat
, Ubc.Parse.Language.decimal
, Ubc.Parse.Language.hexadecimal
, Ubc.Parse.Language.octal
, Ubc.Parse.Language.symbol
, Ubc.Parse.Language.lexeme
, Ubc.Parse.Language.whiteSpace
, Ubc.Parse.Language.parens
, Ubc.Parse.Language.braces
, Ubc.Parse.Language.angles
, Ubc.Parse.Language.brackets
, Ubc.Parse.Language.semicolon
, Ubc.Parse.Language.comma
, Ubc.Parse.Language.colon
, Ubc.Parse.Language.dot
, Ubc.Parse.Language.semicolonSeparated
, Ubc.Parse.Language.semicolonSeparated1
, Ubc.Parse.Language.commaSeparated
, Ubc.Parse.Language.commaSeparated1
, Ubc.Parse.Syntax.Language.identifier
, Ubc.Parse.Syntax.Language.reserved
, Ubc.Parse.Syntax.Language.operator
, Ubc.Parse.Syntax.Language.reservedOperator
, Ubc.Parse.Syntax.Language.characterLiteral
, Ubc.Parse.Syntax.Language.stringLiteral
, Ubc.Parse.Syntax.Language.natural
, Ubc.Parse.Syntax.Language.integer
, Ubc.Parse.Syntax.Language.float
, Ubc.Parse.Syntax.Language.naturalOrFloat
, Ubc.Parse.Syntax.Language.decimal
, Ubc.Parse.Syntax.Language.hexadecimal
, Ubc.Parse.Syntax.Language.octal
, Ubc.Parse.Syntax.Language.symbol
, Ubc.Parse.Syntax.Language.lexeme
, Ubc.Parse.Syntax.Language.whiteSpace
, Ubc.Parse.Syntax.Language.parens
, Ubc.Parse.Syntax.Language.braces
, Ubc.Parse.Syntax.Language.angles
, Ubc.Parse.Syntax.Language.brackets
, Ubc.Parse.Syntax.Language.semicolon
, Ubc.Parse.Syntax.Language.comma
, Ubc.Parse.Syntax.Language.colon
, Ubc.Parse.Syntax.Language.dot
, Ubc.Parse.Syntax.Language.semicolonSeparated
, Ubc.Parse.Syntax.Language.semicolonSeparated1
, Ubc.Parse.Syntax.Language.commaSeparated
, Ubc.Parse.Syntax.Language.commaSeparated1
)
where
@ -124,8 +124,8 @@ TokenParser{
typeName :: Monad m => ParsecT String u m String
typeName = choice
[ Ubc.Parse.Language.reserved "i32" $> "i32"
, Ubc.Parse.Language.reserved "u32" $> "u32"
, Ubc.Parse.Language.reserved "f32" $> "f32"
, Ubc.Parse.Language.identifier
[ Ubc.Parse.Syntax.Language.reserved "i32" $> "i32"
, Ubc.Parse.Syntax.Language.reserved "u32" $> "u32"
, Ubc.Parse.Syntax.Language.reserved "f32" $> "f32"
, Ubc.Parse.Syntax.Language.identifier
]

View file

@ -1,4 +1,4 @@
module Ubc.Parse.ParserState
module Ubc.Parse.Syntax.ParserState
( ParserState(..)
, initialState
, pushScope
@ -7,8 +7,8 @@ module Ubc.Parse.ParserState
)
where
import Ubc.Parse.Scope (Scope)
import Ubc.Parse.Config (Config)
import Ubc.Parse.Syntax.Scope (Scope)
import Ubc.Parse.Syntax.Config (Config)
data ParserState = ParserState
{ scopes :: [Scope]

View file

@ -1,11 +1,11 @@
module Ubc.Parse.Scope
module Ubc.Parse.Syntax.Scope
( Scope(..)
, expectScopeStruct
)
where
import Ubc.Parse.Scope.FileScope (FileScope)
import Ubc.Parse.Scope.StructScope (StructScope)
import Ubc.Parse.Syntax.Scope.FileScope (FileScope)
import Ubc.Parse.Syntax.Scope.StructScope (StructScope)
data Scope =
ScopeFile FileScope

View file

@ -1,10 +1,10 @@
module Ubc.Parse.Scope.FileScope
module Ubc.Parse.Syntax.Scope.FileScope
( FileScope(..)
)
where
import Data.Map (Map)
import Ubc.Parse.Data.Struct (Struct)
import Ubc.Parse.Syntax.Data.Struct (Struct)
data FileScope = FileScope
{ structs :: Map String Struct

View file

@ -1,10 +1,10 @@
module Ubc.Parse.Scope.StructScope
module Ubc.Parse.Syntax.Scope.StructScope
( StructScope(..)
, modifyVariables
)
where
import Ubc.Parse.VariableType (VariableType)
import Ubc.Parse.Syntax.VariableType (VariableType)
type VariableName = String

View file

@ -0,0 +1,11 @@
module Ubc.Parse.Syntax.Statement
( Statement(..)
)
where
import Ubc.Parse.Syntax.VariableType (VariableType)
import Ubc.Parse.Syntax.Expression (Expression)
type VariableName = String
data Statement = VariableDefinition VariableType VariableName Expression

View file

@ -1,4 +1,4 @@
module Ubc.Parse.Struct
module Ubc.Parse.Syntax.Struct
( Struct(..)
, parseStruct
)
@ -17,16 +17,16 @@ import Text.Parsec
try,
ParsecT )
import Ubc.Parse.ParserState (ParserState)
import Ubc.Parse.Scope.StructScope (StructScope(..))
import Ubc.Parse.Scope (Scope(..))
import Ubc.Parse.Data.Struct (Struct(..))
import Ubc.Parse.Syntax.ParserState (ParserState)
import Ubc.Parse.Syntax.Scope.StructScope (StructScope(..))
import Ubc.Parse.Syntax.Scope (Scope(..))
import Ubc.Parse.Syntax.Data.Struct (Struct(..))
import qualified Ubc.Parse.Language as UbcLanguage
import qualified Ubc.Parse.Scope.StructScope as StructScope
import qualified Ubc.Parse.ParserState as ParserState
import qualified Ubc.Parse.Scope as Scope
import qualified Ubc.Parse.VariableType as VariableType
import qualified Ubc.Parse.Syntax.Language as UbcLanguage
import qualified Ubc.Parse.Syntax.Scope.StructScope as StructScope
import qualified Ubc.Parse.Syntax.ParserState as ParserState
import qualified Ubc.Parse.Syntax.Scope as Scope
import qualified Ubc.Parse.Syntax.VariableType as VariableType
parseStruct :: Monad m => ParsecT String ParserState m Struct

View file

@ -1,18 +1,19 @@
module Ubc.Parse.Types
module Ubc.Parse.Syntax.Types
( checkTypeValidity
, getTypeNames
)
where
import Text.Parsec
import qualified Data.Set as Set
import Ubc.Parse.ParserState (ParserState)
import Text.Parsec ( ParsecT )
import Ubc.Parse.Syntax.ParserState (ParserState)
import Control.Monad ((<$!>))
checkTypeValidity :: Monad m => String -> ParsecT String ParserState m Bool
checkTypeValidity typeName = do
typeNames <- getTypeNames >>= return . Set.union (Set.fromList ["i32", "u32", "f32"]) . Set.fromList
typeNames <- Set.union (Set.fromList ["i32", "u32", "f32"]) . Set.fromList <$!> getTypeNames
return $ typeName `Set.member` typeNames
getTypeNames :: Monad m => ParsecT String ParserState m [String]

View file

@ -1,4 +1,4 @@
module Ubc.Parse.VariableType
module Ubc.Parse.Syntax.VariableType
(VariableType(..)
, fromString
)

View file

@ -26,18 +26,20 @@ source-repository head
library
exposed-modules:
Ubc.Parse
Ubc.Parse.Config
Ubc.Parse.Data.Struct
Ubc.Parse.Expression
Ubc.Parse.File
Ubc.Parse.Language
Ubc.Parse.ParserState
Ubc.Parse.Scope
Ubc.Parse.Scope.FileScope
Ubc.Parse.Scope.StructScope
Ubc.Parse.Struct
Ubc.Parse.Types
Ubc.Parse.VariableType
Ubc.Parse.Syntax.Config
Ubc.Parse.Syntax.Data.Struct
Ubc.Parse.Syntax.Expression
Ubc.Parse.Syntax.File
Ubc.Parse.Syntax.Function
Ubc.Parse.Syntax.Language
Ubc.Parse.Syntax.ParserState
Ubc.Parse.Syntax.Scope
Ubc.Parse.Syntax.Scope.FileScope
Ubc.Parse.Syntax.Scope.StructScope
Ubc.Parse.Syntax.Statement
Ubc.Parse.Syntax.Struct
Ubc.Parse.Syntax.Types
Ubc.Parse.Syntax.VariableType
other-modules:
Paths_ubcc
autogen-modules: