Everything is now Data.Text instead of String

This commit is contained in:
VegOwOtenks 2025-02-21 18:17:46 +01:00
parent 753f429ec8
commit cbfd729795
18 changed files with 178 additions and 137 deletions

View file

@ -6,6 +6,7 @@ import System.Directory as Directory
import qualified Control.Monad.Trans.Reader as Reader import qualified Control.Monad.Trans.Reader as Reader
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Text.IO as Text.IO
import qualified Text.Parsec as Parsec import qualified Text.Parsec as Parsec
@ -22,7 +23,7 @@ minimalConfig = do
main :: IO () main :: IO ()
main = do main = do
text <- getContents text <- Text.IO.getContents
-- fixme: handle errors? -- fixme: handle errors?
parseConfig <- minimalConfig parseConfig <- minimalConfig

View file

@ -29,6 +29,7 @@ dependencies:
- os-string - os-string
- path - path
- parsec - parsec
- text
- transformers - transformers
ghc-options: ghc-options:

View file

@ -1,7 +1,12 @@
module Ubc.Parse.Syntax module Ubc.Parse.Syntax
(Transformer) (Transformer, Token)
where where
import Control.Monad.Reader (ReaderT) import Control.Monad.Reader (ReaderT)
import Data.Text ( Text )
import Ubc.Parse.Syntax.Config (Config) import Ubc.Parse.Syntax.Config (Config)
type Transformer = ReaderT Config IO type Transformer = ReaderT Config IO
type Token = Text

View file

@ -1,3 +1,4 @@
{-# LANGUAGE OverloadedStrings #-}
module Ubc.Parse.Syntax.Enumeration module Ubc.Parse.Syntax.Enumeration
( Enumeration(..) ( Enumeration(..)
, parse , parse
@ -7,15 +8,17 @@ where
import Text.Parsec (ParsecT, many, (<?>)) import Text.Parsec (ParsecT, many, (<?>))
import qualified Ubc.Parse.Syntax.Language as UbcLanguage import qualified Ubc.Parse.Syntax.Language as UbcLanguage
import Ubc.Parse.Syntax (Token)
import Data.Text (Text)
type EnumerationMember = String type EnumerationMember = Token
data Enumeration = Enumeration data Enumeration = Enumeration
{ name :: String { name :: Text
, members :: [EnumerationMember] , members :: [EnumerationMember]
} }
deriving (Show) deriving (Show)
parse :: Monad m => ParsecT String u m Enumeration parse :: Monad m => ParsecT Token u m Enumeration
parse = do parse = do
UbcLanguage.reserved "enum" UbcLanguage.reserved "enum"
identifier <- UbcLanguage.identifier <?> "enum identifier" identifier <- UbcLanguage.identifier <?> "enum identifier"

View file

@ -1,6 +1,8 @@
{-# LANGUAGE OverloadedStrings #-}
module Ubc.Parse.Syntax.Expression module Ubc.Parse.Syntax.Expression
( Expression(..) ( Expression(..)
, expressionParser , expressionParser
, blockExpression
) )
where where
@ -9,25 +11,27 @@ import Data.Functor ( (<&>), ($>) )
import Data.Ratio ((%)) import Data.Ratio ((%))
import Text.Parsec.Expr (Operator(Infix, Prefix), Assoc (AssocLeft, AssocRight), buildExpressionParser) import Text.Parsec.Expr (Operator(Infix, Prefix), Assoc (AssocLeft, AssocRight), buildExpressionParser)
import Text.Parsec (ParsecT, (<|>), (<?>), choice, option, many1, digit, lookAhead, oneOf, char, notFollowedBy) import Text.Parsec (ParsecT, (<|>), (<?>), choice, option, many1, digit, lookAhead, oneOf, char, notFollowedBy, many)
import Ubc.Parse.Syntax.Statement (blockExpression, Statement) import Ubc.Parse.Syntax.Statement (Statement)
import Ubc.Parse.Syntax.Operators (BinaryOperator(..), UnaryOperator (..)) import Ubc.Parse.Syntax.Operators (BinaryOperator(..), UnaryOperator (..))
import qualified Ubc.Parse.Syntax.Language as UbcLanguage import qualified Ubc.Parse.Syntax.Language as UbcLanguage
import Ubc.Parse.Syntax (Token)
import qualified Ubc.Parse.Syntax.Statement as Statement
data Expression = Binary BinaryOperator Expression Expression data Expression = Binary BinaryOperator Expression Expression
| Unary UnaryOperator Expression | Unary UnaryOperator Expression
| ConstantInteger Integer | ConstantInteger Integer
| ConstantFraction Rational | ConstantFraction Rational
| FunctionCall String [Expression] | FunctionCall Token [Expression]
| Variable String | Variable Token
| If Expression Expression (Maybe Expression) -- if then else | If Expression Expression (Maybe Expression) -- if then else
| Loop Expression Expression -- condition body | Loop Expression Expression -- condition body
| Block [Statement] | Block [Statement]
deriving (Show) deriving (Show)
operatorTable :: Monad m => [[Operator String u m Expression]] operatorTable :: Monad m => [[Operator Token u m Expression]]
operatorTable = operatorTable =
[ [
[ Infix (UbcLanguage.reservedOperator "." $> Binary StructureAccess) AssocLeft [ Infix (UbcLanguage.reservedOperator "." $> Binary StructureAccess) AssocLeft
@ -66,10 +70,10 @@ operatorTable =
, [ Infix (UbcLanguage.reservedOperator "=" $> Binary Assign) AssocRight ] , [ Infix (UbcLanguage.reservedOperator "=" $> Binary Assign) AssocRight ]
] ]
expressionParser :: (Monad m) => ParsecT String u m Expression expressionParser :: (Monad m) => ParsecT Token u m Expression
expressionParser = buildExpressionParser operatorTable termParser <?> "expression" expressionParser = buildExpressionParser operatorTable termParser <?> "expression"
numberTerm :: Monad m => ParsecT String u m Expression numberTerm :: Monad m => ParsecT Token u m Expression
numberTerm = do numberTerm = do
notFollowedBy $ char '0' notFollowedBy $ char '0'
integerDigits <- many1 digit integerDigits <- many1 digit
@ -79,7 +83,7 @@ numberTerm = do
] ]
<* UbcLanguage.whiteSpace <* UbcLanguage.whiteSpace
decimalTerm :: Monad m => [Char] -> ParsecT String u m Expression decimalTerm :: Monad m => [Char] -> ParsecT Token u m Expression
decimalTerm integerDigits = do decimalTerm integerDigits = do
_ <- lookAhead $ oneOf "e." _ <- lookAhead $ oneOf "e."
fractionalDigits <- option "" $ char '.' *> many1 digit fractionalDigits <- option "" $ char '.' *> many1 digit
@ -92,7 +96,7 @@ decimalTerm integerDigits = do
return $ ConstantFraction $ numeratorInteger ^ numeratorPower % denominatorInteger ^ denominatorPower return $ ConstantFraction $ numeratorInteger ^ numeratorPower % denominatorInteger ^ denominatorPower
termParser :: Monad m => ParsecT String u m Expression termParser :: Monad m => ParsecT Token u m Expression
termParser = UbcLanguage.parens expressionParser termParser = UbcLanguage.parens expressionParser
<|> numberTerm <|> numberTerm
<|> fmap ConstantInteger UbcLanguage.integer <|> fmap ConstantInteger UbcLanguage.integer
@ -104,13 +108,13 @@ termParser = UbcLanguage.parens expressionParser
<|> functionCallOrVariable <|> functionCallOrVariable
<?> "term" <?> "term"
loopExpression :: Monad m => String -> (Expression -> Expression) -> ParsecT String u m Expression loopExpression :: Monad m => String -> (Expression -> Expression) -> ParsecT Token u m Expression
loopExpression startKeyword conditionWrapper = do loopExpression startKeyword conditionWrapper = do
_ <- UbcLanguage.reserved startKeyword _ <- UbcLanguage.reserved startKeyword
condition <- conditionWrapper <$!> expressionParser condition <- conditionWrapper <$!> expressionParser
Loop condition <$> expressionParser Loop condition <$> expressionParser
conditionalExpression :: Monad m => String -> (Expression -> Expression) -> ParsecT String u m Expression conditionalExpression :: Monad m => String -> (Expression -> Expression) -> ParsecT Token u m Expression
conditionalExpression startKeyword conditionWrapper = do conditionalExpression startKeyword conditionWrapper = do
_ <- UbcLanguage.reserved startKeyword _ <- UbcLanguage.reserved startKeyword
condition <- conditionWrapper <$!> expressionParser condition <- conditionWrapper <$!> expressionParser
@ -120,7 +124,7 @@ conditionalExpression startKeyword conditionWrapper = do
return $ If condition then_ else_ return $ If condition then_ else_
functionCallOrVariable :: Monad m => ParsecT String u m Expression functionCallOrVariable :: Monad m => ParsecT Token u m Expression
functionCallOrVariable = do functionCallOrVariable = do
name <- UbcLanguage.identifier name <- UbcLanguage.identifier
choice choice
@ -128,3 +132,5 @@ functionCallOrVariable = do
, return $ Variable name , return $ Variable name
] ]
blockExpression :: Monad m => ParsecT Token u m Expression
blockExpression = UbcLanguage.braces (many Statement.parse) <&> Block

View file

@ -1,21 +1,14 @@
module Ubc.Parse.Syntax.Expression module Ubc.Parse.Syntax.Expression
( Expression(..) ( Expression
, expressionParser , expressionParser
, blockExpression
) )
where where
import Text.Parsec (ParsecT) import Text.Parsec (ParsecT)
import Ubc.Parse.Syntax.Operators (BinaryOperator, UnaryOperator) import Ubc.Parse.Syntax (Token)
import {-# SOURCE #-} Ubc.Parse.Syntax.Statement (Statement)
data Expression = Binary BinaryOperator Expression Expression data Expression
| Unary UnaryOperator Expression
| ConstantInteger Integer
| ConstantFraction Rational
| FunctionCall String [Expression]
| Variable String
| If Expression Expression (Maybe Expression) -- if then else
| Loop Expression Expression -- condition body
| Block [Statement]
instance Show Expression instance Show Expression
expressionParser :: Monad m => ParsecT String u m Expression expressionParser :: Monad m => ParsecT Token u m Expression
blockExpression :: Monad m => ParsecT Token u m Expression

View file

@ -11,9 +11,13 @@ import GHC.Generics (Generic, Generically(..))
import Control.Monad ((<$!>)) import Control.Monad ((<$!>))
import Data.Functor ((<&>)) import Data.Functor ((<&>))
import qualified Data.Text as Text
import Text.Parsec (choice, ParsecT, many) import Text.Parsec (choice, ParsecT, many)
import Ubc.Parse.Syntax (Transformer) import qualified Text.Parsec as Parsec
import Ubc.Parse.Syntax (Transformer, Token)
import {-# SOURCE #-} Ubc.Parse.Syntax.Import (Import) import {-# SOURCE #-} Ubc.Parse.Syntax.Import (Import)
import Ubc.Parse.Syntax.Struct (Struct) import Ubc.Parse.Syntax.Struct (Struct)
@ -26,10 +30,9 @@ import qualified Ubc.Parse.Syntax.Struct as Struct
import qualified Ubc.Parse.Syntax.Function as Function import qualified Ubc.Parse.Syntax.Function as Function
import qualified Ubc.Parse.Syntax.Statement as Statement import qualified Ubc.Parse.Syntax.Statement as Statement
import qualified Ubc.Parse.Syntax.Enumeration as Enumeration import qualified Ubc.Parse.Syntax.Enumeration as Enumeration
import qualified Text.Parsec as Parsec
data File = File data File = File
{ name :: String { name :: Token
, body :: FileBody , body :: FileBody
} }
deriving (Show) deriving (Show)
@ -45,13 +48,13 @@ data FileBody = FileBody
deriving (Semigroup, Monoid) via Generically FileBody deriving (Semigroup, Monoid) via Generically FileBody
-- dont use `deriving ... via FileBody` because that leads to a loop, somehow -- dont use `deriving ... via FileBody` because that leads to a loop, somehow
parse :: ParsecT String u Transformer File parse :: ParsecT Token u Transformer File
parse = do parse = do
fileName <- Parsec.sourceName <$!> Parsec.getPosition fileName <- Text.pack . Parsec.sourceName <$!> Parsec.getPosition
fileBody <- mconcat <$!> many fileMember fileBody <- mconcat <$!> many fileMember
pure $ File fileName fileBody pure $ File fileName fileBody
fileMember :: ParsecT String u Transformer FileBody fileMember :: ParsecT Token u Transformer FileBody
fileMember = choice fileMember = choice
[ Struct.parse <&> \s -> mempty { structs = [s] } [ Struct.parse <&> \s -> mempty { structs = [s] }
, Import.parse <&> \i -> mempty { imports = [i] } , Import.parse <&> \i -> mempty { imports = [i] }

View file

@ -6,10 +6,10 @@ where
import Text.Parsec (ParsecT) import Text.Parsec (ParsecT)
import Ubc.Parse.Syntax (Transformer) import Ubc.Parse.Syntax (Transformer, Token)
data File data File
instance Show File instance Show File
parse :: ParsecT String u Transformer File parse :: ParsecT Token u Transformer File

View file

@ -1,3 +1,4 @@
{-# LANGUAGE OverloadedStrings #-}
module Ubc.Parse.Syntax.Function module Ubc.Parse.Syntax.Function
( Function(..) ( Function(..)
, parse , parse
@ -16,17 +17,18 @@ import Ubc.Parse.Syntax.Generic (Generic)
import qualified Ubc.Parse.Syntax.Language as UbcLanguage import qualified Ubc.Parse.Syntax.Language as UbcLanguage
import qualified Ubc.Parse.Syntax.VariableType as VariableType import qualified Ubc.Parse.Syntax.VariableType as VariableType
import qualified Ubc.Parse.Syntax.Generic as Generic import qualified Ubc.Parse.Syntax.Generic as Generic
import Ubc.Parse.Syntax (Token)
data Function = Function data Function = Function
{ returnType :: VariableType { returnType :: VariableType
, identifier :: String , identifier :: Token
, generics :: [Generic] , generics :: [Generic]
, body :: Expression , body :: Expression
, arguments :: [(VariableType, String)] , arguments :: [(VariableType, Token)]
} }
deriving (Show) deriving (Show)
parsePrefixed :: Monad m => VariableType -> String -> ParsecT String u m Function parsePrefixed :: Monad m => VariableType -> Token -> ParsecT Token u m Function
parsePrefixed ftype fname = do parsePrefixed ftype fname = do
genericList <- UbcLanguage.angles $ UbcLanguage.commaSeparated1 Generic.parse genericList <- UbcLanguage.angles $ UbcLanguage.commaSeparated1 Generic.parse
@ -36,7 +38,7 @@ parsePrefixed ftype fname = do
return $ Function ftype fname genericList expressionBody argumentList return $ Function ftype fname genericList expressionBody argumentList
parse :: Monad m => ParsecT String u m Function parse :: Monad m => ParsecT Token u m Function
parse = do parse = do
(resultType, name) <- try $ do (resultType, name) <- try $ do
resultType <- UbcLanguage.typeName resultType <- UbcLanguage.typeName
@ -46,7 +48,7 @@ parse = do
parsePrefixed resultType name parsePrefixed resultType name
argumentDefinition :: Monad m => ParsecT String u m (VariableType, String) argumentDefinition :: Monad m => ParsecT Token u m (VariableType, Token)
argumentDefinition = do argumentDefinition = do
argumentType <- VariableType.fromString <$!> UbcLanguage.typeName argumentType <- VariableType.fromString <$!> UbcLanguage.typeName
argumentName <- UbcLanguage.identifier argumentName <- UbcLanguage.identifier

View file

@ -1,17 +1,19 @@
{-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE DerivingStrategies #-}
module Ubc.Parse.Syntax.Generic module Ubc.Parse.Syntax.Generic
( Generic(..) ( Generic(..)
, parse) , parse
)
where where
import qualified Ubc.Parse.Syntax.Language as Language import qualified Ubc.Parse.Syntax.Language as Language
import Control.Monad ((<$!>)) import Control.Monad ((<$!>))
import Text.Parsec (ParsecT) import Text.Parsec (ParsecT)
import Ubc.Parse.Syntax (Token)
data Generic = Generic data Generic = Generic
{ name :: String { name :: Token
} }
deriving stock (Show) deriving stock (Show)
parse :: Monad m => ParsecT String u m Generic parse :: Monad m => ParsecT Token u m Generic
parse = Generic <$!> Language.identifier parse = Generic <$!> Language.identifier

View file

@ -1,5 +1,6 @@
{-# LANGUAGE LambdaCase #-} {-# LANGUAGE LambdaCase #-}
{-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE OverloadedStrings #-}
module Ubc.Parse.Syntax.Import module Ubc.Parse.Syntax.Import
( parse ( parse
, Import(..) , Import(..)
@ -18,8 +19,10 @@ import Data.Functor ((<&>))
import Data.List.NonEmpty (NonEmpty) import Data.List.NonEmpty (NonEmpty)
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Set as Set
import qualified Data.List.NonEmpty as NonEmpty import qualified Data.List.NonEmpty as NonEmpty
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Data.Text.IO as Text.IO
import qualified System.Directory as IO import qualified System.Directory as IO
@ -28,7 +31,7 @@ import qualified Text.Parsec as Parsec
import Path as Path ( Path, File, parseRelFile, Rel, parseRelDir, Dir, Abs, (</>), fromAbsFile, fromRelFile ) import Path as Path ( Path, File, parseRelFile, Rel, parseRelDir, Dir, Abs, (</>), fromAbsFile, fromRelFile )
import Ubc.Parse.Syntax (Transformer) import Ubc.Parse.Syntax (Transformer, Token)
import Ubc.Parse.Syntax.Config (Config) import Ubc.Parse.Syntax.Config (Config)
import qualified Ubc.Parse.Syntax.Config as Config import qualified Ubc.Parse.Syntax.Config as Config
import qualified Ubc.Parse.Syntax.File as File import qualified Ubc.Parse.Syntax.File as File
@ -36,22 +39,22 @@ import qualified Ubc.Parse.Syntax.Language as UbcLanguage
data Import = Import data Import = Import
{ file :: File.File { file :: File.File
, alias :: Maybe String , alias :: Maybe Token
, list :: [String] , list :: [Token]
} }
deriving stock (Show) deriving stock (Show)
importPath :: Monad m => ParsecT String u m (NonEmpty String) importPath :: Monad m => ParsecT Token u m (NonEmpty Token)
importPath = UbcLanguage.angles (many importChar `sepBy1` char '/') importPath = UbcLanguage.angles (many importChar `sepBy1` char '/')
<&> NonEmpty.fromList <&> NonEmpty.fromList . List.map Text.pack
importChar :: (Monad m) => ParsecT String u m Char importChar :: (Monad m) => ParsecT Token u m Char
importChar = choice importChar = choice
[ char '\\' >> oneOf ">/\\" [ char '\\' >> oneOf ">/\\"
, notFollowedBy (oneOf ">/") >> anyChar , notFollowedBy (oneOf ">/") >> anyChar
] ]
parse :: ParsecT String u Transformer Import parse :: ParsecT Token u Transformer Import
parse = do parse = do
UbcLanguage.reserved "import" UbcLanguage.reserved "import"
fragments <- importPath fragments <- importPath
@ -68,9 +71,9 @@ parse = do
-- fail or parse -- fail or parse
importedFile <- case existingFiles of importedFile <- case existingFiles of
[] -> fail $ notFoundMessage relFile possiblePaths [] -> fail . Text.unpack $ notFoundMessage relFile possiblePaths
[path] -> parseFile path [path] -> parseFile path
fs@_ -> fail $ multipleFoundMessage relFile fs fs@_ -> fail . Text.unpack $ multipleFoundMessage relFile fs
importAs <- Parsec.optionMaybe importAlias importAs <- Parsec.optionMaybe importAlias
@ -78,24 +81,24 @@ parse = do
return $ Import importedFile importAs importList return $ Import importedFile importAs importList
importAlias :: Monad m => ParsecT String u m String importAlias :: Monad m => ParsecT Token u m Token
importAlias = UbcLanguage.reserved "as" importAlias = UbcLanguage.reserved "as"
*> UbcLanguage.identifier *> UbcLanguage.identifier
multipleFoundMessage :: Path Rel File -> [Path Abs File] -> String multipleFoundMessage :: Path Rel File -> [Path Abs File] -> Token
multipleFoundMessage relFile foundFiles = "Could not identify imported file " <> Path.fromRelFile relFile <> "\n" multipleFoundMessage relFile foundFiles = "Could not identify imported file " <> Text.pack (Path.fromRelFile relFile) <> "\n"
<> "Found multiple files in the search path:" <> "Found multiple files in the search path:"
<> (unlines . List.map (Path.fromAbsFile >>> ('\t':)) $ foundFiles) <> (Text.unlines . List.map (Path.fromAbsFile >>> ('\t':) >>> Text.pack) $ foundFiles)
notFoundMessage :: Path Rel File -> [Path Abs File] -> String notFoundMessage :: Path Rel File -> [Path Abs File] -> Token
notFoundMessage relFile searchedLocations = "Could not locate import file <" <> Path.fromRelFile relFile <> ">\n" notFoundMessage relFile searchedLocations = "Could not locate import file <" <> Text.pack (Path.fromRelFile relFile) <> ">\n"
<> "Searched locations were:\n" <> "Searched locations were:\n"
<> (unlines . List.map (('\t':) . Path.fromAbsFile) $ searchedLocations) <> (Text.unlines . List.map (Path.fromAbsFile >>> ('\t':) >>> Text.pack) $ searchedLocations)
parseFile :: Path Abs File -> ParsecT String u Transformer File.File parseFile :: Path Abs File -> ParsecT Token u Transformer File.File
parseFile path = do parseFile path = do
let stringPath = Path.fromAbsFile path let stringPath = Path.fromAbsFile path
contents <- liftIO . readFile $ stringPath contents <- liftIO . Text.IO.readFile $ stringPath
-- save old state -- save old state
oldInput <- Parsec.getInput oldInput <- Parsec.getInput
@ -123,10 +126,10 @@ parseFile path = do
pure importedFile pure importedFile
parseRelPath :: MonadThrow m => NonEmpty FilePath -> m (Path Rel File) parseRelPath :: MonadThrow m => NonEmpty Token -> m (Path Rel File)
parseRelPath fragments = do parseRelPath fragments = do
name <- Path.parseRelFile $ NonEmpty.last fragments name <- Path.parseRelFile . Text.unpack $ NonEmpty.last fragments
dirPath <- mapM Path.parseRelDir . NonEmpty.init $ fragments dirPath <- mapM (Path.parseRelDir . Text.unpack) . NonEmpty.init $ fragments
pure $ constructRelative name dirPath pure $ constructRelative name dirPath
constructRelative :: Path Rel t -> [Path Rel Dir] -> Path Rel t constructRelative :: Path Rel t -> [Path Rel Dir] -> Path Rel t

View file

@ -6,10 +6,10 @@ where
import Text.Parsec (ParsecT) import Text.Parsec (ParsecT)
import Ubc.Parse.Syntax (Transformer) import Ubc.Parse.Syntax (Transformer, Token)
data Import data Import
instance Show Import instance Show Import
parse :: ParsecT String u Transformer Import parse :: ParsecT Token u Transformer Import

View file

@ -1,3 +1,4 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoMonomorphismRestriction #-} {-# LANGUAGE NoMonomorphismRestriction #-}
module Ubc.Parse.Syntax.Language module Ubc.Parse.Syntax.Language
( languageDef ( languageDef
@ -33,19 +34,24 @@ module Ubc.Parse.Syntax.Language
) )
where where
import Data.Functor ( ($>) ) import Data.Functor ( ($>), (<&>) )
import qualified Data.Text as Text
import Text.Parsec import Text.Parsec
( alphaNum, char, letter, oneOf, choice, (<|>), ParsecT ) ( alphaNum, char, letter, oneOf, choice, (<|>), ParsecT )
import Text.Parsec.Token import Text.Parsec.Token
( makeTokenParser, ( makeTokenParser,
GenLanguageDef(..), GenLanguageDef(..),
GenTokenParser(TokenParser, identifier, reserved, operator, GenTokenParser(TokenParser,
reservedOp, charLiteral, stringLiteral, natural, integer, float, charLiteral, natural, integer, float,
naturalOrFloat, decimal, hexadecimal, octal, symbol, lexeme, naturalOrFloat, decimal, hexadecimal, octal, lexeme,
whiteSpace, parens, braces, angles, brackets, semi, comma, colon, whiteSpace, parens, braces, angles, brackets,
dot, semiSep, semiSep1, commaSep, commaSep1) ) semiSep, semiSep1, commaSep, commaSep1) )
import qualified Text.Parsec.Token as Token
languageDef :: Monad m => GenLanguageDef String u m import Ubc.Parse.Syntax (Token)
languageDef :: Monad m => GenLanguageDef Token u m
languageDef = LanguageDef { languageDef = LanguageDef {
commentStart = "/*" commentStart = "/*"
, commentEnd = "*/" , commentEnd = "*/"
@ -92,44 +98,29 @@ languageDef = LanguageDef {
, caseSensitive = True , caseSensitive = True
} }
tokenParser :: Monad m => GenTokenParser String u m tokenParser :: Monad m => GenTokenParser Token u m
tokenParser = makeTokenParser languageDef tokenParser = makeTokenParser languageDef
identifier :: Monad m => ParsecT String u m String characterLiteral :: Monad m => ParsecT Token u m Char
reserved :: Monad m => String -> ParsecT String u m () natural :: Monad m => ParsecT Token u m Integer
operator :: Monad m => ParsecT String u m String integer :: Monad m => ParsecT Token u m Integer
reservedOperator :: Monad m => String -> ParsecT String u m () float :: Monad m => ParsecT Token u m Double
characterLiteral :: Monad m => ParsecT String u m Char naturalOrFloat :: Monad m => ParsecT Token u m (Either Integer Double)
stringLiteral :: Monad m => ParsecT String u m String decimal :: Monad m => ParsecT Token u m Integer
natural :: Monad m => ParsecT String u m Integer hexadecimal :: Monad m => ParsecT Token u m Integer
integer :: Monad m => ParsecT String u m Integer octal :: Monad m => ParsecT Token u m Integer
float :: Monad m => ParsecT String u m Double lexeme :: Monad m => ParsecT Token u m a -> ParsecT Token u m a
naturalOrFloat :: Monad m => ParsecT String u m (Either Integer Double) whiteSpace :: Monad m => ParsecT Token u m ()
decimal :: Monad m => ParsecT String u m Integer parens :: Monad m => ParsecT Token u m a -> ParsecT Token u m a
hexadecimal :: Monad m => ParsecT String u m Integer braces :: Monad m => ParsecT Token u m a -> ParsecT Token u m a
octal :: Monad m => ParsecT String u m Integer angles :: Monad m => ParsecT Token u m a -> ParsecT Token u m a
symbol :: Monad m => String -> ParsecT String u m String brackets :: Monad m => ParsecT Token u m a -> ParsecT Token u m a
lexeme :: Monad m => ParsecT String u m a -> ParsecT String u m a semicolonSeparated :: Monad m => ParsecT Token u m a -> ParsecT Token u m [a]
whiteSpace :: Monad m => ParsecT String u m () semicolonSeparated1 :: Monad m => ParsecT Token u m a -> ParsecT Token u m [a]
parens :: Monad m => ParsecT String u m a -> ParsecT String u m a commaSeparated :: Monad m => ParsecT Token u m a -> ParsecT Token u m [a]
braces :: Monad m => ParsecT String u m a -> ParsecT String u m a commaSeparated1 :: Monad m => ParsecT Token u m a -> ParsecT Token u m [a]
angles :: Monad m => ParsecT String u m a -> ParsecT String u m a
brackets :: Monad m => ParsecT String u m a -> ParsecT String u m a
semicolon :: Monad m => ParsecT String u m String
comma :: Monad m => ParsecT String u m String
colon :: Monad m => ParsecT String u m String
dot :: Monad m => ParsecT String u m String
semicolonSeparated :: Monad m => ParsecT String u m a -> ParsecT String u m [a]
semicolonSeparated1 :: Monad m => ParsecT String u m a -> ParsecT String u m [a]
commaSeparated :: Monad m => ParsecT String u m a -> ParsecT String u m [a]
commaSeparated1 :: Monad m => ParsecT String u m a -> ParsecT String u m [a]
TokenParser{ TokenParser{
identifier = identifier charLiteral = characterLiteral
, reserved = reserved
, operator = operator
, reservedOp = reservedOperator
, charLiteral = characterLiteral
, stringLiteral = stringLiteral
, natural = natural -- decimal, hexadecimal or octal , natural = natural -- decimal, hexadecimal or octal
, integer = integer -- decimal, hexadecimal or octal , integer = integer -- decimal, hexadecimal or octal
, float = float , float = float
@ -137,24 +128,49 @@ TokenParser{
, decimal = decimal , decimal = decimal
, hexadecimal = hexadecimal , hexadecimal = hexadecimal
, octal = octal , octal = octal
, symbol = symbol
, lexeme = lexeme , lexeme = lexeme
, whiteSpace = whiteSpace , whiteSpace = whiteSpace
, parens = parens , parens = parens
, braces = braces , braces = braces
, angles = angles , angles = angles
, brackets = brackets , brackets = brackets
, semi = semicolon
, comma = comma
, colon = colon
, dot = dot
, semiSep = semicolonSeparated , semiSep = semicolonSeparated
, semiSep1 = semicolonSeparated1 , semiSep1 = semicolonSeparated1
, commaSep = commaSeparated , commaSep = commaSeparated
, commaSep1 = commaSeparated1 , commaSep1 = commaSeparated1
} = tokenParser } = tokenParser
typeName :: Monad m => ParsecT String u m String semicolon :: Monad m => ParsecT Token u m Token
semicolon = Token.semi tokenParser <&> Text.pack
comma :: Monad m => ParsecT Token u m Token
comma = Token.comma tokenParser <&> Text.pack
colon :: Monad m => ParsecT Token u m Token
colon = Token.colon tokenParser <&> Text.pack
dot :: Monad m => ParsecT Token u m Token
dot = Token.dot tokenParser <&> Text.pack
symbol :: Monad m => String -> ParsecT Token u m Token
symbol = (<&> Text.pack) . Token.symbol tokenParser
stringLiteral :: Monad m => ParsecT Token u m Token
stringLiteral = Token.stringLiteral tokenParser <&> Text.pack
identifier :: Monad m => ParsecT Token u m Token
identifier = Token.identifier tokenParser <&> Text.pack
reserved :: Monad m => String -> ParsecT Token u m ()
reserved = Token.reserved tokenParser
operator :: Monad m => ParsecT Token u m Token
operator = Token.operator tokenParser <&> Text.pack
reservedOperator :: Monad m => String -> ParsecT Token u m ()
reservedOperator = Token.reservedOp tokenParser
typeName :: Monad m => ParsecT Token u m Token
typeName = choice typeName = choice
[ Ubc.Parse.Syntax.Language.reserved "i32" $> "i32" [ Ubc.Parse.Syntax.Language.reserved "i32" $> "i32"
, Ubc.Parse.Syntax.Language.reserved "u32" $> "u32" , Ubc.Parse.Syntax.Language.reserved "u32" $> "u32"

View file

@ -1,12 +1,12 @@
{-# LANGUAGE OverloadedStrings #-}
module Ubc.Parse.Syntax.Statement module Ubc.Parse.Syntax.Statement
( Statement(..) ( Statement(..)
, parse , parse
, blockExpression
) )
where where
import Ubc.Parse.Syntax.VariableType (VariableType) import Ubc.Parse.Syntax.VariableType (VariableType)
import {-# SOURCE #-} Ubc.Parse.Syntax.Expression (expressionParser, Expression (Block)) import {-# SOURCE #-} Ubc.Parse.Syntax.Expression (expressionParser, Expression,)
import Ubc.Parse.Syntax.TypeExpression (TypeExpression) import Ubc.Parse.Syntax.TypeExpression (TypeExpression)
import Text.Parsec (choice, ParsecT, try, many) import Text.Parsec (choice, ParsecT, try, many)
import qualified Ubc.Parse.Syntax.Language as UbcLanguage import qualified Ubc.Parse.Syntax.Language as UbcLanguage
@ -14,22 +14,23 @@ import qualified Ubc.Parse.Syntax.VariableType as VariableType
import qualified Ubc.Parse.Syntax.TypeExpression as TypeExpression import qualified Ubc.Parse.Syntax.TypeExpression as TypeExpression
import Control.Monad ((<$!>)) import Control.Monad ((<$!>))
import Data.Functor ((<&>)) import Data.Functor ((<&>))
import Ubc.Parse.Syntax (Token)
type VariableName = String type VariableName = Token
type TypeName = String type TypeName = Token
data Statement = VariableDefinition VariableType VariableName Expression data Statement = VariableDefinition VariableType VariableName Expression
| TypeDefinition TypeName TypeExpression | TypeDefinition TypeName TypeExpression
| ExpressionStatement Expression | ExpressionStatement Expression
deriving (Show) deriving (Show)
parse :: Monad m => ParsecT String u m Statement parse :: Monad m => ParsecT Token u m Statement
parse = choice [ variableDefinition parse = choice [ variableDefinition
, typeDefinition , typeDefinition
, ExpressionStatement <$!> expressionParser , ExpressionStatement <$!> expressionParser
] ]
typeDefinition :: Monad m => ParsecT String u m Statement typeDefinition :: Monad m => ParsecT Token u m Statement
typeDefinition = do typeDefinition = do
UbcLanguage.reserved "type" UbcLanguage.reserved "type"
@ -40,7 +41,7 @@ typeDefinition = do
TypeDefinition name <$> TypeExpression.parseTypeExpression TypeDefinition name <$> TypeExpression.parseTypeExpression
variableDefinition :: Monad m => ParsecT String u m Statement variableDefinition :: Monad m => ParsecT Token u m Statement
variableDefinition = do variableDefinition = do
(variableType, variableName) <- try $ do (variableType, variableName) <- try $ do
variableType <- UbcLanguage.typeName variableType <- UbcLanguage.typeName
@ -49,6 +50,3 @@ variableDefinition = do
return (VariableType.fromString variableType, variableName) return (VariableType.fromString variableType, variableName)
VariableDefinition variableType variableName <$> expressionParser VariableDefinition variableType variableName <$> expressionParser
blockExpression :: Monad m => ParsecT String u m Expression
blockExpression = UbcLanguage.braces (many parse) <&> Block

View file

@ -1,5 +1,6 @@
{-# LANGUAGE DerivingVia #-} {-# LANGUAGE DerivingVia #-}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
module Ubc.Parse.Syntax.Struct module Ubc.Parse.Syntax.Struct
( Struct(..) ( Struct(..)
, parse , parse
@ -26,11 +27,12 @@ import qualified Ubc.Parse.Syntax.Language as UbcLanguage
import qualified Ubc.Parse.Syntax.VariableType as VariableType import qualified Ubc.Parse.Syntax.VariableType as VariableType
import qualified Ubc.Parse.Syntax.Function as Function import qualified Ubc.Parse.Syntax.Function as Function
import qualified Ubc.Parse.Syntax.Generic as Syntax.Generic import qualified Ubc.Parse.Syntax.Generic as Syntax.Generic
import Ubc.Parse.Syntax (Token)
type VariableName = String type VariableName = Token
data Struct = Struct data Struct = Struct
{ name :: String { name :: Token
, generics :: [Syntax.Generic.Generic] , generics :: [Syntax.Generic.Generic]
, body :: StructBody , body :: StructBody
} }
@ -43,7 +45,7 @@ data StructBody = StructBody
deriving stock (Generic, Show) deriving stock (Generic, Show)
deriving (Semigroup, Monoid) via Generically StructBody deriving (Semigroup, Monoid) via Generically StructBody
parse :: Monad m => ParsecT String u m Struct parse :: Monad m => ParsecT Token u m Struct
parse = do parse = do
_ <- UbcLanguage.reserved "struct" _ <- UbcLanguage.reserved "struct"
structIdentifier <- UbcLanguage.identifier structIdentifier <- UbcLanguage.identifier
@ -52,10 +54,10 @@ parse = do
pure $ Struct structIdentifier structGenerics structBody pure $ Struct structIdentifier structGenerics structBody
structMember :: Monad m => ParsecT String u m StructBody structMember :: Monad m => ParsecT Token u m StructBody
structMember = choice [ structVariableOrFunction ] structMember = choice [ structVariableOrFunction ]
structVariableOrFunction :: Monad m => ParsecT String u m StructBody structVariableOrFunction :: Monad m => ParsecT Token u m StructBody
structVariableOrFunction = do structVariableOrFunction = do
(typeName, identifier) <- try $ do (typeName, identifier) <- try $ do
typeName <- UbcLanguage.typeName typeName <- UbcLanguage.typeName
@ -66,7 +68,7 @@ structVariableOrFunction = do
, (\ f -> mempty { functions = [f] }) <$!> Function.parsePrefixed typeName identifier , (\ f -> mempty { functions = [f] }) <$!> Function.parsePrefixed typeName identifier
] ]
parseVariable :: Monad m => VariableType -> String -> ParsecT String u m StructBody parseVariable :: Monad m => VariableType -> Token -> ParsecT Token u m StructBody
parseVariable variableType variableName = do parseVariable variableType variableName = do
_ <- UbcLanguage.semicolon _ <- UbcLanguage.semicolon
return $ mempty { variables = [(variableName, variableType)] } return $ mempty { variables = [(variableName, variableType)] }

View file

@ -14,13 +14,14 @@ import Ubc.Parse.Syntax.Struct (Struct)
import qualified Ubc.Parse.Syntax.Language as UbcLanguage import qualified Ubc.Parse.Syntax.Language as UbcLanguage
import qualified Ubc.Parse.Syntax.VariableType as VariableType import qualified Ubc.Parse.Syntax.VariableType as VariableType
import qualified Ubc.Parse.Syntax.Struct as Struct import qualified Ubc.Parse.Syntax.Struct as Struct
import Ubc.Parse.Syntax (Token)
data TypeExpression = TypeAlias VariableType data TypeExpression = TypeAlias VariableType
| StructExpression Struct | StructExpression Struct
deriving (Show) deriving (Show)
parseTypeExpression :: Monad m => ParsecT String u m TypeExpression parseTypeExpression :: Monad m => ParsecT Token u m TypeExpression
parseTypeExpression = choice parseTypeExpression = choice
[ TypeAlias . VariableType.fromString <$!> UbcLanguage.typeName [ TypeAlias . VariableType.fromString <$!> UbcLanguage.typeName
, StructExpression <$!> Struct.parse , StructExpression <$!> Struct.parse

View file

@ -1,16 +1,18 @@
{-# LANGUAGE OverloadedStrings #-}
module Ubc.Parse.Syntax.VariableType module Ubc.Parse.Syntax.VariableType
(VariableType(..) (VariableType(..)
, fromString , fromString
) )
where where
import Ubc.Parse.Syntax (Token)
data VariableType = BuiltInI32 data VariableType = BuiltInI32
| BuiltInU32 | BuiltInU32
| BuiltInF32 | BuiltInF32
| UserStruct String | UserStruct Token
deriving (Show, Eq) deriving (Show, Eq)
fromString :: String -> VariableType fromString :: Token -> VariableType
fromString "i32" = BuiltInI32 fromString "i32" = BuiltInI32
fromString "u32" = BuiltInU32 fromString "u32" = BuiltInU32
fromString "f32" = BuiltInF32 fromString "f32" = BuiltInF32

View file

@ -56,6 +56,7 @@ library
, os-string , os-string
, parsec , parsec
, path , path
, text
, transformers , transformers
default-language: Haskell2010 default-language: Haskell2010
@ -78,6 +79,7 @@ executable ubcc-exe
, os-string , os-string
, parsec , parsec
, path , path
, text
, transformers , transformers
, ubcc , ubcc
default-language: Haskell2010 default-language: Haskell2010
@ -102,6 +104,7 @@ test-suite ubcc-test
, os-string , os-string
, parsec , parsec
, path , path
, text
, transformers , transformers
, ubcc , ubcc
default-language: Haskell2010 default-language: Haskell2010