From cbfd729795507f7cf0d5f13792ee5b6877bdaed4 Mon Sep 17 00:00:00 2001 From: VegOwOtenks Date: Fri, 21 Feb 2025 18:17:46 +0100 Subject: [PATCH] Everything is now Data.Text instead of String --- app/Main.hs | 3 +- package.yaml | 1 + src/Ubc/Parse/Syntax.hs | 7 +- src/Ubc/Parse/Syntax/Enumeration.hs | 9 +- src/Ubc/Parse/Syntax/Expression.hs | 30 ++++--- src/Ubc/Parse/Syntax/Expression.hs-boot | 19 ++-- src/Ubc/Parse/Syntax/File.hs | 15 ++-- src/Ubc/Parse/Syntax/File.hs-boot | 4 +- src/Ubc/Parse/Syntax/Function.hs | 12 +-- src/Ubc/Parse/Syntax/Generic.hs | 8 +- src/Ubc/Parse/Syntax/Import.hs | 47 +++++----- src/Ubc/Parse/Syntax/Import.hs-boot | 4 +- src/Ubc/Parse/Syntax/Language.hs | 112 ++++++++++++++---------- src/Ubc/Parse/Syntax/Statement.hs | 18 ++-- src/Ubc/Parse/Syntax/Struct.hs | 14 +-- src/Ubc/Parse/Syntax/TypeExpression.hs | 3 +- src/Ubc/Parse/Syntax/VariableType.hs | 6 +- ubcc.cabal | 3 + 18 files changed, 178 insertions(+), 137 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index 0f4e265..98700ca 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -6,6 +6,7 @@ import System.Directory as Directory import qualified Control.Monad.Trans.Reader as Reader import qualified Data.Set as Set +import qualified Data.Text.IO as Text.IO import qualified Text.Parsec as Parsec @@ -22,7 +23,7 @@ minimalConfig = do main :: IO () main = do - text <- getContents + text <- Text.IO.getContents -- fixme: handle errors? parseConfig <- minimalConfig diff --git a/package.yaml b/package.yaml index 113d1ab..47a2844 100644 --- a/package.yaml +++ b/package.yaml @@ -29,6 +29,7 @@ dependencies: - os-string - path - parsec +- text - transformers ghc-options: diff --git a/src/Ubc/Parse/Syntax.hs b/src/Ubc/Parse/Syntax.hs index 9afeb5a..2225c9e 100644 --- a/src/Ubc/Parse/Syntax.hs +++ b/src/Ubc/Parse/Syntax.hs @@ -1,7 +1,12 @@ module Ubc.Parse.Syntax -(Transformer) +(Transformer, Token) where + import Control.Monad.Reader (ReaderT) + +import Data.Text ( Text ) + import Ubc.Parse.Syntax.Config (Config) type Transformer = ReaderT Config IO +type Token = Text diff --git a/src/Ubc/Parse/Syntax/Enumeration.hs b/src/Ubc/Parse/Syntax/Enumeration.hs index bcba92a..3ac1b53 100644 --- a/src/Ubc/Parse/Syntax/Enumeration.hs +++ b/src/Ubc/Parse/Syntax/Enumeration.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} module Ubc.Parse.Syntax.Enumeration ( Enumeration(..) , parse @@ -7,15 +8,17 @@ where import Text.Parsec (ParsecT, many, ()) import qualified Ubc.Parse.Syntax.Language as UbcLanguage +import Ubc.Parse.Syntax (Token) +import Data.Text (Text) -type EnumerationMember = String +type EnumerationMember = Token data Enumeration = Enumeration - { name :: String + { name :: Text , members :: [EnumerationMember] } deriving (Show) -parse :: Monad m => ParsecT String u m Enumeration +parse :: Monad m => ParsecT Token u m Enumeration parse = do UbcLanguage.reserved "enum" identifier <- UbcLanguage.identifier "enum identifier" diff --git a/src/Ubc/Parse/Syntax/Expression.hs b/src/Ubc/Parse/Syntax/Expression.hs index 3bff8ad..4de46a8 100644 --- a/src/Ubc/Parse/Syntax/Expression.hs +++ b/src/Ubc/Parse/Syntax/Expression.hs @@ -1,6 +1,8 @@ +{-# LANGUAGE OverloadedStrings #-} module Ubc.Parse.Syntax.Expression ( Expression(..) , expressionParser +, blockExpression ) where @@ -9,25 +11,27 @@ import Data.Functor ( (<&>), ($>) ) import Data.Ratio ((%)) import Text.Parsec.Expr (Operator(Infix, Prefix), Assoc (AssocLeft, AssocRight), buildExpressionParser) -import Text.Parsec (ParsecT, (<|>), (), choice, option, many1, digit, lookAhead, oneOf, char, notFollowedBy) +import Text.Parsec (ParsecT, (<|>), (), choice, option, many1, digit, lookAhead, oneOf, char, notFollowedBy, many) -import Ubc.Parse.Syntax.Statement (blockExpression, Statement) +import Ubc.Parse.Syntax.Statement (Statement) import Ubc.Parse.Syntax.Operators (BinaryOperator(..), UnaryOperator (..)) import qualified Ubc.Parse.Syntax.Language as UbcLanguage +import Ubc.Parse.Syntax (Token) +import qualified Ubc.Parse.Syntax.Statement as Statement data Expression = Binary BinaryOperator Expression Expression | Unary UnaryOperator Expression | ConstantInteger Integer | ConstantFraction Rational - | FunctionCall String [Expression] - | Variable String + | FunctionCall Token [Expression] + | Variable Token | If Expression Expression (Maybe Expression) -- if then else | Loop Expression Expression -- condition body | Block [Statement] deriving (Show) -operatorTable :: Monad m => [[Operator String u m Expression]] +operatorTable :: Monad m => [[Operator Token u m Expression]] operatorTable = [ [ Infix (UbcLanguage.reservedOperator "." $> Binary StructureAccess) AssocLeft @@ -66,10 +70,10 @@ operatorTable = , [ Infix (UbcLanguage.reservedOperator "=" $> Binary Assign) AssocRight ] ] -expressionParser :: (Monad m) => ParsecT String u m Expression +expressionParser :: (Monad m) => ParsecT Token u m Expression expressionParser = buildExpressionParser operatorTable termParser "expression" -numberTerm :: Monad m => ParsecT String u m Expression +numberTerm :: Monad m => ParsecT Token u m Expression numberTerm = do notFollowedBy $ char '0' integerDigits <- many1 digit @@ -79,7 +83,7 @@ numberTerm = do ] <* UbcLanguage.whiteSpace -decimalTerm :: Monad m => [Char] -> ParsecT String u m Expression +decimalTerm :: Monad m => [Char] -> ParsecT Token u m Expression decimalTerm integerDigits = do _ <- lookAhead $ oneOf "e." fractionalDigits <- option "" $ char '.' *> many1 digit @@ -92,7 +96,7 @@ decimalTerm integerDigits = do return $ ConstantFraction $ numeratorInteger ^ numeratorPower % denominatorInteger ^ denominatorPower -termParser :: Monad m => ParsecT String u m Expression +termParser :: Monad m => ParsecT Token u m Expression termParser = UbcLanguage.parens expressionParser <|> numberTerm <|> fmap ConstantInteger UbcLanguage.integer @@ -104,13 +108,13 @@ termParser = UbcLanguage.parens expressionParser <|> functionCallOrVariable "term" -loopExpression :: Monad m => String -> (Expression -> Expression) -> ParsecT String u m Expression +loopExpression :: Monad m => String -> (Expression -> Expression) -> ParsecT Token u m Expression loopExpression startKeyword conditionWrapper = do _ <- UbcLanguage.reserved startKeyword condition <- conditionWrapper <$!> expressionParser Loop condition <$> expressionParser -conditionalExpression :: Monad m => String -> (Expression -> Expression) -> ParsecT String u m Expression +conditionalExpression :: Monad m => String -> (Expression -> Expression) -> ParsecT Token u m Expression conditionalExpression startKeyword conditionWrapper = do _ <- UbcLanguage.reserved startKeyword condition <- conditionWrapper <$!> expressionParser @@ -120,7 +124,7 @@ conditionalExpression startKeyword conditionWrapper = do return $ If condition then_ else_ -functionCallOrVariable :: Monad m => ParsecT String u m Expression +functionCallOrVariable :: Monad m => ParsecT Token u m Expression functionCallOrVariable = do name <- UbcLanguage.identifier choice @@ -128,3 +132,5 @@ functionCallOrVariable = do , return $ Variable name ] +blockExpression :: Monad m => ParsecT Token u m Expression +blockExpression = UbcLanguage.braces (many Statement.parse) <&> Block diff --git a/src/Ubc/Parse/Syntax/Expression.hs-boot b/src/Ubc/Parse/Syntax/Expression.hs-boot index 6375669..a9b7022 100644 --- a/src/Ubc/Parse/Syntax/Expression.hs-boot +++ b/src/Ubc/Parse/Syntax/Expression.hs-boot @@ -1,21 +1,14 @@ module Ubc.Parse.Syntax.Expression -( Expression(..) +( Expression , expressionParser +, blockExpression ) where import Text.Parsec (ParsecT) -import Ubc.Parse.Syntax.Operators (BinaryOperator, UnaryOperator) -import {-# SOURCE #-} Ubc.Parse.Syntax.Statement (Statement) +import Ubc.Parse.Syntax (Token) -data Expression = Binary BinaryOperator Expression Expression - | Unary UnaryOperator Expression - | ConstantInteger Integer - | ConstantFraction Rational - | FunctionCall String [Expression] - | Variable String - | If Expression Expression (Maybe Expression) -- if then else - | Loop Expression Expression -- condition body - | Block [Statement] +data Expression instance Show Expression -expressionParser :: Monad m => ParsecT String u m Expression +expressionParser :: Monad m => ParsecT Token u m Expression +blockExpression :: Monad m => ParsecT Token u m Expression diff --git a/src/Ubc/Parse/Syntax/File.hs b/src/Ubc/Parse/Syntax/File.hs index db7d116..000f663 100644 --- a/src/Ubc/Parse/Syntax/File.hs +++ b/src/Ubc/Parse/Syntax/File.hs @@ -11,9 +11,13 @@ import GHC.Generics (Generic, Generically(..)) import Control.Monad ((<$!>)) import Data.Functor ((<&>)) +import qualified Data.Text as Text + import Text.Parsec (choice, ParsecT, many) -import Ubc.Parse.Syntax (Transformer) +import qualified Text.Parsec as Parsec + +import Ubc.Parse.Syntax (Transformer, Token) import {-# SOURCE #-} Ubc.Parse.Syntax.Import (Import) import Ubc.Parse.Syntax.Struct (Struct) @@ -26,10 +30,9 @@ import qualified Ubc.Parse.Syntax.Struct as Struct import qualified Ubc.Parse.Syntax.Function as Function import qualified Ubc.Parse.Syntax.Statement as Statement import qualified Ubc.Parse.Syntax.Enumeration as Enumeration -import qualified Text.Parsec as Parsec data File = File - { name :: String + { name :: Token , body :: FileBody } deriving (Show) @@ -45,13 +48,13 @@ data FileBody = FileBody deriving (Semigroup, Monoid) via Generically FileBody -- dont use `deriving ... via FileBody` because that leads to a loop, somehow -parse :: ParsecT String u Transformer File +parse :: ParsecT Token u Transformer File parse = do - fileName <- Parsec.sourceName <$!> Parsec.getPosition + fileName <- Text.pack . Parsec.sourceName <$!> Parsec.getPosition fileBody <- mconcat <$!> many fileMember pure $ File fileName fileBody -fileMember :: ParsecT String u Transformer FileBody +fileMember :: ParsecT Token u Transformer FileBody fileMember = choice [ Struct.parse <&> \s -> mempty { structs = [s] } , Import.parse <&> \i -> mempty { imports = [i] } diff --git a/src/Ubc/Parse/Syntax/File.hs-boot b/src/Ubc/Parse/Syntax/File.hs-boot index 72b1dd6..108dc4c 100644 --- a/src/Ubc/Parse/Syntax/File.hs-boot +++ b/src/Ubc/Parse/Syntax/File.hs-boot @@ -6,10 +6,10 @@ where import Text.Parsec (ParsecT) -import Ubc.Parse.Syntax (Transformer) +import Ubc.Parse.Syntax (Transformer, Token) data File instance Show File -parse :: ParsecT String u Transformer File +parse :: ParsecT Token u Transformer File diff --git a/src/Ubc/Parse/Syntax/Function.hs b/src/Ubc/Parse/Syntax/Function.hs index 5001fce..770a900 100644 --- a/src/Ubc/Parse/Syntax/Function.hs +++ b/src/Ubc/Parse/Syntax/Function.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} module Ubc.Parse.Syntax.Function ( Function(..) , parse @@ -16,17 +17,18 @@ import Ubc.Parse.Syntax.Generic (Generic) import qualified Ubc.Parse.Syntax.Language as UbcLanguage import qualified Ubc.Parse.Syntax.VariableType as VariableType import qualified Ubc.Parse.Syntax.Generic as Generic +import Ubc.Parse.Syntax (Token) data Function = Function { returnType :: VariableType - , identifier :: String + , identifier :: Token , generics :: [Generic] , body :: Expression - , arguments :: [(VariableType, String)] + , arguments :: [(VariableType, Token)] } deriving (Show) -parsePrefixed :: Monad m => VariableType -> String -> ParsecT String u m Function +parsePrefixed :: Monad m => VariableType -> Token -> ParsecT Token u m Function parsePrefixed ftype fname = do genericList <- UbcLanguage.angles $ UbcLanguage.commaSeparated1 Generic.parse @@ -36,7 +38,7 @@ parsePrefixed ftype fname = do return $ Function ftype fname genericList expressionBody argumentList -parse :: Monad m => ParsecT String u m Function +parse :: Monad m => ParsecT Token u m Function parse = do (resultType, name) <- try $ do resultType <- UbcLanguage.typeName @@ -46,7 +48,7 @@ parse = do parsePrefixed resultType name -argumentDefinition :: Monad m => ParsecT String u m (VariableType, String) +argumentDefinition :: Monad m => ParsecT Token u m (VariableType, Token) argumentDefinition = do argumentType <- VariableType.fromString <$!> UbcLanguage.typeName argumentName <- UbcLanguage.identifier diff --git a/src/Ubc/Parse/Syntax/Generic.hs b/src/Ubc/Parse/Syntax/Generic.hs index 0dc4268..9cfb465 100644 --- a/src/Ubc/Parse/Syntax/Generic.hs +++ b/src/Ubc/Parse/Syntax/Generic.hs @@ -1,17 +1,19 @@ {-# LANGUAGE DerivingStrategies #-} module Ubc.Parse.Syntax.Generic ( Generic(..) -, parse) +, parse +) where import qualified Ubc.Parse.Syntax.Language as Language import Control.Monad ((<$!>)) import Text.Parsec (ParsecT) +import Ubc.Parse.Syntax (Token) data Generic = Generic - { name :: String + { name :: Token } deriving stock (Show) -parse :: Monad m => ParsecT String u m Generic +parse :: Monad m => ParsecT Token u m Generic parse = Generic <$!> Language.identifier diff --git a/src/Ubc/Parse/Syntax/Import.hs b/src/Ubc/Parse/Syntax/Import.hs index 12a4d8f..a92703c 100644 --- a/src/Ubc/Parse/Syntax/Import.hs +++ b/src/Ubc/Parse/Syntax/Import.hs @@ -1,5 +1,6 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE OverloadedStrings #-} module Ubc.Parse.Syntax.Import ( parse , Import(..) @@ -18,8 +19,10 @@ import Data.Functor ((<&>)) import Data.List.NonEmpty (NonEmpty) import qualified Data.List as List -import qualified Data.Set as Set import qualified Data.List.NonEmpty as NonEmpty +import qualified Data.Set as Set +import qualified Data.Text as Text +import qualified Data.Text.IO as Text.IO import qualified System.Directory as IO @@ -28,7 +31,7 @@ import qualified Text.Parsec as Parsec import Path as Path ( Path, File, parseRelFile, Rel, parseRelDir, Dir, Abs, (), fromAbsFile, fromRelFile ) -import Ubc.Parse.Syntax (Transformer) +import Ubc.Parse.Syntax (Transformer, Token) import Ubc.Parse.Syntax.Config (Config) import qualified Ubc.Parse.Syntax.Config as Config import qualified Ubc.Parse.Syntax.File as File @@ -36,22 +39,22 @@ import qualified Ubc.Parse.Syntax.Language as UbcLanguage data Import = Import { file :: File.File - , alias :: Maybe String - , list :: [String] + , alias :: Maybe Token + , list :: [Token] } deriving stock (Show) -importPath :: Monad m => ParsecT String u m (NonEmpty String) +importPath :: Monad m => ParsecT Token u m (NonEmpty Token) importPath = UbcLanguage.angles (many importChar `sepBy1` char '/') - <&> NonEmpty.fromList + <&> NonEmpty.fromList . List.map Text.pack -importChar :: (Monad m) => ParsecT String u m Char +importChar :: (Monad m) => ParsecT Token u m Char importChar = choice [ char '\\' >> oneOf ">/\\" , notFollowedBy (oneOf ">/") >> anyChar ] -parse :: ParsecT String u Transformer Import +parse :: ParsecT Token u Transformer Import parse = do UbcLanguage.reserved "import" fragments <- importPath @@ -68,9 +71,9 @@ parse = do -- fail or parse importedFile <- case existingFiles of - [] -> fail $ notFoundMessage relFile possiblePaths + [] -> fail . Text.unpack $ notFoundMessage relFile possiblePaths [path] -> parseFile path - fs@_ -> fail $ multipleFoundMessage relFile fs + fs@_ -> fail . Text.unpack $ multipleFoundMessage relFile fs importAs <- Parsec.optionMaybe importAlias @@ -78,24 +81,24 @@ parse = do return $ Import importedFile importAs importList -importAlias :: Monad m => ParsecT String u m String +importAlias :: Monad m => ParsecT Token u m Token importAlias = UbcLanguage.reserved "as" *> UbcLanguage.identifier -multipleFoundMessage :: Path Rel File -> [Path Abs File] -> String -multipleFoundMessage relFile foundFiles = "Could not identify imported file " <> Path.fromRelFile relFile <> "\n" +multipleFoundMessage :: Path Rel File -> [Path Abs File] -> Token +multipleFoundMessage relFile foundFiles = "Could not identify imported file " <> Text.pack (Path.fromRelFile relFile) <> "\n" <> "Found multiple files in the search path:" - <> (unlines . List.map (Path.fromAbsFile >>> ('\t':)) $ foundFiles) + <> (Text.unlines . List.map (Path.fromAbsFile >>> ('\t':) >>> Text.pack) $ foundFiles) -notFoundMessage :: Path Rel File -> [Path Abs File] -> String -notFoundMessage relFile searchedLocations = "Could not locate import file <" <> Path.fromRelFile relFile <> ">\n" +notFoundMessage :: Path Rel File -> [Path Abs File] -> Token +notFoundMessage relFile searchedLocations = "Could not locate import file <" <> Text.pack (Path.fromRelFile relFile) <> ">\n" <> "Searched locations were:\n" - <> (unlines . List.map (('\t':) . Path.fromAbsFile) $ searchedLocations) + <> (Text.unlines . List.map (Path.fromAbsFile >>> ('\t':) >>> Text.pack) $ searchedLocations) -parseFile :: Path Abs File -> ParsecT String u Transformer File.File +parseFile :: Path Abs File -> ParsecT Token u Transformer File.File parseFile path = do let stringPath = Path.fromAbsFile path - contents <- liftIO . readFile $ stringPath + contents <- liftIO . Text.IO.readFile $ stringPath -- save old state oldInput <- Parsec.getInput @@ -123,10 +126,10 @@ parseFile path = do pure importedFile -parseRelPath :: MonadThrow m => NonEmpty FilePath -> m (Path Rel File) +parseRelPath :: MonadThrow m => NonEmpty Token -> m (Path Rel File) parseRelPath fragments = do - name <- Path.parseRelFile $ NonEmpty.last fragments - dirPath <- mapM Path.parseRelDir . NonEmpty.init $ fragments + name <- Path.parseRelFile . Text.unpack $ NonEmpty.last fragments + dirPath <- mapM (Path.parseRelDir . Text.unpack) . NonEmpty.init $ fragments pure $ constructRelative name dirPath constructRelative :: Path Rel t -> [Path Rel Dir] -> Path Rel t diff --git a/src/Ubc/Parse/Syntax/Import.hs-boot b/src/Ubc/Parse/Syntax/Import.hs-boot index 2878d0a..fd7b1ac 100644 --- a/src/Ubc/Parse/Syntax/Import.hs-boot +++ b/src/Ubc/Parse/Syntax/Import.hs-boot @@ -6,10 +6,10 @@ where import Text.Parsec (ParsecT) -import Ubc.Parse.Syntax (Transformer) +import Ubc.Parse.Syntax (Transformer, Token) data Import instance Show Import -parse :: ParsecT String u Transformer Import +parse :: ParsecT Token u Transformer Import diff --git a/src/Ubc/Parse/Syntax/Language.hs b/src/Ubc/Parse/Syntax/Language.hs index b82eb97..bc9b679 100644 --- a/src/Ubc/Parse/Syntax/Language.hs +++ b/src/Ubc/Parse/Syntax/Language.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NoMonomorphismRestriction #-} module Ubc.Parse.Syntax.Language ( languageDef @@ -33,19 +34,24 @@ module Ubc.Parse.Syntax.Language ) where -import Data.Functor ( ($>) ) +import Data.Functor ( ($>), (<&>) ) +import qualified Data.Text as Text + import Text.Parsec ( alphaNum, char, letter, oneOf, choice, (<|>), ParsecT ) import Text.Parsec.Token ( makeTokenParser, GenLanguageDef(..), - GenTokenParser(TokenParser, identifier, reserved, operator, - reservedOp, charLiteral, stringLiteral, natural, integer, float, - naturalOrFloat, decimal, hexadecimal, octal, symbol, lexeme, - whiteSpace, parens, braces, angles, brackets, semi, comma, colon, - dot, semiSep, semiSep1, commaSep, commaSep1) ) + GenTokenParser(TokenParser, + charLiteral, natural, integer, float, + naturalOrFloat, decimal, hexadecimal, octal, lexeme, + whiteSpace, parens, braces, angles, brackets, + semiSep, semiSep1, commaSep, commaSep1) ) +import qualified Text.Parsec.Token as Token -languageDef :: Monad m => GenLanguageDef String u m +import Ubc.Parse.Syntax (Token) + +languageDef :: Monad m => GenLanguageDef Token u m languageDef = LanguageDef { commentStart = "/*" , commentEnd = "*/" @@ -92,44 +98,29 @@ languageDef = LanguageDef { , caseSensitive = True } -tokenParser :: Monad m => GenTokenParser String u m +tokenParser :: Monad m => GenTokenParser Token u m tokenParser = makeTokenParser languageDef -identifier :: Monad m => ParsecT String u m String -reserved :: Monad m => String -> ParsecT String u m () -operator :: Monad m => ParsecT String u m String -reservedOperator :: Monad m => String -> ParsecT String u m () -characterLiteral :: Monad m => ParsecT String u m Char -stringLiteral :: Monad m => ParsecT String u m String -natural :: Monad m => ParsecT String u m Integer -integer :: Monad m => ParsecT String u m Integer -float :: Monad m => ParsecT String u m Double -naturalOrFloat :: Monad m => ParsecT String u m (Either Integer Double) -decimal :: Monad m => ParsecT String u m Integer -hexadecimal :: Monad m => ParsecT String u m Integer -octal :: Monad m => ParsecT String u m Integer -symbol :: Monad m => String -> ParsecT String u m String -lexeme :: Monad m => ParsecT String u m a -> ParsecT String u m a -whiteSpace :: Monad m => ParsecT String u m () -parens :: Monad m => ParsecT String u m a -> ParsecT String u m a -braces :: Monad m => ParsecT String u m a -> ParsecT String u m a -angles :: Monad m => ParsecT String u m a -> ParsecT String u m a -brackets :: Monad m => ParsecT String u m a -> ParsecT String u m a -semicolon :: Monad m => ParsecT String u m String -comma :: Monad m => ParsecT String u m String -colon :: Monad m => ParsecT String u m String -dot :: Monad m => ParsecT String u m String -semicolonSeparated :: Monad m => ParsecT String u m a -> ParsecT String u m [a] -semicolonSeparated1 :: Monad m => ParsecT String u m a -> ParsecT String u m [a] -commaSeparated :: Monad m => ParsecT String u m a -> ParsecT String u m [a] -commaSeparated1 :: Monad m => ParsecT String u m a -> ParsecT String u m [a] +characterLiteral :: Monad m => ParsecT Token u m Char +natural :: Monad m => ParsecT Token u m Integer +integer :: Monad m => ParsecT Token u m Integer +float :: Monad m => ParsecT Token u m Double +naturalOrFloat :: Monad m => ParsecT Token u m (Either Integer Double) +decimal :: Monad m => ParsecT Token u m Integer +hexadecimal :: Monad m => ParsecT Token u m Integer +octal :: Monad m => ParsecT Token u m Integer +lexeme :: Monad m => ParsecT Token u m a -> ParsecT Token u m a +whiteSpace :: Monad m => ParsecT Token u m () +parens :: Monad m => ParsecT Token u m a -> ParsecT Token u m a +braces :: Monad m => ParsecT Token u m a -> ParsecT Token u m a +angles :: Monad m => ParsecT Token u m a -> ParsecT Token u m a +brackets :: Monad m => ParsecT Token u m a -> ParsecT Token u m a +semicolonSeparated :: Monad m => ParsecT Token u m a -> ParsecT Token u m [a] +semicolonSeparated1 :: Monad m => ParsecT Token u m a -> ParsecT Token u m [a] +commaSeparated :: Monad m => ParsecT Token u m a -> ParsecT Token u m [a] +commaSeparated1 :: Monad m => ParsecT Token u m a -> ParsecT Token u m [a] TokenParser{ - identifier = identifier - , reserved = reserved - , operator = operator - , reservedOp = reservedOperator - , charLiteral = characterLiteral - , stringLiteral = stringLiteral + charLiteral = characterLiteral , natural = natural -- decimal, hexadecimal or octal , integer = integer -- decimal, hexadecimal or octal , float = float @@ -137,24 +128,49 @@ TokenParser{ , decimal = decimal , hexadecimal = hexadecimal , octal = octal - , symbol = symbol , lexeme = lexeme , whiteSpace = whiteSpace , parens = parens , braces = braces , angles = angles , brackets = brackets - , semi = semicolon - , comma = comma - , colon = colon - , dot = dot , semiSep = semicolonSeparated , semiSep1 = semicolonSeparated1 , commaSep = commaSeparated , commaSep1 = commaSeparated1 } = tokenParser -typeName :: Monad m => ParsecT String u m String +semicolon :: Monad m => ParsecT Token u m Token +semicolon = Token.semi tokenParser <&> Text.pack + +comma :: Monad m => ParsecT Token u m Token +comma = Token.comma tokenParser <&> Text.pack + +colon :: Monad m => ParsecT Token u m Token +colon = Token.colon tokenParser <&> Text.pack + +dot :: Monad m => ParsecT Token u m Token +dot = Token.dot tokenParser <&> Text.pack + +symbol :: Monad m => String -> ParsecT Token u m Token +symbol = (<&> Text.pack) . Token.symbol tokenParser + +stringLiteral :: Monad m => ParsecT Token u m Token +stringLiteral = Token.stringLiteral tokenParser <&> Text.pack + +identifier :: Monad m => ParsecT Token u m Token +identifier = Token.identifier tokenParser <&> Text.pack + +reserved :: Monad m => String -> ParsecT Token u m () +reserved = Token.reserved tokenParser + +operator :: Monad m => ParsecT Token u m Token +operator = Token.operator tokenParser <&> Text.pack + +reservedOperator :: Monad m => String -> ParsecT Token u m () +reservedOperator = Token.reservedOp tokenParser + +typeName :: Monad m => ParsecT Token u m Token typeName = choice [ Ubc.Parse.Syntax.Language.reserved "i32" $> "i32" , Ubc.Parse.Syntax.Language.reserved "u32" $> "u32" diff --git a/src/Ubc/Parse/Syntax/Statement.hs b/src/Ubc/Parse/Syntax/Statement.hs index 20a3231..9f08952 100644 --- a/src/Ubc/Parse/Syntax/Statement.hs +++ b/src/Ubc/Parse/Syntax/Statement.hs @@ -1,12 +1,12 @@ +{-# LANGUAGE OverloadedStrings #-} module Ubc.Parse.Syntax.Statement ( Statement(..) , parse -, blockExpression ) where import Ubc.Parse.Syntax.VariableType (VariableType) -import {-# SOURCE #-} Ubc.Parse.Syntax.Expression (expressionParser, Expression (Block)) +import {-# SOURCE #-} Ubc.Parse.Syntax.Expression (expressionParser, Expression,) import Ubc.Parse.Syntax.TypeExpression (TypeExpression) import Text.Parsec (choice, ParsecT, try, many) import qualified Ubc.Parse.Syntax.Language as UbcLanguage @@ -14,22 +14,23 @@ import qualified Ubc.Parse.Syntax.VariableType as VariableType import qualified Ubc.Parse.Syntax.TypeExpression as TypeExpression import Control.Monad ((<$!>)) import Data.Functor ((<&>)) +import Ubc.Parse.Syntax (Token) -type VariableName = String -type TypeName = String +type VariableName = Token +type TypeName = Token data Statement = VariableDefinition VariableType VariableName Expression | TypeDefinition TypeName TypeExpression | ExpressionStatement Expression deriving (Show) -parse :: Monad m => ParsecT String u m Statement +parse :: Monad m => ParsecT Token u m Statement parse = choice [ variableDefinition , typeDefinition , ExpressionStatement <$!> expressionParser ] -typeDefinition :: Monad m => ParsecT String u m Statement +typeDefinition :: Monad m => ParsecT Token u m Statement typeDefinition = do UbcLanguage.reserved "type" @@ -40,7 +41,7 @@ typeDefinition = do TypeDefinition name <$> TypeExpression.parseTypeExpression -variableDefinition :: Monad m => ParsecT String u m Statement +variableDefinition :: Monad m => ParsecT Token u m Statement variableDefinition = do (variableType, variableName) <- try $ do variableType <- UbcLanguage.typeName @@ -49,6 +50,3 @@ variableDefinition = do return (VariableType.fromString variableType, variableName) VariableDefinition variableType variableName <$> expressionParser - -blockExpression :: Monad m => ParsecT String u m Expression -blockExpression = UbcLanguage.braces (many parse) <&> Block diff --git a/src/Ubc/Parse/Syntax/Struct.hs b/src/Ubc/Parse/Syntax/Struct.hs index a3d1556..5631351 100644 --- a/src/Ubc/Parse/Syntax/Struct.hs +++ b/src/Ubc/Parse/Syntax/Struct.hs @@ -1,5 +1,6 @@ {-# LANGUAGE DerivingVia #-} {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} module Ubc.Parse.Syntax.Struct ( Struct(..) , parse @@ -26,11 +27,12 @@ import qualified Ubc.Parse.Syntax.Language as UbcLanguage import qualified Ubc.Parse.Syntax.VariableType as VariableType import qualified Ubc.Parse.Syntax.Function as Function import qualified Ubc.Parse.Syntax.Generic as Syntax.Generic +import Ubc.Parse.Syntax (Token) -type VariableName = String +type VariableName = Token data Struct = Struct - { name :: String + { name :: Token , generics :: [Syntax.Generic.Generic] , body :: StructBody } @@ -43,7 +45,7 @@ data StructBody = StructBody deriving stock (Generic, Show) deriving (Semigroup, Monoid) via Generically StructBody -parse :: Monad m => ParsecT String u m Struct +parse :: Monad m => ParsecT Token u m Struct parse = do _ <- UbcLanguage.reserved "struct" structIdentifier <- UbcLanguage.identifier @@ -52,10 +54,10 @@ parse = do pure $ Struct structIdentifier structGenerics structBody -structMember :: Monad m => ParsecT String u m StructBody +structMember :: Monad m => ParsecT Token u m StructBody structMember = choice [ structVariableOrFunction ] -structVariableOrFunction :: Monad m => ParsecT String u m StructBody +structVariableOrFunction :: Monad m => ParsecT Token u m StructBody structVariableOrFunction = do (typeName, identifier) <- try $ do typeName <- UbcLanguage.typeName @@ -66,7 +68,7 @@ structVariableOrFunction = do , (\ f -> mempty { functions = [f] }) <$!> Function.parsePrefixed typeName identifier ] -parseVariable :: Monad m => VariableType -> String -> ParsecT String u m StructBody +parseVariable :: Monad m => VariableType -> Token -> ParsecT Token u m StructBody parseVariable variableType variableName = do _ <- UbcLanguage.semicolon return $ mempty { variables = [(variableName, variableType)] } diff --git a/src/Ubc/Parse/Syntax/TypeExpression.hs b/src/Ubc/Parse/Syntax/TypeExpression.hs index ad0db90..ce6219d 100644 --- a/src/Ubc/Parse/Syntax/TypeExpression.hs +++ b/src/Ubc/Parse/Syntax/TypeExpression.hs @@ -14,13 +14,14 @@ import Ubc.Parse.Syntax.Struct (Struct) import qualified Ubc.Parse.Syntax.Language as UbcLanguage import qualified Ubc.Parse.Syntax.VariableType as VariableType import qualified Ubc.Parse.Syntax.Struct as Struct +import Ubc.Parse.Syntax (Token) data TypeExpression = TypeAlias VariableType | StructExpression Struct deriving (Show) -parseTypeExpression :: Monad m => ParsecT String u m TypeExpression +parseTypeExpression :: Monad m => ParsecT Token u m TypeExpression parseTypeExpression = choice [ TypeAlias . VariableType.fromString <$!> UbcLanguage.typeName , StructExpression <$!> Struct.parse diff --git a/src/Ubc/Parse/Syntax/VariableType.hs b/src/Ubc/Parse/Syntax/VariableType.hs index bc71b95..273d2ec 100644 --- a/src/Ubc/Parse/Syntax/VariableType.hs +++ b/src/Ubc/Parse/Syntax/VariableType.hs @@ -1,16 +1,18 @@ +{-# LANGUAGE OverloadedStrings #-} module Ubc.Parse.Syntax.VariableType (VariableType(..) , fromString ) where +import Ubc.Parse.Syntax (Token) data VariableType = BuiltInI32 | BuiltInU32 | BuiltInF32 - | UserStruct String + | UserStruct Token deriving (Show, Eq) -fromString :: String -> VariableType +fromString :: Token -> VariableType fromString "i32" = BuiltInI32 fromString "u32" = BuiltInU32 fromString "f32" = BuiltInF32 diff --git a/ubcc.cabal b/ubcc.cabal index 565ed48..ee308a7 100644 --- a/ubcc.cabal +++ b/ubcc.cabal @@ -56,6 +56,7 @@ library , os-string , parsec , path + , text , transformers default-language: Haskell2010 @@ -78,6 +79,7 @@ executable ubcc-exe , os-string , parsec , path + , text , transformers , ubcc default-language: Haskell2010 @@ -102,6 +104,7 @@ test-suite ubcc-test , os-string , parsec , path + , text , transformers , ubcc default-language: Haskell2010