Parser combinators, JSON Parser
This commit is contained in:
commit
b078a62ffa
13 changed files with 777 additions and 0 deletions
210
src/Language/Json.hs
Normal file
210
src/Language/Json.hs
Normal file
|
@ -0,0 +1,210 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE LambdaCase #-}
|
||||
{-# LANGUAGE TypeApplications #-}
|
||||
module Language.Json (Value(..), null, true, false, boolean, buildInteger, number, document) where
|
||||
import Data.Text (Text)
|
||||
import Data.Array.IArray (Array)
|
||||
import Numeric.Natural (Natural)
|
||||
import Data.Map.Strict (Map)
|
||||
import Language.Json.Parser (Parser)
|
||||
import qualified Language.Json.Parser as Parser
|
||||
|
||||
import Prelude hiding (exponent, null)
|
||||
import Control.Applicative ((<|>), Alternative (many), some)
|
||||
import qualified Data.Char as Char
|
||||
import qualified Data.List as List
|
||||
import Data.Functor (($>))
|
||||
import Data.Ratio ((%))
|
||||
import qualified Data.Maybe as Maybe
|
||||
import qualified Data.Text as Text
|
||||
import qualified Data.Text.Internal.Read as Text
|
||||
import qualified Debug.Trace as Debug
|
||||
import qualified Data.Array.IArray as Array
|
||||
import qualified Data.Map.Strict as Map
|
||||
|
||||
|
||||
data Value
|
||||
= Null
|
||||
| String Text
|
||||
| Boolean Bool
|
||||
| Number Rational
|
||||
| Array (Array Natural Value)
|
||||
| Object (Map Text Value)
|
||||
deriving Show
|
||||
|
||||
null :: Parser Value
|
||||
null = do
|
||||
Parser.exact "null"
|
||||
pure Null
|
||||
|
||||
true :: Parser Bool
|
||||
true = do
|
||||
Parser.exact "true"
|
||||
pure True
|
||||
|
||||
false :: Parser Bool
|
||||
false = do
|
||||
Parser.exact "false"
|
||||
pure False
|
||||
|
||||
boolean :: Parser Bool
|
||||
boolean = true <|> false
|
||||
|
||||
digit :: Parser Int
|
||||
digit = Char.digitToInt <$> Parser.oneOf (Parser.char <$> ['0'..'9'])
|
||||
|
||||
onenine :: Parser Int
|
||||
onenine = Char.digitToInt <$> Parser.oneOf (Parser.char <$> ['1'..'9'])
|
||||
|
||||
multiDigitNumber :: Parser Natural
|
||||
multiDigitNumber = do
|
||||
firstDigit <- onenine
|
||||
rest <- many digit
|
||||
let allDigits = firstDigit : rest
|
||||
pure $! buildInteger allDigits
|
||||
|
||||
buildInteger :: [Int] -> Natural
|
||||
buildInteger = List.foldl (\ acc x -> acc * 10 + fromIntegral x) 0
|
||||
|
||||
fraction :: Parser (Maybe Natural)
|
||||
fraction = Parser.optional (buildInteger <$> (Parser.char '.' *> some digit))
|
||||
|
||||
data Sign
|
||||
= Positive
|
||||
| Negative
|
||||
|
||||
applySign :: Integral a => Sign -> a -> a
|
||||
applySign = \case
|
||||
Positive -> id
|
||||
Negative -> negate
|
||||
|
||||
sign :: Parser Sign
|
||||
sign = Parser.oneOf
|
||||
[ Parser.char '+' $> Positive
|
||||
, Parser.char '-' $> Negative
|
||||
, pure Positive
|
||||
]
|
||||
|
||||
exponent :: Parser (Maybe Integer)
|
||||
exponent = Parser.optional $ do
|
||||
_ <- Parser.oneOf (Parser.char <$> ['e', 'E'])
|
||||
applySign <$> sign <*> (fromIntegral <$> multiDigitNumber)
|
||||
|
||||
number :: Parser Rational
|
||||
number = do
|
||||
factor <- sign
|
||||
integerPart <- multiDigitNumber <|> fromIntegral <$> digit
|
||||
decimalPart <- fraction
|
||||
power <- exponent
|
||||
|
||||
let concatRational = case decimalPart of
|
||||
Nothing -> fromIntegral integerPart
|
||||
Just part -> let
|
||||
decimalPartLength = length . show $ part
|
||||
in fromIntegral (integerPart * 10 ^ decimalPartLength + part) % 10 ^ decimalPartLength
|
||||
|
||||
pure $ concatRational * fromIntegral @Integer (applySign factor (10 ^ Maybe.fromMaybe 0 power))
|
||||
|
||||
-- >>> import Language.Json.Parser
|
||||
-- >>> runParser number "1"
|
||||
-- Just ("",Number (1 % 1))
|
||||
-- >>> runParser number "1.5"
|
||||
-- Just ("",Number (3 % 5))
|
||||
-- >>> runParser number "1.5e1"
|
||||
-- Just ("",Number (6 % 1))
|
||||
|
||||
string :: Parser Text
|
||||
string = do
|
||||
_ <- Parser.exact "\""
|
||||
str <- many stringChar
|
||||
_ <- Parser.exact "\""
|
||||
|
||||
pure $ Text.pack str
|
||||
|
||||
stringChar :: Parser Char
|
||||
stringChar = Parser.oneOf
|
||||
[ Parser.char '\\' *> Parser.oneOf
|
||||
[ Parser.char '"'
|
||||
, Parser.char '\\'
|
||||
, Parser.char '/'
|
||||
, Parser.char 'b' $> '\b'
|
||||
, Parser.char 'f' $> '\f'
|
||||
, Parser.char 'n' $> '\b'
|
||||
, Parser.char 'r' $> '\r'
|
||||
, Parser.char 't' $> '\t'
|
||||
, Parser.char 'u' *> hexChar
|
||||
]
|
||||
, Parser.satisfies isAllowedCodepoint
|
||||
]
|
||||
|
||||
-- >>> import Language.Json.Parser
|
||||
-- >>> runParser string "\"\\u000A\""
|
||||
-- Just ("",String "\n")
|
||||
|
||||
buildBase16 :: String -> Int
|
||||
buildBase16 = List.foldl' (\ acc x -> acc * 16 + Text.hexDigitToInt x) 0
|
||||
|
||||
hexChar :: Parser Char
|
||||
hexChar = do
|
||||
digits <- 4 `Parser.times` Parser.satisfies Char.isHexDigit
|
||||
let base16 = buildBase16 $ Debug.traceShowId digits
|
||||
pure . toEnum $ Debug.traceShowId base16
|
||||
|
||||
isAllowedCodepoint :: Char -> Bool
|
||||
isAllowedCodepoint = \case
|
||||
'"' -> False
|
||||
'\\' -> False
|
||||
x -> not . Char.isControl $ x
|
||||
|
||||
-- >>> fromEnum 'A'
|
||||
-- 65
|
||||
-- >>> toEnum 10 :: Char
|
||||
-- '\n'
|
||||
|
||||
value :: Parser Value
|
||||
value = Parser.oneOf
|
||||
[ null
|
||||
, Array <$> array
|
||||
, Object <$> object
|
||||
, Number <$> number
|
||||
, String <$> string
|
||||
, Boolean <$> boolean
|
||||
]
|
||||
|
||||
document :: Parser Value
|
||||
document = whitespace *> value <* whitespace <* Parser.eof
|
||||
|
||||
keyvalue :: Parser (Text, Value)
|
||||
keyvalue = do
|
||||
name <- string
|
||||
|
||||
whitespace
|
||||
_ <- Parser.char ':'
|
||||
whitespace
|
||||
|
||||
val <- value
|
||||
pure (name, val)
|
||||
|
||||
object :: Parser (Map Text Value)
|
||||
object = do
|
||||
_ <- Parser.char '{'
|
||||
whitespace
|
||||
|
||||
members <- (keyvalue <* whitespace) `Parser.sepBy` (Parser.char ',' <* whitespace)
|
||||
|
||||
_ <- Parser.char '}'
|
||||
pure $ Map.fromList members
|
||||
|
||||
whitespace :: Parser ()
|
||||
whitespace = many (Parser.satisfies Char.isSpace) $> ()
|
||||
|
||||
array :: Parser (Array Natural Value)
|
||||
array = do
|
||||
_ <- Parser.char '['
|
||||
|
||||
whitespace
|
||||
elements <- (value <* whitespace) `Parser.sepBy` (Parser.char ',' <* whitespace)
|
||||
|
||||
_ <- Parser.char ']'
|
||||
pure $ Array.listArray (0, fromIntegral $ length elements - 1) elements
|
||||
|
97
src/Language/Json/Parser.hs
Normal file
97
src/Language/Json/Parser.hs
Normal file
|
@ -0,0 +1,97 @@
|
|||
{-# LANGUAGE InstanceSigs #-}
|
||||
{-# LANGUAGE LambdaCase #-}
|
||||
module Language.Json.Parser (runParser, Parser, exact, char, oneOf, optional, anyChar, satisfies, times, sepBy, eof) where
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as Text
|
||||
import Control.Applicative (Alternative (..), asum)
|
||||
import Data.Functor (($>))
|
||||
|
||||
newtype Parser a = Parser (Text -> Reply a)
|
||||
|
||||
data Reply a
|
||||
= Done Text a -- rest, result
|
||||
| Fail
|
||||
|
||||
instance Functor Reply where
|
||||
fmap :: (a -> b) -> Reply a -> Reply b
|
||||
fmap f = \case
|
||||
Done rest result -> Done rest (f result)
|
||||
Fail -> Fail
|
||||
|
||||
runParser :: Parser a -> Text -> Maybe (Text, a)
|
||||
runParser (Parser computeA) input = case computeA input of
|
||||
Done rest result -> Just (rest, result)
|
||||
Fail -> Nothing
|
||||
|
||||
instance Functor Parser where
|
||||
fmap :: (a -> b) -> Parser a -> Parser b
|
||||
fmap func (Parser computeA) = Parser (fmap func . computeA)
|
||||
|
||||
instance Applicative Parser where
|
||||
pure :: a -> Parser a
|
||||
pure a = Parser $ \ input -> Done input a
|
||||
(<*>) :: Parser (a -> b) -> Parser a -> Parser b
|
||||
(<*>) (Parser computeF) (Parser computeA) = Parser $ \ input -> case computeF input of
|
||||
Done rest f -> f <$> computeA rest
|
||||
Fail -> Fail
|
||||
|
||||
instance Monad Parser where
|
||||
(>>=) :: Parser a -> (a -> Parser b) -> Parser b
|
||||
(>>=) (Parser computeA) f = Parser $ \ input -> case computeA input of
|
||||
Done rest a -> let
|
||||
(Parser computeB) = f a
|
||||
in computeB rest
|
||||
Fail -> Fail
|
||||
|
||||
exact :: Text -> Parser ()
|
||||
exact prefix = Parser $ \ input -> case Text.stripPrefix prefix input of
|
||||
Just rest -> Done rest ()
|
||||
Nothing -> Fail
|
||||
|
||||
instance Alternative Parser where
|
||||
empty :: Parser a
|
||||
empty = Parser $ const Fail
|
||||
(<|>) :: Parser a -> Parser a -> Parser a
|
||||
(<|>) (Parser first) (Parser second) = Parser $ \ input -> case first input of
|
||||
Fail -> second input
|
||||
x -> x
|
||||
|
||||
char :: Char -> Parser Char
|
||||
char c = exact (Text.singleton c) $> c
|
||||
|
||||
oneOf :: [Parser a] -> Parser a
|
||||
oneOf = asum
|
||||
|
||||
optional :: Parser a -> Parser (Maybe a)
|
||||
optional parser = fmap Just parser <|> pure Nothing
|
||||
|
||||
anyChar :: Parser Char
|
||||
anyChar = Parser $ \ input -> case Text.uncons input of
|
||||
Just (c, rest) -> Done rest c
|
||||
Nothing -> Fail
|
||||
|
||||
satisfies :: (Char -> Bool) -> Parser Char
|
||||
satisfies f = do
|
||||
c <- anyChar
|
||||
if f c
|
||||
then pure c
|
||||
else empty
|
||||
|
||||
times :: Int -> Parser a -> Parser [a]
|
||||
times n body = go n
|
||||
where
|
||||
go 0 = pure []
|
||||
go i = do
|
||||
x <- body
|
||||
xs <- go $ pred i
|
||||
pure $ x:xs
|
||||
|
||||
sepBy :: Parser a -> Parser b -> Parser [a]
|
||||
sepBy p sep = do
|
||||
first <- p
|
||||
rest <- sep *> sepBy p sep <|> pure []
|
||||
|
||||
pure $ first : rest
|
||||
|
||||
eof :: Parser Bool
|
||||
eof = anyChar *> empty <|> pure True
|
Loading…
Add table
Add a link
Reference in a new issue