Parser combinators, JSON Parser

This commit is contained in:
vegowotenks 2025-08-21 19:30:00 +02:00
commit b078a62ffa
13 changed files with 777 additions and 0 deletions

210
src/Language/Json.hs Normal file
View file

@ -0,0 +1,210 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TypeApplications #-}
module Language.Json (Value(..), null, true, false, boolean, buildInteger, number, document) where
import Data.Text (Text)
import Data.Array.IArray (Array)
import Numeric.Natural (Natural)
import Data.Map.Strict (Map)
import Language.Json.Parser (Parser)
import qualified Language.Json.Parser as Parser
import Prelude hiding (exponent, null)
import Control.Applicative ((<|>), Alternative (many), some)
import qualified Data.Char as Char
import qualified Data.List as List
import Data.Functor (($>))
import Data.Ratio ((%))
import qualified Data.Maybe as Maybe
import qualified Data.Text as Text
import qualified Data.Text.Internal.Read as Text
import qualified Debug.Trace as Debug
import qualified Data.Array.IArray as Array
import qualified Data.Map.Strict as Map
data Value
= Null
| String Text
| Boolean Bool
| Number Rational
| Array (Array Natural Value)
| Object (Map Text Value)
deriving Show
null :: Parser Value
null = do
Parser.exact "null"
pure Null
true :: Parser Bool
true = do
Parser.exact "true"
pure True
false :: Parser Bool
false = do
Parser.exact "false"
pure False
boolean :: Parser Bool
boolean = true <|> false
digit :: Parser Int
digit = Char.digitToInt <$> Parser.oneOf (Parser.char <$> ['0'..'9'])
onenine :: Parser Int
onenine = Char.digitToInt <$> Parser.oneOf (Parser.char <$> ['1'..'9'])
multiDigitNumber :: Parser Natural
multiDigitNumber = do
firstDigit <- onenine
rest <- many digit
let allDigits = firstDigit : rest
pure $! buildInteger allDigits
buildInteger :: [Int] -> Natural
buildInteger = List.foldl (\ acc x -> acc * 10 + fromIntegral x) 0
fraction :: Parser (Maybe Natural)
fraction = Parser.optional (buildInteger <$> (Parser.char '.' *> some digit))
data Sign
= Positive
| Negative
applySign :: Integral a => Sign -> a -> a
applySign = \case
Positive -> id
Negative -> negate
sign :: Parser Sign
sign = Parser.oneOf
[ Parser.char '+' $> Positive
, Parser.char '-' $> Negative
, pure Positive
]
exponent :: Parser (Maybe Integer)
exponent = Parser.optional $ do
_ <- Parser.oneOf (Parser.char <$> ['e', 'E'])
applySign <$> sign <*> (fromIntegral <$> multiDigitNumber)
number :: Parser Rational
number = do
factor <- sign
integerPart <- multiDigitNumber <|> fromIntegral <$> digit
decimalPart <- fraction
power <- exponent
let concatRational = case decimalPart of
Nothing -> fromIntegral integerPart
Just part -> let
decimalPartLength = length . show $ part
in fromIntegral (integerPart * 10 ^ decimalPartLength + part) % 10 ^ decimalPartLength
pure $ concatRational * fromIntegral @Integer (applySign factor (10 ^ Maybe.fromMaybe 0 power))
-- >>> import Language.Json.Parser
-- >>> runParser number "1"
-- Just ("",Number (1 % 1))
-- >>> runParser number "1.5"
-- Just ("",Number (3 % 5))
-- >>> runParser number "1.5e1"
-- Just ("",Number (6 % 1))
string :: Parser Text
string = do
_ <- Parser.exact "\""
str <- many stringChar
_ <- Parser.exact "\""
pure $ Text.pack str
stringChar :: Parser Char
stringChar = Parser.oneOf
[ Parser.char '\\' *> Parser.oneOf
[ Parser.char '"'
, Parser.char '\\'
, Parser.char '/'
, Parser.char 'b' $> '\b'
, Parser.char 'f' $> '\f'
, Parser.char 'n' $> '\b'
, Parser.char 'r' $> '\r'
, Parser.char 't' $> '\t'
, Parser.char 'u' *> hexChar
]
, Parser.satisfies isAllowedCodepoint
]
-- >>> import Language.Json.Parser
-- >>> runParser string "\"\\u000A\""
-- Just ("",String "\n")
buildBase16 :: String -> Int
buildBase16 = List.foldl' (\ acc x -> acc * 16 + Text.hexDigitToInt x) 0
hexChar :: Parser Char
hexChar = do
digits <- 4 `Parser.times` Parser.satisfies Char.isHexDigit
let base16 = buildBase16 $ Debug.traceShowId digits
pure . toEnum $ Debug.traceShowId base16
isAllowedCodepoint :: Char -> Bool
isAllowedCodepoint = \case
'"' -> False
'\\' -> False
x -> not . Char.isControl $ x
-- >>> fromEnum 'A'
-- 65
-- >>> toEnum 10 :: Char
-- '\n'
value :: Parser Value
value = Parser.oneOf
[ null
, Array <$> array
, Object <$> object
, Number <$> number
, String <$> string
, Boolean <$> boolean
]
document :: Parser Value
document = whitespace *> value <* whitespace <* Parser.eof
keyvalue :: Parser (Text, Value)
keyvalue = do
name <- string
whitespace
_ <- Parser.char ':'
whitespace
val <- value
pure (name, val)
object :: Parser (Map Text Value)
object = do
_ <- Parser.char '{'
whitespace
members <- (keyvalue <* whitespace) `Parser.sepBy` (Parser.char ',' <* whitespace)
_ <- Parser.char '}'
pure $ Map.fromList members
whitespace :: Parser ()
whitespace = many (Parser.satisfies Char.isSpace) $> ()
array :: Parser (Array Natural Value)
array = do
_ <- Parser.char '['
whitespace
elements <- (value <* whitespace) `Parser.sepBy` (Parser.char ',' <* whitespace)
_ <- Parser.char ']'
pure $ Array.listArray (0, fromIntegral $ length elements - 1) elements

View file

@ -0,0 +1,97 @@
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE LambdaCase #-}
module Language.Json.Parser (runParser, Parser, exact, char, oneOf, optional, anyChar, satisfies, times, sepBy, eof) where
import Data.Text (Text)
import qualified Data.Text as Text
import Control.Applicative (Alternative (..), asum)
import Data.Functor (($>))
newtype Parser a = Parser (Text -> Reply a)
data Reply a
= Done Text a -- rest, result
| Fail
instance Functor Reply where
fmap :: (a -> b) -> Reply a -> Reply b
fmap f = \case
Done rest result -> Done rest (f result)
Fail -> Fail
runParser :: Parser a -> Text -> Maybe (Text, a)
runParser (Parser computeA) input = case computeA input of
Done rest result -> Just (rest, result)
Fail -> Nothing
instance Functor Parser where
fmap :: (a -> b) -> Parser a -> Parser b
fmap func (Parser computeA) = Parser (fmap func . computeA)
instance Applicative Parser where
pure :: a -> Parser a
pure a = Parser $ \ input -> Done input a
(<*>) :: Parser (a -> b) -> Parser a -> Parser b
(<*>) (Parser computeF) (Parser computeA) = Parser $ \ input -> case computeF input of
Done rest f -> f <$> computeA rest
Fail -> Fail
instance Monad Parser where
(>>=) :: Parser a -> (a -> Parser b) -> Parser b
(>>=) (Parser computeA) f = Parser $ \ input -> case computeA input of
Done rest a -> let
(Parser computeB) = f a
in computeB rest
Fail -> Fail
exact :: Text -> Parser ()
exact prefix = Parser $ \ input -> case Text.stripPrefix prefix input of
Just rest -> Done rest ()
Nothing -> Fail
instance Alternative Parser where
empty :: Parser a
empty = Parser $ const Fail
(<|>) :: Parser a -> Parser a -> Parser a
(<|>) (Parser first) (Parser second) = Parser $ \ input -> case first input of
Fail -> second input
x -> x
char :: Char -> Parser Char
char c = exact (Text.singleton c) $> c
oneOf :: [Parser a] -> Parser a
oneOf = asum
optional :: Parser a -> Parser (Maybe a)
optional parser = fmap Just parser <|> pure Nothing
anyChar :: Parser Char
anyChar = Parser $ \ input -> case Text.uncons input of
Just (c, rest) -> Done rest c
Nothing -> Fail
satisfies :: (Char -> Bool) -> Parser Char
satisfies f = do
c <- anyChar
if f c
then pure c
else empty
times :: Int -> Parser a -> Parser [a]
times n body = go n
where
go 0 = pure []
go i = do
x <- body
xs <- go $ pred i
pure $ x:xs
sepBy :: Parser a -> Parser b -> Parser [a]
sepBy p sep = do
first <- p
rest <- sep *> sepBy p sep <|> pure []
pure $ first : rest
eof :: Parser Bool
eof = anyChar *> empty <|> pure True