2023-10-24 20:08:10 +00:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
|
|
|
|
module Language.Yanais.Parser (
|
|
|
|
Text,
|
|
|
|
Parser(..),
|
|
|
|
ParserEnv(..),
|
|
|
|
makeParseEnv,
|
|
|
|
takeUntil,
|
|
|
|
takeWhile,
|
2023-10-24 20:21:23 +00:00
|
|
|
takeIdent,
|
|
|
|
takeWithProperty,
|
|
|
|
skipWhiteSpace,
|
|
|
|
tryOne
|
2023-10-24 20:08:10 +00:00
|
|
|
) where
|
|
|
|
|
|
|
|
import Prelude hiding (takeWhile, span, splitAt)
|
|
|
|
import Control.Monad.Trans.State.Strict
|
|
|
|
import qualified Data.ByteString.Char8 as C
|
|
|
|
import qualified Data.ByteString.UTF8 as B
|
|
|
|
import qualified Data.String.UTF8 as U
|
|
|
|
import qualified Data.Text as T
|
|
|
|
import qualified Data.Text.ICU.Char as IC
|
|
|
|
import qualified Data.Text.ICU.Normalize2 as IN
|
|
|
|
-- import Language.Yanais.Fusion
|
|
|
|
|
|
|
|
-- a generic parser monad
|
|
|
|
newtype Parser s e a = Parser { runP :: StateT s (Either e) a }
|
|
|
|
|
|
|
|
instance Functor (Parser s e) where
|
|
|
|
fmap f = Parser . (fmap f) . runP
|
|
|
|
{-# INLINE fmap #-}
|
|
|
|
|
|
|
|
instance Applicative (Parser s e) where
|
|
|
|
pure = Parser . pure
|
|
|
|
{-# INLINE pure #-}
|
|
|
|
Parser a <*> Parser b = Parser $ a <*> b
|
|
|
|
{-# INLINE (<*>) #-}
|
|
|
|
|
|
|
|
instance Monad (Parser s e) where
|
|
|
|
return = pure
|
|
|
|
Parser a >>= fb = Parser $ a >>= (runP . fb)
|
|
|
|
{-# INLINE (>>=) #-}
|
|
|
|
|
|
|
|
type Text = U.UTF8 B.ByteString
|
|
|
|
|
|
|
|
data ParserEnv = ParserEnv {
|
|
|
|
peOffset :: !Int,
|
|
|
|
peText :: Text
|
|
|
|
}
|
|
|
|
|
|
|
|
makeParseEnv :: B.ByteString -> ParserEnv
|
|
|
|
makeParseEnv bs = ParserEnv { peOffset = 0, peText = U.fromRep bs }
|
|
|
|
|
|
|
|
-- some simple parser combinators
|
|
|
|
|
|
|
|
shiftEnv :: (Int, Text) -> Parser ParserEnv e ()
|
|
|
|
shiftEnv (ll, r) = Parser $ modify go
|
|
|
|
where
|
|
|
|
go :: ParserEnv -> ParserEnv
|
|
|
|
go st = st { peOffset = (peOffset st) + ll, peText = r }
|
|
|
|
|
|
|
|
slen :: Text -> Int
|
|
|
|
slen = C.length . U.toRep
|
|
|
|
|
|
|
|
takeUntil :: (Char -> Bool) -> Parser ParserEnv e Text
|
|
|
|
takeUntil f = takeWhile (not . f)
|
|
|
|
|
|
|
|
takeWhile :: (Char -> Bool) -> Parser ParserEnv e Text
|
|
|
|
takeWhile f = do
|
|
|
|
env <- Parser $ get
|
|
|
|
let (l, r) = U.span f (peText env)
|
|
|
|
shiftEnv (slen l, r)
|
|
|
|
return l
|
|
|
|
|
2023-10-24 20:21:23 +00:00
|
|
|
takeWithProperty :: IC.Bool_ -> Parser ParserEnv e Text
|
|
|
|
takeWithProperty p = takeWhile $ IC.property p
|
|
|
|
|
2023-10-24 20:08:10 +00:00
|
|
|
takeIdent :: Parser ParserEnv e (Maybe Text)
|
|
|
|
takeIdent = do
|
|
|
|
env <- Parser $ get
|
|
|
|
let (l, r) = U.splitAt 1 (peText env)
|
|
|
|
case U.uncons l of
|
|
|
|
Nothing -> return Nothing
|
|
|
|
Just (fi, r2) -> (
|
|
|
|
(if not ((slen r2) == 0) then error "takeIdent : unable to iterator over characters" else ())
|
|
|
|
`seq`
|
|
|
|
(if IC.property IC.XidStart fi then (
|
|
|
|
do
|
|
|
|
shiftEnv (slen l, r)
|
2023-10-24 20:21:23 +00:00
|
|
|
rest <- takeWithProperty IC.XidContinue
|
2023-10-24 20:08:10 +00:00
|
|
|
-- I hate this...
|
|
|
|
return (Just . U.fromRep . B.fromString . T.unpack . IN.nfc . T.pack $ fi:(B.toString $ U.toRep rest))
|
|
|
|
) else return Nothing))
|
2023-10-24 20:21:23 +00:00
|
|
|
|
|
|
|
skipWhiteSpace :: Parser ParserEnv e ()
|
|
|
|
skipWhiteSpace = takeWithProperty IC.WhiteSpace >> pure ()
|
|
|
|
|
|
|
|
-- try to parse the first character as a token
|
|
|
|
tryOne :: (Char -> Maybe tok) -> Parser ParserEnv e (Maybe tok)
|
|
|
|
tryOne f = do
|
|
|
|
env <- Parser $ get
|
|
|
|
let (l, r) = U.splitAt 1 (peText env)
|
|
|
|
case U.uncons l of
|
|
|
|
Nothing -> return Nothing
|
|
|
|
Just (fi, r2) -> (
|
|
|
|
(if not ((slen r2) == 0) then error "tryOne : unable to iterator over characters" else ())
|
|
|
|
`seq`
|
|
|
|
(shiftEnv (slen l, r) >> return (f fi)))
|