{-# LANGUAGE OverloadedStrings #-} module Language.Yanais.Parser ( Text, Parser(..), ParserEnv(..), makeParseEnv, takeUntil, takeWhile, takeIdent ) where import Prelude hiding (takeWhile, span, splitAt) import Control.Monad.Trans.State.Strict import qualified Data.ByteString.Char8 as C import qualified Data.ByteString.UTF8 as B import qualified Data.String.UTF8 as U import qualified Data.Text as T import qualified Data.Text.ICU.Char as IC import qualified Data.Text.ICU.Normalize2 as IN -- import Language.Yanais.Fusion -- a generic parser monad newtype Parser s e a = Parser { runP :: StateT s (Either e) a } instance Functor (Parser s e) where fmap f = Parser . (fmap f) . runP {-# INLINE fmap #-} instance Applicative (Parser s e) where pure = Parser . pure {-# INLINE pure #-} Parser a <*> Parser b = Parser $ a <*> b {-# INLINE (<*>) #-} instance Monad (Parser s e) where return = pure Parser a >>= fb = Parser $ a >>= (runP . fb) {-# INLINE (>>=) #-} type Text = U.UTF8 B.ByteString data ParserEnv = ParserEnv { peOffset :: !Int, peText :: Text } makeParseEnv :: B.ByteString -> ParserEnv makeParseEnv bs = ParserEnv { peOffset = 0, peText = U.fromRep bs } -- some simple parser combinators shiftEnv :: (Int, Text) -> Parser ParserEnv e () shiftEnv (ll, r) = Parser $ modify go where go :: ParserEnv -> ParserEnv go st = st { peOffset = (peOffset st) + ll, peText = r } slen :: Text -> Int slen = C.length . U.toRep takeUntil :: (Char -> Bool) -> Parser ParserEnv e Text takeUntil f = takeWhile (not . f) takeWhile :: (Char -> Bool) -> Parser ParserEnv e Text takeWhile f = do env <- Parser $ get let (l, r) = U.span f (peText env) shiftEnv (slen l, r) return l takeIdent :: Parser ParserEnv e (Maybe Text) takeIdent = do env <- Parser $ get let (l, r) = U.splitAt 1 (peText env) case U.uncons l of Nothing -> return Nothing Just (fi, r2) -> ( (if not ((slen r2) == 0) then error "takeIdent : unable to iterator over characters" else ()) `seq` (if IC.property IC.XidStart fi then ( do shiftEnv (slen l, r) rest <- takeWhile (IC.property IC.XidContinue) -- I hate this... return (Just . U.fromRep . B.fromString . T.unpack . IN.nfc . T.pack $ fi:(B.toString $ U.toRep rest)) ) else return Nothing))