yanais/haskell/nfker7h/lib/Language/Yanais/Parser.hs

184 lines
5 KiB
Haskell

{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
module Language.Yanais.Parser (
Text,
Ident(..),
Parser(..),
Parser',
ParserEnv(..),
Span(..),
HandleTree(..),
makeParseEnv,
runParser,
takeUntil,
takeWhile,
takeIdent,
takeWithProperty,
skipWhiteSpace,
tryOne,
eats,
recordSpan,
) where
import Prelude hiding (takeWhile, span, splitAt)
import Control.Applicative ((<|>))
import Control.DeepSeq (NFData)
import Control.Monad.State.Strict
import Data.Hashable (Hashable)
import qualified Data.ByteString.Char8 as C
import qualified Data.ByteString.UTF8 as B
import qualified Data.HashMap.Strict as H
import qualified Data.String.UTF8 as U
import qualified Data.Text as T
import qualified Data.Text.ICU.Char as IC
import qualified Data.Text.ICU.Normalize2 as IN
-- import Language.Yanais.Fusion
import Generic.Data
-- A 0-indexed, half-open interval of integers, defined by start & end indices
data Span = Span
{ start :: {-# UNPACK #-} !Int
, end :: {-# UNPACK #-} !Int
}
deriving (Eq, Generic, Ord, Show)
instance Hashable Span
instance NFData Span
instance Semigroup Span where
Span start1 end1 <> Span start2 end2 = Span (min start1 start2) (max end1 end2)
-- a generic parser monad
newtype Parser s e a = Parser { runP :: StateT s (Either e) a }
instance Functor (Parser s e) where
fmap f = Parser . (fmap f) . runP
{-# INLINE fmap #-}
instance Applicative (Parser s e) where
pure = Parser . pure
{-# INLINE pure #-}
Parser a <*> Parser b = Parser $ a <*> b
{-# INLINE (<*>) #-}
instance Monad (Parser s e) where
return = pure
Parser a >>= fb = Parser $ a >>= (runP . fb)
{-# INLINE (>>=) #-}
instance MonadState s (Parser s e) where
state = Parser . state
{-# INLINE state #-}
type Text = U.UTF8 B.ByteString
data Ident = Ident !Span !Text
instance Show Ident where
show (Ident _ t) = C.unpack (U.toRep t)
data ParserEnv = ParserEnv {
peOffset :: !Int,
peText :: Text
}
type Parser' e a = Parser ParserEnv e a
makeParseEnv :: B.ByteString -> ParserEnv
makeParseEnv bs = ParserEnv { peOffset = 0, peText = U.fromRep bs }
runParser :: s -> Parser s e a -> Either e (a, s)
runParser st (Parser p) = runStateT p st
-- some simple parser combinators
shiftEnv :: (Int, Text) -> Parser' e ()
shiftEnv (ll, r) = Parser $ modify go
where
go :: ParserEnv -> ParserEnv
go st = st { peOffset = (peOffset st) + ll, peText = r }
slen :: Text -> Int
slen = C.length . U.toRep
takeUntil :: (Char -> Bool) -> Parser' e Text
takeUntil f = takeWhile (not . f)
takeWhile :: (Char -> Bool) -> Parser' e Text
takeWhile f = do
env <- get
let (l, r) = U.span f (peText env)
shiftEnv (slen l, r)
return l
takeWithProperty :: IC.Bool_ -> Parser' e Text
takeWithProperty p = takeWhile $ IC.property p
takeIdent :: Parser' e (Maybe Ident)
takeIdent = do
env <- get
let start_ = peOffset env
let (l, r) = U.splitAt 1 (peText env)
case U.uncons l of
Nothing -> return Nothing
Just (fi, r2) -> (
(if not ((slen r2) == 0) then error "takeIdent : unable to iterator over characters" else ())
`seq`
(if IC.property IC.XidStart fi then (
do
shiftEnv (slen l, r)
rest <- takeWithProperty IC.XidContinue
end_ <- gets peOffset
-- I hate this...
return (Just . (\s -> Ident (Span { start = start_, end = end_ }) s) . U.fromRep . B.fromString . T.unpack . IN.nfc . T.pack $ fi:(B.toString $ U.toRep rest))
) else return Nothing))
skipWhiteSpace :: Parser' e ()
skipWhiteSpace = takeWithProperty IC.WhiteSpace >> pure ()
-- | try to parse the first character as a token
tryOne :: (Char -> Maybe tok) -> Parser' e (Maybe tok)
tryOne f = do
env <- get
let (l, r) = U.splitAt 1 (peText env)
case U.uncons l of
Nothing -> return Nothing
Just (fi, r2) -> (
(if not ((slen r2) == 0) then error "tryOne : unable to iterator over characters" else ())
`seq`
(shiftEnv (slen l, r) >> return (f fi)))
-- | A case tree for `eats`
data HandleTree x =
HandleTree (H.HashMap Char (HandleTree x)) (Maybe x)
| Htleaf !x
-- | try to recognize specific tokens
eats :: HandleTree x -> Parser' e (Maybe x)
eats ht = Parser . StateT $ \st ->
-- do backtracking in case of failure
let Parser (StateT eatsfun) = eats_ ht in
-- fmap @ Either
fmap (\val -> case val of
(Nothing, _) -> (Nothing, st)
(Just x, st2) -> (Just x, st2)
) $ eatsfun st
where
eatOne hm xc = tryOne $ \c -> (H.lookup c hm) <|> (fmap Htleaf xc)
eats_ :: HandleTree x -> Parser' e (Maybe x)
eats_ (Htleaf x) = pure (Just x)
eats_ (HandleTree hm xc) = (eatOne hm xc) >>= \x -> case x of
Nothing -> pure Nothing
Just y -> eats_ y
-- | record the span of the inner parser
recordSpan :: Parser' e a -> Parser' e (Span, a)
recordSpan inner = do
start_ <- gets peOffset
x <- inner
end_ <- gets peOffset
pure (Span { start = start_, end = end_ }, x)