yanais/haskell/nfker7h/lib/Language/Yanais/Parser.hs

136 lines
3.7 KiB
Haskell
Raw Normal View History

{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
module Language.Yanais.Parser (
Text,
Ident(..),
Parser(..),
ParserEnv(..),
Span(..),
makeParseEnv,
takeUntil,
takeWhile,
2023-10-24 20:21:23 +00:00
takeIdent,
takeWithProperty,
skipWhiteSpace,
tryOne
) where
import Prelude hiding (takeWhile, span, splitAt)
import Control.DeepSeq (NFData)
import Data.Hashable (Hashable)
import Control.Monad.Trans.State.Strict
import qualified Data.ByteString.Char8 as C
import qualified Data.ByteString.UTF8 as B
import qualified Data.String.UTF8 as U
import qualified Data.Text as T
import qualified Data.Text.ICU.Char as IC
import qualified Data.Text.ICU.Normalize2 as IN
-- import Language.Yanais.Fusion
import Generic.Data
-- A 0-indexed, half-open interval of integers, defined by start & end indices
data Span = Span
{ start :: {-# UNPACK #-} !Int
, end :: {-# UNPACK #-} !Int
}
deriving (Eq, Generic, Ord, Show)
instance Hashable Span
instance NFData Span
instance Semigroup Span where
Span start1 end1 <> Span start2 end2 = Span (min start1 start2) (max end1 end2)
-- a generic parser monad
newtype Parser s e a = Parser { runP :: StateT s (Either e) a }
instance Functor (Parser s e) where
fmap f = Parser . (fmap f) . runP
{-# INLINE fmap #-}
instance Applicative (Parser s e) where
pure = Parser . pure
{-# INLINE pure #-}
Parser a <*> Parser b = Parser $ a <*> b
{-# INLINE (<*>) #-}
instance Monad (Parser s e) where
return = pure
Parser a >>= fb = Parser $ a >>= (runP . fb)
{-# INLINE (>>=) #-}
type Text = U.UTF8 B.ByteString
data Ident = Ident !Span !Text
instance Show Ident where
show (Ident _ t) = show t
data ParserEnv = ParserEnv {
peOffset :: !Int,
peText :: Text
}
makeParseEnv :: B.ByteString -> ParserEnv
makeParseEnv bs = ParserEnv { peOffset = 0, peText = U.fromRep bs }
-- some simple parser combinators
shiftEnv :: (Int, Text) -> Parser ParserEnv e ()
shiftEnv (ll, r) = Parser $ modify go
where
go :: ParserEnv -> ParserEnv
go st = st { peOffset = (peOffset st) + ll, peText = r }
slen :: Text -> Int
slen = C.length . U.toRep
takeUntil :: (Char -> Bool) -> Parser ParserEnv e Text
takeUntil f = takeWhile (not . f)
takeWhile :: (Char -> Bool) -> Parser ParserEnv e Text
takeWhile f = do
env <- Parser $ get
let (l, r) = U.span f (peText env)
shiftEnv (slen l, r)
return l
2023-10-24 20:21:23 +00:00
takeWithProperty :: IC.Bool_ -> Parser ParserEnv e Text
takeWithProperty p = takeWhile $ IC.property p
takeIdent :: Parser ParserEnv e (Maybe Ident)
takeIdent = do
env <- Parser $ get
let start_ = peOffset env
let (l, r) = U.splitAt 1 (peText env)
case U.uncons l of
Nothing -> return Nothing
Just (fi, r2) -> (
(if not ((slen r2) == 0) then error "takeIdent : unable to iterator over characters" else ())
`seq`
(if IC.property IC.XidStart fi then (
do
shiftEnv (slen l, r)
2023-10-24 20:21:23 +00:00
rest <- takeWithProperty IC.XidContinue
env2 <- Parser $ get
let end_ = peOffset env2
-- I hate this...
return (Just . (\s -> Ident (Span { start = start_, end = end_ }) s) . U.fromRep . B.fromString . T.unpack . IN.nfc . T.pack $ fi:(B.toString $ U.toRep rest))
) else return Nothing))
2023-10-24 20:21:23 +00:00
skipWhiteSpace :: Parser ParserEnv e ()
skipWhiteSpace = takeWithProperty IC.WhiteSpace >> pure ()
-- try to parse the first character as a token
tryOne :: (Char -> Maybe tok) -> Parser ParserEnv e (Maybe tok)
tryOne f = do
env <- Parser $ get
let (l, r) = U.splitAt 1 (peText env)
case U.uncons l of
Nothing -> return Nothing
Just (fi, r2) -> (
(if not ((slen r2) == 0) then error "tryOne : unable to iterator over characters" else ())
`seq`
(shiftEnv (slen l, r) >> return (f fi)))