yanais/haskell/nfker7h/lib/Language/Yanais/Nfker7h/Parser/Lex.hs
2023-10-25 16:10:15 +02:00

136 lines
4.1 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
module Language.Yanais.Nfker7h.Parser.Lex (
Token,
TokenKind(..),
Keyword(..),
lexe,
) where
-- import Debug.Trace
import Control.Monad.State.Strict
import qualified Data.HashMap.Strict as H
import Language.Yanais.Parser
import Language.Yanais.Nfker7h.Parser.Types
data Keyword = KwLambda | KwType deriving (Show)
data Token = Token !Span TokenKind deriving (Show)
instance MaybeGetSpan Token where
mgSpan (Token s _) = Just s
data TokenKind = TIdent Ident
| TPatOut Ident -- this ident might be empty
| TDotIdent Ident
| TString Text
| TInteger Integer
| TCaret
| TLParen
| TRParen
| TLBrace
| TRBrace
| TLArr
| TRArr
| TDot
| TDubColon
| TSemiColon
| TAssign
| TKeyword Keyword
deriving (Show)
data IdentPrefix = IpDot | IpPatOut
lexeIdentPrefixTree :: HandleTree IdentPrefix
lexeIdentPrefixTree = HandleTree (H.fromList l1) Nothing
where
l1 = [('.', Htleaf IpDot), ('$', Htleaf IpPatOut)]
eitherToParser :: Either Error x -> Parser' Error x
eitherToParser e = (Parser . StateT $ \st -> fmap (\x -> (x, st)) e)
handleIdentsI :: Parser' Error (Maybe TokenKind)
handleIdentsI = do
pfx <- eats lexeIdentPrefixTree
marker <- gets peOffset
ident <- takeIdent
case tmp pfx ident marker of
Just x -> pure . Just $ x
Nothing -> do
gtc <- tryOne Just
-- usually this is an error, but not at EOF
eitherToParser $ case gtc of
Just gtc_ -> Left $ UnexpectedChar marker gtc_
Nothing -> Right Nothing
where
tmp (Just IpDot) (Just x) _ = Just $ TDotIdent x
tmp (Just IpDot) Nothing _ = Just $ TDot
tmp (Just IpPatOut) x m = Just $ TPatOut (maybe (Ident (Span {start = m, end = m}) "") (\z -> z) x)
tmp Nothing (Just x) _ = Just $ TIdent x
tmp Nothing Nothing _ = Nothing
lexe :: Parser' Error [Token]
lexe = do
-- handle whitespace and comments
lexeComments 0
-- handle symbols
symb <- lexeSymbols
case symb of
Just tok -> fmap (tok:) lexe
Nothing -> recordSpan handleIdentsI >>= \(sp, mtk) ->
case mtk of
Nothing -> pure []
Just tk -> fmap ((Token sp tk):) lexe
data LevelDelta = LevelIncr | LevelDecr deriving (Show)
lvladj :: LevelDelta -> Integer -> Integer
lvladj LevelDecr lvl = lvl - 1
lvladj LevelIncr lvl = lvl + 1
lexeCommentsTreeInit :: HandleTree LevelDelta
lexeCommentsTreeInit = HandleTree (H.fromList l1) Nothing
where
l1 = [('(' {- ) -}, HandleTree (H.fromList l2) Nothing)]
l2 = [('*', Htleaf LevelIncr)]
lexeCommentsTree :: HandleTree LevelDelta
lexeCommentsTree = HandleTree (H.fromList l1) Nothing
where
-- (* ... comment start; *) ... comment end
l1 = [('(' {- ) -}, HandleTree (H.fromList l2) Nothing)
,('*', HandleTree (H.fromList l3) Nothing)]
l2 = [('*', Htleaf LevelIncr)]
l3 = [({- ( -} ')', Htleaf LevelDecr)]
lexeComments :: Integer -> Parser' e ()
lexeComments (-1) = error "lexeComments invalid index"
lexeComments lvl = do
skipWhiteSpace
fi <- eats (if lvl <= 0 then lexeCommentsTreeInit else lexeCommentsTree)
case fi of
Just ld -> lexeComments $ lvladj ld lvl
-- we either can abort searching for comment contents (lvl == 0)
-- or we skip over a single character and continue
Nothing -> if lvl <= 0 then pure () else tryOne (\_ -> Just ()) >> lexeComments lvl
lexeSymbolsTree :: HandleTree TokenKind
lexeSymbolsTree = HandleTree (H.fromList l1) Nothing
where
-- NOTE: we can't handle '.' and '$' here (they might be followed by identifiers)
l1 = [('^', Htleaf TCaret ), ('(', Htleaf TLParen), (')', Htleaf TRParen)
,('{', Htleaf TLBrace ), ('}', Htleaf TRBrace), (':', Htleaf TDubColon)
,(';', Htleaf TSemiColon), ('=', Htleaf TAssign)
,('←', Htleaf TLArr ), ('→', Htleaf TRArr )
]
recordToken :: Functor f => Parser' e (f TokenKind) -> Parser' e (f Token)
recordToken tkp = recordSpan tkp >>=
\(sp, tk) -> pure $ fmap (\y -> Token sp y) tk
lexeSymbols :: Parser' e (Maybe Token)
lexeSymbols = recordToken (eats lexeSymbolsTree)