diff --git a/haskell/nfker7h/app/Main.hs b/haskell/nfker7h/app/Main.hs index f7a2f50..12eef24 100644 --- a/haskell/nfker7h/app/Main.hs +++ b/haskell/nfker7h/app/Main.hs @@ -2,7 +2,6 @@ module Main where import System.Environment import qualified Language.Yanais.Parser as P -import qualified Language.Yanais.Parser.IO as Pio import qualified Language.Yanais.Nfker7h.Parser.Lex as L main :: IO () @@ -11,8 +10,8 @@ main = do mainA args mainA :: [String] -> IO () -mainA [x] = Pio.parseFile x L.lexe >>= putStrLn . (\x -> case x of - Right y -> fmtList y +mainA [x] = P.parseFile x L.lexe >>= putStrLn . (\y -> case y of + Right z -> fmtList z Left e -> show e) mainA _ = putStrLn "Hello World!" diff --git a/haskell/nfker7h/lib/Language/Yanais/Nfker7h/Parser/Lex.hs b/haskell/nfker7h/lib/Language/Yanais/Nfker7h/Parser/Lex.hs index c1ad01e..bfce7bd 100644 --- a/haskell/nfker7h/lib/Language/Yanais/Nfker7h/Parser/Lex.hs +++ b/haskell/nfker7h/lib/Language/Yanais/Nfker7h/Parser/Lex.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE OverloadedStrings #-} - module Language.Yanais.Nfker7h.Parser.Lex ( Token, TokenKind(..), @@ -47,15 +45,14 @@ lexeIdentPrefixTree = HandleTree (H.fromList l1) Nothing where l1 = [('.', Htleaf IpDot), ('$', Htleaf IpPatOut)] -eitherToParser :: Either Error x -> Parser' Error x -eitherToParser e = (Parser . StateT $ \st -> fmap (\x -> (x, st)) e) - handleIdentsI :: Parser' Error (Maybe TokenKind) handleIdentsI = do pfx <- eats lexeIdentPrefixTree marker <- gets peOffset - ident <- takeIdent - case tmp pfx ident marker of + ident' <- takeIdent >>= (\ident -> eitherToParser $ case ident of + Left exc -> Left $ DteUnicodeExc exc + Right x -> Right x) + case tmp pfx ident' marker of Just x -> pure . Just $ x Nothing -> do gtc <- tryOne Just @@ -67,7 +64,7 @@ handleIdentsI = do where tmp (Just IpDot) (Just x) _ = Just $ TDotIdent x tmp (Just IpDot) Nothing _ = Just $ TDot - tmp (Just IpPatOut) x m = Just $ TPatOut (maybe (Ident (Span {start = m, end = m}) "") (\z -> z) x) + tmp (Just IpPatOut) x m = Just $ TPatOut (maybe (emptyIdent m) (\z -> z) x) tmp Nothing (Just x) _ = Just $ TIdent x tmp Nothing Nothing _ = Nothing diff --git a/haskell/nfker7h/lib/Language/Yanais/Nfker7h/Parser/Types.hs b/haskell/nfker7h/lib/Language/Yanais/Nfker7h/Parser/Types.hs index dc687a6..973c95d 100644 --- a/haskell/nfker7h/lib/Language/Yanais/Nfker7h/Parser/Types.hs +++ b/haskell/nfker7h/lib/Language/Yanais/Nfker7h/Parser/Types.hs @@ -7,6 +7,7 @@ module Language.Yanais.Nfker7h.Parser.Types ( MaybeGetSpan(..) ) where +import Data.Text.Encoding.Error (UnicodeException) import qualified Language.Yanais.Parser as P -- a "classic" tristate result @@ -16,6 +17,7 @@ data Context = CtxModule data Error = UnexpectedEof Context | UnexpectedChar !Int !Char + | DteUnicodeExc UnicodeException | UnknownIdent !P.Ident instance Functor Result where @@ -52,3 +54,4 @@ instance Show Error where show (UnexpectedEof ctx) = "end of file encountered inside " <> show ctx show (UnknownIdent x) = "unknown identifier: " <> show x show (UnexpectedChar pos c) = "unexpected / unknown character at " <> show pos <> ": " <> show c <> " (" <> (show $ fromEnum c) <> ")" + show (DteUnicodeExc exc) = "unicode decoding error: " <> show exc diff --git a/haskell/nfker7h/lib/Language/Yanais/Parser.hs b/haskell/nfker7h/lib/Language/Yanais/Parser.hs index 5e481cf..5716375 100644 --- a/haskell/nfker7h/lib/Language/Yanais/Parser.hs +++ b/haskell/nfker7h/lib/Language/Yanais/Parser.hs @@ -11,8 +11,15 @@ module Language.Yanais.Parser ( ParserEnv(..), Span(..), HandleTree(..), + emptyIdent, + eitherToParser, + + -- creating and running parsers makeParseEnv, runParser, + parseFile, + + -- parser combinators takeUntil, takeWhile, takeIdent, @@ -33,8 +40,8 @@ import qualified Data.ByteString.Char8 as C import qualified Data.ByteString.UTF8 as B import qualified Data.HashMap.Strict as H import qualified Data.String.UTF8 as U -import qualified Data.Text as T import qualified Data.Text.Encoding as TE +import Data.Text.Encoding.Error (UnicodeException) import qualified Data.Text.ICU.Char as IC import qualified Data.Text.ICU.Normalize2 as IN -- import Language.Yanais.Fusion @@ -79,6 +86,9 @@ type Text = U.UTF8 B.ByteString data Ident = Ident !Span !Text +emptyIdent :: Int -> Ident +emptyIdent m = Ident (Span { start = m, end = m }) (U.fromRep Bb.empty) + instance Show Ident where show (Ident _ t) = B.toString $ U.toRep t @@ -89,12 +99,21 @@ data ParserEnv = ParserEnv { type Parser' e a = Parser ParserEnv e a +-- | injects an `Either` into the parser (used to conditionally throw errors) +eitherToParser :: Either e a -> Parser s e a +eitherToParser e = (Parser . StateT $ \st -> fmap (\x -> (x, st)) e) + makeParseEnv :: B.ByteString -> ParserEnv makeParseEnv bs = ParserEnv { peOffset = 0, peText = U.fromRep bs } runParser :: s -> Parser s e a -> Either e (a, s) runParser st (Parser p) = runStateT p st +parseFile :: String -> Parser' e a -> IO (Either e a) +parseFile f p = do + contents <- C.readFile f + return (fmap (\(r, _) -> r) $ runParser (makeParseEnv contents) p) + -- some simple parser combinators shiftEnv :: (Int, Text) -> Parser' e () @@ -119,13 +138,13 @@ takeWhile f = do takeWithProperty :: IC.Bool_ -> Parser' e Text takeWithProperty p = takeWhile $ IC.property p -takeIdent :: Parser' e (Maybe Ident) +takeIdent :: Parser' e (Either UnicodeException (Maybe Ident)) takeIdent = do env <- get let start_ = peOffset env let (l, r) = U.splitAt 1 (peText env) case U.uncons l of - Nothing -> return Nothing + Nothing -> okNone Just (fi, r2) -> ( (if not ((slen r2) == 0) then error "takeIdent : unable to iterator over characters" else ()) `seq` @@ -134,12 +153,24 @@ takeIdent = do shiftEnv (slen l, r) rest <- takeWithProperty IC.XidContinue end_ <- gets peOffset - return (Just . (\s -> Ident (Span { start = start_, end = end_ }) s) $ theHorrorsLieWithin fi rest) - ) else return Nothing)) + let thlw = theHorrorsLieWithin fi rest + let mkident = \s -> Ident (Span { start = start_, end = end_ }) s + return (fmap (Just . mkident) thlw) + ) else okNone)) where + okNone :: Parser' e (Either e2 (Maybe dt)) + okNone = Parser . StateT $ \st -> Right (Right Nothing, st) + -- I hate this... - theHorrorsLieWithin fi rest = U.fromRep . TE.encodeUtf8 . IN.nfc . TE.decodeUtf8Lenient $ (Bb.append (B.fromChar fi) (U.toRep rest)) + theHorrorsLieWithin :: Char -> Text -> Either UnicodeException Text + theHorrorsLieWithin fi rest = + -- make a byte string from the parts of the identifyer + let iabs = Bb.append (B.fromString [fi]) (U.toRep rest) in + -- finish a UTF-8 decoded identifier + let fini = U.fromRep . TE.encodeUtf8 . IN.nfc in + -- handle decoding + fmap fini $ TE.decodeUtf8' iabs skipWhiteSpace :: Parser' e () skipWhiteSpace = takeWithProperty IC.WhiteSpace >> pure () diff --git a/haskell/nfker7h/lib/Language/Yanais/Parser/IO.hs b/haskell/nfker7h/lib/Language/Yanais/Parser/IO.hs deleted file mode 100644 index a9a90a1..0000000 --- a/haskell/nfker7h/lib/Language/Yanais/Parser/IO.hs +++ /dev/null @@ -1,13 +0,0 @@ -{-# LANGUAGE ScopedTypeVariables #-} -module Language.Yanais.Parser.IO ( - parseFile -) where - -import qualified Data.ByteString.Char8 as C -import qualified Language.Yanais.Parser as P - -parseFile :: String -> P.Parser' e a -> IO (Either e a) -parseFile f p = do - contents <- C.readFile f - let pe = P.makeParseEnv contents - return (fmap (\(r, _) -> r) $ P.runParser pe p) diff --git a/haskell/nfker7h/yanais-nfker7h.cabal b/haskell/nfker7h/yanais-nfker7h.cabal index afbf37f..d5a09eb 100644 --- a/haskell/nfker7h/yanais-nfker7h.cabal +++ b/haskell/nfker7h/yanais-nfker7h.cabal @@ -62,7 +62,6 @@ library exposed-modules: Language.Yanais.Fusion , Language.Yanais.Parser - , Language.Yanais.Parser.IO , Language.Yanais.Nfker7h.Parser.Types , Language.Yanais.Nfker7h.Parser.Lex @@ -77,7 +76,6 @@ library MultiParamTypeClasses OverloadedStrings RankNTypes - ScopedTypeVariables -- Other library packages from which modules are imported. build-depends: