improve partial parse

This commit is contained in:
Alain Zscheile 2023-09-25 14:33:56 +02:00
parent 995711578c
commit f11c3afa28
5 changed files with 129 additions and 55 deletions

View file

@ -232,8 +232,8 @@ impl_functor_for_collection!(LinkedList);
#[cfg(test)]
mod test {
use alloc::vec;
use crate::Functor;
use alloc::vec;
#[test]
fn option_functor() {

View file

@ -1,6 +1,8 @@
//use bitflags::bitflags;
pub mod parser;
use parser::{Env as ParseEnv, Error as Perr, ErrorKind as Pek, Parse, Token, TokenKind as Tok};
use parser::{
Env as ParseEnv, Error as Perr, ErrorKind as Pek, MaybeParse, Parse, Token, TokenKind as Tok,
};
#[derive(Clone, Debug)]
pub enum Pattern {
@ -83,35 +85,33 @@ impl Pattern {
}
}
impl Parse for Pattern {
fn parse(env: &mut ParseEnv<'_>) -> Result<Self, Perr> {
impl MaybeParse for Pattern {
const DFL_CTX: &'static str = "pattern";
fn maybe_parse(env: &mut ParseEnv<'_>) -> Result<Option<Self>, Perr> {
let backup = env.lxr.clone();
let Token { offset, kind } = env.lxr.next().unwrap_or_else(|| {
Err(Perr {
offset: env.lxr.offset(),
kind: Pek::UnexpectedEof("pattern"),
})
})?;
let Token { kind, .. } = match env.lxr.next().transpose()? {
None => return Ok(None),
Some(x) => x,
};
Ok(match kind {
Tok::PatOut(i) => Pattern::Ident(i),
Tok::PatIgnore => Pattern::Ignore,
Tok::PatOut(i) => Some(Pattern::Ident(i)),
Tok::PatIgnore => Some(Pattern::Ignore),
_ => {
env.lxr = backup;
return Err(Perr {
offset: offset.try_into().unwrap(),
kind: Pek::UnexpectedToken {
kind,
ctx: "pattern",
},
});
None
}
})
}
}
impl Parse for Record {
fn parse(env: &mut ParseEnv<'_>) -> Result<Self, Perr> {
env.lxr.expect(Tok::LBrace, "record")?;
impl MaybeParse for Record {
const DFL_CTX: &'static str = "record";
fn maybe_parse(env: &mut ParseEnv<'_>) -> Result<Option<Self>, Perr> {
if env.lxr.got(Tok::LBrace).is_none() {
return Ok(None);
}
let mut fields = Vec::new();
@ -121,7 +121,7 @@ impl Parse for Record {
let Token { kind, .. } = env.lxr.next_in_noeof("record")?;
if let Tok::DotIdent(i) = kind {
if env.lxr.expect(Tok::Assign, "record").is_ok() {
if env.lxr.expect(Tok::Assign, "record =").is_ok() {
Some(i)
} else {
None
@ -139,12 +139,12 @@ impl Parse for Record {
}
let expr = Expr::parse(env)?;
env.lxr.expect(Tok::SemiColon, "record")?;
env.lxr.expect(Tok::SemiColon, "record ;")?;
fields.push((name, expr));
}
env.lxr.expect(Tok::RBrace, "record")?;
Ok(Record { fields })
Ok(Some(Record { fields }))
}
}
@ -187,19 +187,19 @@ fn parse_minexpr(env: &mut ParseEnv<'_>) -> Result<Expr, Perr> {
return Ok(inner);
}
Tok::Caret => {
return if let Ok(r) = Record::parse(env) {
return if let Some(r) = Record::maybe_parse(env)? {
Ok(Expr::TyRecord(r))
} else {
let Token { kind, offset } = env.lxr.next_in_noeof("expression")?;
Err(parser::unexpected_token(offset, kind, "expression"))
let Token { kind, offset } = env.lxr.next_in_noeof("^expression")?;
Err(parser::unexpected_token(offset, kind, "^expression"))
}
}
Tok::Dot => {
return if let Ok(r) = Record::parse(env) {
return if let Some(r) = Record::maybe_parse(env)? {
Ok(Expr::Record(r))
} else {
let Token { kind, offset } = env.lxr.next_in_noeof("expression")?;
Err(parser::unexpected_token(offset, kind, "expression"))
let Token { kind, offset } = env.lxr.next_in_noeof(".expression")?;
Err(parser::unexpected_token(offset, kind, ".expression"))
}
}
/*
@ -216,10 +216,8 @@ fn parse_minexpr(env: &mut ParseEnv<'_>) -> Result<Expr, Perr> {
let lxrbak = env.lxr.clone();
let Token { kind, .. } = match env.lxr.next() {
Some(Ok(x)) => x,
_ => {
env.lxr = lxrbak;
break;
}
Some(Err(e)) => return Err(e),
None => break,
};
match kind {
Tok::DotIdent(i) => {
@ -243,25 +241,63 @@ fn parse_minexpr(env: &mut ParseEnv<'_>) -> Result<Expr, Perr> {
impl Parse for Expr {
fn parse(env: &mut ParseEnv<'_>) -> Result<Self, Perr> {
let base = parse_minexpr(env)?;
let mut args = Vec::new();
fn parse_env_inner(env: &mut ParseEnv<'_>) -> Result<Expr, Perr> {
let mut lxprs = Vec::new();
let knamcnt = env.names.len();
loop {
let mut nxtenv = env.clone();
args.push(match parse_minexpr(&mut nxtenv) {
Ok(x) => x,
Err(_) => break,
});
*env = nxtenv;
while env.lxr.got(Tok::Let).is_some() {
let Token { kind, offset } = env.lxr.next_in_noeof("let expression (ident)")?;
let key = match kind {
Tok::Ident(i) => i,
_ => {
return Err(parser::unexpected_token(
offset,
kind,
"let expression (ident)",
))
}
};
env.lxr.expect(Tok::Assign, "let expression =")?;
lxprs.push(Expr::parse(env)?);
assert_eq!(env.names.len(), knamcnt + lxprs.len());
env.lxr.expect(Tok::SemiColon, "let expression ;")?;
env.names.push(key);
}
let base = parse_minexpr(env)?;
let mut args = Vec::new();
let reside_knamcnt = env.names.len();
loop {
let lxrbak = env.lxr.clone();
let arg = parse_minexpr(env);
assert_eq!(env.names.len(), reside_knamcnt);
args.push(match arg {
Ok(x) => x,
Err(_) => {
// do not eat errors without backtracking the lexer
env.lxr = lxrbak;
break;
}
});
}
Ok(if args.is_empty() {
base
} else {
Expr::Apply {
lam: Box::new(base),
args,
}
})
}
Ok(if args.is_empty() {
base
} else {
Expr::Apply {
lam: Box::new(base),
args,
}
})
// make sure we don't leak names
// (the assert_eq checks serve the same purpose)
let knamcnt = env.names.len();
let res = parse_env_inner(env);
assert!(env.names.len() >= knamcnt);
env.names.truncate(knamcnt);
res
}
}

View file

@ -66,6 +66,15 @@ impl<'a> Lexer<'a> {
self.inner.offset
}
pub fn peek_offset(&self) -> usize {
let mut this = self.clone();
match this.next() {
Some(Ok(Token { offset, .. })) => offset.try_into().unwrap(),
Some(Err(Error { offset, .. })) => offset,
None => this.inner.offset,
}
}
pub fn peek(&self) -> Option<Result<Token, Error>> {
self.clone().next()
}

View file

@ -55,6 +55,10 @@ pub enum ErrorKind {
ctx: &'static str,
},
#[error("expected {0}")]
#[diagnostic(code(yanais::parser::expected))]
Expected(&'static str),
#[error("unknown identifier {0:?}")]
UnknownIdent(Box<str>),
}
@ -96,3 +100,24 @@ pub fn unexpected_token(offset: u32, kind: TokenKind<Box<str>>, ctx: &'static st
pub trait Parse: Sized {
fn parse(env: &mut Env<'_>) -> Result<Self, Error>;
}
pub trait MaybeParse: Sized {
const DFL_CTX: &'static str;
/// this function allows to clearly differentiate between
/// recoverable failures and unrecoverable ones.
fn maybe_parse(env: &mut Env<'_>) -> Result<Option<Self>, Error>;
}
impl<T: MaybeParse> Parse for T {
fn parse(env: &mut Env<'_>) -> Result<Self, Error> {
let knamcnt = env.names.len();
let offset = env.lxr.peek_offset();
let mres = T::maybe_parse(env);
assert_eq!(env.names.len(), knamcnt);
mres?.ok_or_else(|| Error {
offset,
kind: ErrorKind::Expected(<T as MaybeParse>::DFL_CTX),
})
}
}

View file

@ -1,11 +1,15 @@
use readfilez::read_from_file;
use yn_qgy4hbz_core::parser::{Lexer, Parse, Env as ParseEnv};
use yn_qgy4hbz_core::parser::{Env as ParseEnv, Lexer, Parse};
fn do_parse(f: &str) {
let fh = read_from_file(std::fs::File::open(f)).expect("unable to open example file");
let mut penv = ParseEnv::new(Lexer::new(core::str::from_utf8(&*fh).expect("unable to parse example file (UTF-8)")));
let mut penv = ParseEnv::new(Lexer::new(
core::str::from_utf8(&*fh).expect("unable to parse example file (UTF-8)"),
));
yn_qgy4hbz_core::Expr::parse(&mut penv).expect("unable to parse example file (yanais)");
}
#[test]
fn ex00() { do_parse("tests/ex00.yns"); }
fn ex00() {
do_parse("tests/ex00.yns");
}