rust: implement expression parsing

This commit is contained in:
Alain Zscheile 2023-11-03 20:22:47 +01:00
parent b2c7935968
commit f098db4e9c
6 changed files with 269 additions and 33 deletions

View file

@ -101,6 +101,9 @@ pub enum ErrorKind {
#[diagnostic(code(yanais::parser::unexpected_eof))]
UnexpectedEof(ErrorCtx),
#[error("unexpected token in context {0:?}: {1:?}")]
UnexpectedToken(ErrorCtx, crate::lex::TokenKind<crate::Kw>),
#[error("unhandled character '{0}'")]
#[diagnostic(code(yanais::lexer::unhandled_char))]
UnhandledChar(char),
@ -117,6 +120,10 @@ pub enum ErrorKind {
#[diagnostic(code(yanais::parser::record_dup_ident))]
RecordDupIdent(Arc<str>),
#[error("duplicated pattern field identifier {0:?}")]
#[diagnostic(code(yanais::parser::pattern_dup_ident))]
PatternDupIdent(Arc<str>),
#[error("unknown identifer {0:?}")]
#[diagnostic(code(yanais::parser::unknown_ident))]
UnknownIdent(Arc<str>),
@ -125,7 +132,11 @@ pub enum ErrorKind {
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum ErrorCtx {
Comment,
Expression,
Lambda,
Let,
Literal,
Parentheses,
Pattern,
Record,
Select,
@ -137,7 +148,11 @@ impl fmt::Display for ErrorCtx {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(match self {
ErrorCtx::Comment => "comment",
ErrorCtx::Expression => "expression",
ErrorCtx::Lambda => "(ty)lambda",
ErrorCtx::Let => "let expression",
ErrorCtx::Literal => "literal",
ErrorCtx::Parentheses => "parentheses",
ErrorCtx::Pattern => "pattern",
ErrorCtx::Record => "record",
ErrorCtx::Select => "selection",

View file

@ -1,17 +1,38 @@
use crate::{pat::Pattern, record::Record, EvEqSourceSpan};
use crate::lex::{Token, TokenKind as Tok};
use crate::{
pat::PatternMT, record::Record, Env as ParseEnv, Error, ErrorCtx as Ectx, ErrorKind as Pek,
EvEqSourceSpan, Kw, Parse, Result, SelIdent,
};
use yanais_literal::Literal;
#[derive(Clone, Debug)]
pub struct Lambda {
pub pat: Pattern,
pub ty: Option<Box<Expr>>,
pub res: Box<Expr>,
pub pat: PatternMT,
pub body_span: EvEqSourceSpan,
pub body: Box<Expr>,
}
impl Parse for Lambda {
fn parse(env: &mut ParseEnv<'_>) -> Result<Self> {
let pat = PatternMT::parse(env)?;
env.lxr.expect(Tok::RArr, Ectx::Lambda)?;
let body_start = env.lxr.offset();
let body = Box::new(Expr::parse(env)?);
let body_end = env.lxr.offset();
Ok(Self {
pat,
body_span: (body_start..body_end).into(),
body,
})
}
}
#[derive(Clone, Debug)]
pub enum Expr {
Literal(Literal),
Use(SelIdent),
Lambda(Lambda),
TyLambda(Lambda),
TyLambdaLin(Lambda),
@ -20,3 +41,113 @@ pub enum Expr {
Record(Record<Expr>),
TyRecord(Record<Expr>),
}
/// parse a "minimal expression", meaning it can be a lambda or argument for an apply expression
/// so it shouldn't "snap up" stuff like an apply expression
fn parse_minexpr(env: &mut ParseEnv<'_>) -> Result<Expr> {
let Token {
span: fi_span,
kind: fi_kind,
} = env.lxr.next_in_noeof(Ectx::Expression)?;
match fi_kind {
Tok::Ident(i) => {
if let Some(x) = env.lookup(&i) {
Ok(Expr::Use(SelIdent {
span: fi_span,
dbidx: x,
}))
} else {
Err(Error {
span: fi_span,
kind: Pek::UnknownIdent(i),
})
}
}
Tok::Kw(Kw::Literal(lit)) => Ok(Expr::Literal(lit)),
Tok::Kw(Kw::Lambda) => Lambda::parse(env).map(Expr::Lambda),
Tok::Kw(Kw::TyLambda) => Lambda::parse(env).map(Expr::TyLambda),
Tok::Kw(Kw::TyLambdaLin) => Lambda::parse(env).map(Expr::TyLambdaLin),
Tok::LParen => {
let inner = Expr::parse(env)?;
env.lxr.expect(Tok::RParen, Ectx::Parentheses)?;
Ok(inner)
}
_ => Err(Error {
span: fi_span,
kind: Pek::UnexpectedToken(Ectx::Expression, fi_kind),
}),
}
// TODO: support record destructuring when dealing with references to records...
}
impl Parse for Expr {
fn parse(env: &mut ParseEnv<'_>) -> Result<Self> {
fn parse_inner(env: &mut ParseEnv<'_>) -> Result<Expr> {
use crate::pat::PatternTrait;
let mut letbinds = Vec::<(_, EvEqSourceSpan, _, usize)>::new();
while env.lxr.got(Tok::Kw(Kw::Let)).is_some() {
let key = PatternMT::parse(env)?;
env.lxr.expect(Tok::Assign, Ectx::Let)?;
let value_start = env.lxr.offset();
let value = Expr::parse(env)?;
let value_end = env.lxr.offset();
key.push_to_penv(env);
env.lxr.expect(Tok::SemiColon, Ectx::Let)?;
let next_start = env.lxr.peek_span().offset();
letbinds.push((key, (value_start..value_end).into(), value, next_start));
}
let mut args = Vec::new();
let mut base = parse_minexpr(env)?;
let reside_knamcnt = env.names.len();
loop {
let lxrbak = env.lxr.clone();
let arg_start = env.lxr.offset();
let arg = parse_minexpr(env);
let arg_end = env.lxr.offset();
assert_eq!(env.names.len(), reside_knamcnt);
match arg {
Ok(x) => args.push(((arg_start..arg_end).into(), x)),
Err(_) => {
// do not eat errors without backtracking the lexer
env.lxr = lxrbak;
break;
}
}
}
if !args.is_empty() {
base = Expr::Apply(Box::new(base), args);
}
if !letbinds.is_empty() {
// turn let bindings into lambda invocations...
let body_span_end = env.lxr.offset();
for (pat, value_span, value, next_start) in letbinds.into_iter().rev() {
base = Expr::Apply(
Box::new(Expr::Lambda(Lambda {
pat,
body_span: (next_start..body_span_end).into(),
body: Box::new(base),
})),
vec![(value_span, value)],
);
}
}
Ok(base)
}
// make sure we don't leak names
// (the assert_eq checks serve the same purpose)
let knamcnt = env.names.len();
let res = parse_inner(env);
assert!(env.names.len() >= knamcnt);
env.names.truncate(knamcnt);
res
}
}

View file

@ -8,9 +8,10 @@ use crate::{lex, Env as ParseEnv, Error, ErrorCtx, EvEqSourceSpan, MaybeParse};
pub use yanais_literal::{IntSize, Literal, TyLit};
#[derive(Clone, Copy, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Kw {
Literal(Literal),
Let,
Lambda,
TyLambda,
TyLambdaLin,
@ -51,6 +52,7 @@ impl core::str::FromStr for Kw {
"λ" => Kw::Lambda,
"Λ" => Kw::TyLambda,
"Λlin" => Kw::TyLambdaLin,
"let" => Kw::Let,
_ => return Err(()),
})
}

View file

@ -13,7 +13,7 @@ pub use error::{Error, ErrorCtx, ErrorKind, FullError, Result};
pub mod expr;
mod kw;
pub use kw::Kw;
pub use kw::{Kw, Literal};
pub mod lex;
pub mod pat;
pub mod record;

View file

@ -4,30 +4,79 @@
* SPDX-License-Identifier: Apache-2.0
*/
use crate::{
lex, record::Record, Env as ParseEnv, Error, ErrorCtx, EvEqSourceSpan, MaybeParse,
Parse
};
use core::convert::Infallible;
use std::sync::Arc;
use crate::lex::{self, TokenKind as Tok};
use crate::{
expr::Expr, none_up, record::Record, Env as ParseEnv, Error, ErrorCtx, ErrorKind as Pek,
EvEqSourceSpan, MaybeParse, Parse, TaggedIdent,
};
// infallible patterns
pub trait PatternTrait {
fn foreach_exports<'i, E, F>(&'i self, f: &mut F) -> Result<(), E>
where
F: FnMut(&'i TaggedIdent) -> Result<(), E>;
fn push_to_penv(&self, env: &mut ParseEnv<'_>) {
self.foreach_exports::<Infallible, _>(&mut |i| {
env.names.push(Arc::clone(&i.name));
Ok(())
})
.unwrap();
}
fn count_exports(&self) -> usize {
let mut counter = 0;
self.foreach_exports::<Infallible, _>(&mut |_| {
counter += 1;
Ok(())
})
.unwrap();
counter
}
fn pop_from_penv(&self, env: &mut ParseEnv<'_>) {
let counter = self.count_exports();
let enl = env.names.len();
env.names.truncate(enl - counter);
}
fn extract_exports(&self, outp: &mut Vec<Arc<str>>) -> Result<(), Error> {
self.foreach_exports::<Error, _>(&mut |i| {
let i2 = Arc::clone(&i.name);
if outp.iter().any(|j| i.name == *j) {
Err(Error {
span: i.span,
kind: Pek::PatternDupIdent(i2),
})
} else {
outp.push(i2);
Ok(())
}
})
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum Pattern {
Ignore(EvEqSourceSpan),
Name(EvEqSourceSpan, Arc<str>),
Name(TaggedIdent),
Record(Record<Pattern>),
}
impl Pattern {
pub fn alloc_slots(&self) -> usize {
impl PatternTrait for Pattern {
fn foreach_exports<'i, E, F>(&'i self, f: &mut F) -> Result<(), E>
where
F: FnMut(&'i TaggedIdent) -> Result<(), E>,
{
match self {
Pattern::Ignore(_) => 0,
Pattern::Name(_, x) => {
assert!(!x.is_empty());
1
}
Pattern::Record(rcd) => rcd.fields.iter().map(|(_, i)| i.alloc_slots()).sum(),
Pattern::Ignore(_) => Ok(()),
Pattern::Name(i) => f(i),
Pattern::Record(xs) => xs.0[..].iter().try_for_each(|i| i.2.foreach_exports(f)),
}
}
}
@ -48,7 +97,10 @@ impl MaybeParse for Pattern {
use lex::TokenKind as Tk;
let ret = match kind {
Tk::PatOut(nam) if nam.is_empty() => Pattern::Ignore(tok_span),
Tk::PatOut(nam) => Pattern::Name(tok_span, nam),
Tk::PatOut(name) => Pattern::Name(TaggedIdent {
span: tok_span,
name,
}),
Tk::LBrace => return Record::parse(env).map(|i| Some(Pattern::Record(i))),
_ => return Ok(None),
};
@ -56,3 +108,41 @@ impl MaybeParse for Pattern {
Ok(Some(ret))
}
}
#[derive(Clone, Debug)]
pub struct PatternMT {
pub pat: Pattern,
pub pty: Option<Box<(EvEqSourceSpan, Expr)>>,
}
impl PatternTrait for PatternMT {
#[inline(always)]
fn foreach_exports<'i, E, F>(&'i self, f: &mut F) -> Result<(), E>
where
F: FnMut(&'i TaggedIdent) -> Result<(), E>,
{
self.pat.foreach_exports(f)
}
}
impl MaybeParse for PatternMT {
const DFL_CTX: ErrorCtx = ErrorCtx::Pattern;
fn maybe_parse(env: &mut ParseEnv<'_>) -> Result<Option<Self>, Error> {
let pat = none_up!(Pattern::maybe_parse(env)?);
// check for name collisions
pat.extract_exports(&mut Vec::new())?;
let pty = if env.lxr.got(Tok::DubColon).is_some() {
let span_start = env.lxr.offset();
let ptyx = Expr::parse(env)?;
let span_end = env.lxr.offset();
Some(Box::new(((span_start..span_end).into(), ptyx)))
} else {
None
};
Ok(Some(PatternMT { pat, pty }))
}
}

View file

@ -13,19 +13,18 @@ use crate::{
};
#[derive(Clone, Debug, PartialEq)]
pub struct Record<V> {
pub span: EvEqSourceSpan,
pub fields: Vec<(Option<Arc<str>>, V)>,
}
pub struct Record<V>(pub Vec<(EvEqSourceSpan, Option<Arc<str>>, V)>);
impl<V: Parse> MaybeParse for Record<V> {
const DFL_CTX: PeCtx = PeCtx::Record;
fn maybe_parse(env: &mut ParseEnv<'_>) -> Pres<Option<Self>> {
let start_span = none_up!(env.lxr.got(Tok::LBrace));
let mut fields: Vec<(Option<Arc<str>>, _)> = Vec::new();
none_up!(env.lxr.got(Tok::LBrace));
let mut fields: Vec<(_, Option<Arc<str>>, _)> = Vec::new();
// warning: this code executes in O(|fields|²)
loop {
let span_start;
let name = {
let mut lxrnxt = env.lxr.clone();
let Token { kind, span } = match lxrnxt.next_in_noeof(PeCtx::Record) {
@ -35,11 +34,12 @@ impl<V: Parse> MaybeParse for Record<V> {
}
Ok(x) => x,
};
span_start = span.offset();
match kind {
Tok::DotIdent(i) => {
if env.lxr.expect(Tok::Assign, PeCtx::Record).is_ok() {
if fields.iter().any(|(j, _)| j.as_ref() == Some(&i)) {
if fields.iter().any(|(_, j, _)| j.as_ref() == Some(&i)) {
return Err(Perr {
span,
kind: Pek::RecordDupIdent(i),
@ -57,14 +57,12 @@ impl<V: Parse> MaybeParse for Record<V> {
}
};
let expr = V::parse(env)?;
let span_end = env.lxr.offset();
env.lxr.expect(Tok::SemiColon, PeCtx::Record)?;
fields.push((name, expr));
fields.push(((span_start..span_end).into(), name, expr));
}
let end_span = env.lxr.expect(Tok::RBrace, PeCtx::Record)?;
Ok(Some(Record {
span: (start_span.offset()..end_span.offset() + end_span.len()).into(),
fields,
}))
env.lxr.expect(Tok::RBrace, PeCtx::Record)?;
Ok(Some(Record(fields)))
}
}