WaFlesh-v1/crates/wafl-parser/src/parser.rs
2023-05-22 21:04:00 +02:00

380 lines
12 KiB
Rust

use alloc::collections::BTreeMap;
use bitflags::bitflags;
use core::iter::Peekable;
use peeking_take_while::PeekableExt as _;
pub use string_cache::DefaultAtom as Atom;
use crate::lex::{Lexer, Location, Token};
#[derive(Clone, Debug)]
pub struct Module {
pub entries: BTreeMap<Atom, Entry>,
}
bitflags! {
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct EntryFlags: u8 {
const PUBLIC = 0x01;
const FINAL = 0x02;
const CTRLFE = 0x04;
}
}
#[derive(Clone, Debug)]
pub struct Entry {
// location of the name
pub nloc: Location,
// technically this is a set, but that would just waste space here...
pub args: Box<[Atom]>,
// might be a dummy
pub obj: Option<Expression>,
// we use a bitflag here to conserve memory
pub flags: EntryFlags,
}
#[derive(Clone, Debug)]
pub enum Expression {
Code {
cfe: bool,
data: Box<[Expression]>,
ret: Option<Box<Expression>>,
},
Module(Module),
Alias(FullIdentifier),
Integer(i64),
InvocationSingle(Box<(Expression, Expression)>),
InvocationMulti {
obj: Box<Expression>,
args: BTreeMap<Box<[Atom]>, (Location, Expression)>,
},
}
#[derive(Clone, Debug)]
pub struct FullIdentifier {
pub loc: Location,
pub lscoperef: u32,
pub idents: Box<[Atom]>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Path {
pub loc: Location,
pub idents: Box<[Atom]>,
}
pub struct Error {
pub loc: Location,
pub kind: ErrorKind,
}
pub enum ErrorKind {
UnexpectedEof,
UnexpectedTrail,
InvalidIdentifier,
DuplicateIdentifier,
Unexpected(&'static str),
Unknown,
}
#[derive(Clone)]
struct ParserContext<'a> {
pklx: Peekable<Lexer<'a>>,
eof_loc: Location,
}
impl<'a> ParserContext<'a> {
// this function is idempotent
fn peek_loc(&mut self) -> Location {
self.pklx.peek().map_or(self.eof_loc, |&(loc, _)| loc)
}
fn make_eof(&self) -> Error {
Error {
loc: self.eof_loc,
kind: ErrorKind::UnexpectedEof,
}
}
fn expect_token(&mut self, tok_exp: Token<'a>, descr: &'static str) -> Result<Location, Error> {
let (loc, tok) = self.pklx.next().ok_or_else(|| self.make_eof())?;
if let Token::Unknown(_) = tok {
Err(Error {
loc,
kind: ErrorKind::Unknown,
})
} else if tok == tok_exp {
Ok(loc)
} else {
Err(Error {
loc,
kind: ErrorKind::Unexpected(descr),
})
}
}
fn maybe_eat_token(&mut self, tok: Token<'a>) -> Option<Location> {
self.pklx.next_if(|(_, t)| t == &tok).map(|(loc, _)| loc)
}
}
impl Path {
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
let pathloc = ctx.peek_loc();
let mut ret = Vec::new();
loop {
// this contraption makes this parser particularly nice, as it only consumes lexer tokens
// when it can actually use them.
let mut tmpctx = (*ctx).clone();
let (loc, tok) = tmpctx.pklx.next().ok_or_else(|| tmpctx.make_eof())?;
ret.push(match tok {
Token::Unknown(_) => Err(Error {
loc,
kind: ErrorKind::Unknown,
}),
Token::Identifier(ident) => Ok(Atom::from(ident)),
_ => Err(Error {
loc,
kind: ErrorKind::Unexpected("identifier"),
}),
}?);
// make sure the parser can somewhat recover...
*ctx = tmpctx;
if ctx.maybe_eat_token(Token::Dot).is_none() {
break Ok(Path {
loc: pathloc,
idents: ret.into_boxed_slice(),
});
}
}
}
}
impl FullIdentifier {
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
// this contraption makes this parser particularly nice, as it only consumes lexer tokens
// when it can actually use them.
let mut tmpctx = (*ctx).clone();
let loc = tmpctx.peek_loc();
let lscoperef = if let Some((_, Token::ScopeRef(lscoperef))) = tmpctx
.pklx
.next_if(|(_, t)| matches!(t, Token::ScopeRef(_)))
{
lscoperef.get()
} else {
0
};
let Path { idents, .. } = Path::parse_high(&mut tmpctx)?;
*ctx = tmpctx;
Ok(FullIdentifier {
loc,
lscoperef,
idents,
})
}
}
impl Expression {
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
let mut cfe = false;
for (_, i) in ctx.pklx.peeking_take_while(|(_, t)| t.is_cobj_attr()) {
match i {
Token::CtrlFlowEdit => cfe = true,
_ => unimplemented!(),
}
}
let obj = match ctx.pklx.peek() {
None => Err(ctx.make_eof()),
Some((_, Token::OpenBrace)) => {
ctx.pklx.next();
let mut codata = Vec::new();
let mut ret = None;
while ctx.maybe_eat_token(Token::CloseBrace).is_none() {
let expr = Expression::parse_high(ctx)?;
if ctx.maybe_eat_token(Token::SemiColon).is_some() {
codata.push(expr);
} else {
ret = Some(Box::new(expr));
// we don't go back to the loop header, so do it here.
ctx.expect_token(Token::CloseBrace, /* { */ "}")?;
break;
}
}
Ok(Expression::Code {
cfe,
data: codata.into_boxed_slice(),
ret,
})
}
Some(&(loc, _)) if cfe => Err(Error {
loc,
kind: ErrorKind::Unexpected("braced statement set"),
}),
// we have no proper recursive delimiting,
// so just resort to the parsing which we also do for other such items,
// expect a single code object descriptor => alias, or number.
Some(&(_, Token::Integer(i))) => {
ctx.pklx.next();
Ok(Expression::Integer(i))
}
Some((_, Token::Module)) => {
ctx.pklx.next();
ctx.expect_token(Token::OpenBrace, "{")?;
let m = Module::parse_high(ctx)?;
ctx.expect_token(Token::CloseBrace, "}")?;
Ok(Expression::Module(m))
}
_ => FullIdentifier::parse_high(ctx).map(Expression::Alias),
}?;
Ok(if let Some(&(_, Token::Comma)) = ctx.pklx.peek() {
let mut args = BTreeMap::new();
while ctx.maybe_eat_token(Token::Comma).is_some() {
let Path { loc, idents } = Path::parse_high(ctx)?;
ctx.expect_token(Token::Assign, "=")?;
let obj = Expression::parse_high(ctx)?;
if args.insert(idents, (loc, obj)).is_some() {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
});
}
}
Expression::InvocationMulti {
obj: Box::new(obj),
args,
}
} else if matches!(ctx.pklx.peek(), Some(&(_, Token::SemiColon)) | None) {
obj
} else {
Expression::InvocationSingle(Box::new((obj, Expression::parse_high(ctx)?)))
})
}
}
impl Module {
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
let mut ret = Module {
entries: Default::default(),
};
loop {
// parse entry
// entry attributes
let mut flags = EntryFlags::empty();
for (_, i) in ctx.pklx.peeking_take_while(|(_, t)| t.is_def_attr()) {
flags |= match i {
Token::Final => EntryFlags::FINAL,
Token::Public => EntryFlags::PUBLIC,
Token::CtrlFlowEdit => EntryFlags::CTRLFE,
_ => unimplemented!(),
}
}
// entry name
let (nloc, name) = match ctx.pklx.next_if(|(_, t)| matches!(t, Token::Identifier(_))) {
Some((loc, Token::Identifier(ident))) => {
let ident = Atom::from(ident);
if ret.entries.contains_key(&ident) {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
});
}
(loc, ident)
}
Some(_) => unreachable!(),
None => {
if flags.is_empty() {
break;
}
return Err(Error {
loc: ctx.peek_loc(),
kind: ErrorKind::UnexpectedEof,
});
}
};
// optional: arguments
let mut args = Vec::new();
if ctx.maybe_eat_token(Token::OpenBrace).is_some() {
loop {
let (loc, tok) = ctx.pklx.next().ok_or(Error {
loc: ctx.eof_loc,
kind: ErrorKind::UnexpectedEof,
})?;
let ident = match tok {
Token::Unknown(_) => Err(Error {
loc,
kind: ErrorKind::Unknown,
}),
Token::Identifier(ident) => Ok(Atom::from(ident)),
Token::CloseBrace => break,
_ => Err(Error {
loc,
kind: ErrorKind::Unexpected("argument name"),
}),
}?;
if args.contains(&ident) {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
});
}
args.push(ident);
}
}
let obj = if ctx.maybe_eat_token(Token::Assign).is_some() {
let obj = Expression::parse_high(ctx)?;
if let Expression::Code { cfe: true, .. } = obj {
flags |= EntryFlags::CTRLFE;
}
Some(obj)
} else {
None
};
ret.entries.insert(
name,
Entry {
nloc,
args: args.into_boxed_slice(),
obj,
flags,
},
);
ctx.expect_token(Token::SemiColon, ";")?;
}
Ok(ret)
}
}
pub fn parse_s2doc(fileid: u32, s: &str) -> Result<Module, Error> {
let mut ctx = ParserContext {
pklx: (Lexer {
loc: Location { fileid, offset: 0 },
s,
})
.peekable(),
eof_loc: Location {
fileid,
offset: s.len().try_into().expect("file too big"),
},
};
let ret = Module::parse_high(&mut ctx)?;
if let Some((loc, _)) = ctx.pklx.next() {
Err(Error {
loc,
kind: ErrorKind::UnexpectedTrail,
})
} else {
Ok(ret)
}
}