WaFlesh-v1/crates/wafl-parser/src/parser.rs
2023-05-20 00:52:51 +02:00

367 lines
11 KiB
Rust

use alloc::collections::BTreeMap;
use bitflags::bitflags;
use core::iter::Peekable;
use peeking_take_while::PeekableExt as _;
pub use string_cache::DefaultAtom as Atom;
use crate::lex::{Lexer, Location, Token};
#[derive(Clone, Debug)]
pub struct Document {
pub entries: BTreeMap<Atom, Entry>,
}
bitflags! {
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct EntryFlags: u8 {
const PUBLIC = 0x01;
const FINAL = 0x02;
}
}
#[derive(Clone, Debug)]
pub struct Entry {
// location of the name
pub nloc: Location,
// technically this is a set, but that would just waste space here...
pub args: Box<[Atom]>,
pub cobj: CodeObject,
// we use a bitflag here to conserve memory
pub flags: EntryFlags,
}
#[derive(Clone, Debug)]
pub enum CodeObject {
Normal {
cfe: bool,
data: Box<[Statement]>,
ret: Option<Statement>,
},
Alias(Identifier),
Integer(i64),
}
#[derive(Clone, Debug)]
pub struct Identifier {
pub loc: Location,
pub lscoperef: u8,
pub ident: Atom,
}
#[derive(Clone, Debug)]
pub enum StmtArgs {
Single(Box<CodeObject>),
Multi(BTreeMap<Atom, (Location, CodeObject)>),
}
impl Default for StmtArgs {
#[inline]
fn default() -> Self {
StmtArgs::Multi(BTreeMap::new())
}
}
#[derive(Clone, Debug)]
pub struct Statement {
pub sel: Identifier,
pub args: StmtArgs,
}
pub struct Error {
pub loc: Location,
pub kind: ErrorKind,
}
pub enum ErrorKind {
UnexpectedEof,
InvalidIdentifier,
DuplicateIdentifier,
Unexpected(&'static str),
Unknown,
}
#[derive(Clone)]
struct ParserContext<'a> {
pklx: Peekable<Lexer<'a>>,
eof_loc: Location,
}
impl<'a> ParserContext<'a> {
fn make_eof(&self) -> Error {
Error {
loc: self.eof_loc,
kind: ErrorKind::UnexpectedEof,
}
}
fn expect_token(&mut self, tok_exp: Token<'a>, descr: &'static str) -> Result<Location, Error> {
let (loc, tok) = self.pklx.next().ok_or_else(|| self.make_eof())?;
if let Token::Unknown(_) = tok {
Err(Error {
loc,
kind: ErrorKind::Unknown,
})
} else if tok == tok_exp {
Ok(loc)
} else {
Err(Error {
loc,
kind: ErrorKind::Unexpected(descr),
})
}
}
}
impl Identifier {
fn parse_step_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
// this contraption makes this parser particularly nice, as it only consumes lexer tokens
// when it can actually use them.
let (loc, tok) = ctx
.pklx
.next_if(|(_, t)| matches!(t, Token::Identifier { .. }))
.ok_or_else(|| match ctx.pklx.peek() {
None => ctx.make_eof(),
Some(&(loc, Token::Unknown(_))) => Error {
loc,
kind: ErrorKind::Unknown,
},
Some(&(loc, _)) => Error {
loc,
kind: ErrorKind::Unexpected("identifier"),
},
})?;
if let Token::Identifier { lscoperef, ident } = tok {
Ok(Identifier {
loc,
lscoperef,
ident: Atom::from(ident),
})
} else {
unreachable!();
}
}
}
impl Statement {
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
let sel = Identifier::parse_step_high(ctx)?;
let args = if let Some(&(_, Token::Comma)) = ctx.pklx.peek() {
let mut args = BTreeMap::new();
while ctx.pklx.next_if(|(_, t)| t == &Token::Comma).is_some() {
let Identifier {
loc,
lscoperef,
ident,
} = Identifier::parse_step_high(ctx)?;
if lscoperef != 0 {
return Err(Error {
loc,
kind: ErrorKind::InvalidIdentifier,
});
}
let cobj = CodeObject::parse_high(ctx)?;
if args.insert(ident, (loc, cobj)).is_some() {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
});
}
}
StmtArgs::Multi(args)
} else if matches!(ctx.pklx.peek(), Some(&(_, Token::SemiColon)) | None) {
// do nothing, there won't be an argument
// this escape hatch is a bit ugly, idk...
StmtArgs::default()
} else {
StmtArgs::Single(Box::new(CodeObject::parse_high(ctx)?))
};
Ok(Statement { sel, args })
}
}
impl CodeObject {
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
let mut cfe = false;
for (_, i) in ctx.pklx.peeking_take_while(|(_, t)| t.is_cobj_attr()) {
match i {
Token::CtrlFlowEdit => cfe = true,
_ => unimplemented!(),
}
}
// either we need to deal with a bunch of stmts, or just a single one (basically alias)
if ctx.pklx.next_if(|(_, t)| t == &Token::OpenBrace).is_some() {
let mut codata = Vec::new();
let mut ret = None;
while ctx.pklx.next_if(|(_, t)| t == &Token::CloseBrace).is_none() {
let stmt = Statement::parse_high(ctx)?;
if ctx.pklx.next_if(|(_, t)| t == &Token::SemiColon).is_some() {
codata.push(stmt);
} else {
ret = Some(stmt);
// we don't go back to the loop header, so do it here.
ctx.expect_token(Token::CloseBrace, /* { */ "}")?;
break;
}
}
Ok(CodeObject::Normal {
cfe,
data: codata.into_boxed_slice(),
ret,
})
} else if !cfe {
// we have no proper recursive delimiting,
// so just resort to the parsing which we also do for other such items,
// expect a single code object descriptor => alias, or number.
if let Some((_, Token::Integer(i))) =
ctx.pklx.next_if(|(_, t)| matches!(t, Token::Integer(_)))
{
Ok(CodeObject::Integer(i))
} else {
Identifier::parse_step_high(ctx).map(CodeObject::Alias)
}
} else {
Err(match ctx.pklx.next() {
Some((loc, _)) => Error {
loc,
kind: ErrorKind::Unexpected("braced statement set"),
},
None => ctx.make_eof(),
})
}
}
}
pub fn parse_s2doc(fileid: u32, s: &str) -> Result<Document, Error> {
let mut ctx = ParserContext {
pklx: (Lexer {
loc: Location { fileid, offset: 0 },
s,
})
.peekable(),
eof_loc: Location {
fileid,
offset: s.len().try_into().expect("file too big"),
},
};
let mut ret = Document {
entries: Default::default(),
};
loop {
// parse entry
// entry attributes
let mut flags = EntryFlags::empty();
for (_, i) in ctx.pklx.peeking_take_while(|(_, t)| t.is_def_attr()) {
match i {
Token::Final => flags |= EntryFlags::FINAL,
Token::Public => flags |= EntryFlags::PUBLIC,
_ => unimplemented!(),
}
}
let flags = flags;
// entry name
let (nloc, name) = match ctx.pklx.next() {
Some((loc, Token::Identifier { lscoperef, ident })) => {
if lscoperef != 0 || ident.contains(':') {
return Err(Error {
loc,
kind: ErrorKind::InvalidIdentifier,
});
}
let tmp = Atom::from(ident);
if ret.entries.contains_key(&tmp) {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
});
}
(loc, tmp)
}
Some((loc, Token::Unknown(_))) => {
return Err(Error {
loc,
kind: ErrorKind::Unknown,
})
}
Some((loc, _)) => {
return Err(Error {
loc,
kind: ErrorKind::Unexpected("name"),
})
}
None if flags.is_empty() => break,
None => {
return Err(Error {
loc: ctx.eof_loc,
kind: ErrorKind::UnexpectedEof,
})
}
};
// optional: arguments
let mut args = Vec::new();
if ctx.pklx.next_if(|(_, t)| t == &Token::OpenBrace).is_some() {
loop {
let (loc, tok) = ctx.pklx.next().ok_or(Error {
loc: ctx.eof_loc,
kind: ErrorKind::UnexpectedEof,
})?;
args.push(match tok {
Token::Unknown(_) => {
return Err(Error {
loc,
kind: ErrorKind::Unknown,
})
}
Token::Identifier { lscoperef, ident } => {
if lscoperef != 0 || ident.contains(':') {
return Err(Error {
loc,
kind: ErrorKind::InvalidIdentifier,
});
}
let tmp = Atom::from(ident);
if args.contains(&tmp) {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
});
}
tmp
}
Token::CloseBrace => break,
_ => {
return Err(Error {
loc,
kind: ErrorKind::Unexpected("argument name"),
})
}
});
}
}
ctx.expect_token(Token::Assign, "=")?;
// code object
ret.entries.insert(
name,
Entry {
nloc,
args: args.into_boxed_slice(),
cobj: CodeObject::parse_high(&mut ctx)?,
flags,
},
);
ctx.expect_token(Token::SemiColon, ";")?;
}
return Ok(ret);
}