398 lines
12 KiB
Rust
398 lines
12 KiB
Rust
use alloc::collections::BTreeMap;
|
|
use bitflags::bitflags;
|
|
use core::iter::Peekable;
|
|
use peeking_take_while::PeekableExt as _;
|
|
pub use string_cache::DefaultAtom as Atom;
|
|
|
|
use crate::lex::{Lexer, Location, Token};
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub struct Module {
|
|
pub entries: BTreeMap<Atom, Entry>,
|
|
}
|
|
|
|
bitflags! {
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
pub struct EntryFlags: u8 {
|
|
const PUBLIC = 0x01;
|
|
const FINAL = 0x02;
|
|
const CTRLFE = 0x04;
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub struct Entry {
|
|
// location of the name
|
|
pub nloc: Location,
|
|
// technically this is a set, but that would just waste space here...
|
|
pub args: Box<[Atom]>,
|
|
pub obj: Object,
|
|
// we use a bitflag here to conserve memory
|
|
pub flags: EntryFlags,
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub enum Object {
|
|
Code {
|
|
cfe: bool,
|
|
data: Box<[Statement]>,
|
|
ret: Option<Statement>,
|
|
},
|
|
Module(Module),
|
|
Alias(FullIdentifier),
|
|
Integer(i64),
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub struct FullIdentifier {
|
|
pub loc: Location,
|
|
pub lscoperef: u32,
|
|
pub idents: Box<[Atom]>,
|
|
}
|
|
|
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
|
pub struct Path {
|
|
pub loc: Location,
|
|
pub idents: Box<[Atom]>,
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub enum StmtArgs {
|
|
Single(Box<Object>),
|
|
Multi(BTreeMap<Box<[Atom]>, (Location, Object)>),
|
|
}
|
|
|
|
impl Default for StmtArgs {
|
|
#[inline]
|
|
fn default() -> Self {
|
|
StmtArgs::Multi(BTreeMap::new())
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub struct Statement {
|
|
pub sel: FullIdentifier,
|
|
pub args: StmtArgs,
|
|
}
|
|
|
|
pub struct Error {
|
|
pub loc: Location,
|
|
pub kind: ErrorKind,
|
|
}
|
|
|
|
pub enum ErrorKind {
|
|
UnexpectedEof,
|
|
UnexpectedTrail,
|
|
InvalidIdentifier,
|
|
DuplicateIdentifier,
|
|
Unexpected(&'static str),
|
|
Unknown,
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
struct ParserContext<'a> {
|
|
pklx: Peekable<Lexer<'a>>,
|
|
eof_loc: Location,
|
|
}
|
|
|
|
impl<'a> ParserContext<'a> {
|
|
// this function is idempotent
|
|
fn peek_loc(&mut self) -> Location {
|
|
self.pklx.peek().map_or(self.eof_loc, |&(loc, _)| loc)
|
|
}
|
|
|
|
fn make_eof(&self) -> Error {
|
|
Error {
|
|
loc: self.eof_loc,
|
|
kind: ErrorKind::UnexpectedEof,
|
|
}
|
|
}
|
|
|
|
fn expect_token(&mut self, tok_exp: Token<'a>, descr: &'static str) -> Result<Location, Error> {
|
|
let (loc, tok) = self.pklx.next().ok_or_else(|| self.make_eof())?;
|
|
if let Token::Unknown(_) = tok {
|
|
Err(Error {
|
|
loc,
|
|
kind: ErrorKind::Unknown,
|
|
})
|
|
} else if tok == tok_exp {
|
|
Ok(loc)
|
|
} else {
|
|
Err(Error {
|
|
loc,
|
|
kind: ErrorKind::Unexpected(descr),
|
|
})
|
|
}
|
|
}
|
|
|
|
fn maybe_eat_token(&mut self, tok: Token<'a>) -> Option<Location> {
|
|
self.pklx.next_if(|(_, t)| t == &tok).map(|(loc, _)| loc)
|
|
}
|
|
}
|
|
|
|
impl Path {
|
|
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
|
|
let pathloc = ctx.peek_loc();
|
|
let mut ret = Vec::new();
|
|
loop {
|
|
// this contraption makes this parser particularly nice, as it only consumes lexer tokens
|
|
// when it can actually use them.
|
|
let mut tmpctx = (*ctx).clone();
|
|
let (loc, tok) = tmpctx.pklx.next().ok_or_else(|| tmpctx.make_eof())?;
|
|
ret.push(match tok {
|
|
Token::Unknown(_) => Err(Error {
|
|
loc,
|
|
kind: ErrorKind::Unknown,
|
|
}),
|
|
Token::Identifier(ident) => Ok(Atom::from(ident)),
|
|
_ => Err(Error {
|
|
loc,
|
|
kind: ErrorKind::Unexpected("identifier"),
|
|
}),
|
|
}?);
|
|
// make sure the parser can somewhat recover...
|
|
*ctx = tmpctx;
|
|
|
|
if ctx.maybe_eat_token(Token::Dot).is_none() {
|
|
break Ok(Path {
|
|
loc: pathloc,
|
|
idents: ret.into_boxed_slice(),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl FullIdentifier {
|
|
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
|
|
// this contraption makes this parser particularly nice, as it only consumes lexer tokens
|
|
// when it can actually use them.
|
|
let mut tmpctx = (*ctx).clone();
|
|
let loc = tmpctx.peek_loc();
|
|
let lscoperef = if let Some((_, Token::ScopeRef(lscoperef))) = tmpctx
|
|
.pklx
|
|
.next_if(|(_, t)| matches!(t, Token::ScopeRef(_)))
|
|
{
|
|
lscoperef.get()
|
|
} else {
|
|
0
|
|
};
|
|
let Path { idents, .. } = Path::parse_high(&mut tmpctx)?;
|
|
*ctx = tmpctx;
|
|
Ok(FullIdentifier {
|
|
loc,
|
|
lscoperef,
|
|
idents,
|
|
})
|
|
}
|
|
}
|
|
|
|
impl Statement {
|
|
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
|
|
let sel = FullIdentifier::parse_high(ctx)?;
|
|
|
|
let args = if let Some(&(_, Token::Comma)) = ctx.pklx.peek() {
|
|
let mut args = BTreeMap::new();
|
|
while ctx.maybe_eat_token(Token::Comma).is_some() {
|
|
let Path { loc, idents } = Path::parse_high(ctx)?;
|
|
ctx.expect_token(Token::Assign, "=")?;
|
|
let obj = Object::parse_high(ctx)?;
|
|
if args.insert(idents, (loc, obj)).is_some() {
|
|
return Err(Error {
|
|
loc,
|
|
kind: ErrorKind::DuplicateIdentifier,
|
|
});
|
|
}
|
|
}
|
|
StmtArgs::Multi(args)
|
|
} else if matches!(ctx.pklx.peek(), Some(&(_, Token::SemiColon)) | None) {
|
|
// do nothing, there won't be an argument
|
|
// this escape hatch is a bit ugly, idk...
|
|
StmtArgs::default()
|
|
} else {
|
|
StmtArgs::Single(Box::new(Object::parse_high(ctx)?))
|
|
};
|
|
|
|
Ok(Statement { sel, args })
|
|
}
|
|
}
|
|
|
|
impl Object {
|
|
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
|
|
let mut cfe = false;
|
|
for (_, i) in ctx.pklx.peeking_take_while(|(_, t)| t.is_cobj_attr()) {
|
|
match i {
|
|
Token::CtrlFlowEdit => cfe = true,
|
|
_ => unimplemented!(),
|
|
}
|
|
}
|
|
|
|
match ctx.pklx.peek() {
|
|
None => Err(ctx.make_eof()),
|
|
Some((_, Token::OpenBrace)) => {
|
|
ctx.pklx.next();
|
|
let mut codata = Vec::new();
|
|
let mut ret = None;
|
|
while ctx.maybe_eat_token(Token::CloseBrace).is_none() {
|
|
let stmt = Statement::parse_high(ctx)?;
|
|
if ctx.maybe_eat_token(Token::SemiColon).is_some() {
|
|
codata.push(stmt);
|
|
} else {
|
|
ret = Some(stmt);
|
|
// we don't go back to the loop header, so do it here.
|
|
ctx.expect_token(Token::CloseBrace, /* { */ "}")?;
|
|
break;
|
|
}
|
|
}
|
|
Ok(Object::Code {
|
|
cfe,
|
|
data: codata.into_boxed_slice(),
|
|
ret,
|
|
})
|
|
}
|
|
Some(&(loc, _)) if cfe => Err(Error {
|
|
loc,
|
|
kind: ErrorKind::Unexpected("braced statement set"),
|
|
}),
|
|
// we have no proper recursive delimiting,
|
|
// so just resort to the parsing which we also do for other such items,
|
|
// expect a single code object descriptor => alias, or number.
|
|
Some(&(_, Token::Integer(i))) => {
|
|
ctx.pklx.next();
|
|
Ok(Object::Integer(i))
|
|
}
|
|
Some((_, Token::Module)) => {
|
|
ctx.pklx.next();
|
|
ctx.expect_token(Token::OpenBrace, "{")?;
|
|
let m = Module::parse_high(ctx)?;
|
|
ctx.expect_token(Token::CloseBrace, "}")?;
|
|
Ok(Object::Module(m))
|
|
}
|
|
_ => FullIdentifier::parse_high(ctx).map(Object::Alias),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Module {
|
|
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
|
|
let mut ret = Module {
|
|
entries: Default::default(),
|
|
};
|
|
|
|
loop {
|
|
// parse entry
|
|
|
|
// entry attributes
|
|
let mut flags = EntryFlags::empty();
|
|
for (_, i) in ctx.pklx.peeking_take_while(|(_, t)| t.is_def_attr()) {
|
|
flags |= match i {
|
|
Token::Final => EntryFlags::FINAL,
|
|
Token::Public => EntryFlags::PUBLIC,
|
|
Token::CtrlFlowEdit => EntryFlags::CTRLFE,
|
|
_ => unimplemented!(),
|
|
}
|
|
}
|
|
|
|
// entry name
|
|
let (nloc, name) = match ctx.pklx.next_if(|(_, t)| matches!(t, Token::Identifier(_))) {
|
|
Some((loc, Token::Identifier(ident))) => {
|
|
let ident = Atom::from(ident);
|
|
if ret.entries.contains_key(&ident) {
|
|
return Err(Error {
|
|
loc,
|
|
kind: ErrorKind::DuplicateIdentifier,
|
|
});
|
|
}
|
|
(loc, ident)
|
|
}
|
|
Some(_) => unreachable!(),
|
|
None => {
|
|
if flags.is_empty() {
|
|
break;
|
|
}
|
|
return Err(Error {
|
|
loc: ctx.peek_loc(),
|
|
kind: ErrorKind::UnexpectedEof,
|
|
});
|
|
}
|
|
};
|
|
|
|
// optional: arguments
|
|
let mut args = Vec::new();
|
|
if ctx.maybe_eat_token(Token::OpenBrace).is_some() {
|
|
loop {
|
|
let (loc, tok) = ctx.pklx.next().ok_or(Error {
|
|
loc: ctx.eof_loc,
|
|
kind: ErrorKind::UnexpectedEof,
|
|
})?;
|
|
let ident = match tok {
|
|
Token::Unknown(_) => Err(Error {
|
|
loc,
|
|
kind: ErrorKind::Unknown,
|
|
}),
|
|
Token::Identifier(ident) => Ok(Atom::from(ident)),
|
|
Token::CloseBrace => break,
|
|
_ => Err(Error {
|
|
loc,
|
|
kind: ErrorKind::Unexpected("argument name"),
|
|
}),
|
|
}?;
|
|
if args.contains(&ident) {
|
|
return Err(Error {
|
|
loc,
|
|
kind: ErrorKind::DuplicateIdentifier,
|
|
});
|
|
}
|
|
args.push(ident);
|
|
}
|
|
}
|
|
|
|
ctx.expect_token(Token::Assign, "=")?;
|
|
|
|
let obj = Object::parse_high(ctx)?;
|
|
if let Object::Code { cfe: true, .. } = obj {
|
|
flags |= EntryFlags::CTRLFE;
|
|
}
|
|
|
|
// code object
|
|
ret.entries.insert(
|
|
name,
|
|
Entry {
|
|
nloc,
|
|
args: args.into_boxed_slice(),
|
|
obj,
|
|
flags,
|
|
},
|
|
);
|
|
|
|
ctx.expect_token(Token::SemiColon, ";")?;
|
|
}
|
|
Ok(ret)
|
|
}
|
|
}
|
|
|
|
pub fn parse_s2doc(fileid: u32, s: &str) -> Result<Module, Error> {
|
|
let mut ctx = ParserContext {
|
|
pklx: (Lexer {
|
|
loc: Location { fileid, offset: 0 },
|
|
s,
|
|
})
|
|
.peekable(),
|
|
eof_loc: Location {
|
|
fileid,
|
|
offset: s.len().try_into().expect("file too big"),
|
|
},
|
|
};
|
|
|
|
let ret = Module::parse_high(&mut ctx)?;
|
|
|
|
if let Some((loc, _)) = ctx.pklx.next() {
|
|
Err(Error {
|
|
loc,
|
|
kind: ErrorKind::UnexpectedTrail,
|
|
})
|
|
} else {
|
|
Ok(ret)
|
|
}
|
|
}
|