use alloc::collections::BTreeMap; use bitflags::bitflags; use core::iter::Peekable; use peeking_take_while::PeekableExt as _; pub use string_cache::DefaultAtom as Atom; use crate::lex::{Lexer, Location, Token}; #[derive(Clone, Debug)] pub struct Document { pub entries: BTreeMap, } bitflags! { #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct EntryFlags: u8 { const PUBLIC = 0x01; const FINAL = 0x02; } } #[derive(Clone, Debug)] pub struct Entry { // location of the name pub nloc: Location, // technically this is a set, but that would just waste space here... pub args: Box<[Atom]>, pub cobj: CodeObject, // we use a bitflag here to conserve memory pub flags: EntryFlags, } #[derive(Clone, Debug)] pub enum CodeObject { Normal { cfe: bool, data: Box<[Statement]>, ret: Option, }, Alias(Identifier), Integer(i64), } #[derive(Clone, Debug)] pub struct Identifier { pub loc: Location, pub lscoperef: u8, pub ident: Atom, } #[derive(Clone, Debug)] pub enum StmtArgs { Single(Box), Multi(BTreeMap), } impl Default for StmtArgs { #[inline] fn default() -> Self { StmtArgs::Multi(BTreeMap::new()) } } #[derive(Clone, Debug)] pub struct Statement { pub sel: Identifier, pub args: StmtArgs, } pub struct Error { pub loc: Location, pub kind: ErrorKind, } pub enum ErrorKind { UnexpectedEof, InvalidIdentifier, DuplicateIdentifier, Unexpected(&'static str), Unknown, } #[derive(Clone)] struct ParserContext<'a> { pklx: Peekable>, eof_loc: Location, } impl<'a> ParserContext<'a> { fn make_eof(&self) -> Error { Error { loc: self.eof_loc, kind: ErrorKind::UnexpectedEof, } } fn expect_token(&mut self, tok_exp: Token<'a>, descr: &'static str) -> Result { let (loc, tok) = self.pklx.next().ok_or_else(|| self.make_eof())?; if let Token::Unknown(_) = tok { Err(Error { loc, kind: ErrorKind::Unknown, }) } else if tok == tok_exp { Ok(loc) } else { Err(Error { loc, kind: ErrorKind::Unexpected(descr), }) } } } impl Identifier { fn parse_step_high(ctx: &mut ParserContext<'_>) -> Result { // this contraption makes this parser particularly nice, as it only consumes lexer tokens // when it can actually use them. let (loc, tok) = ctx .pklx .next_if(|(_, t)| matches!(t, Token::Identifier { .. })) .ok_or_else(|| match ctx.pklx.peek() { None => ctx.make_eof(), Some(&(loc, Token::Unknown(_))) => Error { loc, kind: ErrorKind::Unknown, }, Some(&(loc, _)) => Error { loc, kind: ErrorKind::Unexpected("identifier"), }, })?; if let Token::Identifier { lscoperef, ident } = tok { Ok(Identifier { loc, lscoperef, ident: Atom::from(ident), }) } else { unreachable!(); } } } impl Statement { fn parse_high(ctx: &mut ParserContext<'_>) -> Result { let sel = Identifier::parse_step_high(ctx)?; let args = if let Some(&(_, Token::Comma)) = ctx.pklx.peek() { let mut args = BTreeMap::new(); while ctx.pklx.next_if(|(_, t)| t == &Token::Comma).is_some() { let Identifier { loc, lscoperef, ident, } = Identifier::parse_step_high(ctx)?; if lscoperef != 0 { return Err(Error { loc, kind: ErrorKind::InvalidIdentifier, }); } let cobj = CodeObject::parse_high(ctx)?; if args.insert(ident, (loc, cobj)).is_some() { return Err(Error { loc, kind: ErrorKind::DuplicateIdentifier, }); } } StmtArgs::Multi(args) } else if matches!(ctx.pklx.peek(), Some(&(_, Token::SemiColon)) | None) { // do nothing, there won't be an argument // this escape hatch is a bit ugly, idk... StmtArgs::default() } else { StmtArgs::Single(Box::new(CodeObject::parse_high(ctx)?)) }; Ok(Statement { sel, args }) } } impl CodeObject { fn parse_high(ctx: &mut ParserContext<'_>) -> Result { let mut cfe = false; for (_, i) in ctx.pklx.peeking_take_while(|(_, t)| t.is_cobj_attr()) { match i { Token::CtrlFlowEdit => cfe = true, _ => unimplemented!(), } } // either we need to deal with a bunch of stmts, or just a single one (basically alias) if ctx.pklx.next_if(|(_, t)| t == &Token::OpenBrace).is_some() { let mut codata = Vec::new(); let mut ret = None; while ctx.pklx.next_if(|(_, t)| t == &Token::CloseBrace).is_none() { let stmt = Statement::parse_high(ctx)?; if ctx.pklx.next_if(|(_, t)| t == &Token::SemiColon).is_some() { codata.push(stmt); } else { ret = Some(stmt); // we don't go back to the loop header, so do it here. ctx.expect_token(Token::CloseBrace, /* { */ "}")?; break; } } Ok(CodeObject::Normal { cfe, data: codata.into_boxed_slice(), ret, }) } else if !cfe { // we have no proper recursive delimiting, // so just resort to the parsing which we also do for other such items, // expect a single code object descriptor => alias, or number. if let Some((_, Token::Integer(i))) = ctx.pklx.next_if(|(_, t)| matches!(t, Token::Integer(_))) { Ok(CodeObject::Integer(i)) } else { Identifier::parse_step_high(ctx).map(CodeObject::Alias) } } else { Err(match ctx.pklx.next() { Some((loc, _)) => Error { loc, kind: ErrorKind::Unexpected("braced statement set"), }, None => ctx.make_eof(), }) } } } pub fn parse_s2doc(fileid: u32, s: &str) -> Result { let mut ctx = ParserContext { pklx: (Lexer { loc: Location { fileid, offset: 0 }, s, }) .peekable(), eof_loc: Location { fileid, offset: s.len().try_into().expect("file too big"), }, }; let mut ret = Document { entries: Default::default(), }; loop { // parse entry // entry attributes let mut flags = EntryFlags::empty(); for (_, i) in ctx.pklx.peeking_take_while(|(_, t)| t.is_def_attr()) { match i { Token::Final => flags |= EntryFlags::FINAL, Token::Public => flags |= EntryFlags::PUBLIC, _ => unimplemented!(), } } let flags = flags; // entry name let (nloc, name) = match ctx.pklx.next() { Some((loc, Token::Identifier { lscoperef, ident })) => { if lscoperef != 0 || ident.contains(':') { return Err(Error { loc, kind: ErrorKind::InvalidIdentifier, }); } let tmp = Atom::from(ident); if ret.entries.contains_key(&tmp) { return Err(Error { loc, kind: ErrorKind::DuplicateIdentifier, }); } (loc, tmp) } Some((loc, Token::Unknown(_))) => { return Err(Error { loc, kind: ErrorKind::Unknown, }) } Some((loc, _)) => { return Err(Error { loc, kind: ErrorKind::Unexpected("name"), }) } None if flags.is_empty() => break, None => { return Err(Error { loc: ctx.eof_loc, kind: ErrorKind::UnexpectedEof, }) } }; // optional: arguments let mut args = Vec::new(); if ctx.pklx.next_if(|(_, t)| t == &Token::OpenBrace).is_some() { loop { let (loc, tok) = ctx.pklx.next().ok_or(Error { loc: ctx.eof_loc, kind: ErrorKind::UnexpectedEof, })?; args.push(match tok { Token::Unknown(_) => { return Err(Error { loc, kind: ErrorKind::Unknown, }) } Token::Identifier { lscoperef, ident } => { if lscoperef != 0 || ident.contains(':') { return Err(Error { loc, kind: ErrorKind::InvalidIdentifier, }); } let tmp = Atom::from(ident); if args.contains(&tmp) { return Err(Error { loc, kind: ErrorKind::DuplicateIdentifier, }); } tmp } Token::CloseBrace => break, _ => { return Err(Error { loc, kind: ErrorKind::Unexpected("argument name"), }) } }); } } ctx.expect_token(Token::Assign, "=")?; // code object ret.entries.insert( name, Entry { nloc, args: args.into_boxed_slice(), cobj: CodeObject::parse_high(&mut ctx)?, flags, }, ); ctx.expect_token(Token::SemiColon, ";")?; } return Ok(ret); }