From f7d5e53a013fd1d02f8e74035480355805e809ee Mon Sep 17 00:00:00 2001 From: Alain Zscheile Date: Sat, 20 May 2023 13:59:20 +0200 Subject: [PATCH] allow inline modules in places where code objects can be used --- README.md | 14 +- crates/wafl-parser/src/lex.rs | 41 ++-- crates/wafl-parser/src/parser.rs | 398 ++++++++++++++++--------------- 3 files changed, 236 insertions(+), 217 deletions(-) diff --git a/README.md b/README.md index 0f56612..3855c40 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,9 @@ as an argument, prefix the block with `'`. ## Control flow builtins -- `cflow::if, cond ..., then ..., else ...` -- `cflow::loop ...` -- `cflow::return ...` (implicit when last expr in a block is not terminated via semicolon) +- `cflow.if, cond = ..., then = ..., else = ...` +- `cflow.loop ...` +- `cflow.return ...` (implicit when last expr in a block is not terminated via semicolon) ## Modules @@ -61,7 +61,7 @@ with package-lock files or such. Packages (packaged module trees) would then be # 1. @final main { args env } = { - std:io:writeln "Hello World!"; + std.io.writeln "Hello World!"; 0 }; @@ -70,9 +70,9 @@ with package-lock files or such. Packages (packaged module trees) would then be business_in_the_front = party_in_the_back; what_now = { - self:business_in_the_front; - }, self:party_in_the_back { - std:io:writeln "It works"; + self.business_in_the_front; + }, self.party_in_the_back { + std.io.writeln "It works"; }; # running `what_now` results in "It works" being printed. diff --git a/crates/wafl-parser/src/lex.rs b/crates/wafl-parser/src/lex.rs index b3a131e..31c64e8 100644 --- a/crates/wafl-parser/src/lex.rs +++ b/crates/wafl-parser/src/lex.rs @@ -1,8 +1,12 @@ +use core::num::NonZeroU32; + #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Token<'a> { // keywords CtrlFlowEdit, + Defer, Final, + Module, Public, // single-char keywords @@ -11,13 +15,12 @@ pub enum Token<'a> { CloseBrace, Comma, SemiColon, + Dot, // dynamic stuff - Identifier { - /// amount of prefixed `$` to force immediate application - lscoperef: u8, - ident: &'a str, - }, + /// amount of prefixed `$` to force immediate application + ScopeRef(NonZeroU32), + Identifier(&'a str), Integer(i64), Unknown(&'a str), } @@ -101,7 +104,7 @@ impl<'a> Iterator for Lexer<'a> { let x = self.s.chars().next()?; let loc = self.loc; match x { - '{' | '}' | '=' | ',' | ';' => { + '{' | '}' | '=' | ',' | ';' | '.' => { self.eat(x.len_utf8()); return Some(( loc, @@ -111,6 +114,7 @@ impl<'a> Iterator for Lexer<'a> { '=' => Token::Assign, ',' => Token::Comma, ';' => Token::SemiColon, + '.' => Token::Dot, _ => unreachable!(), }, )); @@ -134,7 +138,9 @@ impl<'a> Iterator for Lexer<'a> { loc, match &ident[1..] { "cfe" => Token::CtrlFlowEdit, + "defer" => Token::Defer, "final" => Token::Final, + "module" => Token::Module, "pub" => Token::Public, _ => Token::Unknown(ident), }, @@ -178,23 +184,14 @@ impl<'a> Iterator for Lexer<'a> { }; return Some((loc, tok)); } - _ if (unicode_ident::is_xid_start(x) || x == ':' || x == '$') => { - let lscoperef = u8::try_from(self.select_text(0, |i| i != '$').chars().count()) + '$' => { + let scoperef = u32::try_from(self.select_text(0, |i| i != '$').chars().count()) .expect("too many scope ref uppers"); - let ident = - self.select_text(0, |i| !unicode_ident::is_xid_continue(i) && i != ':'); - // now, lets check that the identifier is valid - return Some(( - loc, - if ident.split(':').any(|i| { - i.is_empty() || !unicode_ident::is_xid_start(i.chars().next().unwrap()) - }) { - // this drops the leading '$'s, but I don't think they'll matter much in that case - Token::Unknown(ident) - } else { - Token::Identifier { lscoperef, ident } - }, - )); + return Some((loc, Token::ScopeRef(NonZeroU32::new(scoperef).unwrap()))); + } + _ if unicode_ident::is_xid_start(x) => { + let ident = self.select_text(0, |i| !unicode_ident::is_xid_continue(i)); + return Some((loc, Token::Identifier(ident))); } _ if x.is_whitespace() => { self.eat(x.len_utf8()); diff --git a/crates/wafl-parser/src/parser.rs b/crates/wafl-parser/src/parser.rs index 3b72f90..e15a9a9 100644 --- a/crates/wafl-parser/src/parser.rs +++ b/crates/wafl-parser/src/parser.rs @@ -7,7 +7,7 @@ pub use string_cache::DefaultAtom as Atom; use crate::lex::{Lexer, Location, Token}; #[derive(Clone, Debug)] -pub struct Document { +pub struct Module { pub entries: BTreeMap, } @@ -37,21 +37,28 @@ pub enum CodeObject { data: Box<[Statement]>, ret: Option, }, - Alias(Identifier), + Module(Module), + Alias(FullIdentifier), Integer(i64), } #[derive(Clone, Debug)] -pub struct Identifier { +pub struct FullIdentifier { pub loc: Location, - pub lscoperef: u8, - pub ident: Atom, + pub lscoperef: u32, + pub idents: Box<[Atom]>, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Path { + pub loc: Location, + pub idents: Box<[Atom]>, } #[derive(Clone, Debug)] pub enum StmtArgs { Single(Box), - Multi(BTreeMap), + Multi(BTreeMap, (Location, CodeObject)>), } impl Default for StmtArgs { @@ -63,7 +70,7 @@ impl Default for StmtArgs { #[derive(Clone, Debug)] pub struct Statement { - pub sel: Identifier, + pub sel: FullIdentifier, pub args: StmtArgs, } @@ -74,6 +81,7 @@ pub struct Error { pub enum ErrorKind { UnexpectedEof, + UnexpectedTrail, InvalidIdentifier, DuplicateIdentifier, Unexpected(&'static str), @@ -87,6 +95,11 @@ struct ParserContext<'a> { } impl<'a> ParserContext<'a> { + // this function is idempotent + fn peek_loc(&mut self) -> Location { + self.pklx.peek().map_or(self.eof_loc, |&(loc, _)| loc) + } + fn make_eof(&self) -> Error { Error { loc: self.eof_loc, @@ -112,56 +125,74 @@ impl<'a> ParserContext<'a> { } } -impl Identifier { - fn parse_step_high(ctx: &mut ParserContext<'_>) -> Result { - // this contraption makes this parser particularly nice, as it only consumes lexer tokens - // when it can actually use them. - let (loc, tok) = ctx - .pklx - .next_if(|(_, t)| matches!(t, Token::Identifier { .. })) - .ok_or_else(|| match ctx.pklx.peek() { - None => ctx.make_eof(), - Some(&(loc, Token::Unknown(_))) => Error { +impl Path { + fn parse_high(ctx: &mut ParserContext<'_>) -> Result { + let pathloc = ctx.peek_loc(); + let mut ret = Vec::new(); + loop { + // this contraption makes this parser particularly nice, as it only consumes lexer tokens + // when it can actually use them. + let mut tmpctx = (*ctx).clone(); + let (loc, tok) = tmpctx.pklx.next().ok_or_else(|| tmpctx.make_eof())?; + ret.push(match tok { + Token::Unknown(_) => Err(Error { loc, kind: ErrorKind::Unknown, - }, - Some(&(loc, _)) => Error { + }), + Token::Identifier(ident) => Ok(Atom::from(ident)), + _ => Err(Error { loc, kind: ErrorKind::Unexpected("identifier"), - }, - })?; - if let Token::Identifier { lscoperef, ident } = tok { - Ok(Identifier { - loc, - lscoperef, - ident: Atom::from(ident), - }) - } else { - unreachable!(); + }), + }?); + // make sure the parser can somewhat recover... + *ctx = tmpctx; + + if ctx.pklx.next_if(|(_, t)| t == &Token::Dot).is_none() { + break Ok(Path { + loc: pathloc, + idents: ret.into_boxed_slice(), + }); + } } } } +impl FullIdentifier { + fn parse_high(ctx: &mut ParserContext<'_>) -> Result { + // this contraption makes this parser particularly nice, as it only consumes lexer tokens + // when it can actually use them. + let mut tmpctx = (*ctx).clone(); + let loc = tmpctx.peek_loc(); + let lscoperef = if let Some((_, Token::ScopeRef(lscoperef))) = tmpctx + .pklx + .next_if(|(_, t)| matches!(t, Token::ScopeRef(_))) + { + lscoperef.get() + } else { + 0 + }; + let Path { idents, .. } = Path::parse_high(&mut tmpctx)?; + *ctx = tmpctx; + Ok(FullIdentifier { + loc, + lscoperef, + idents, + }) + } +} + impl Statement { fn parse_high(ctx: &mut ParserContext<'_>) -> Result { - let sel = Identifier::parse_step_high(ctx)?; + let sel = FullIdentifier::parse_high(ctx)?; let args = if let Some(&(_, Token::Comma)) = ctx.pklx.peek() { let mut args = BTreeMap::new(); while ctx.pklx.next_if(|(_, t)| t == &Token::Comma).is_some() { - let Identifier { - loc, - lscoperef, - ident, - } = Identifier::parse_step_high(ctx)?; - if lscoperef != 0 { - return Err(Error { - loc, - kind: ErrorKind::InvalidIdentifier, - }); - } + let Path { loc, idents } = Path::parse_high(ctx)?; + ctx.expect_token(Token::Assign, "=")?; let cobj = CodeObject::parse_high(ctx)?; - if args.insert(ident, (loc, cobj)).is_some() { + if args.insert(idents, (loc, cobj)).is_some() { return Err(Error { loc, kind: ErrorKind::DuplicateIdentifier, @@ -192,49 +223,146 @@ impl CodeObject { } // either we need to deal with a bunch of stmts, or just a single one (basically alias) - if ctx.pklx.next_if(|(_, t)| t == &Token::OpenBrace).is_some() { - let mut codata = Vec::new(); - let mut ret = None; - while ctx.pklx.next_if(|(_, t)| t == &Token::CloseBrace).is_none() { - let stmt = Statement::parse_high(ctx)?; - if ctx.pklx.next_if(|(_, t)| t == &Token::SemiColon).is_some() { - codata.push(stmt); - } else { - ret = Some(stmt); - // we don't go back to the loop header, so do it here. - ctx.expect_token(Token::CloseBrace, /* { */ "}")?; - break; + match ctx.pklx.peek() { + None => Err(ctx.make_eof()), + Some((_, Token::OpenBrace)) => { + ctx.pklx.next(); + let mut codata = Vec::new(); + let mut ret = None; + while ctx.pklx.next_if(|(_, t)| t == &Token::CloseBrace).is_none() { + let stmt = Statement::parse_high(ctx)?; + if ctx.pklx.next_if(|(_, t)| t == &Token::SemiColon).is_some() { + codata.push(stmt); + } else { + ret = Some(stmt); + // we don't go back to the loop header, so do it here. + ctx.expect_token(Token::CloseBrace, /* { */ "}")?; + break; + } } + Ok(CodeObject::Normal { + cfe, + data: codata.into_boxed_slice(), + ret, + }) } - Ok(CodeObject::Normal { - cfe, - data: codata.into_boxed_slice(), - ret, - }) - } else if !cfe { + Some(&(loc, _)) if cfe => Err(Error { + loc, + kind: ErrorKind::Unexpected("braced statement set"), + }), // we have no proper recursive delimiting, // so just resort to the parsing which we also do for other such items, // expect a single code object descriptor => alias, or number. - if let Some((_, Token::Integer(i))) = - ctx.pklx.next_if(|(_, t)| matches!(t, Token::Integer(_))) - { + Some(&(_, Token::Integer(i))) => { + ctx.pklx.next(); Ok(CodeObject::Integer(i)) - } else { - Identifier::parse_step_high(ctx).map(CodeObject::Alias) } - } else { - Err(match ctx.pklx.next() { - Some((loc, _)) => Error { - loc, - kind: ErrorKind::Unexpected("braced statement set"), - }, - None => ctx.make_eof(), - }) + Some((_, Token::Module)) => { + ctx.pklx.next(); + ctx.expect_token(Token::OpenBrace, "{")?; + let m = Module::parse_high(ctx)?; + ctx.expect_token(Token::CloseBrace, "}")?; + Ok(CodeObject::Module(m)) + } + _ => FullIdentifier::parse_high(ctx).map(CodeObject::Alias), } } } -pub fn parse_s2doc(fileid: u32, s: &str) -> Result { +impl Module { + fn parse_high(ctx: &mut ParserContext<'_>) -> Result { + let mut ret = Module { + entries: Default::default(), + }; + + loop { + // parse entry + + // entry attributes + let mut flags = EntryFlags::empty(); + for (_, i) in ctx.pklx.peeking_take_while(|(_, t)| t.is_def_attr()) { + match i { + Token::Final => flags |= EntryFlags::FINAL, + Token::Public => flags |= EntryFlags::PUBLIC, + _ => unimplemented!(), + } + } + let flags = flags; + + // entry name + let (nloc, name) = match ctx.pklx.next_if(|(_, t)| matches!(t, Token::Identifier(_))) { + Some((loc, Token::Identifier(ident))) => { + let ident = Atom::from(ident); + if ret.entries.contains_key(&ident) { + return Err(Error { + loc, + kind: ErrorKind::DuplicateIdentifier, + }); + } + (loc, ident) + } + Some(_) => unreachable!(), + None => { + if flags.is_empty() { + break; + } + return Err(Error { + loc: ctx.peek_loc(), + kind: ErrorKind::UnexpectedEof, + }); + } + }; + + // optional: arguments + let mut args = Vec::new(); + if ctx.pklx.next_if(|(_, t)| t == &Token::OpenBrace).is_some() { + loop { + let (loc, tok) = ctx.pklx.next().ok_or(Error { + loc: ctx.eof_loc, + kind: ErrorKind::UnexpectedEof, + })?; + let ident = match tok { + Token::Unknown(_) => Err(Error { + loc, + kind: ErrorKind::Unknown, + }), + Token::Identifier(ident) => Ok(Atom::from(ident)), + Token::CloseBrace => break, + _ => Err(Error { + loc, + kind: ErrorKind::Unexpected("argument name"), + }), + }?; + if args.contains(&ident) { + return Err(Error { + loc, + kind: ErrorKind::DuplicateIdentifier, + }); + } + args.push(ident); + } + } + + ctx.expect_token(Token::Assign, "=")?; + + // code object + ret.entries.insert( + name, + Entry { + nloc, + args: args.into_boxed_slice(), + cobj: CodeObject::parse_high(ctx)?, + flags, + }, + ); + + ctx.expect_token(Token::SemiColon, ";")?; + } + Ok(ret) + } +} + +pub fn parse_s2doc(fileid: u32, s: &str) -> Result { let mut ctx = ParserContext { pklx: (Lexer { loc: Location { fileid, offset: 0 }, @@ -247,120 +375,14 @@ pub fn parse_s2doc(fileid: u32, s: &str) -> Result { }, }; - let mut ret = Document { - entries: Default::default(), - }; + let ret = Module::parse_high(&mut ctx)?; - loop { - // parse entry - - // entry attributes - let mut flags = EntryFlags::empty(); - for (_, i) in ctx.pklx.peeking_take_while(|(_, t)| t.is_def_attr()) { - match i { - Token::Final => flags |= EntryFlags::FINAL, - Token::Public => flags |= EntryFlags::PUBLIC, - _ => unimplemented!(), - } - } - let flags = flags; - - // entry name - let (nloc, name) = match ctx.pklx.next() { - Some((loc, Token::Identifier { lscoperef, ident })) => { - if lscoperef != 0 || ident.contains(':') { - return Err(Error { - loc, - kind: ErrorKind::InvalidIdentifier, - }); - } - let tmp = Atom::from(ident); - if ret.entries.contains_key(&tmp) { - return Err(Error { - loc, - kind: ErrorKind::DuplicateIdentifier, - }); - } - (loc, tmp) - } - Some((loc, Token::Unknown(_))) => { - return Err(Error { - loc, - kind: ErrorKind::Unknown, - }) - } - Some((loc, _)) => { - return Err(Error { - loc, - kind: ErrorKind::Unexpected("name"), - }) - } - None if flags.is_empty() => break, - None => { - return Err(Error { - loc: ctx.eof_loc, - kind: ErrorKind::UnexpectedEof, - }) - } - }; - - // optional: arguments - let mut args = Vec::new(); - if ctx.pklx.next_if(|(_, t)| t == &Token::OpenBrace).is_some() { - loop { - let (loc, tok) = ctx.pklx.next().ok_or(Error { - loc: ctx.eof_loc, - kind: ErrorKind::UnexpectedEof, - })?; - args.push(match tok { - Token::Unknown(_) => { - return Err(Error { - loc, - kind: ErrorKind::Unknown, - }) - } - Token::Identifier { lscoperef, ident } => { - if lscoperef != 0 || ident.contains(':') { - return Err(Error { - loc, - kind: ErrorKind::InvalidIdentifier, - }); - } - let tmp = Atom::from(ident); - if args.contains(&tmp) { - return Err(Error { - loc, - kind: ErrorKind::DuplicateIdentifier, - }); - } - tmp - } - Token::CloseBrace => break, - _ => { - return Err(Error { - loc, - kind: ErrorKind::Unexpected("argument name"), - }) - } - }); - } - } - - ctx.expect_token(Token::Assign, "=")?; - - // code object - ret.entries.insert( - name, - Entry { - nloc, - args: args.into_boxed_slice(), - cobj: CodeObject::parse_high(&mut ctx)?, - flags, - }, - ); - - ctx.expect_token(Token::SemiColon, ";")?; + if let Some((loc, _)) = ctx.pklx.next() { + Err(Error { + loc, + kind: ErrorKind::UnexpectedTrail, + }) + } else { + Ok(ret) } - - return Ok(ret); }