allow inline modules in places where code objects can be used

This commit is contained in:
Alain Zscheile 2023-05-20 13:59:20 +02:00
parent a76dc04211
commit f7d5e53a01
3 changed files with 236 additions and 217 deletions

View file

@ -34,9 +34,9 @@ as an argument, prefix the block with `'`.
## Control flow builtins ## Control flow builtins
- `cflow::if, cond ..., then ..., else ...` - `cflow.if, cond = ..., then = ..., else = ...`
- `cflow::loop ...` - `cflow.loop ...`
- `cflow::return ...` (implicit when last expr in a block is not terminated via semicolon) - `cflow.return ...` (implicit when last expr in a block is not terminated via semicolon)
## Modules ## Modules
@ -61,7 +61,7 @@ with package-lock files or such. Packages (packaged module trees) would then be
# 1. # 1.
@final main { args env } = { @final main { args env } = {
std:io:writeln "Hello World!"; std.io.writeln "Hello World!";
0 0
}; };
@ -70,9 +70,9 @@ with package-lock files or such. Packages (packaged module trees) would then be
business_in_the_front = party_in_the_back; business_in_the_front = party_in_the_back;
what_now = { what_now = {
self:business_in_the_front; self.business_in_the_front;
}, self:party_in_the_back { }, self.party_in_the_back {
std:io:writeln "It works"; std.io.writeln "It works";
}; };
# running `what_now` results in "It works" being printed. # running `what_now` results in "It works" being printed.

View file

@ -1,8 +1,12 @@
use core::num::NonZeroU32;
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Token<'a> { pub enum Token<'a> {
// keywords // keywords
CtrlFlowEdit, CtrlFlowEdit,
Defer,
Final, Final,
Module,
Public, Public,
// single-char keywords // single-char keywords
@ -11,13 +15,12 @@ pub enum Token<'a> {
CloseBrace, CloseBrace,
Comma, Comma,
SemiColon, SemiColon,
Dot,
// dynamic stuff // dynamic stuff
Identifier { /// amount of prefixed `$` to force immediate application
/// amount of prefixed `$` to force immediate application ScopeRef(NonZeroU32),
lscoperef: u8, Identifier(&'a str),
ident: &'a str,
},
Integer(i64), Integer(i64),
Unknown(&'a str), Unknown(&'a str),
} }
@ -101,7 +104,7 @@ impl<'a> Iterator for Lexer<'a> {
let x = self.s.chars().next()?; let x = self.s.chars().next()?;
let loc = self.loc; let loc = self.loc;
match x { match x {
'{' | '}' | '=' | ',' | ';' => { '{' | '}' | '=' | ',' | ';' | '.' => {
self.eat(x.len_utf8()); self.eat(x.len_utf8());
return Some(( return Some((
loc, loc,
@ -111,6 +114,7 @@ impl<'a> Iterator for Lexer<'a> {
'=' => Token::Assign, '=' => Token::Assign,
',' => Token::Comma, ',' => Token::Comma,
';' => Token::SemiColon, ';' => Token::SemiColon,
'.' => Token::Dot,
_ => unreachable!(), _ => unreachable!(),
}, },
)); ));
@ -134,7 +138,9 @@ impl<'a> Iterator for Lexer<'a> {
loc, loc,
match &ident[1..] { match &ident[1..] {
"cfe" => Token::CtrlFlowEdit, "cfe" => Token::CtrlFlowEdit,
"defer" => Token::Defer,
"final" => Token::Final, "final" => Token::Final,
"module" => Token::Module,
"pub" => Token::Public, "pub" => Token::Public,
_ => Token::Unknown(ident), _ => Token::Unknown(ident),
}, },
@ -178,23 +184,14 @@ impl<'a> Iterator for Lexer<'a> {
}; };
return Some((loc, tok)); return Some((loc, tok));
} }
_ if (unicode_ident::is_xid_start(x) || x == ':' || x == '$') => { '$' => {
let lscoperef = u8::try_from(self.select_text(0, |i| i != '$').chars().count()) let scoperef = u32::try_from(self.select_text(0, |i| i != '$').chars().count())
.expect("too many scope ref uppers"); .expect("too many scope ref uppers");
let ident = return Some((loc, Token::ScopeRef(NonZeroU32::new(scoperef).unwrap())));
self.select_text(0, |i| !unicode_ident::is_xid_continue(i) && i != ':'); }
// now, lets check that the identifier is valid _ if unicode_ident::is_xid_start(x) => {
return Some(( let ident = self.select_text(0, |i| !unicode_ident::is_xid_continue(i));
loc, return Some((loc, Token::Identifier(ident)));
if ident.split(':').any(|i| {
i.is_empty() || !unicode_ident::is_xid_start(i.chars().next().unwrap())
}) {
// this drops the leading '$'s, but I don't think they'll matter much in that case
Token::Unknown(ident)
} else {
Token::Identifier { lscoperef, ident }
},
));
} }
_ if x.is_whitespace() => { _ if x.is_whitespace() => {
self.eat(x.len_utf8()); self.eat(x.len_utf8());

View file

@ -7,7 +7,7 @@ pub use string_cache::DefaultAtom as Atom;
use crate::lex::{Lexer, Location, Token}; use crate::lex::{Lexer, Location, Token};
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Document { pub struct Module {
pub entries: BTreeMap<Atom, Entry>, pub entries: BTreeMap<Atom, Entry>,
} }
@ -37,21 +37,28 @@ pub enum CodeObject {
data: Box<[Statement]>, data: Box<[Statement]>,
ret: Option<Statement>, ret: Option<Statement>,
}, },
Alias(Identifier), Module(Module),
Alias(FullIdentifier),
Integer(i64), Integer(i64),
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Identifier { pub struct FullIdentifier {
pub loc: Location, pub loc: Location,
pub lscoperef: u8, pub lscoperef: u32,
pub ident: Atom, pub idents: Box<[Atom]>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Path {
pub loc: Location,
pub idents: Box<[Atom]>,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum StmtArgs { pub enum StmtArgs {
Single(Box<CodeObject>), Single(Box<CodeObject>),
Multi(BTreeMap<Atom, (Location, CodeObject)>), Multi(BTreeMap<Box<[Atom]>, (Location, CodeObject)>),
} }
impl Default for StmtArgs { impl Default for StmtArgs {
@ -63,7 +70,7 @@ impl Default for StmtArgs {
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Statement { pub struct Statement {
pub sel: Identifier, pub sel: FullIdentifier,
pub args: StmtArgs, pub args: StmtArgs,
} }
@ -74,6 +81,7 @@ pub struct Error {
pub enum ErrorKind { pub enum ErrorKind {
UnexpectedEof, UnexpectedEof,
UnexpectedTrail,
InvalidIdentifier, InvalidIdentifier,
DuplicateIdentifier, DuplicateIdentifier,
Unexpected(&'static str), Unexpected(&'static str),
@ -87,6 +95,11 @@ struct ParserContext<'a> {
} }
impl<'a> ParserContext<'a> { impl<'a> ParserContext<'a> {
// this function is idempotent
fn peek_loc(&mut self) -> Location {
self.pklx.peek().map_or(self.eof_loc, |&(loc, _)| loc)
}
fn make_eof(&self) -> Error { fn make_eof(&self) -> Error {
Error { Error {
loc: self.eof_loc, loc: self.eof_loc,
@ -112,56 +125,74 @@ impl<'a> ParserContext<'a> {
} }
} }
impl Identifier { impl Path {
fn parse_step_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> { fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
// this contraption makes this parser particularly nice, as it only consumes lexer tokens let pathloc = ctx.peek_loc();
// when it can actually use them. let mut ret = Vec::new();
let (loc, tok) = ctx loop {
.pklx // this contraption makes this parser particularly nice, as it only consumes lexer tokens
.next_if(|(_, t)| matches!(t, Token::Identifier { .. })) // when it can actually use them.
.ok_or_else(|| match ctx.pklx.peek() { let mut tmpctx = (*ctx).clone();
None => ctx.make_eof(), let (loc, tok) = tmpctx.pklx.next().ok_or_else(|| tmpctx.make_eof())?;
Some(&(loc, Token::Unknown(_))) => Error { ret.push(match tok {
Token::Unknown(_) => Err(Error {
loc, loc,
kind: ErrorKind::Unknown, kind: ErrorKind::Unknown,
}, }),
Some(&(loc, _)) => Error { Token::Identifier(ident) => Ok(Atom::from(ident)),
_ => Err(Error {
loc, loc,
kind: ErrorKind::Unexpected("identifier"), kind: ErrorKind::Unexpected("identifier"),
}, }),
})?; }?);
if let Token::Identifier { lscoperef, ident } = tok { // make sure the parser can somewhat recover...
Ok(Identifier { *ctx = tmpctx;
loc,
lscoperef, if ctx.pklx.next_if(|(_, t)| t == &Token::Dot).is_none() {
ident: Atom::from(ident), break Ok(Path {
}) loc: pathloc,
} else { idents: ret.into_boxed_slice(),
unreachable!(); });
}
} }
} }
} }
impl FullIdentifier {
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
// this contraption makes this parser particularly nice, as it only consumes lexer tokens
// when it can actually use them.
let mut tmpctx = (*ctx).clone();
let loc = tmpctx.peek_loc();
let lscoperef = if let Some((_, Token::ScopeRef(lscoperef))) = tmpctx
.pklx
.next_if(|(_, t)| matches!(t, Token::ScopeRef(_)))
{
lscoperef.get()
} else {
0
};
let Path { idents, .. } = Path::parse_high(&mut tmpctx)?;
*ctx = tmpctx;
Ok(FullIdentifier {
loc,
lscoperef,
idents,
})
}
}
impl Statement { impl Statement {
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> { fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
let sel = Identifier::parse_step_high(ctx)?; let sel = FullIdentifier::parse_high(ctx)?;
let args = if let Some(&(_, Token::Comma)) = ctx.pklx.peek() { let args = if let Some(&(_, Token::Comma)) = ctx.pklx.peek() {
let mut args = BTreeMap::new(); let mut args = BTreeMap::new();
while ctx.pklx.next_if(|(_, t)| t == &Token::Comma).is_some() { while ctx.pklx.next_if(|(_, t)| t == &Token::Comma).is_some() {
let Identifier { let Path { loc, idents } = Path::parse_high(ctx)?;
loc, ctx.expect_token(Token::Assign, "=")?;
lscoperef,
ident,
} = Identifier::parse_step_high(ctx)?;
if lscoperef != 0 {
return Err(Error {
loc,
kind: ErrorKind::InvalidIdentifier,
});
}
let cobj = CodeObject::parse_high(ctx)?; let cobj = CodeObject::parse_high(ctx)?;
if args.insert(ident, (loc, cobj)).is_some() { if args.insert(idents, (loc, cobj)).is_some() {
return Err(Error { return Err(Error {
loc, loc,
kind: ErrorKind::DuplicateIdentifier, kind: ErrorKind::DuplicateIdentifier,
@ -192,49 +223,146 @@ impl CodeObject {
} }
// either we need to deal with a bunch of stmts, or just a single one (basically alias) // either we need to deal with a bunch of stmts, or just a single one (basically alias)
if ctx.pklx.next_if(|(_, t)| t == &Token::OpenBrace).is_some() { match ctx.pklx.peek() {
let mut codata = Vec::new(); None => Err(ctx.make_eof()),
let mut ret = None; Some((_, Token::OpenBrace)) => {
while ctx.pklx.next_if(|(_, t)| t == &Token::CloseBrace).is_none() { ctx.pklx.next();
let stmt = Statement::parse_high(ctx)?; let mut codata = Vec::new();
if ctx.pklx.next_if(|(_, t)| t == &Token::SemiColon).is_some() { let mut ret = None;
codata.push(stmt); while ctx.pklx.next_if(|(_, t)| t == &Token::CloseBrace).is_none() {
} else { let stmt = Statement::parse_high(ctx)?;
ret = Some(stmt); if ctx.pklx.next_if(|(_, t)| t == &Token::SemiColon).is_some() {
// we don't go back to the loop header, so do it here. codata.push(stmt);
ctx.expect_token(Token::CloseBrace, /* { */ "}")?; } else {
break; ret = Some(stmt);
// we don't go back to the loop header, so do it here.
ctx.expect_token(Token::CloseBrace, /* { */ "}")?;
break;
}
} }
Ok(CodeObject::Normal {
cfe,
data: codata.into_boxed_slice(),
ret,
})
} }
Ok(CodeObject::Normal { Some(&(loc, _)) if cfe => Err(Error {
cfe, loc,
data: codata.into_boxed_slice(), kind: ErrorKind::Unexpected("braced statement set"),
ret, }),
})
} else if !cfe {
// we have no proper recursive delimiting, // we have no proper recursive delimiting,
// so just resort to the parsing which we also do for other such items, // so just resort to the parsing which we also do for other such items,
// expect a single code object descriptor => alias, or number. // expect a single code object descriptor => alias, or number.
if let Some((_, Token::Integer(i))) = Some(&(_, Token::Integer(i))) => {
ctx.pklx.next_if(|(_, t)| matches!(t, Token::Integer(_))) ctx.pklx.next();
{
Ok(CodeObject::Integer(i)) Ok(CodeObject::Integer(i))
} else {
Identifier::parse_step_high(ctx).map(CodeObject::Alias)
} }
} else { Some((_, Token::Module)) => {
Err(match ctx.pklx.next() { ctx.pklx.next();
Some((loc, _)) => Error { ctx.expect_token(Token::OpenBrace, "{")?;
loc, let m = Module::parse_high(ctx)?;
kind: ErrorKind::Unexpected("braced statement set"), ctx.expect_token(Token::CloseBrace, "}")?;
}, Ok(CodeObject::Module(m))
None => ctx.make_eof(), }
}) _ => FullIdentifier::parse_high(ctx).map(CodeObject::Alias),
} }
} }
} }
pub fn parse_s2doc(fileid: u32, s: &str) -> Result<Document, Error> { impl Module {
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
let mut ret = Module {
entries: Default::default(),
};
loop {
// parse entry
// entry attributes
let mut flags = EntryFlags::empty();
for (_, i) in ctx.pklx.peeking_take_while(|(_, t)| t.is_def_attr()) {
match i {
Token::Final => flags |= EntryFlags::FINAL,
Token::Public => flags |= EntryFlags::PUBLIC,
_ => unimplemented!(),
}
}
let flags = flags;
// entry name
let (nloc, name) = match ctx.pklx.next_if(|(_, t)| matches!(t, Token::Identifier(_))) {
Some((loc, Token::Identifier(ident))) => {
let ident = Atom::from(ident);
if ret.entries.contains_key(&ident) {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
});
}
(loc, ident)
}
Some(_) => unreachable!(),
None => {
if flags.is_empty() {
break;
}
return Err(Error {
loc: ctx.peek_loc(),
kind: ErrorKind::UnexpectedEof,
});
}
};
// optional: arguments
let mut args = Vec::new();
if ctx.pklx.next_if(|(_, t)| t == &Token::OpenBrace).is_some() {
loop {
let (loc, tok) = ctx.pklx.next().ok_or(Error {
loc: ctx.eof_loc,
kind: ErrorKind::UnexpectedEof,
})?;
let ident = match tok {
Token::Unknown(_) => Err(Error {
loc,
kind: ErrorKind::Unknown,
}),
Token::Identifier(ident) => Ok(Atom::from(ident)),
Token::CloseBrace => break,
_ => Err(Error {
loc,
kind: ErrorKind::Unexpected("argument name"),
}),
}?;
if args.contains(&ident) {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
});
}
args.push(ident);
}
}
ctx.expect_token(Token::Assign, "=")?;
// code object
ret.entries.insert(
name,
Entry {
nloc,
args: args.into_boxed_slice(),
cobj: CodeObject::parse_high(ctx)?,
flags,
},
);
ctx.expect_token(Token::SemiColon, ";")?;
}
Ok(ret)
}
}
pub fn parse_s2doc(fileid: u32, s: &str) -> Result<Module, Error> {
let mut ctx = ParserContext { let mut ctx = ParserContext {
pklx: (Lexer { pklx: (Lexer {
loc: Location { fileid, offset: 0 }, loc: Location { fileid, offset: 0 },
@ -247,120 +375,14 @@ pub fn parse_s2doc(fileid: u32, s: &str) -> Result<Document, Error> {
}, },
}; };
let mut ret = Document { let ret = Module::parse_high(&mut ctx)?;
entries: Default::default(),
};
loop { if let Some((loc, _)) = ctx.pklx.next() {
// parse entry Err(Error {
loc,
// entry attributes kind: ErrorKind::UnexpectedTrail,
let mut flags = EntryFlags::empty(); })
for (_, i) in ctx.pklx.peeking_take_while(|(_, t)| t.is_def_attr()) { } else {
match i { Ok(ret)
Token::Final => flags |= EntryFlags::FINAL,
Token::Public => flags |= EntryFlags::PUBLIC,
_ => unimplemented!(),
}
}
let flags = flags;
// entry name
let (nloc, name) = match ctx.pklx.next() {
Some((loc, Token::Identifier { lscoperef, ident })) => {
if lscoperef != 0 || ident.contains(':') {
return Err(Error {
loc,
kind: ErrorKind::InvalidIdentifier,
});
}
let tmp = Atom::from(ident);
if ret.entries.contains_key(&tmp) {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
});
}
(loc, tmp)
}
Some((loc, Token::Unknown(_))) => {
return Err(Error {
loc,
kind: ErrorKind::Unknown,
})
}
Some((loc, _)) => {
return Err(Error {
loc,
kind: ErrorKind::Unexpected("name"),
})
}
None if flags.is_empty() => break,
None => {
return Err(Error {
loc: ctx.eof_loc,
kind: ErrorKind::UnexpectedEof,
})
}
};
// optional: arguments
let mut args = Vec::new();
if ctx.pklx.next_if(|(_, t)| t == &Token::OpenBrace).is_some() {
loop {
let (loc, tok) = ctx.pklx.next().ok_or(Error {
loc: ctx.eof_loc,
kind: ErrorKind::UnexpectedEof,
})?;
args.push(match tok {
Token::Unknown(_) => {
return Err(Error {
loc,
kind: ErrorKind::Unknown,
})
}
Token::Identifier { lscoperef, ident } => {
if lscoperef != 0 || ident.contains(':') {
return Err(Error {
loc,
kind: ErrorKind::InvalidIdentifier,
});
}
let tmp = Atom::from(ident);
if args.contains(&tmp) {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
});
}
tmp
}
Token::CloseBrace => break,
_ => {
return Err(Error {
loc,
kind: ErrorKind::Unexpected("argument name"),
})
}
});
}
}
ctx.expect_token(Token::Assign, "=")?;
// code object
ret.entries.insert(
name,
Entry {
nloc,
args: args.into_boxed_slice(),
cobj: CodeObject::parse_high(&mut ctx)?,
flags,
},
);
ctx.expect_token(Token::SemiColon, ";")?;
} }
return Ok(ret);
} }