allow inline modules in places where code objects can be used

This commit is contained in:
Alain Zscheile 2023-05-20 13:59:20 +02:00
parent a76dc04211
commit f7d5e53a01
3 changed files with 236 additions and 217 deletions

View file

@ -34,9 +34,9 @@ as an argument, prefix the block with `'`.
## Control flow builtins
- `cflow::if, cond ..., then ..., else ...`
- `cflow::loop ...`
- `cflow::return ...` (implicit when last expr in a block is not terminated via semicolon)
- `cflow.if, cond = ..., then = ..., else = ...`
- `cflow.loop ...`
- `cflow.return ...` (implicit when last expr in a block is not terminated via semicolon)
## Modules
@ -61,7 +61,7 @@ with package-lock files or such. Packages (packaged module trees) would then be
# 1.
@final main { args env } = {
std:io:writeln "Hello World!";
std.io.writeln "Hello World!";
0
};
@ -70,9 +70,9 @@ with package-lock files or such. Packages (packaged module trees) would then be
business_in_the_front = party_in_the_back;
what_now = {
self:business_in_the_front;
}, self:party_in_the_back {
std:io:writeln "It works";
self.business_in_the_front;
}, self.party_in_the_back {
std.io.writeln "It works";
};
# running `what_now` results in "It works" being printed.

View file

@ -1,8 +1,12 @@
use core::num::NonZeroU32;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Token<'a> {
// keywords
CtrlFlowEdit,
Defer,
Final,
Module,
Public,
// single-char keywords
@ -11,13 +15,12 @@ pub enum Token<'a> {
CloseBrace,
Comma,
SemiColon,
Dot,
// dynamic stuff
Identifier {
/// amount of prefixed `$` to force immediate application
lscoperef: u8,
ident: &'a str,
},
/// amount of prefixed `$` to force immediate application
ScopeRef(NonZeroU32),
Identifier(&'a str),
Integer(i64),
Unknown(&'a str),
}
@ -101,7 +104,7 @@ impl<'a> Iterator for Lexer<'a> {
let x = self.s.chars().next()?;
let loc = self.loc;
match x {
'{' | '}' | '=' | ',' | ';' => {
'{' | '}' | '=' | ',' | ';' | '.' => {
self.eat(x.len_utf8());
return Some((
loc,
@ -111,6 +114,7 @@ impl<'a> Iterator for Lexer<'a> {
'=' => Token::Assign,
',' => Token::Comma,
';' => Token::SemiColon,
'.' => Token::Dot,
_ => unreachable!(),
},
));
@ -134,7 +138,9 @@ impl<'a> Iterator for Lexer<'a> {
loc,
match &ident[1..] {
"cfe" => Token::CtrlFlowEdit,
"defer" => Token::Defer,
"final" => Token::Final,
"module" => Token::Module,
"pub" => Token::Public,
_ => Token::Unknown(ident),
},
@ -178,23 +184,14 @@ impl<'a> Iterator for Lexer<'a> {
};
return Some((loc, tok));
}
_ if (unicode_ident::is_xid_start(x) || x == ':' || x == '$') => {
let lscoperef = u8::try_from(self.select_text(0, |i| i != '$').chars().count())
'$' => {
let scoperef = u32::try_from(self.select_text(0, |i| i != '$').chars().count())
.expect("too many scope ref uppers");
let ident =
self.select_text(0, |i| !unicode_ident::is_xid_continue(i) && i != ':');
// now, lets check that the identifier is valid
return Some((
loc,
if ident.split(':').any(|i| {
i.is_empty() || !unicode_ident::is_xid_start(i.chars().next().unwrap())
}) {
// this drops the leading '$'s, but I don't think they'll matter much in that case
Token::Unknown(ident)
} else {
Token::Identifier { lscoperef, ident }
},
));
return Some((loc, Token::ScopeRef(NonZeroU32::new(scoperef).unwrap())));
}
_ if unicode_ident::is_xid_start(x) => {
let ident = self.select_text(0, |i| !unicode_ident::is_xid_continue(i));
return Some((loc, Token::Identifier(ident)));
}
_ if x.is_whitespace() => {
self.eat(x.len_utf8());

View file

@ -7,7 +7,7 @@ pub use string_cache::DefaultAtom as Atom;
use crate::lex::{Lexer, Location, Token};
#[derive(Clone, Debug)]
pub struct Document {
pub struct Module {
pub entries: BTreeMap<Atom, Entry>,
}
@ -37,21 +37,28 @@ pub enum CodeObject {
data: Box<[Statement]>,
ret: Option<Statement>,
},
Alias(Identifier),
Module(Module),
Alias(FullIdentifier),
Integer(i64),
}
#[derive(Clone, Debug)]
pub struct Identifier {
pub struct FullIdentifier {
pub loc: Location,
pub lscoperef: u8,
pub ident: Atom,
pub lscoperef: u32,
pub idents: Box<[Atom]>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Path {
pub loc: Location,
pub idents: Box<[Atom]>,
}
#[derive(Clone, Debug)]
pub enum StmtArgs {
Single(Box<CodeObject>),
Multi(BTreeMap<Atom, (Location, CodeObject)>),
Multi(BTreeMap<Box<[Atom]>, (Location, CodeObject)>),
}
impl Default for StmtArgs {
@ -63,7 +70,7 @@ impl Default for StmtArgs {
#[derive(Clone, Debug)]
pub struct Statement {
pub sel: Identifier,
pub sel: FullIdentifier,
pub args: StmtArgs,
}
@ -74,6 +81,7 @@ pub struct Error {
pub enum ErrorKind {
UnexpectedEof,
UnexpectedTrail,
InvalidIdentifier,
DuplicateIdentifier,
Unexpected(&'static str),
@ -87,6 +95,11 @@ struct ParserContext<'a> {
}
impl<'a> ParserContext<'a> {
// this function is idempotent
fn peek_loc(&mut self) -> Location {
self.pklx.peek().map_or(self.eof_loc, |&(loc, _)| loc)
}
fn make_eof(&self) -> Error {
Error {
loc: self.eof_loc,
@ -112,56 +125,74 @@ impl<'a> ParserContext<'a> {
}
}
impl Identifier {
fn parse_step_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
// this contraption makes this parser particularly nice, as it only consumes lexer tokens
// when it can actually use them.
let (loc, tok) = ctx
.pklx
.next_if(|(_, t)| matches!(t, Token::Identifier { .. }))
.ok_or_else(|| match ctx.pklx.peek() {
None => ctx.make_eof(),
Some(&(loc, Token::Unknown(_))) => Error {
impl Path {
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
let pathloc = ctx.peek_loc();
let mut ret = Vec::new();
loop {
// this contraption makes this parser particularly nice, as it only consumes lexer tokens
// when it can actually use them.
let mut tmpctx = (*ctx).clone();
let (loc, tok) = tmpctx.pklx.next().ok_or_else(|| tmpctx.make_eof())?;
ret.push(match tok {
Token::Unknown(_) => Err(Error {
loc,
kind: ErrorKind::Unknown,
},
Some(&(loc, _)) => Error {
}),
Token::Identifier(ident) => Ok(Atom::from(ident)),
_ => Err(Error {
loc,
kind: ErrorKind::Unexpected("identifier"),
},
})?;
if let Token::Identifier { lscoperef, ident } = tok {
Ok(Identifier {
loc,
lscoperef,
ident: Atom::from(ident),
})
} else {
unreachable!();
}),
}?);
// make sure the parser can somewhat recover...
*ctx = tmpctx;
if ctx.pklx.next_if(|(_, t)| t == &Token::Dot).is_none() {
break Ok(Path {
loc: pathloc,
idents: ret.into_boxed_slice(),
});
}
}
}
}
impl FullIdentifier {
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
// this contraption makes this parser particularly nice, as it only consumes lexer tokens
// when it can actually use them.
let mut tmpctx = (*ctx).clone();
let loc = tmpctx.peek_loc();
let lscoperef = if let Some((_, Token::ScopeRef(lscoperef))) = tmpctx
.pklx
.next_if(|(_, t)| matches!(t, Token::ScopeRef(_)))
{
lscoperef.get()
} else {
0
};
let Path { idents, .. } = Path::parse_high(&mut tmpctx)?;
*ctx = tmpctx;
Ok(FullIdentifier {
loc,
lscoperef,
idents,
})
}
}
impl Statement {
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
let sel = Identifier::parse_step_high(ctx)?;
let sel = FullIdentifier::parse_high(ctx)?;
let args = if let Some(&(_, Token::Comma)) = ctx.pklx.peek() {
let mut args = BTreeMap::new();
while ctx.pklx.next_if(|(_, t)| t == &Token::Comma).is_some() {
let Identifier {
loc,
lscoperef,
ident,
} = Identifier::parse_step_high(ctx)?;
if lscoperef != 0 {
return Err(Error {
loc,
kind: ErrorKind::InvalidIdentifier,
});
}
let Path { loc, idents } = Path::parse_high(ctx)?;
ctx.expect_token(Token::Assign, "=")?;
let cobj = CodeObject::parse_high(ctx)?;
if args.insert(ident, (loc, cobj)).is_some() {
if args.insert(idents, (loc, cobj)).is_some() {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
@ -192,49 +223,146 @@ impl CodeObject {
}
// either we need to deal with a bunch of stmts, or just a single one (basically alias)
if ctx.pklx.next_if(|(_, t)| t == &Token::OpenBrace).is_some() {
let mut codata = Vec::new();
let mut ret = None;
while ctx.pklx.next_if(|(_, t)| t == &Token::CloseBrace).is_none() {
let stmt = Statement::parse_high(ctx)?;
if ctx.pklx.next_if(|(_, t)| t == &Token::SemiColon).is_some() {
codata.push(stmt);
} else {
ret = Some(stmt);
// we don't go back to the loop header, so do it here.
ctx.expect_token(Token::CloseBrace, /* { */ "}")?;
break;
match ctx.pklx.peek() {
None => Err(ctx.make_eof()),
Some((_, Token::OpenBrace)) => {
ctx.pklx.next();
let mut codata = Vec::new();
let mut ret = None;
while ctx.pklx.next_if(|(_, t)| t == &Token::CloseBrace).is_none() {
let stmt = Statement::parse_high(ctx)?;
if ctx.pklx.next_if(|(_, t)| t == &Token::SemiColon).is_some() {
codata.push(stmt);
} else {
ret = Some(stmt);
// we don't go back to the loop header, so do it here.
ctx.expect_token(Token::CloseBrace, /* { */ "}")?;
break;
}
}
Ok(CodeObject::Normal {
cfe,
data: codata.into_boxed_slice(),
ret,
})
}
Ok(CodeObject::Normal {
cfe,
data: codata.into_boxed_slice(),
ret,
})
} else if !cfe {
Some(&(loc, _)) if cfe => Err(Error {
loc,
kind: ErrorKind::Unexpected("braced statement set"),
}),
// we have no proper recursive delimiting,
// so just resort to the parsing which we also do for other such items,
// expect a single code object descriptor => alias, or number.
if let Some((_, Token::Integer(i))) =
ctx.pklx.next_if(|(_, t)| matches!(t, Token::Integer(_)))
{
Some(&(_, Token::Integer(i))) => {
ctx.pklx.next();
Ok(CodeObject::Integer(i))
} else {
Identifier::parse_step_high(ctx).map(CodeObject::Alias)
}
} else {
Err(match ctx.pklx.next() {
Some((loc, _)) => Error {
loc,
kind: ErrorKind::Unexpected("braced statement set"),
},
None => ctx.make_eof(),
})
Some((_, Token::Module)) => {
ctx.pklx.next();
ctx.expect_token(Token::OpenBrace, "{")?;
let m = Module::parse_high(ctx)?;
ctx.expect_token(Token::CloseBrace, "}")?;
Ok(CodeObject::Module(m))
}
_ => FullIdentifier::parse_high(ctx).map(CodeObject::Alias),
}
}
}
pub fn parse_s2doc(fileid: u32, s: &str) -> Result<Document, Error> {
impl Module {
fn parse_high(ctx: &mut ParserContext<'_>) -> Result<Self, Error> {
let mut ret = Module {
entries: Default::default(),
};
loop {
// parse entry
// entry attributes
let mut flags = EntryFlags::empty();
for (_, i) in ctx.pklx.peeking_take_while(|(_, t)| t.is_def_attr()) {
match i {
Token::Final => flags |= EntryFlags::FINAL,
Token::Public => flags |= EntryFlags::PUBLIC,
_ => unimplemented!(),
}
}
let flags = flags;
// entry name
let (nloc, name) = match ctx.pklx.next_if(|(_, t)| matches!(t, Token::Identifier(_))) {
Some((loc, Token::Identifier(ident))) => {
let ident = Atom::from(ident);
if ret.entries.contains_key(&ident) {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
});
}
(loc, ident)
}
Some(_) => unreachable!(),
None => {
if flags.is_empty() {
break;
}
return Err(Error {
loc: ctx.peek_loc(),
kind: ErrorKind::UnexpectedEof,
});
}
};
// optional: arguments
let mut args = Vec::new();
if ctx.pklx.next_if(|(_, t)| t == &Token::OpenBrace).is_some() {
loop {
let (loc, tok) = ctx.pklx.next().ok_or(Error {
loc: ctx.eof_loc,
kind: ErrorKind::UnexpectedEof,
})?;
let ident = match tok {
Token::Unknown(_) => Err(Error {
loc,
kind: ErrorKind::Unknown,
}),
Token::Identifier(ident) => Ok(Atom::from(ident)),
Token::CloseBrace => break,
_ => Err(Error {
loc,
kind: ErrorKind::Unexpected("argument name"),
}),
}?;
if args.contains(&ident) {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
});
}
args.push(ident);
}
}
ctx.expect_token(Token::Assign, "=")?;
// code object
ret.entries.insert(
name,
Entry {
nloc,
args: args.into_boxed_slice(),
cobj: CodeObject::parse_high(ctx)?,
flags,
},
);
ctx.expect_token(Token::SemiColon, ";")?;
}
Ok(ret)
}
}
pub fn parse_s2doc(fileid: u32, s: &str) -> Result<Module, Error> {
let mut ctx = ParserContext {
pklx: (Lexer {
loc: Location { fileid, offset: 0 },
@ -247,120 +375,14 @@ pub fn parse_s2doc(fileid: u32, s: &str) -> Result<Document, Error> {
},
};
let mut ret = Document {
entries: Default::default(),
};
let ret = Module::parse_high(&mut ctx)?;
loop {
// parse entry
// entry attributes
let mut flags = EntryFlags::empty();
for (_, i) in ctx.pklx.peeking_take_while(|(_, t)| t.is_def_attr()) {
match i {
Token::Final => flags |= EntryFlags::FINAL,
Token::Public => flags |= EntryFlags::PUBLIC,
_ => unimplemented!(),
}
}
let flags = flags;
// entry name
let (nloc, name) = match ctx.pklx.next() {
Some((loc, Token::Identifier { lscoperef, ident })) => {
if lscoperef != 0 || ident.contains(':') {
return Err(Error {
loc,
kind: ErrorKind::InvalidIdentifier,
});
}
let tmp = Atom::from(ident);
if ret.entries.contains_key(&tmp) {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
});
}
(loc, tmp)
}
Some((loc, Token::Unknown(_))) => {
return Err(Error {
loc,
kind: ErrorKind::Unknown,
})
}
Some((loc, _)) => {
return Err(Error {
loc,
kind: ErrorKind::Unexpected("name"),
})
}
None if flags.is_empty() => break,
None => {
return Err(Error {
loc: ctx.eof_loc,
kind: ErrorKind::UnexpectedEof,
})
}
};
// optional: arguments
let mut args = Vec::new();
if ctx.pklx.next_if(|(_, t)| t == &Token::OpenBrace).is_some() {
loop {
let (loc, tok) = ctx.pklx.next().ok_or(Error {
loc: ctx.eof_loc,
kind: ErrorKind::UnexpectedEof,
})?;
args.push(match tok {
Token::Unknown(_) => {
return Err(Error {
loc,
kind: ErrorKind::Unknown,
})
}
Token::Identifier { lscoperef, ident } => {
if lscoperef != 0 || ident.contains(':') {
return Err(Error {
loc,
kind: ErrorKind::InvalidIdentifier,
});
}
let tmp = Atom::from(ident);
if args.contains(&tmp) {
return Err(Error {
loc,
kind: ErrorKind::DuplicateIdentifier,
});
}
tmp
}
Token::CloseBrace => break,
_ => {
return Err(Error {
loc,
kind: ErrorKind::Unexpected("argument name"),
})
}
});
}
}
ctx.expect_token(Token::Assign, "=")?;
// code object
ret.entries.insert(
name,
Entry {
nloc,
args: args.into_boxed_slice(),
cobj: CodeObject::parse_high(&mut ctx)?,
flags,
},
);
ctx.expect_token(Token::SemiColon, ";")?;
if let Some((loc, _)) = ctx.pklx.next() {
Err(Error {
loc,
kind: ErrorKind::UnexpectedTrail,
})
} else {
Ok(ret)
}
return Ok(ret);
}