add document level parser (inner parts still missing)

This commit is contained in:
Alain Zscheile 2023-05-19 18:07:36 +02:00
parent 6c62538959
commit a16255bff2
6 changed files with 421 additions and 19 deletions

205
Cargo.lock generated
View file

@ -2,6 +2,143 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "libc"
version = "0.2.144"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
[[package]]
name = "lock_api"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
[[package]]
name = "once_cell"
version = "1.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
[[package]]
name = "parking_lot"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-sys",
]
[[package]]
name = "peeking_take_while"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e9ed2178b0575fff8e1b83b58ba6f75e727aafac2e1b6c795169ad3b17eb518"
[[package]]
name = "phf_shared"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
dependencies = [
"siphasher",
]
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "redox_syscall"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
dependencies = [
"bitflags",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "serde"
version = "1.0.163"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2"
[[package]]
name = "siphasher"
version = "0.3.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
[[package]]
name = "smallvec"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
[[package]]
name = "string_cache"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b"
dependencies = [
"new_debug_unreachable",
"once_cell",
"parking_lot",
"phf_shared",
"precomputed-hash",
"serde",
]
[[package]]
name = "unicode-ident"
version = "1.0.8"
@ -12,5 +149,73 @@ checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
name = "wafl-parser"
version = "0.1.0"
dependencies = [
"peeking_take_while",
"string_cache",
"unicode-ident",
]
[[package]]
name = "windows-sys"
version = "0.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
[[package]]
name = "windows_i686_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
[[package]]
name = "windows_i686_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"

View file

@ -9,7 +9,7 @@
## Syntax
```
[@cfe] [@final] name ["{" args (separated via commas) "}"] = {
[@final] name ["{" args (separated via commas) "}"] = [@cfe] {
content;
content;
etc;
@ -60,7 +60,7 @@ with package-lock files or such. Packages (packaged module trees) would then be
```
# 1.
@final main { args, env } = {
@final main { args env } = {
std:io:writeln "Hello World!";
0
};

View file

@ -5,4 +5,6 @@ version = "0.1.0"
edition = "2021"
[dependencies]
peeking_take_while = "1.0"
string_cache = "0.8"
unicode-ident = "1.0"

View file

@ -1,3 +1,4 @@
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Token<'a> {
// keywords
CtrlFlowEdit,
@ -16,32 +17,50 @@ pub enum Token<'a> {
// dynamic stuff
Identifier {
/// amount of prefixed `$` to force immediate application
lscoperef: usize,
lscoperef: u8,
ident: &'a str,
},
Integer(i64),
Unknown(&'a str),
}
impl Token<'_> {
#[inline]
pub fn is_def_attr(&self) -> bool {
matches!(self, Token::Final)
}
#[inline]
pub fn is_cobj_attr(&self) -> bool {
matches!(self, Token::CtrlFlowEdit)
}
#[inline]
pub fn is_builtin_def(&self) -> bool {
matches!(self, Token::If | Token::Loop | Token::Return)
}
}
#[derive(Clone, PartialEq, Eq)]
pub struct Lexer<'a> {
pub fileid: u32,
pub offset: u32,
pub loc: Location,
pub s: &'a str,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Location {
pub fileid: u32,
pub offset: u32,
}
impl<'a> Lexer<'a> {
pub fn loc(&self) -> Location {
Location {
fileid: self.fileid,
offset: self.offset,
}
impl core::ops::AddAssign<usize> for Location {
fn add_assign(&mut self, rhs: usize) {
let rhs2: u32 = rhs.try_into().unwrap();
self.offset += rhs2;
}
}
impl<'a> Lexer<'a> {
pub fn eat(&mut self, n: usize) -> bool {
let ssl = self.s.len();
let ret = n <= ssl;
@ -50,8 +69,7 @@ impl<'a> Lexer<'a> {
} else {
(n, &self.s[n..])
};
let l2: u32 = l.try_into().unwrap();
self.offset += l2;
self.loc += l;
self.s = r;
ret
}
@ -76,8 +94,7 @@ impl<'a> Lexer<'a> {
txt
}
};
let rl2: u32 = ret.len().try_into().unwrap();
self.offset += rl2;
self.loc += ret.len();
ret
}
}
@ -89,7 +106,7 @@ impl<'a> Iterator for Lexer<'a> {
fn next(&mut self) -> Option<(Location, Token<'a>)> {
loop {
let x = self.s.chars().next()?;
let loc = self.loc();
let loc = self.loc;
match x {
'{' | '}' | '=' | ',' | ';' => {
self.eat(x.len_utf8());
@ -133,7 +150,7 @@ impl<'a> Iterator for Lexer<'a> {
));
}
_ if (unicode_ident::is_xid_start(x) || x == ':' || x == '$') => {
let lscoperef = self.select_text(0, |i| i != '$').chars().count();
let lscoperef = u8::try_from(self.select_text(0, |i| i != '$').chars().count()).expect("too many scope ref uppers");
let ident =
self.select_text(0, |i| !unicode_ident::is_xid_continue(i) && i != ':');
// now, lets check that the identifier is valid

View file

@ -1,3 +1,3 @@
#![no_std]
extern crate alloc;
pub mod lex;
pub mod parser;

View file

@ -0,0 +1,178 @@
use alloc::collections::BTreeMap;
use core::iter::Peekable;
pub use string_cache::DefaultAtom as Atom;
use peeking_take_while::PeekableExt as _;
use crate::lex::{Lexer, Token, Location};
#[derive(Clone)]
pub struct Document {
pub entries: BTreeMap<Atom, Entry>,
}
#[derive(Clone, Debug)]
pub struct Entry {
// location of the name
pub nloc: Location,
// technically this is a set, but would just waste space here...
pub args: Box<[Atom]>,
pub cobj: CodeObject,
pub final_: bool,
}
#[derive(Clone, Debug)]
pub struct CodeObject {
pub cfe: bool,
pub data: Box<[Statement]>,
}
#[derive(Clone, Debug)]
pub enum ObjectSelector {
// builtins
If,
Loop,
Return,
// other
Custom {
lscoperef: u8,
ident: Atom,
},
}
#[derive(Clone, Debug)]
pub enum StmtArgs {
Single(CodeObject),
Multi(BTreeMap<ObjectSelector, CodeObject>),
}
#[derive(Clone, Debug)]
pub struct Statement {
pub sel: ObjectSelector,
pub args: StmtArgs,
}
pub struct Error {
pub loc: Location,
pub kind: ErrorKind,
}
pub enum ErrorKind {
UnexpectedEof,
InvalidIdentifier,
DuplicateIdentifier,
Unexpected(&'static str),
Unknown,
}
pub trait ParseHigh<I>: Sized {
fn parse_high(data: I) -> Result<(I, Self), Error>;
}
impl<'a> ParseHigh<Peekable<Lexer<'a>>> for CodeObject {
fn parse_high(mut data: Peekable<Lexer<'a>>) -> Result<(Peekable<Lexer<'a>>, Self), Error> {
let mut cfe = false;
for (_, i) in data.peeking_take_while(|(_, t)| t.is_cobj_attr()) {
match i {
Token::CtrlFlowEdit => cfe = true,
_ => unimplemented!(),
}
}
unimplemented!();
}
}
pub fn parse_s2doc(fileid: u32, s: &str) -> Result<Document, Error> {
let eof_loc = Location {
fileid,
offset: s.len().try_into().expect("file too big"),
};
let mut data = (Lexer {
loc: Location {
fileid,
offset: 0,
},
s,
}).peekable();
let mut ret = Document {
entries: Default::default(),
};
loop {
// parse entry
// entry attributes
let mut modified = false;
let mut final_ = false;
for (_, i) in data.peeking_take_while(|(_, t)| t.is_def_attr()) {
modified = true;
match i {
Token::Final => final_ = true,
_ => unimplemented!(),
}
}
// entry name
let (nloc, name) = match data.next() {
Some((loc, Token::Identifier { lscoperef, ident })) => {
if lscoperef != 0 || ident.contains(':') {
return Err(Error { loc, kind: ErrorKind::InvalidIdentifier });
}
let tmp = Atom::from(ident);
if ret.entries.contains_key(&tmp) {
return Err(Error { loc, kind: ErrorKind::DuplicateIdentifier });
}
(loc, tmp)
}
Some((loc, Token::Unknown(_))) => return Err(Error { loc, kind: ErrorKind::Unknown }),
Some((loc, _)) => return Err(Error { loc, kind: ErrorKind::Unexpected("name") }),
None if modified => return Err(Error { loc: eof_loc, kind: ErrorKind::UnexpectedEof }),
None => break,
};
// optional: arguments
let mut args = Vec::new();
if data.next_if(|(_, t)| t == &Token::OpenBrace).is_some() {
loop {
let (loc, tok) = data.next().ok_or(Error { loc: eof_loc, kind: ErrorKind::UnexpectedEof })?;
args.push(match tok {
Token::Unknown(_) => return Err(Error { loc, kind: ErrorKind::Unknown }),
Token::Identifier { lscoperef, ident } => {
if lscoperef != 0 || ident.contains(':') {
return Err(Error { loc, kind: ErrorKind::InvalidIdentifier });
}
let tmp = Atom::from(ident);
if args.contains(&tmp) {
return Err(Error { loc, kind: ErrorKind::DuplicateIdentifier });
}
tmp
}
Token::CloseBrace => break,
_ => return Err(Error { loc, kind: ErrorKind::Unexpected("argument name") }),
});
}
}
// `=`
let (loc, tok) = data.next().ok_or(Error { loc: eof_loc, kind: ErrorKind::UnexpectedEof })?;
match tok {
Token::Assign => Ok(()),
Token::Unknown(_) => Err(Error { loc, kind: ErrorKind::Unknown }),
_ => Err(Error { loc, kind: ErrorKind::Unexpected("=") }),
}?;
// code object
let (data2, cobj) = CodeObject::parse_high(data.clone())?;
data = data2;
ret.entries.insert(name, Entry {
nloc,
args: args.into_boxed_slice(),
cobj,
final_,
});
}
return Ok(ret);
}