add document level parser (inner parts still missing)
This commit is contained in:
parent
6c62538959
commit
a16255bff2
205
Cargo.lock
generated
205
Cargo.lock
generated
|
@ -2,6 +2,143 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.144"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "new_debug_unreachable"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.17.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot_core"
|
||||
version = "0.9.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "peeking_take_while"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e9ed2178b0575fff8e1b83b58ba6f75e727aafac2e1b6c795169ad3b17eb518"
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
|
||||
dependencies = [
|
||||
"siphasher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "precomputed-hash"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.163"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2"
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
|
||||
|
||||
[[package]]
|
||||
name = "string_cache"
|
||||
version = "0.8.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b"
|
||||
dependencies = [
|
||||
"new_debug_unreachable",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"phf_shared",
|
||||
"precomputed-hash",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.8"
|
||||
|
@ -12,5 +149,73 @@ checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
|
|||
name = "wafl-parser"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"peeking_take_while",
|
||||
"string_cache",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.45.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.42.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.42.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.42.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.42.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.42.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.42.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.42.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.42.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
## Syntax
|
||||
|
||||
```
|
||||
[@cfe] [@final] name ["{" args (separated via commas) "}"] = {
|
||||
[@final] name ["{" args (separated via commas) "}"] = [@cfe] {
|
||||
content;
|
||||
content;
|
||||
etc;
|
||||
|
@ -60,7 +60,7 @@ with package-lock files or such. Packages (packaged module trees) would then be
|
|||
```
|
||||
# 1.
|
||||
|
||||
@final main { args, env } = {
|
||||
@final main { args env } = {
|
||||
std:io:writeln "Hello World!";
|
||||
0
|
||||
};
|
||||
|
|
|
@ -5,4 +5,6 @@ version = "0.1.0"
|
|||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
peeking_take_while = "1.0"
|
||||
string_cache = "0.8"
|
||||
unicode-ident = "1.0"
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum Token<'a> {
|
||||
// keywords
|
||||
CtrlFlowEdit,
|
||||
|
@ -16,32 +17,50 @@ pub enum Token<'a> {
|
|||
// dynamic stuff
|
||||
Identifier {
|
||||
/// amount of prefixed `$` to force immediate application
|
||||
lscoperef: usize,
|
||||
lscoperef: u8,
|
||||
ident: &'a str,
|
||||
},
|
||||
Integer(i64),
|
||||
Unknown(&'a str),
|
||||
}
|
||||
|
||||
impl Token<'_> {
|
||||
#[inline]
|
||||
pub fn is_def_attr(&self) -> bool {
|
||||
matches!(self, Token::Final)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_cobj_attr(&self) -> bool {
|
||||
matches!(self, Token::CtrlFlowEdit)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_builtin_def(&self) -> bool {
|
||||
matches!(self, Token::If | Token::Loop | Token::Return)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
pub struct Lexer<'a> {
|
||||
pub fileid: u32,
|
||||
pub offset: u32,
|
||||
pub loc: Location,
|
||||
pub s: &'a str,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct Location {
|
||||
pub fileid: u32,
|
||||
pub offset: u32,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn loc(&self) -> Location {
|
||||
Location {
|
||||
fileid: self.fileid,
|
||||
offset: self.offset,
|
||||
}
|
||||
impl core::ops::AddAssign<usize> for Location {
|
||||
fn add_assign(&mut self, rhs: usize) {
|
||||
let rhs2: u32 = rhs.try_into().unwrap();
|
||||
self.offset += rhs2;
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn eat(&mut self, n: usize) -> bool {
|
||||
let ssl = self.s.len();
|
||||
let ret = n <= ssl;
|
||||
|
@ -50,8 +69,7 @@ impl<'a> Lexer<'a> {
|
|||
} else {
|
||||
(n, &self.s[n..])
|
||||
};
|
||||
let l2: u32 = l.try_into().unwrap();
|
||||
self.offset += l2;
|
||||
self.loc += l;
|
||||
self.s = r;
|
||||
ret
|
||||
}
|
||||
|
@ -76,8 +94,7 @@ impl<'a> Lexer<'a> {
|
|||
txt
|
||||
}
|
||||
};
|
||||
let rl2: u32 = ret.len().try_into().unwrap();
|
||||
self.offset += rl2;
|
||||
self.loc += ret.len();
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
@ -89,7 +106,7 @@ impl<'a> Iterator for Lexer<'a> {
|
|||
fn next(&mut self) -> Option<(Location, Token<'a>)> {
|
||||
loop {
|
||||
let x = self.s.chars().next()?;
|
||||
let loc = self.loc();
|
||||
let loc = self.loc;
|
||||
match x {
|
||||
'{' | '}' | '=' | ',' | ';' => {
|
||||
self.eat(x.len_utf8());
|
||||
|
@ -133,7 +150,7 @@ impl<'a> Iterator for Lexer<'a> {
|
|||
));
|
||||
}
|
||||
_ if (unicode_ident::is_xid_start(x) || x == ':' || x == '$') => {
|
||||
let lscoperef = self.select_text(0, |i| i != '$').chars().count();
|
||||
let lscoperef = u8::try_from(self.select_text(0, |i| i != '$').chars().count()).expect("too many scope ref uppers");
|
||||
let ident =
|
||||
self.select_text(0, |i| !unicode_ident::is_xid_continue(i) && i != ':');
|
||||
// now, lets check that the identifier is valid
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
#![no_std]
|
||||
|
||||
extern crate alloc;
|
||||
pub mod lex;
|
||||
pub mod parser;
|
||||
|
|
178
crates/wafl-parser/src/parser.rs
Normal file
178
crates/wafl-parser/src/parser.rs
Normal file
|
@ -0,0 +1,178 @@
|
|||
use alloc::collections::BTreeMap;
|
||||
use core::iter::Peekable;
|
||||
pub use string_cache::DefaultAtom as Atom;
|
||||
use peeking_take_while::PeekableExt as _;
|
||||
|
||||
use crate::lex::{Lexer, Token, Location};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Document {
|
||||
pub entries: BTreeMap<Atom, Entry>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Entry {
|
||||
// location of the name
|
||||
pub nloc: Location,
|
||||
// technically this is a set, but would just waste space here...
|
||||
pub args: Box<[Atom]>,
|
||||
pub cobj: CodeObject,
|
||||
pub final_: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CodeObject {
|
||||
pub cfe: bool,
|
||||
pub data: Box<[Statement]>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ObjectSelector {
|
||||
// builtins
|
||||
If,
|
||||
Loop,
|
||||
Return,
|
||||
|
||||
// other
|
||||
Custom {
|
||||
lscoperef: u8,
|
||||
ident: Atom,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum StmtArgs {
|
||||
Single(CodeObject),
|
||||
Multi(BTreeMap<ObjectSelector, CodeObject>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Statement {
|
||||
pub sel: ObjectSelector,
|
||||
pub args: StmtArgs,
|
||||
}
|
||||
|
||||
pub struct Error {
|
||||
pub loc: Location,
|
||||
pub kind: ErrorKind,
|
||||
}
|
||||
|
||||
pub enum ErrorKind {
|
||||
UnexpectedEof,
|
||||
InvalidIdentifier,
|
||||
DuplicateIdentifier,
|
||||
Unexpected(&'static str),
|
||||
Unknown,
|
||||
}
|
||||
|
||||
pub trait ParseHigh<I>: Sized {
|
||||
fn parse_high(data: I) -> Result<(I, Self), Error>;
|
||||
}
|
||||
|
||||
impl<'a> ParseHigh<Peekable<Lexer<'a>>> for CodeObject {
|
||||
fn parse_high(mut data: Peekable<Lexer<'a>>) -> Result<(Peekable<Lexer<'a>>, Self), Error> {
|
||||
let mut cfe = false;
|
||||
for (_, i) in data.peeking_take_while(|(_, t)| t.is_cobj_attr()) {
|
||||
match i {
|
||||
Token::CtrlFlowEdit => cfe = true,
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
unimplemented!();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_s2doc(fileid: u32, s: &str) -> Result<Document, Error> {
|
||||
let eof_loc = Location {
|
||||
fileid,
|
||||
offset: s.len().try_into().expect("file too big"),
|
||||
};
|
||||
|
||||
let mut data = (Lexer {
|
||||
loc: Location {
|
||||
fileid,
|
||||
offset: 0,
|
||||
},
|
||||
s,
|
||||
}).peekable();
|
||||
|
||||
let mut ret = Document {
|
||||
entries: Default::default(),
|
||||
};
|
||||
loop {
|
||||
// parse entry
|
||||
|
||||
// entry attributes
|
||||
let mut modified = false;
|
||||
let mut final_ = false;
|
||||
for (_, i) in data.peeking_take_while(|(_, t)| t.is_def_attr()) {
|
||||
modified = true;
|
||||
match i {
|
||||
Token::Final => final_ = true,
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
// entry name
|
||||
let (nloc, name) = match data.next() {
|
||||
Some((loc, Token::Identifier { lscoperef, ident })) => {
|
||||
if lscoperef != 0 || ident.contains(':') {
|
||||
return Err(Error { loc, kind: ErrorKind::InvalidIdentifier });
|
||||
}
|
||||
let tmp = Atom::from(ident);
|
||||
if ret.entries.contains_key(&tmp) {
|
||||
return Err(Error { loc, kind: ErrorKind::DuplicateIdentifier });
|
||||
}
|
||||
(loc, tmp)
|
||||
}
|
||||
Some((loc, Token::Unknown(_))) => return Err(Error { loc, kind: ErrorKind::Unknown }),
|
||||
Some((loc, _)) => return Err(Error { loc, kind: ErrorKind::Unexpected("name") }),
|
||||
None if modified => return Err(Error { loc: eof_loc, kind: ErrorKind::UnexpectedEof }),
|
||||
None => break,
|
||||
};
|
||||
|
||||
// optional: arguments
|
||||
let mut args = Vec::new();
|
||||
if data.next_if(|(_, t)| t == &Token::OpenBrace).is_some() {
|
||||
loop {
|
||||
let (loc, tok) = data.next().ok_or(Error { loc: eof_loc, kind: ErrorKind::UnexpectedEof })?;
|
||||
args.push(match tok {
|
||||
Token::Unknown(_) => return Err(Error { loc, kind: ErrorKind::Unknown }),
|
||||
Token::Identifier { lscoperef, ident } => {
|
||||
if lscoperef != 0 || ident.contains(':') {
|
||||
return Err(Error { loc, kind: ErrorKind::InvalidIdentifier });
|
||||
}
|
||||
let tmp = Atom::from(ident);
|
||||
if args.contains(&tmp) {
|
||||
return Err(Error { loc, kind: ErrorKind::DuplicateIdentifier });
|
||||
}
|
||||
tmp
|
||||
}
|
||||
Token::CloseBrace => break,
|
||||
_ => return Err(Error { loc, kind: ErrorKind::Unexpected("argument name") }),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// `=`
|
||||
let (loc, tok) = data.next().ok_or(Error { loc: eof_loc, kind: ErrorKind::UnexpectedEof })?;
|
||||
match tok {
|
||||
Token::Assign => Ok(()),
|
||||
Token::Unknown(_) => Err(Error { loc, kind: ErrorKind::Unknown }),
|
||||
_ => Err(Error { loc, kind: ErrorKind::Unexpected("=") }),
|
||||
}?;
|
||||
|
||||
// code object
|
||||
let (data2, cobj) = CodeObject::parse_high(data.clone())?;
|
||||
data = data2;
|
||||
ret.entries.insert(name, Entry {
|
||||
nloc,
|
||||
args: args.into_boxed_slice(),
|
||||
cobj,
|
||||
final_,
|
||||
});
|
||||
}
|
||||
|
||||
return Ok(ret);
|
||||
}
|
Loading…
Reference in a new issue