add simple lexer
This commit is contained in:
parent
6554b0f679
commit
8287a96134
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
.#*
|
||||
/target
|
||||
result
|
||||
result-*
|
||||
perf.data*
|
||||
flamegraph.svg
|
16
Cargo.lock
generated
Normal file
16
Cargo.lock
generated
Normal file
|
@ -0,0 +1,16 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
|
||||
|
||||
[[package]]
|
||||
name = "wafl-parser"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
7
Cargo.toml
Normal file
7
Cargo.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[workspace]
|
||||
members = ["crates/*"]
|
||||
|
||||
[profile.release]
|
||||
codegen-units = 1
|
||||
debug = 1
|
||||
lto = "thin"
|
22
README.md
22
README.md
|
@ -9,7 +9,7 @@
|
|||
## Syntax
|
||||
|
||||
```
|
||||
[cfe] [final] name ["{" args (separated via commas) "}"] = {
|
||||
[@cfe] [@final] name ["{" args (separated via commas) "}"] = {
|
||||
content;
|
||||
content;
|
||||
etc;
|
||||
|
@ -19,8 +19,8 @@
|
|||
|
||||
## Attributes of Code Objects
|
||||
|
||||
- `final` prevents overwriting and basically marks a root
|
||||
- `cfe` allows an object to access the continuation and omits the default return
|
||||
- `@final` prevents overwriting and basically marks a root
|
||||
- `@cfe` allows an object to access the continuation and omits the default return
|
||||
|
||||
## Early vs late binding
|
||||
|
||||
|
@ -34,9 +34,9 @@ as an argument, prefix the block with `'`.
|
|||
|
||||
## Control flow builtins
|
||||
|
||||
- `if cond ... then ... else ...`
|
||||
- `loop ...`
|
||||
- `return ...` (implicit when last expr in a block is not terminated via semicolon)
|
||||
- `@if cond ... then ... else ...`
|
||||
- `@loop ...`
|
||||
- `@return ...` (implicit when last expr in a block is not terminated via semicolon)
|
||||
|
||||
## Modules
|
||||
|
||||
|
@ -60,8 +60,8 @@ with package-lock files or such. Packages (packaged module trees) would then be
|
|||
```
|
||||
# 1.
|
||||
|
||||
final main { args, env } = {
|
||||
std::io::writeln "Hello World!";
|
||||
@final main { args, env } = {
|
||||
std:io:writeln "Hello World!";
|
||||
0
|
||||
};
|
||||
|
||||
|
@ -70,9 +70,9 @@ with package-lock files or such. Packages (packaged module trees) would then be
|
|||
business_in_the_front = party_in_the_back;
|
||||
|
||||
what_now = {
|
||||
self::business_in_the_front;
|
||||
} self::party_in_the_back {
|
||||
std::io::writeln "It works";
|
||||
self:business_in_the_front;
|
||||
}, self:party_in_the_back {
|
||||
std:io:writeln "It works";
|
||||
};
|
||||
|
||||
# running `what_now` results in "It works" being printed.
|
||||
|
|
8
crates/wafl-parser/Cargo.toml
Normal file
8
crates/wafl-parser/Cargo.toml
Normal file
|
@ -0,0 +1,8 @@
|
|||
[package]
|
||||
name = "wafl-parser"
|
||||
description = "Walls of Flesh parser"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
unicode-ident = "1.0"
|
140
crates/wafl-parser/src/lex.rs
Normal file
140
crates/wafl-parser/src/lex.rs
Normal file
|
@ -0,0 +1,140 @@
|
|||
pub enum Token<'a> {
|
||||
// keywords
|
||||
CtrlFlowEdit,
|
||||
Final,
|
||||
If,
|
||||
Loop,
|
||||
Return,
|
||||
|
||||
// single-char keywords
|
||||
Assign,
|
||||
OpenBrace,
|
||||
CloseBrace,
|
||||
Comma,
|
||||
SemiColon,
|
||||
|
||||
// dynamic stuff
|
||||
Identifier(&'a str),
|
||||
Integer(i64),
|
||||
Unknown(&'a str),
|
||||
}
|
||||
|
||||
pub struct Lexer<'a> {
|
||||
pub fileid: u32,
|
||||
pub offset: u32,
|
||||
pub s: &'a str,
|
||||
}
|
||||
|
||||
pub struct Location {
|
||||
pub fileid: u32,
|
||||
pub offset: u32,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn loc(&self) -> Location {
|
||||
Location {
|
||||
fileid: self.fileid,
|
||||
offset: self.offset,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn eat(&mut self, n: usize) -> bool {
|
||||
let ssl = self.s.len();
|
||||
let ret = n <= ssl;
|
||||
let (l, r) = if n >= ssl {
|
||||
(ssl, "")
|
||||
} else {
|
||||
(n, &self.s[n..])
|
||||
};
|
||||
let l2: u32 = l.try_into().unwrap();
|
||||
self.offset += l2;
|
||||
self.s = r;
|
||||
ret
|
||||
}
|
||||
|
||||
/// use to determinate the length of a token based upon e.g.
|
||||
/// `lex.eat(loc.offset); toklen = lex.first_token_length();`
|
||||
pub fn first_token_length(&mut self) -> Option<usize> {
|
||||
let start = self.s.len();
|
||||
self.next()?;
|
||||
let stop = self.s.len();
|
||||
assert!(stop >= start);
|
||||
Some(stop - start)
|
||||
}
|
||||
|
||||
fn select_text(&mut self, skip: usize, f: impl Fn(char) -> bool) -> &'a str {
|
||||
let tmp = &self.s[skip..];
|
||||
let ret = match tmp.find(f) {
|
||||
None => core::mem::replace(&mut self.s, ""),
|
||||
Some(nxt) => {
|
||||
let (txt, rest) = tmp.split_at(nxt);
|
||||
self.s = rest;
|
||||
txt
|
||||
}
|
||||
};
|
||||
let rl2: u32 = ret.len().try_into().unwrap();
|
||||
self.offset += rl2;
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Lexer<'a> {
|
||||
// note: we don't need to
|
||||
type Item = (Location, Token<'a>);
|
||||
|
||||
fn next(&mut self) -> Option<(Location, Token<'a>)> {
|
||||
loop {
|
||||
let x = self.s.chars().next()?;
|
||||
let loc = self.loc();
|
||||
match x {
|
||||
'{' | '}' | '=' | ',' | ';' => {
|
||||
self.eat(x.len_utf8());
|
||||
return Some((loc, match x {
|
||||
'{' => Token::OpenBrace,
|
||||
'}' => Token::CloseBrace,
|
||||
'=' => Token::Assign,
|
||||
',' => Token::Comma,
|
||||
';' => Token::SemiColon,
|
||||
_ => unreachable!(),
|
||||
}));
|
||||
}
|
||||
'#' | '⍝' => {
|
||||
// comment until next line
|
||||
match self.s.find('\n') {
|
||||
None => {
|
||||
self.eat(self.s.len());
|
||||
return None;
|
||||
}
|
||||
Some(y) => {
|
||||
self.eat(x.len_utf8() + y);
|
||||
}
|
||||
}
|
||||
}
|
||||
'@' => {
|
||||
// keyword
|
||||
let ident = self.select_text(1, |i| !i.is_alphanumeric());
|
||||
return Some((loc, match &ident[1..] {
|
||||
"cfe" => Token::CtrlFlowEdit,
|
||||
"final" => Token::Final,
|
||||
"if" => Token::If,
|
||||
"loop" => Token::Loop,
|
||||
"return" => Token::Return,
|
||||
_ => Token::Unknown(ident),
|
||||
}));
|
||||
}
|
||||
_ if (unicode_ident::is_xid_start(x) || x == ':') => {
|
||||
let ident = self.select_text(0, |i| !unicode_ident::is_xid_continue(i) && i != ':');
|
||||
return Some((loc, Token::Identifier(ident)));
|
||||
}
|
||||
_ if x.is_whitespace() => {
|
||||
self.eat(x.len_utf8());
|
||||
}
|
||||
_ => {
|
||||
let tok = &self.s[..x.len_utf8()];
|
||||
self.eat(x.len_utf8());
|
||||
return Some((loc, Token::Unknown(tok)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
3
crates/wafl-parser/src/lib.rs
Normal file
3
crates/wafl-parser/src/lib.rs
Normal file
|
@ -0,0 +1,3 @@
|
|||
#![no_std]
|
||||
|
||||
pub mod lex;
|
Loading…
Reference in a new issue