rust/qwlum2s: yet another rust variant

This commit is contained in:
Alain Zscheile 2023-10-24 22:08:57 +02:00
parent 093d7dcb0b
commit 8942de9c81
7 changed files with 578 additions and 0 deletions

26
examples/qwlum2s/00.yxtw Normal file
View file

@ -0,0 +1,26 @@
(* minimal version *)
use std;
A = " bottles of beer";
C = " on the wall";
D = "no more";
loop = λ $B : int {
if (std.gtz B) {
Bs = std.int_to_string B;
std.print 7 .{ Bs, A, C, ".\n", Bs, A, ".\nTake one down, pass it around.\n" };
B2 = B - 1;
Bs = std.int_to_string B2;
std.print 4 .{ Bs, A, C, ".\n" };
loop B2;
} else {
std.print 7 .{ D, A, C, ".\n", D, A, ".\nGo to the store, buy some more.\n" };
std.print 4 .{ "99", A, C, ".\n" };
};
};
pub main = {
B = 99;
loop 99;
};

28
examples/qwlum2s/01.yxtw Normal file
View file

@ -0,0 +1,28 @@
(* stack implementation *)
(* what is an enum... a selector *)
(*
Maybe = λ $t : * → gadt {
none = ~
some = π $x : t → ~
}
*)
Maybe' = λ $t : * → λ $l : * → ^{
none = l;
some = π t → l;
};
Maybe = λ $t : * → .{
none = λ $l : * → λ $m : (Maybe' t l) → m.none;
some = λ $x : t → λ $l : * → λ $m : (Maybe' t l) → m.some x;
};
stack = λ $t : * → {
push = λ $this : t → λ $x {
.{
pop = if this { this } else { null };
peek = x;
}
};
};

View file

@ -0,0 +1,19 @@
# SPDX-FileCopyrightText: 2023 Alain Zscheile <fogti+devel@ytrizja.de>
#
# SPDX-License-Identifier: CC0-1.0
[package]
name = "yn-qwlum2s-core"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
[dependencies]
miette = "5.10"
thiserror = "1.0"
unicode-ident = "1.0"
unicode-normalization = "0.1"
yz-string-utils = "0.3.1"
[dev-dependencies]
readfilez = "0.3.1"

View file

@ -0,0 +1,7 @@
# qwlum2s
This variant is nearer towards `zxtw` than `gardswag`.
It also experiments with linearity and uniqueness by default.
That is, every value can and must be used once by default,
but exceptions can be made per type via a trait-like mechanism.

View file

View file

@ -0,0 +1,296 @@
/*
* SPDX-FileCopyrightText: 2023 Alain Zscheile <fogti+devel@ytrizja.de>
*
* SPDX-License-Identifier: Apache-2.0
*/
use super::{Error, ErrorKind};
use core::fmt;
use yz_string_utils::StrLexerBase;
#[derive(Clone)]
pub struct Lexer<'a> {
inner: StrLexerBase<'a>,
}
pub type Offset = u32;
#[derive(Clone, Debug)]
pub struct Token {
pub kind: TokenKind<Box<str>>,
pub offset: Offset,
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "at offset {}: {:?}", self.offset, self.kind)
}
}
#[derive(Clone, Debug, PartialEq, yn_functor::Functor)]
pub enum TokenKind<S> {
Ident(S),
PatOut(S),
DotIdent(S),
Symbol(S),
Integer(usize),
LParen,
RParen,
LBrace,
RBrace,
LArr,
RArr,
LdubArr,
RdubArr,
Caret,
Dot,
DubColon,
SemiColon,
Assign,
PatIgnore,
Data,
Lambda,
Let,
TyLambda,
Mu,
Match,
Mutable,
}
impl<'a> Lexer<'a> {
pub fn new(inp: &'a str) -> Self {
Self {
inner: StrLexerBase { inp, offset: 0 },
}
}
#[inline(always)]
pub fn offset(&self) -> usize {
self.inner.offset
}
pub fn token_len_at(&mut self, plus_offset: usize) -> usize {
self.inner.consume(plus_offset);
let origoffs = self.inner.offset;
self.next();
self.inner.offset - origoffs
}
pub fn peek_offset(&self) -> usize {
let mut this = self.clone();
match this.next() {
Some(Ok(Token { offset, .. })) => offset.try_into().unwrap(),
Some(Err(Error { offset, .. })) => offset,
None => this.inner.offset,
}
}
pub fn peek(&self) -> Option<Result<Token, Error>> {
self.clone().next()
}
pub fn next_in_noeof(&mut self, ctx: &'static str) -> Result<Token, Error> {
let offset = self.offset();
self.next().unwrap_or_else(|| {
Err(Error {
offset,
kind: ErrorKind::UnexpectedEof(ctx),
})
})
}
pub fn got(&mut self, xkind: TokenKind<Box<str>>) -> Option<Offset> {
let mut nxt = self.clone();
match nxt.next() {
Some(Ok(Token { offset, kind })) if xkind == kind => {
*self = nxt;
Some(offset)
}
_ => None,
}
}
pub fn expect(
&mut self,
xkind: TokenKind<Box<str>>,
ctx: &'static str,
) -> Result<Offset, Error> {
let mut nxt = self.clone();
let Token { offset, kind } = nxt.next_in_noeof(ctx)?;
if xkind == kind {
*self = nxt;
Ok(offset)
} else {
Err(Error {
offset: offset.try_into().unwrap(),
kind: ErrorKind::UnexpectedToken { kind, ctx },
})
}
}
}
fn consume_ident(slb: &mut StrLexerBase<'_>) -> Box<str> {
use unicode_normalization::UnicodeNormalization;
let s = slb
.consume_select(unicode_ident::is_xid_continue)
.nfc()
.to_string();
assert!(!s.is_empty());
s.into()
}
fn try_consume_ident(slb: &mut StrLexerBase<'_>) -> Option<Box<str>> {
if slb.inp.chars().next().map(unicode_ident::is_xid_start) == Some(true) {
Some(consume_ident(slb))
} else {
None
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Result<Token, Error>;
fn next(&mut self) -> Option<Result<Token, Error>> {
use TokenKind as Tk;
let slb = &mut self.inner;
let mut offset;
Some(
'lxl: loop {
// handle whitespace
slb.consume_select(|i| i.is_whitespace());
if slb.inp.is_empty() {
return None;
}
offset = match u32::try_from(slb.offset) {
Ok(x) => x,
Err(_) => {
slb.inp = "";
return Some(Err(Error {
offset: slb.offset,
kind: ErrorKind::OffsetOverflow,
}));
}
};
break match slb.inp.chars().next()? {
'0'..='9' => {
let s = slb.consume_select(|i| i.is_ascii_digit());
debug_assert!(!s.is_empty());
s.parse().map(TokenKind::Integer).map_err(|e| e.into())
}
c if unicode_ident::is_xid_start(c) => {
// identifier
let s = consume_ident(slb);
// handle keywords
Ok(match &*s {
"data" => Tk::Data,
"repr" => Tk::Repr,
"let" => Tk::Let,
"match" => Tk::Match,
"mut" => Tk::Mutable,
"λ" => Tk::Lambda,
"μ" => Tk::Mu,
"Λ" => Tk::TyLambda,
_ => Tk::Ident(s),
})
}
c => {
slb.consume(c.len_utf8());
match c {
'⎇' => Ok(Tk::Alternative),
'.' => Ok(if let Some(s) = try_consume_ident(slb) {
Tk::DotIdent(s)
} else {
Tk::Dot
}),
';' => Ok(Tk::SemiColon),
'^' => Ok(Tk::Caret),
'$' => {
Ok(if let Some(s) = try_consume_ident(slb) {
Tk::PatOut(s)
} else {
Tk::PatIgnore
})
},
':' => {
Ok(if let Some(s) = try_consume_ident(slb) {
Tk::Symbol(s)
} else {
Tk::DubColon
})
},
'=' => Ok(Tk::Assign),
'←' => Ok(Tk::LArr),
'→' => Ok(Tk::RArr),
'⇐' => Ok(Tk::LdubArr),
'⇒' => Ok(Tk::RdubArr),
'<' => {
if slb.inp.starts_with('-') {
slb.consume(1);
Ok(Tk::LArr)
} else {
Err(ErrorKind::UnhandledChar(c))
}
}
'-' => {
if slb.inp.starts_with('>') {
slb.consume(1);
Ok(Tk::RArr)
} else {
Err(ErrorKind::UnhandledChar(c))
}
}
'{' /* '}' */ => Ok(Tk::LBrace),
/* '{' */ '}' => Ok(Tk::RBrace),
'(' /* ')' */ => {
if slb.inp.starts_with('*') {
// comment
let mut lvl = 1;
let mut it = slb.inp.chars().peekable();
while lvl > 0 {
let c = match it.next() {
Some(c) => c,
None => break 'lxl Err(ErrorKind::EofInComment),
};
slb.consume(c.len_utf8());
match c {
'(' => {
if it.peek() == Some(&'*') {
lvl += 1;
}
}
'*' => {
if it.peek() == Some(&')') {
lvl -= 1;
it.next();
slb.consume(1);
}
}
_ => {}
}
}
continue;
} else {
Ok(Tk::LParen)
}
}
/* '(' */ ')' => Ok(Tk::RParen),
_ => Err(ErrorKind::UnhandledChar(c)),
}
}
};
}
.map(|kind| Token { offset, kind })
.map_err(|kind| Error {
offset: offset.try_into().unwrap(),
kind,
}),
)
}
}

View file

@ -0,0 +1,202 @@
/*
* SPDX-FileCopyrightText: 2023 Alain Zscheile <fogti+devel@ytrizja.de>
*
* SPDX-License-Identifier: Apache-2.0
*/
use core::fmt;
use miette::Diagnostic;
mod lex;
pub use lex::{Lexer, Offset, Token, TokenKind};
#[derive(Clone, Debug)]
pub struct Error {
// NOTE: the offset might exceed 32bit
pub offset: usize,
pub kind: ErrorKind,
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if f.alternate() {
write!(f, "at offset {}: ", self.offset)?;
}
write!(f, "{}", self.kind)
}
}
impl std::error::Error for Error {
#[inline(always)]
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
self.kind.source()
}
}
#[derive(Debug)]
pub struct FullError {
// NOTE: the offset might exceed 32bit
pub offset: usize,
pub len: usize,
pub kind: ErrorKind,
pub code: miette::NamedSource,
}
impl Error {
pub fn with_code<'a>(self, src_name: &'a str, code: &'a str) -> FullError {
let Error { offset, kind } = self;
let len = Lexer::new(code).token_len_at(offset);
FullError {
offset,
len,
kind,
code: miette::NamedSource::new(src_name, code.to_string()),
}
}
}
impl fmt::Display for FullError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.kind)
}
}
impl std::error::Error for FullError {
#[inline(always)]
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
self.kind.source()
}
}
impl miette::Diagnostic for FullError {
#[inline(always)]
fn code<'a>(&'a self) -> Option<Box<dyn fmt::Display + 'a>> {
self.kind.code()
}
#[inline(always)]
fn severity(&self) -> Option<miette::Severity> {
None
}
#[inline(always)]
fn source_code(&self) -> Option<&dyn miette::SourceCode> {
Some(&self.code)
}
#[inline(always)]
fn labels(&self) -> Option<Box<dyn Iterator<Item = miette::LabeledSpan> + '_>> {
use miette::LabeledSpan as Lsp;
Some(Box::new(
Some(Lsp::new(None, self.offset, self.len)).into_iter(),
))
}
}
#[derive(Clone, Debug, Diagnostic, thiserror::Error)]
pub enum ErrorKind {
// lexer errors
#[error("offset overflowed")]
#[diagnostic(code(yanais::parser::offset_overflow))]
OffsetOverflow,
#[error("end of file inside comment encountered")]
#[diagnostic(code(yanais::parser::eof_in_comment))]
EofInComment,
#[error("unhandled character '{0}'")]
#[diagnostic(code(yanais::parser::unhandled_char))]
UnhandledChar(char),
#[error(transparent)]
#[diagnostic(code(yanais::parser::invalid_int))]
InvalidInt(#[from] core::num::ParseIntError),
// higher parser errors
#[error("end of file encountered inside {0}")]
#[diagnostic(code(yanais::parser::unexpected_eof))]
UnexpectedEof(&'static str),
#[error("unexpected token {kind:?} inside {ctx}")]
#[diagnostic(code(yanais::parser::unexpected_token))]
UnexpectedToken {
kind: TokenKind<Box<str>>,
ctx: &'static str,
},
#[error("expected {0}")]
#[diagnostic(code(yanais::parser::expected))]
Expected(&'static str),
#[error("unknown identifier {0:?}")]
#[diagnostic(code(yanais::parser::unknown_identifier))]
UnknownIdent(Box<str>),
#[error("duplicate identifier {0:?} in record")]
#[diagnostic(code(yanais::parser::rcddup_identifier))]
RcdDupIdent(Box<str>),
#[error("duplicate identifier {0:?} in pattern")]
#[diagnostic(code(yanais::parser::patdup_identifier))]
PatDupIdent(Box<str>),
}
#[derive(Clone)]
pub struct Env<'a> {
//pub fileid: u32,
pub lxr: lex::Lexer<'a>,
pub names: Vec<Box<str>>,
}
impl<'a> Env<'a> {
pub fn new(lxr: lex::Lexer<'a>) -> Self {
Self {
lxr,
names: Vec::new(),
}
}
pub fn lookup(&self, name: &str) -> Option<usize> {
self.names.iter().rev().enumerate().find_map(
|(n, i)| {
if &**i == name {
Some(n)
} else {
None
}
},
)
}
}
pub fn unexpected_token(offset: u32, kind: TokenKind<Box<str>>, ctx: &'static str) -> Error {
Error {
offset: offset.try_into().unwrap(),
kind: ErrorKind::UnexpectedToken { kind, ctx },
}
}
pub trait Parse: Sized {
fn parse(env: &mut Env<'_>) -> Result<Self, Error>;
}
pub trait MaybeParse: Sized {
const DFL_CTX: &'static str;
/// this function allows to clearly differentiate between
/// recoverable failures and unrecoverable ones.
fn maybe_parse(env: &mut Env<'_>) -> Result<Option<Self>, Error>;
}
impl<T: MaybeParse> Parse for T {
fn parse(env: &mut Env<'_>) -> Result<Self, Error> {
let knamcnt = env.names.len();
let offset = env.lxr.peek_offset();
let mres = T::maybe_parse(env);
assert_eq!(env.names.len(), knamcnt);
mres?.ok_or_else(|| Error {
offset,
kind: ErrorKind::Expected(<T as MaybeParse>::DFL_CTX),
})
}
}