From c5bbf31cf85c6d2d3044a5a86d31dee40ce42f79 Mon Sep 17 00:00:00 2001 From: Alain Zscheile Date: Fri, 23 Sep 2022 11:39:47 +0200 Subject: [PATCH] feat(bytecode): many small things; proper call tables --- crates/fogtix-bytecode/src/consts.rs | 3 + crates/fogtix-bytecode/src/header.rs | 55 +++++++++++++----- crates/fogtix-bytecode/src/instr.rs | 33 +++++++++-- crates/fogtix-bytecode/src/lib.rs | 2 +- crates/fogtix-bytecode/src/parse.rs | 14 +++++ crates/fogtix-bytecode/src/value.rs | 87 +++++++++++++++++++++------- 6 files changed, 151 insertions(+), 43 deletions(-) diff --git a/crates/fogtix-bytecode/src/consts.rs b/crates/fogtix-bytecode/src/consts.rs index fa9564e..6867f57 100644 --- a/crates/fogtix-bytecode/src/consts.rs +++ b/crates/fogtix-bytecode/src/consts.rs @@ -14,11 +14,14 @@ pub enum ValueType { #[repr(u16)] #[derive(Clone, Copy, Debug, PartialEq, Eq, int_enum::IntEnum)] pub enum OpType { + Label = 0x6c62, /* lb */ Call = 0x636c, /* cl */ Jump = 0x6a70, /* jp */ Return = 0x7274, /* rt */ Push = 0x7073, /* ps */ Pop = 0x7071, /* pq */ + Dup = 0x3278, /* 2x */ + Swap = 0x3c3e, /* <> */ } impl Sealed for ValueType {} diff --git a/crates/fogtix-bytecode/src/header.rs b/crates/fogtix-bytecode/src/header.rs index 406d9ae..8ae64a0 100644 --- a/crates/fogtix-bytecode/src/header.rs +++ b/crates/fogtix-bytecode/src/header.rs @@ -1,11 +1,14 @@ -use crate::{consts::MAGIC, Value}; -use alloc::collections::BTreeMap; +use crate::{consts::MAGIC, CallTarget}; +use alloc::{collections::BTreeMap, vec::Vec}; use core::fmt; pub struct Header<'a> { - // if a symbol `Bytes([])` exists, it's the entry point - // only symbols named with a byte string or atom are exported - pub symbols: BTreeMap, u64>, + /// all internal references are resolved through this + /// the first tuple element is the target "name", the second tuple element is the expected arity + pub imports: Vec<(CallTarget<'a>, u8)>, + + /// if a symbol `[]` (empty bytestring) exists, it's the entry point + pub exports: BTreeMap<&'a [u8], u64>, } #[derive(Clone, Copy, Debug)] @@ -45,15 +48,23 @@ impl<'a> crate::Parse<'a> for Header<'a> { if !inp.starts_with(MAGIC) { return Err(ParseError); } - let (mut inp, len) = u32::parse(&inp[4..])?; + let (inp, ilen) = u32::parse(&inp[MAGIC.len()..])?; + let (mut inp, elen) = u32::parse(inp)?; let mut h = Header { - symbols: BTreeMap::new(), + imports: Vec::with_capacity(ilen.try_into()?), + exports: BTreeMap::new(), }; - for _ in 0..len { - let (xinp, exptarget) = u64::parse(inp)?; - let (xinp, expsrc) = Value::parse(xinp)?; + for _ in 0..ilen { + let (xinp, ct) = CallTarget::parse(inp)?; + let (xinp, arity) = u8::parse(xinp)?; inp = xinp; - if h.symbols.insert(expsrc, exptarget).is_some() { + h.imports.push((ct, arity)); + } + for _ in 0..elen { + let (xinp, exptarget) = u64::parse(inp)?; + let (xinp, expsrc) = <&[u8]>::parse(xinp)?; + inp = xinp; + if h.exports.insert(expsrc, exptarget).is_some() { return Err(ParseError); } } @@ -65,15 +76,27 @@ impl<'a> crate::Parse<'a> for Header<'a> { impl Header<'_> { pub fn write_to(&self, mut writer: W) -> std::io::Result<()> { writer.write_all(MAGIC)?; - let len: u32 = self - .symbols + let ilen: u32 = self + .imports .len() .try_into() .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; - writer.write_all(&len.to_be_bytes())?; - for (k, v) in &self.symbols { + let elen: u32 = self + .exports + .len() + .try_into() + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + writer.write_all(&ilen.to_be_bytes())?; + writer.write_all(&elen.to_be_bytes())?; + for (ct, arity) in &self.imports { + ct.write_to(&mut writer)?; + writer.write_all(&[*arity])?; + } + for (k, v) in &self.exports { writer.write_all(&v.to_be_bytes())?; - k.write_to(&mut writer)?; + let len: u64 = k.len().try_into().unwrap(); + writer.write_all(&len.to_be_bytes())?; + writer.write_all(k)?; } Ok(()) } diff --git a/crates/fogtix-bytecode/src/instr.rs b/crates/fogtix-bytecode/src/instr.rs index c021f14..05aa23e 100644 --- a/crates/fogtix-bytecode/src/instr.rs +++ b/crates/fogtix-bytecode/src/instr.rs @@ -3,22 +3,44 @@ use core::fmt; #[derive(Clone, Debug)] pub enum Instr<'a> { - Call(Value<'a>), + /// defines a destination label + Label, + + /// pops the destination PID from the stack (should be an atom or pointer), + /// then tries to call the $0 (which is a reference to the `imports` table) on it. + Call(u32), + + /// jumps to the destination $0 given via the local function labels Jump(u32), + + /// pops the current call frame Return, + + /// pushes the given value to the stack Push(Value<'a>), + + /// pops the top value from the stack Pop, + + /// duplicates the top stack value + Dup, + + /// swaps the 2 top-most stack values + Swap, } impl Instr<'_> { #[inline] pub fn typ(&self) -> OpType { match self { + Instr::Label => OpType::Label, Instr::Call(_) => OpType::Call, Instr::Jump(_) => OpType::Jump, Instr::Return => OpType::Return, Instr::Push(_) => OpType::Push, Instr::Pop => OpType::Pop, + Instr::Dup => OpType::Dup, + Instr::Swap => OpType::Swap, } } } @@ -57,7 +79,7 @@ impl<'a> crate::Parse<'a> for Instr<'a> { let (inp, otyp) = OpType::parse(inp)?; Ok(match otyp { OpType::Call => { - let (inp, val) = Value::parse(inp)?; + let (inp, val) = u32::parse(inp)?; (inp, Instr::Call(val)) } OpType::Jump => { @@ -68,8 +90,11 @@ impl<'a> crate::Parse<'a> for Instr<'a> { let (inp, val) = Value::parse(inp)?; (inp, Instr::Push(val)) } + OpType::Label => (inp, Instr::Label), OpType::Return => (inp, Instr::Return), OpType::Pop => (inp, Instr::Pop), + OpType::Dup => (inp, Instr::Dup), + OpType::Swap => (inp, Instr::Swap), }) } } @@ -80,10 +105,10 @@ impl Instr<'_> { use int_enum::IntEnum; writer.write_all(&self.typ().int_value().to_be_bytes())?; match self { - Instr::Call(val) => val.write_to(writer)?, + Instr::Call(val) => writer.write_all(&val.to_be_bytes())?, Instr::Jump(val) => writer.write_all(&val.to_be_bytes())?, Instr::Push(val) => val.write_to(writer)?, - Instr::Return | Instr::Pop => {} + Instr::Label | Instr::Return | Instr::Pop | Instr::Dup | Instr::Swap => {} } Ok(()) } diff --git a/crates/fogtix-bytecode/src/lib.rs b/crates/fogtix-bytecode/src/lib.rs index 745f9fd..81698c8 100644 --- a/crates/fogtix-bytecode/src/lib.rs +++ b/crates/fogtix-bytecode/src/lib.rs @@ -13,4 +13,4 @@ pub use parse::Parse; mod pointer; pub use pointer::{Atom, Pointer}; mod value; -pub use value::{ParseError as ValueParseError, Value}; +pub use value::{CallTarget, ParseError as ValueParseError, Value}; diff --git a/crates/fogtix-bytecode/src/parse.rs b/crates/fogtix-bytecode/src/parse.rs index f576f91..9293fcc 100644 --- a/crates/fogtix-bytecode/src/parse.rs +++ b/crates/fogtix-bytecode/src/parse.rs @@ -63,3 +63,17 @@ where T::from_int(this).map(|this| (inp, this)).map_err(|_| ()) } } + +impl<'a> Parse<'a> for &'a [u8] { + type Err = (); + fn parse(inp: &'a [u8]) -> Result<(&'a [u8], Self), Self::Err> { + let (inp, len) = u64::parse(inp).map_err(|_| ())?; + let len = usize::try_from(len).map_err(|_| ())?; + if inp.len() < len { + Err(()) + } else { + let (data, inp) = inp.split_at(len); + Ok((inp, data)) + } + } +} diff --git a/crates/fogtix-bytecode/src/value.rs b/crates/fogtix-bytecode/src/value.rs index 88c02ad..0f3f968 100644 --- a/crates/fogtix-bytecode/src/value.rs +++ b/crates/fogtix-bytecode/src/value.rs @@ -2,6 +2,22 @@ use crate::{consts::ValueType, Atom, Pointer}; use core::fmt; use int_enum::IntEnum; +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum CallTarget<'a> { + Indirect(&'a [u8]), + Direct(Atom, Pointer), +} + +impl CallTarget<'_> { + #[inline] + pub fn typ(&self) -> ValueType { + match self { + CallTarget::Indirect(_) => ValueType::Bytes, + CallTarget::Direct(_, _) => ValueType::Pointer, + } + } +} + #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum Value<'a> { Bytes(&'a [u8]), @@ -52,6 +68,50 @@ impl From for ParseError { } } +impl<'a> crate::Parse<'a> for CallTarget<'a> { + type Err = ParseError; + + fn parse(inp: &'a [u8]) -> Result<(&'a [u8], Self), ParseError> { + if inp.is_empty() { + return Err(ParseError); + } + let (vtyp, inp) = (ValueType::from_int(inp[0])?, &inp[1..]); + Ok(match vtyp { + ValueType::Bytes => { + let (inp, data) = <&[u8]>::parse(inp)?; + (inp, CallTarget::Indirect(data)) + } + + ValueType::Pointer => { + let (inp, atom) = Atom::parse(inp)?; + let (inp, ptr) = Pointer::parse(inp)?; + (inp, CallTarget::Direct(atom, ptr)) + } + + ValueType::Int_ | ValueType::Atom => return Err(ParseError), + }) + } +} + +#[cfg(any(test, feature = "std"))] +impl CallTarget<'_> { + pub fn write_to(&self, mut writer: W) -> std::io::Result<()> { + writer.write_all(&[self.typ().int_value()])?; + match self { + CallTarget::Indirect(b) => { + let len: u64 = b.len().try_into().unwrap(); + writer.write_all(&len.to_be_bytes())?; + writer.write_all(b)?; + } + CallTarget::Direct(atom, ptr) => { + atom.write_to(&mut writer)?; + ptr.write_to(&mut writer)?; + } + } + Ok(()) + } +} + impl<'a> crate::Parse<'a> for Value<'a> { type Err = ParseError; @@ -61,29 +121,12 @@ impl<'a> crate::Parse<'a> for Value<'a> { } let (vtyp, inp) = (ValueType::from_int(inp[0])?, &inp[1..]); match vtyp { - ValueType::Bytes => { - let (inp, len) = u64::parse(inp)?; - let len = usize::try_from(len)?; - if inp.len() < len { - return Err(ParseError); - } - let (data, inp) = inp.split_at(len); - Ok((inp, Value::Bytes(data))) - } - ValueType::Int_ => { - let (inp, data) = u64::parse(inp)?; - Ok((inp, Value::Int(data))) - } - - ValueType::Pointer => { - let (inp, ptr) = Pointer::parse(inp)?; - Ok((inp, Value::Pointer(ptr))) - } - ValueType::Atom => { - let (inp, ptr) = Atom::parse(inp)?; - Ok((inp, Value::Atom(ptr))) - } + ValueType::Bytes => <&[u8]>::parse(inp).map(|(inp, data)| (inp, Value::Bytes(data))), + ValueType::Int_ => u64::parse(inp).map(|(inp, data)| (inp, Value::Int(data))), + ValueType::Pointer => Pointer::parse(inp).map(|(inp, ptr)| (inp, Value::Pointer(ptr))), + ValueType::Atom => Atom::parse(inp).map(|(inp, atom)| (inp, Value::Atom(atom))), } + .map_err(|()| ParseError) } }