From 7c992a5c84e7556112d928a79cdff2b8b27deeef Mon Sep 17 00:00:00 2001 From: Alain Zscheile Date: Fri, 23 Sep 2022 06:50:42 +0200 Subject: [PATCH] more documentation, some opcodes --- crates/fogtix-bytecode/src/consts.rs | 15 ++++ crates/fogtix-bytecode/src/instr.rs | 104 +++++++++++++++++++++++++++ crates/fogtix-bytecode/src/lib.rs | 64 ++++++++++++++++- crates/fogtix-bytecode/src/value.rs | 41 ++++++----- docs/design.md | 7 ++ 5 files changed, 209 insertions(+), 22 deletions(-) create mode 100644 crates/fogtix-bytecode/src/instr.rs diff --git a/crates/fogtix-bytecode/src/consts.rs b/crates/fogtix-bytecode/src/consts.rs index 30ba545..3bbf8ee 100644 --- a/crates/fogtix-bytecode/src/consts.rs +++ b/crates/fogtix-bytecode/src/consts.rs @@ -1,3 +1,5 @@ +use crate::intern::Sealed; + #[repr(u8)] #[derive(Clone, Copy, Debug, PartialEq, Eq, int_enum::IntEnum)] pub enum ValueType { @@ -6,3 +8,16 @@ pub enum ValueType { Int_ = 0x49, /* I */ Pointer = 0x50, /* P */ } + +#[repr(u16)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, int_enum::IntEnum)] +pub enum OpType { + Call = 0x636c, /* cl */ + Jump = 0x6a70, /* jp */ + Return = 0x7274, /* rt */ + Push = 0x7073, /* ps */ + Pop = 0x7071, /* pq */ +} + +impl Sealed for ValueType {} +impl Sealed for OpType {} diff --git a/crates/fogtix-bytecode/src/instr.rs b/crates/fogtix-bytecode/src/instr.rs new file mode 100644 index 0000000..c51020c --- /dev/null +++ b/crates/fogtix-bytecode/src/instr.rs @@ -0,0 +1,104 @@ +use crate::{consts::OpType, Value}; +use core::fmt; + +pub enum Instr<'a> { + Call(Value<'a>), + Jump(u32), + Return, + Push(Value<'a>), + Pop, +} + +impl Instr<'_> { + #[inline] + pub fn typ(&self) -> OpType { + match self { + Instr::Call(_) => OpType::Call, + Instr::Jump(_) => OpType::Jump, + Instr::Return => OpType::Return, + Instr::Push(_) => OpType::Push, + Instr::Pop => OpType::Pop, + } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct ParseError; + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "unable to parse instruction from buffer") + } +} + +#[cfg(feature = "std")] +impl std::error::Error for ParseError {} + +impl From<()> for ParseError { + fn from(_: ()) -> Self { + Self + } +} + +impl From for ParseError { + fn from(_: crate::ValueParseError) -> Self { + Self + } +} + +impl<'a> crate::Parse<'a> for Instr<'a> { + type Err = ParseError; + + fn parse(inp: &'a [u8]) -> Result<(&'a [u8], Self), ParseError> { + if inp.len() < 2 { + return Err(ParseError); + } + let (inp, otyp) = OpType::parse(inp)?; + Ok(match otyp { + OpType::Call => { + let (inp, val) = Value::parse(inp)?; + (inp, Instr::Call(val)) + } + OpType::Jump => { + let (inp, val) = u32::parse(inp)?; + (inp, Instr::Jump(val)) + } + OpType::Push => { + let (inp, val) = Value::parse(inp)?; + (inp, Instr::Push(val)) + } + OpType::Return => (inp, Instr::Return), + OpType::Pop => (inp, Instr::Pop), + }) + } +} + +#[cfg(any(test, feature = "std"))] +impl Instr<'_> { + pub fn write_to(&self, mut writer: W) -> std::io::Result<()> { + use int_enum::IntEnum; + writer.write_all(&self.typ().int_value().to_be_bytes())?; + match self { + Instr::Call(val) => val.write_to(writer)?, + Instr::Jump(val) => writer.write_all(&val.to_be_bytes())?, + Instr::Push(val) => val.write_to(writer)?, + Instr::Return | Instr::Pop => {} + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + proptest::proptest! { + #[test] + fn doesnt_crash(inp in proptest::collection::vec(0..=u8::MAX, 0..256)) { + if let Ok((_, v)) = >::parse(&inp[..]) { + let mut buf = alloc::vec::Vec::with_capacity(inp.len()); + v.write_to(&mut buf).unwrap(); + } + } + } +} diff --git a/crates/fogtix-bytecode/src/lib.rs b/crates/fogtix-bytecode/src/lib.rs index 46c8473..f2de5da 100644 --- a/crates/fogtix-bytecode/src/lib.rs +++ b/crates/fogtix-bytecode/src/lib.rs @@ -5,6 +5,10 @@ #[cfg(test)] extern crate alloc; +mod intern { + pub trait Sealed {} +} + /// parse an object from a byte stream pub trait Parse<'a>: Sized { type Err: core::fmt::Debug + Send + Sync; @@ -12,7 +16,65 @@ pub trait Parse<'a>: Sized { } pub mod consts; +mod instr; +pub use instr::{Instr, ParseError as InstrParseError}; mod pointer; pub use pointer::*; mod value; -pub use value::*; +pub use value::{ParseError as ValueParseError, Value}; + +impl<'a> Parse<'a> for u8 { + type Err = (); + #[inline] + fn parse(inp: &'a [u8]) -> Result<(&'a [u8], Self), Self::Err> { + if inp.is_empty() { + Err(()) + } else { + Ok((&inp[1..], inp[0])) + } + } +} + +impl<'a> Parse<'a> for u16 { + type Err = (); + fn parse(inp: &'a [u8]) -> Result<(&'a [u8], Self), Self::Err> { + if inp.len() < 2 { + Err(()) + } else { + Ok((&inp[2..], Self::from_be_bytes(inp[..2].try_into().unwrap()))) + } + } +} + +impl<'a> Parse<'a> for u32 { + type Err = (); + fn parse(inp: &'a [u8]) -> Result<(&'a [u8], Self), Self::Err> { + if inp.len() < 4 { + Err(()) + } else { + Ok((&inp[4..], Self::from_be_bytes(inp[..4].try_into().unwrap()))) + } + } +} + +impl<'a> Parse<'a> for u64 { + type Err = (); + fn parse(inp: &'a [u8]) -> Result<(&'a [u8], Self), Self::Err> { + if inp.len() < 8 { + Err(()) + } else { + Ok((&inp[8..], Self::from_be_bytes(inp[..8].try_into().unwrap()))) + } + } +} + +impl<'a, T: int_enum::IntEnum + intern::Sealed> Parse<'a> for T +where + T::Int: Parse<'a, Err = ()>, +{ + type Err = (); + fn parse(inp: &'a [u8]) -> Result<(&'a [u8], Self), Self::Err> { + let (inp, this) = T::Int::parse(inp)?; + T::from_int(this).map(|this| (inp, this)).map_err(|_| ()) + } +} diff --git a/crates/fogtix-bytecode/src/value.rs b/crates/fogtix-bytecode/src/value.rs index f69728c..3efc280 100644 --- a/crates/fogtix-bytecode/src/value.rs +++ b/crates/fogtix-bytecode/src/value.rs @@ -23,65 +23,64 @@ impl Value<'_> { } #[derive(Clone, Copy, Debug)] -pub struct ValueParseError; +pub struct ParseError; -impl fmt::Display for ValueParseError { +impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "unable to parse value from buffer") } } #[cfg(feature = "std")] -impl std::error::Error for ValueParseError {} +impl std::error::Error for ParseError {} -impl From> for ValueParseError { +impl From<()> for ParseError { + fn from(_: ()) -> Self { + Self + } +} + +impl From> for ParseError { fn from(_: int_enum::IntEnumError) -> Self { Self } } -impl From for ValueParseError { +impl From for ParseError { fn from(_: core::num::TryFromIntError) -> Self { Self } } impl<'a> crate::Parse<'a> for Value<'a> { - type Err = ValueParseError; + type Err = ParseError; - fn parse(inp: &'a [u8]) -> Result<(&'a [u8], Self), ValueParseError> { + fn parse(inp: &'a [u8]) -> Result<(&'a [u8], Self), ParseError> { if inp.len() < 2 { - return Err(ValueParseError); + return Err(ParseError); } let (vtyp, inp) = (ValueType::from_int(inp[0])?, &inp[1..]); match vtyp { ValueType::Bytes => { - if inp.len() < 8 { - return Err(ValueParseError); - } - let (len, inp) = inp.split_at(8); - let len: usize = u64::from_be_bytes(len.try_into().unwrap()).try_into()?; + let (inp, len) = u64::parse(inp)?; + let len = usize::try_from(len)?; if inp.len() < len { - return Err(ValueParseError); + return Err(ParseError); } let (data, inp) = inp.split_at(len); Ok((inp, Value::Bytes(data))) } ValueType::Int_ => { - if inp.len() < 8 { - return Err(ValueParseError); - } - let (data, inp) = inp.split_at(8); - let data = u64::from_be_bytes(data.try_into().unwrap()); + let (inp, data) = u64::parse(inp)?; Ok((inp, Value::Int(data))) } ValueType::Pointer => { - let (inp, ptr) = Pointer::parse(inp).map_err(|()| ValueParseError)?; + let (inp, ptr) = Pointer::parse(inp)?; Ok((inp, Value::Pointer(ptr))) } ValueType::Atom => { - let (inp, ptr) = Atom::parse(inp).map_err(|()| ValueParseError)?; + let (inp, ptr) = Atom::parse(inp)?; Ok((inp, Value::Atom(ptr))) } } diff --git a/docs/design.md b/docs/design.md index 41d5710..0592594 100644 --- a/docs/design.md +++ b/docs/design.md @@ -3,3 +3,10 @@ * use tagged, signed pointers for memory addresses * use a separate call stack to prevent ROP * implement message passing + +# values + +* atoms (u128): atomic values, also used as pointer keys to sign pointers +* bytes ([]u8): arbitrary byte strings +* int (u64): the default integer type +* pointer (u128): signed pointer (can only be accessed with matching key)