From 28b60c5dbbf6bae2ad1cf7288f12a658f8f8f253 Mon Sep 17 00:00:00 2001 From: Alain Zscheile Date: Sun, 23 Oct 2022 01:17:34 +0200 Subject: [PATCH] refactor: move verification code from 'vm' to 'bytecode' crate --- crates/fogtix-bytecode/src/instr.rs | 13 ++++ crates/fogtix-bytecode/src/lib.rs | 7 +- crates/fogtix-bytecode/src/verify.rs | 69 ++++++++++++++++++ crates/fogtix-vm-cli/src/bin/fogtix-vm.rs | 2 +- crates/fogtix-vm/src/lib.rs | 86 +---------------------- 5 files changed, 90 insertions(+), 87 deletions(-) create mode 100644 crates/fogtix-bytecode/src/verify.rs diff --git a/crates/fogtix-bytecode/src/instr.rs b/crates/fogtix-bytecode/src/instr.rs index d1ab09d..713be17 100644 --- a/crates/fogtix-bytecode/src/instr.rs +++ b/crates/fogtix-bytecode/src/instr.rs @@ -143,6 +143,19 @@ impl Instr { } } +pub fn next_instr(m: &[u8], pos: usize) -> Option> { + use crate::Parse; + m.get(pos..).map(|nxti_arr| match Instr::parse(nxti_arr) { + Err(_) => Err(&nxti_arr[..core::cmp::min(nxti_arr.len(), 20)]), + Ok((ptr, i)) => Ok((nxti_arr.len() - ptr.len(), i)), + }) +} + +pub fn is_call2jump(m: &[u8], pos: usize) -> bool { + // tail-call optimization (would otherwise require much more opcodes) + matches!(next_instr(m, pos), Some(Ok((_, Instr::Return)))) +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/fogtix-bytecode/src/lib.rs b/crates/fogtix-bytecode/src/lib.rs index 5b15d15..ffa369a 100644 --- a/crates/fogtix-bytecode/src/lib.rs +++ b/crates/fogtix-bytecode/src/lib.rs @@ -1,10 +1,13 @@ #![cfg_attr(not(any(test, feature = "std")), no_std)] #![forbid(unsafe_code, unused_variables)] -/// Fogtix bytecode is structured as type-(length?)-value items, which can be nested. + +#[cfg(test)] extern crate alloc; pub mod consts; mod instr; -pub use instr::{Instr, ParseError as InstrParseError}; +pub use instr::{is_call2jump, next_instr, Instr, ParseError as InstrParseError}; mod parse; pub use parse::{Parse, ParseExt}; +mod verify; +pub use verify::{verify, VerifyError, VerifyErrorKind}; diff --git a/crates/fogtix-bytecode/src/verify.rs b/crates/fogtix-bytecode/src/verify.rs new file mode 100644 index 0000000..2e0b117 --- /dev/null +++ b/crates/fogtix-bytecode/src/verify.rs @@ -0,0 +1,69 @@ +use crate::{next_instr, Instr}; +use core::fmt; + +#[derive(Clone, Debug)] +pub struct VerifyError<'m> { + pub from: usize, + pub kind: VerifyErrorKind<'m>, +} + +#[derive(Clone, Debug)] +pub enum VerifyErrorKind<'m> { + InstrpOutOfBounds, + UnparsableInstruction(&'m [u8]), +} + +impl fmt::Display for VerifyError<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use VerifyErrorKind as K; + write!(f, "at instruction {}: ", self.from)?; + match &self.kind { + K::InstrpOutOfBounds => write!(f, "instruction pointer out-of-bounds"), + K::UnparsableInstruction(x) => write!(f, "reached unparsable instruction: {:x?}", x), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for VerifyError<'_> {} + +/// this should be called before attempting to run an opcode stream to ensure +/// that jump targets are valid (this makes it possible to avoid unnecessary +/// checks during tight loops) +pub fn verify(m: &[u8]) -> Result<(), VerifyError<'_>> { + let mut instrp = 0; + + while let Some(nxti) = next_instr(m, instrp) { + let (nxtidelta, nxti) = nxti.map_err(|code| VerifyError { + from: instrp, + kind: VerifyErrorKind::UnparsableInstruction(code), + })?; + assert_ne!(nxtidelta, 0); + let vfe = VerifyError { + from: instrp, + kind: VerifyErrorKind::InstrpOutOfBounds, + }; + instrp += nxtidelta; + match nxti { + Instr::CallLocal(x) | Instr::CallLDefer(x) | Instr::JumpCond(x) => { + match usize::try_from(x) { + Err(_) => return Err(vfe), + Ok(x) if x >= m.len() => return Err(vfe), + Ok(_) => {} + } + } + Instr::CallRemote(_) + | Instr::Return + | Instr::Push(_) + | Instr::Pop(_) + | Instr::Dup(_) + | Instr::Swap(_) + | Instr::Shift(_) + | Instr::DupFrom + | Instr::DoMath1(_) + | Instr::DoMath2(_) => {} + } + } + + Ok(()) +} diff --git a/crates/fogtix-vm-cli/src/bin/fogtix-vm.rs b/crates/fogtix-vm-cli/src/bin/fogtix-vm.rs index 36abbd3..df8c5b3 100644 --- a/crates/fogtix-vm-cli/src/bin/fogtix-vm.rs +++ b/crates/fogtix-vm-cli/src/bin/fogtix-vm.rs @@ -18,7 +18,7 @@ fn main() { let main_mod = readfilez::read_from_file(std::fs::File::open(main_mod_path)) .expect("unable to open/load main module"); - if let Err(e) = fogtix_vm::verify(&main_mod) { + if let Err(e) = fogtix_bytecode::verify(&main_mod) { eprintln!("ERROR: {}", e); std::process::exit(1); } diff --git a/crates/fogtix-vm/src/lib.rs b/crates/fogtix-vm/src/lib.rs index 0be8dd0..70709d4 100644 --- a/crates/fogtix-vm/src/lib.rs +++ b/crates/fogtix-vm/src/lib.rs @@ -5,19 +5,7 @@ extern crate alloc; use alloc::vec::Vec; use core::fmt; -use fogtix_bytecode::{consts, Instr, Parse}; - -fn next_instr(m: &[u8], pos: usize) -> Option> { - m.get(pos..).map(|nxti_arr| match Instr::parse(nxti_arr) { - Err(_) => Err(&nxti_arr[..core::cmp::min(nxti_arr.len(), 20)]), - Ok((ptr, i)) => Ok((nxti_arr.len() - ptr.len(), i)), - }) -} - -fn is_call2jump(m: &[u8], pos: usize) -> bool { - // tail-call optimization (would otherwise require much more opcodes) - matches!(next_instr(m, pos), Some(Ok((_, Instr::Return)))) -} +use fogtix_bytecode::{consts, is_call2jump, next_instr, Instr}; pub type StackEntValue = u64; @@ -40,76 +28,6 @@ impl fmt::Display for Error<'_> { } } -#[derive(Clone, Debug)] -pub struct VerifyError<'m> { - pub from: usize, - pub kind: VerifyErrorKind<'m>, -} - -#[derive(Clone, Debug)] -pub enum VerifyErrorKind<'m> { - InstrpOutOfBounds, - UnparsableInstruction(&'m [u8]), -} - -impl fmt::Display for VerifyError<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use VerifyErrorKind as K; - write!(f, "at instruction {}: ", self.from)?; - match &self.kind { - K::InstrpOutOfBounds => write!(f, "instruction pointer out-of-bounds"), - K::UnparsableInstruction(x) => write!(f, "reached unparsable instruction: {:x?}", x), - } - } -} - -/// this should be called before attempting to run an opcode stream to ensure -/// that jump targets are valid (this makes it possible to avoid unnecessary -/// checks during tight loops) -pub fn verify(m: &[u8]) -> Result<(), VerifyError<'_>> { - let mut instrp = 0; - - while let Some(nxti) = next_instr(m, instrp) { - let (nxtidelta, nxti) = nxti.map_err(|code| VerifyError { - from: instrp, - kind: VerifyErrorKind::UnparsableInstruction(code), - })?; - assert_ne!(nxtidelta, 0); - instrp += nxtidelta; - match nxti { - Instr::CallLocal(x) | Instr::CallLDefer(x) | Instr::JumpCond(x) => { - match usize::try_from(x) { - Err(_) => { - return Err(VerifyError { - from: usize::MAX, - kind: VerifyErrorKind::InstrpOutOfBounds, - }) - } - Ok(x) if x >= m.len() => { - return Err(VerifyError { - from: x, - kind: VerifyErrorKind::InstrpOutOfBounds, - }) - } - Ok(_) => {} - } - } - Instr::CallRemote(_) - | Instr::Return - | Instr::Push(_) - | Instr::Pop(_) - | Instr::Dup(_) - | Instr::Swap(_) - | Instr::Shift(_) - | Instr::DupFrom - | Instr::DoMath1(_) - | Instr::DoMath2(_) => {} - } - } - - Ok(()) -} - pub struct Process<'m> { pub stack: Vec, pub m: &'m [u8], @@ -264,7 +182,7 @@ mod tests { #[test] fn doesnt_crash(inp in proptest::collection::vec(0..=u8::MAX, 0..1024)) { - if verify(&inp[..]).is_err() { + if fogtix_bytecode::verify(&inp[..]).is_err() { return Ok(()); } let mut p = Process {