From 28b60c5dbbf6bae2ad1cf7288f12a658f8f8f253 Mon Sep 17 00:00:00 2001
From: Alain Zscheile <zseri.devel@ytrizja.de>
Date: Sun, 23 Oct 2022 01:17:34 +0200
Subject: [PATCH] refactor: move verification code from 'vm' to 'bytecode'
 crate

---
 crates/fogtix-bytecode/src/instr.rs       | 13 ++++
 crates/fogtix-bytecode/src/lib.rs         |  7 +-
 crates/fogtix-bytecode/src/verify.rs      | 69 ++++++++++++++++++
 crates/fogtix-vm-cli/src/bin/fogtix-vm.rs |  2 +-
 crates/fogtix-vm/src/lib.rs               | 86 +----------------------
 5 files changed, 90 insertions(+), 87 deletions(-)
 create mode 100644 crates/fogtix-bytecode/src/verify.rs

diff --git a/crates/fogtix-bytecode/src/instr.rs b/crates/fogtix-bytecode/src/instr.rs
index d1ab09d..713be17 100644
--- a/crates/fogtix-bytecode/src/instr.rs
+++ b/crates/fogtix-bytecode/src/instr.rs
@@ -143,6 +143,19 @@ impl Instr {
     }
 }
 
+pub fn next_instr(m: &[u8], pos: usize) -> Option<Result<(usize, Instr), &[u8]>> {
+    use crate::Parse;
+    m.get(pos..).map(|nxti_arr| match Instr::parse(nxti_arr) {
+        Err(_) => Err(&nxti_arr[..core::cmp::min(nxti_arr.len(), 20)]),
+        Ok((ptr, i)) => Ok((nxti_arr.len() - ptr.len(), i)),
+    })
+}
+
+pub fn is_call2jump(m: &[u8], pos: usize) -> bool {
+    // tail-call optimization (would otherwise require much more opcodes)
+    matches!(next_instr(m, pos), Some(Ok((_, Instr::Return))))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/crates/fogtix-bytecode/src/lib.rs b/crates/fogtix-bytecode/src/lib.rs
index 5b15d15..ffa369a 100644
--- a/crates/fogtix-bytecode/src/lib.rs
+++ b/crates/fogtix-bytecode/src/lib.rs
@@ -1,10 +1,13 @@
 #![cfg_attr(not(any(test, feature = "std")), no_std)]
 #![forbid(unsafe_code, unused_variables)]
-/// Fogtix bytecode is structured as type-(length?)-value items, which can be nested.
+
+#[cfg(test)]
 extern crate alloc;
 
 pub mod consts;
 mod instr;
-pub use instr::{Instr, ParseError as InstrParseError};
+pub use instr::{is_call2jump, next_instr, Instr, ParseError as InstrParseError};
 mod parse;
 pub use parse::{Parse, ParseExt};
+mod verify;
+pub use verify::{verify, VerifyError, VerifyErrorKind};
diff --git a/crates/fogtix-bytecode/src/verify.rs b/crates/fogtix-bytecode/src/verify.rs
new file mode 100644
index 0000000..2e0b117
--- /dev/null
+++ b/crates/fogtix-bytecode/src/verify.rs
@@ -0,0 +1,69 @@
+use crate::{next_instr, Instr};
+use core::fmt;
+
+#[derive(Clone, Debug)]
+pub struct VerifyError<'m> {
+    pub from: usize,
+    pub kind: VerifyErrorKind<'m>,
+}
+
+#[derive(Clone, Debug)]
+pub enum VerifyErrorKind<'m> {
+    InstrpOutOfBounds,
+    UnparsableInstruction(&'m [u8]),
+}
+
+impl fmt::Display for VerifyError<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        use VerifyErrorKind as K;
+        write!(f, "at instruction {}: ", self.from)?;
+        match &self.kind {
+            K::InstrpOutOfBounds => write!(f, "instruction pointer out-of-bounds"),
+            K::UnparsableInstruction(x) => write!(f, "reached unparsable instruction: {:x?}", x),
+        }
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for VerifyError<'_> {}
+
+/// this should be called before attempting to run an opcode stream to ensure
+/// that jump targets are valid (this makes it possible to avoid unnecessary
+/// checks during tight loops)
+pub fn verify(m: &[u8]) -> Result<(), VerifyError<'_>> {
+    let mut instrp = 0;
+
+    while let Some(nxti) = next_instr(m, instrp) {
+        let (nxtidelta, nxti) = nxti.map_err(|code| VerifyError {
+            from: instrp,
+            kind: VerifyErrorKind::UnparsableInstruction(code),
+        })?;
+        assert_ne!(nxtidelta, 0);
+        let vfe = VerifyError {
+            from: instrp,
+            kind: VerifyErrorKind::InstrpOutOfBounds,
+        };
+        instrp += nxtidelta;
+        match nxti {
+            Instr::CallLocal(x) | Instr::CallLDefer(x) | Instr::JumpCond(x) => {
+                match usize::try_from(x) {
+                    Err(_) => return Err(vfe),
+                    Ok(x) if x >= m.len() => return Err(vfe),
+                    Ok(_) => {}
+                }
+            }
+            Instr::CallRemote(_)
+            | Instr::Return
+            | Instr::Push(_)
+            | Instr::Pop(_)
+            | Instr::Dup(_)
+            | Instr::Swap(_)
+            | Instr::Shift(_)
+            | Instr::DupFrom
+            | Instr::DoMath1(_)
+            | Instr::DoMath2(_) => {}
+        }
+    }
+
+    Ok(())
+}
diff --git a/crates/fogtix-vm-cli/src/bin/fogtix-vm.rs b/crates/fogtix-vm-cli/src/bin/fogtix-vm.rs
index 36abbd3..df8c5b3 100644
--- a/crates/fogtix-vm-cli/src/bin/fogtix-vm.rs
+++ b/crates/fogtix-vm-cli/src/bin/fogtix-vm.rs
@@ -18,7 +18,7 @@ fn main() {
     let main_mod = readfilez::read_from_file(std::fs::File::open(main_mod_path))
         .expect("unable to open/load main module");
 
-    if let Err(e) = fogtix_vm::verify(&main_mod) {
+    if let Err(e) = fogtix_bytecode::verify(&main_mod) {
         eprintln!("ERROR: {}", e);
         std::process::exit(1);
     }
diff --git a/crates/fogtix-vm/src/lib.rs b/crates/fogtix-vm/src/lib.rs
index 0be8dd0..70709d4 100644
--- a/crates/fogtix-vm/src/lib.rs
+++ b/crates/fogtix-vm/src/lib.rs
@@ -5,19 +5,7 @@ extern crate alloc;
 
 use alloc::vec::Vec;
 use core::fmt;
-use fogtix_bytecode::{consts, Instr, Parse};
-
-fn next_instr(m: &[u8], pos: usize) -> Option<Result<(usize, Instr), &[u8]>> {
-    m.get(pos..).map(|nxti_arr| match Instr::parse(nxti_arr) {
-        Err(_) => Err(&nxti_arr[..core::cmp::min(nxti_arr.len(), 20)]),
-        Ok((ptr, i)) => Ok((nxti_arr.len() - ptr.len(), i)),
-    })
-}
-
-fn is_call2jump(m: &[u8], pos: usize) -> bool {
-    // tail-call optimization (would otherwise require much more opcodes)
-    matches!(next_instr(m, pos), Some(Ok((_, Instr::Return))))
-}
+use fogtix_bytecode::{consts, is_call2jump, next_instr, Instr};
 
 pub type StackEntValue = u64;
 
@@ -40,76 +28,6 @@ impl fmt::Display for Error<'_> {
     }
 }
 
-#[derive(Clone, Debug)]
-pub struct VerifyError<'m> {
-    pub from: usize,
-    pub kind: VerifyErrorKind<'m>,
-}
-
-#[derive(Clone, Debug)]
-pub enum VerifyErrorKind<'m> {
-    InstrpOutOfBounds,
-    UnparsableInstruction(&'m [u8]),
-}
-
-impl fmt::Display for VerifyError<'_> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        use VerifyErrorKind as K;
-        write!(f, "at instruction {}: ", self.from)?;
-        match &self.kind {
-            K::InstrpOutOfBounds => write!(f, "instruction pointer out-of-bounds"),
-            K::UnparsableInstruction(x) => write!(f, "reached unparsable instruction: {:x?}", x),
-        }
-    }
-}
-
-/// this should be called before attempting to run an opcode stream to ensure
-/// that jump targets are valid (this makes it possible to avoid unnecessary
-/// checks during tight loops)
-pub fn verify(m: &[u8]) -> Result<(), VerifyError<'_>> {
-    let mut instrp = 0;
-
-    while let Some(nxti) = next_instr(m, instrp) {
-        let (nxtidelta, nxti) = nxti.map_err(|code| VerifyError {
-            from: instrp,
-            kind: VerifyErrorKind::UnparsableInstruction(code),
-        })?;
-        assert_ne!(nxtidelta, 0);
-        instrp += nxtidelta;
-        match nxti {
-            Instr::CallLocal(x) | Instr::CallLDefer(x) | Instr::JumpCond(x) => {
-                match usize::try_from(x) {
-                    Err(_) => {
-                        return Err(VerifyError {
-                            from: usize::MAX,
-                            kind: VerifyErrorKind::InstrpOutOfBounds,
-                        })
-                    }
-                    Ok(x) if x >= m.len() => {
-                        return Err(VerifyError {
-                            from: x,
-                            kind: VerifyErrorKind::InstrpOutOfBounds,
-                        })
-                    }
-                    Ok(_) => {}
-                }
-            }
-            Instr::CallRemote(_)
-            | Instr::Return
-            | Instr::Push(_)
-            | Instr::Pop(_)
-            | Instr::Dup(_)
-            | Instr::Swap(_)
-            | Instr::Shift(_)
-            | Instr::DupFrom
-            | Instr::DoMath1(_)
-            | Instr::DoMath2(_) => {}
-        }
-    }
-
-    Ok(())
-}
-
 pub struct Process<'m> {
     pub stack: Vec<StackEntValue>,
     pub m: &'m [u8],
@@ -264,7 +182,7 @@ mod tests {
 
         #[test]
         fn doesnt_crash(inp in proptest::collection::vec(0..=u8::MAX, 0..1024)) {
-            if verify(&inp[..]).is_err() {
+            if fogtix_bytecode::verify(&inp[..]).is_err() {
                 return Ok(());
             }
             let mut p = Process {