From adef434c20e4e04c634189a813d1d36fc345db44 Mon Sep 17 00:00:00 2001 From: Andy Wortman Date: Wed, 11 Sep 2019 15:48:21 -0700 Subject: [PATCH 1/2] add version info to modules version information is comprised of both the current crate version and the current git commit hash (if available). the current git commit hash is only used in release builds to avoid too much furstration in typical development workflows using tools like "git commit --amend" or "git rebase", or just making non-conflicting spot changes to only one of lucetc or lucet-runtime --- Cargo.lock | 1 + lucet-analyze/src/main.rs | 5 + lucet-module/Cargo.toml | 1 + lucet-module/src/lib.rs | 2 + lucet-module/src/module.rs | 3 + lucet-module/src/signature.rs | 9 +- lucet-module/src/version_info.rs | 92 ++++++++++++++++++ .../lucet-runtime-internals/build.rs | 28 ++++++ .../lucet-runtime-internals/src/module/dl.rs | 18 +++- lucet-runtime/tests/version_checks.rs | 16 +++ .../tests/version_checks/old_module.so | Bin 0 -> 9424 bytes lucetc/build.rs | 29 ++++++ lucetc/src/output.rs | 9 +- 13 files changed, 207 insertions(+), 6 deletions(-) create mode 100644 lucet-module/src/version_info.rs create mode 100644 lucet-runtime/tests/version_checks.rs create mode 100755 lucet-runtime/tests/version_checks/old_module.so create mode 100644 lucetc/build.rs diff --git a/Cargo.lock b/Cargo.lock index b7303214d..58c45b04b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -850,6 +850,7 @@ dependencies = [ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "cranelift-entity 0.41.0", "failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "memoffset 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", "minisign 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)", "num-derive 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/lucet-analyze/src/main.rs b/lucet-analyze/src/main.rs index 2c20655e8..e1bd17d3b 100644 --- a/lucet-analyze/src/main.rs +++ b/lucet-analyze/src/main.rs @@ -2,6 +2,7 @@ use lucet_module::{ FunctionSpec, Module, ModuleData, SerializedModule, TableElement, TrapManifest, TrapSite, + VersionInfo, }; use byteorder::{LittleEndian, ReadBytesExt}; @@ -102,7 +103,10 @@ impl<'a> ArtifactSummary<'a> { .unwrap(); let mut rdr = Cursor::new(buffer); + let version = VersionInfo::read_from(&mut rdr).unwrap(); + SerializedModule { + version, module_data_ptr: rdr.read_u64::().unwrap(), module_data_len: rdr.read_u64::().unwrap(), tables_ptr: rdr.read_u64::().unwrap(), @@ -211,6 +215,7 @@ fn load_module<'b, 'a: 'b>( ) }; Module { + version: serialized_module.version.clone(), module_data, tables, function_manifest, diff --git a/lucet-module/Cargo.toml b/lucet-module/Cargo.toml index 16078ab0b..41fc62748 100644 --- a/lucet-module/Cargo.toml +++ b/lucet-module/Cargo.toml @@ -20,3 +20,4 @@ num-traits = "0.2" minisign = "0.5.11" object = "0.12" byteorder = "1.3" +memoffset = "0.5.1" diff --git a/lucet-module/src/lib.rs b/lucet-module/src/lib.rs index 8be0f1797..b1259024e 100644 --- a/lucet-module/src/lib.rs +++ b/lucet-module/src/lib.rs @@ -17,6 +17,7 @@ mod signature; mod tables; mod traps; mod types; +mod version_info; pub use crate::error::Error; pub use crate::functions::{ @@ -32,6 +33,7 @@ pub use crate::signature::{ModuleSignature, PublicKey}; pub use crate::tables::TableElement; pub use crate::traps::{TrapCode, TrapManifest, TrapSite}; pub use crate::types::{Signature, ValueType}; +pub use crate::version_info::VersionInfo; /// Owned variants of the module data types, useful for serialization and testing. pub mod owned { diff --git a/lucet-module/src/module.rs b/lucet-module/src/module.rs index 793a94382..238e2bcb0 100644 --- a/lucet-module/src/module.rs +++ b/lucet-module/src/module.rs @@ -1,12 +1,14 @@ use crate::functions::FunctionSpec; use crate::module_data::ModuleData; use crate::tables::TableElement; +use crate::version_info::VersionInfo; pub const LUCET_MODULE_SYM: &str = "lucet_module"; /// Module is the exposed structure that contains all the data backing a Lucet-compiled object. #[derive(Debug)] pub struct Module<'a> { + pub version: VersionInfo, pub module_data: ModuleData<'a>, pub tables: &'a [&'a [TableElement]], pub function_manifest: &'a [FunctionSpec], @@ -18,6 +20,7 @@ pub struct Module<'a> { #[repr(C)] #[derive(Debug)] pub struct SerializedModule { + pub version: VersionInfo, pub module_data_ptr: u64, pub module_data_len: u64, pub tables_ptr: u64, diff --git a/lucet-module/src/signature.rs b/lucet-module/src/signature.rs index 5e6e52b4f..134bd7256 100644 --- a/lucet-module/src/signature.rs +++ b/lucet-module/src/signature.rs @@ -1,8 +1,9 @@ use crate::error::Error::{self, IOError, ModuleSignatureError}; -use crate::module::LUCET_MODULE_SYM; +use crate::module::{SerializedModule, LUCET_MODULE_SYM}; use crate::module_data::MODULE_DATA_SYM; use crate::ModuleData; use byteorder::{ByteOrder, LittleEndian}; +use memoffset::offset_of; pub use minisign::{PublicKey, SecretKey}; use minisign::{SignatureBones, SignatureBox}; use object::*; @@ -94,8 +95,10 @@ impl RawModuleAndData { format!("`{}` symbol not present", MODULE_DATA_SYM), ))?; - let module_data_len = - LittleEndian::read_u64(&obj_bin[(native_data_symbol_data.offset + 8)..]) as usize; + let module_data_len = LittleEndian::read_u64( + &obj_bin[(native_data_symbol_data.offset + + offset_of!(SerializedModule, module_data_len))..], + ) as usize; Ok(RawModuleAndData { obj_bin, diff --git a/lucet-module/src/version_info.rs b/lucet-module/src/version_info.rs new file mode 100644 index 000000000..e0cd70578 --- /dev/null +++ b/lucet-module/src/version_info.rs @@ -0,0 +1,92 @@ +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; +use std::cmp::min; +use std::fmt; +use std::io; + +/// VersionInfo is information about a Lucet module to allow the Lucet runtime to determine if or +/// how the module can be loaded, if so requested. The information here describes implementation +/// details in runtime support for `lucetc`-produced modules, and nothing higher level. +#[repr(C)] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VersionInfo { + major: u16, + minor: u16, + patch: u16, + reserved: u16, + version_hash: [u8; 8], +} + +impl fmt::Display for VersionInfo { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}.{}.{}", self.major, self.minor, self.patch)?; + if u64::from_ne_bytes(self.version_hash) != 0 { + write!( + fmt, + "-{}", + std::str::from_utf8(&self.version_hash).unwrap_or("INVALID") + )?; + } + Ok(()) + } +} + +impl VersionInfo { + pub fn write_to(&self, w: &mut W) -> io::Result<()> { + w.write_u16::(self.major)?; + w.write_u16::(self.minor)?; + w.write_u16::(self.patch)?; + w.write_u16::(self.reserved)?; + w.write(&self.version_hash).and_then(|written| { + if written != self.version_hash.len() { + Err(io::Error::new( + io::ErrorKind::Other, + "unable to write full version hash", + )) + } else { + Ok(()) + } + }) + } + + pub fn read_from(r: &mut R) -> io::Result { + let mut version_hash = [0u8; 8]; + Ok(VersionInfo { + major: r.read_u16::()?, + minor: r.read_u16::()?, + patch: r.read_u16::()?, + reserved: r.read_u16::()?, + version_hash: { + r.read_exact(&mut version_hash)?; + version_hash + }, + }) + } + + pub fn valid(&self) -> bool { + self.reserved == 0x8000 + } + + pub fn current(current_hash: &'static [u8]) -> Self { + let mut version_hash = [0u8; 8]; + + for i in 0..min(version_hash.len(), current_hash.len()) { + version_hash[i] = current_hash[i]; + } + + // The reasoning for this is as follows: + // `SerializedModule`, in version before version information was introduced, began with a + // pointer - `module_data_ptr`. This pointer would be relocated to somewhere in user space + // for the embedder of `lucet-runtime`. On x86_64, hopefully, that's userland code in some + // OS, meaning the pointer will be a pointer to user memory, and will be below + // 0x8000_0000_0000_0000. By setting `reserved` to `0x8000`, we set what would be the + // highest bit in `module_data_ptr` in an old `lucet-runtime` and guarantee a segmentation + // fault when loading these newer modules with version information. + VersionInfo { + major: env!("CARGO_PKG_VERSION_MAJOR").parse().unwrap(), + minor: env!("CARGO_PKG_VERSION_MINOR").parse().unwrap(), + patch: env!("CARGO_PKG_VERSION_PATCH").parse().unwrap(), + reserved: 0x8000u16, + version_hash, + } + } +} diff --git a/lucet-runtime/lucet-runtime-internals/build.rs b/lucet-runtime/lucet-runtime-internals/build.rs index 41323408c..9c5e5418a 100644 --- a/lucet-runtime/lucet-runtime-internals/build.rs +++ b/lucet-runtime/lucet-runtime-internals/build.rs @@ -1,3 +1,7 @@ +use std::env; +use std::fs::File; +use std::path::Path; + use cc; fn main() { @@ -14,4 +18,28 @@ fn main() { cc::Build::new() .file("src/context/tests/c_child.c") .compile("context_tests_c_child"); + + let commit_file_path = Path::new(&env::var("OUT_DIR").unwrap()).join("commit_hash"); + // in debug builds we only need the file to exist, but in release builds this will be used and + // requires mutability. + #[allow(unused_variables, unused_mut)] + let mut f = File::create(&commit_file_path).unwrap(); + + // This is about the closest not-additional-feature-flag way to detect release builds. + // In debug builds, leave the `commit_hash` file empty to allow looser version checking and + // avoid impacting development workflows too much. + #[cfg(not(debug_assertions))] + { + use std::io::Write; + use std::process::Command; + + let last_commit_hash = Command::new("git") + .args(&["log", "-n", "1", "--pretty=format:%H"]) + .output() + .ok(); + + if let Some(last_commit_hash) = last_commit_hash { + f.write_all(&last_commit_hash.stdout).unwrap(); + } + } } diff --git a/lucet-runtime/lucet-runtime-internals/src/module/dl.rs b/lucet-runtime/lucet-runtime-internals/src/module/dl.rs index 350a08640..98fb5bba6 100644 --- a/lucet-runtime/lucet-runtime-internals/src/module/dl.rs +++ b/lucet-runtime/lucet-runtime-internals/src/module/dl.rs @@ -4,7 +4,7 @@ use libc::c_void; use libloading::Library; use lucet_module::{ FunctionHandle, FunctionIndex, FunctionPointer, FunctionSpec, ModuleData, ModuleSignature, - PublicKey, SerializedModule, Signature, LUCET_MODULE_SYM, + PublicKey, SerializedModule, Signature, VersionInfo, LUCET_MODULE_SYM, }; use std::ffi::CStr; use std::mem::MaybeUninit; @@ -61,6 +61,21 @@ impl DlModule { let serialized_module: &SerializedModule = unsafe { serialized_module_ptr.as_ref().unwrap() }; + let version = serialized_module.version.clone(); + + let runtime_version = + VersionInfo::current(include_str!(concat!(env!("OUT_DIR"), "/commit_hash")).as_bytes()); + + if !version.valid() { + return Err(lucet_incorrect_module!("reserved bit is not set. This module is likely too old for this lucet-runtime to load.")); + } else if version != runtime_version { + return Err(lucet_incorrect_module!( + "version mismatch. module has version {}, while this runtime is version {}", + version, + runtime_version, + )); + } + // Deserialize the slice into ModuleData, which will hold refs into the loaded // shared object file in `module_data_slice`. Both of these get a 'static lifetime because // Rust doesn't have a safe way to describe that their lifetime matches the containing @@ -115,6 +130,7 @@ impl DlModule { lib, fbase, module: lucet_module::Module { + version, module_data, tables, function_manifest, diff --git a/lucet-runtime/tests/version_checks.rs b/lucet-runtime/tests/version_checks.rs new file mode 100644 index 000000000..fa5c9a30c --- /dev/null +++ b/lucet-runtime/tests/version_checks.rs @@ -0,0 +1,16 @@ +use lucet_runtime::{DlModule, Error}; + +#[test] +pub fn reject_old_modules() { + let err = DlModule::load("./tests/version_checks/old_module.so") + .err() + .unwrap(); + + if let Error::ModuleError(e) = err { + let msg = format!("{}", e); + assert!(msg.contains("reserved bit is not set")); + assert!(msg.contains("module is likely too old")); + } else { + panic!("unexpected error loading module: {}", err); + } +} diff --git a/lucet-runtime/tests/version_checks/old_module.so b/lucet-runtime/tests/version_checks/old_module.so new file mode 100755 index 0000000000000000000000000000000000000000..cf04b77a76ec93614780f072ddedf1258e4ec802 GIT binary patch literal 9424 zcmeHN&1(};5Pzvws}-9Tzp0uQJ@`S?gXlqJ6t+b*$up5ZkY!OJLXjtOHJWtj<`XNwu?%{n)m<_O5+qb*nnj^?K^b`@7e!?zngQ z&CvPwr{53mrRfE!kF=<*-r&$8`rV{&r4i7X2mSajLx9kq}MD zt`lSZa7Q9k$nQ4d9DkR@2TntzUnov%#9qQEHBfQ=!0xY5*^DsvpkmdaL!R*z@t0?I-Df$Jx}_r1glS zS+}0eSfkH^hL&dQ!Z;RMM*{!h@Ws>_vUv|00IW|^O#g}j_&wqcz!4un1K27*i9`T2 z94Q4L1uv)#HwYL63<3rLgMdN6AYc&qHwa)`c3w8IpTGYob0+s?EZbuBLiT-lXPl6E zY)fxS`pv>O3CBBK55K4pVLv_{4)uE*i?H7ydN6(;VnUu@e}wrVPB^=Czf_hb(ZHor z+P`ax@lKw}$z*eG|3E$*nO}o|LBJqj5HJWB1PlTO0fT@+z#w1{FbEg~{v!m?so;`h zEmC;oG?rTmu`Mzep3E+E=E^;n*v19S>ds$vDD)=6P=j7MLx;ak_OiI@Q}d{dJo$83 znlmt&<+}GF4G^u>^66QFrIM0<-VZ!Fo>s|$QYD#p{Jcu$Y9+r`40+(Gq~{hKR;py+ z4hJ+Y>!atSo45Ntr|4=+r+6_-0-s4Q0r3sw9+RG`b zeKl}v5r0vu^(1f%_)o~q07oB<*W)RykAB)#(s6F^M;{OTCe788g#V@5NfO6~Kl*s! zSU*}n*560I920#*94mvTX|A5|Btv(Q1op@JIOYbQfT8ec{zoJJ`0fTg;{FRi=ueTr zxuZUOkAv@D@SO|pH}FRtJ><(d!5_!vXd>d+oJ=&qALBW`P!g|pgoAT+Iu@h#{{#z5 B=8XUV literal 0 HcmV?d00001 diff --git a/lucetc/build.rs b/lucetc/build.rs new file mode 100644 index 000000000..96299ec26 --- /dev/null +++ b/lucetc/build.rs @@ -0,0 +1,29 @@ +use std::env; +use std::fs::File; +use std::path::Path; + +fn main() { + let commit_file_path = Path::new(&env::var("OUT_DIR").unwrap()).join("commit_hash"); + // in debug builds we only need the file to exist, but in release builds this will be used and + // requires mutability. + #[allow(unused_variables, unused_mut)] + let mut f = File::create(&commit_file_path).unwrap(); + + // This is about the closest not-additional-feature-flag way to detect release builds. + // In debug builds, leave the `commit_hash` file empty to allow looser version checking and + // avoid impacting development workflows too much. + #[cfg(not(debug_assertions))] + { + use std::io::Write; + use std::process::Command; + + let last_commit_hash = Command::new("git") + .args(&["log", "-n", "1", "--pretty=format:%H"]) + .output() + .ok(); + + if let Some(last_commit_hash) = last_commit_hash { + f.write_all(&last_commit_hash.stdout).unwrap(); + } + } +} diff --git a/lucetc/src/output.rs b/lucetc/src/output.rs index 08d65f997..5e49b6bfa 100644 --- a/lucetc/src/output.rs +++ b/lucetc/src/output.rs @@ -10,7 +10,7 @@ use cranelift_codegen::{ir, isa}; use cranelift_faerie::FaerieProduct; use faerie::{Artifact, Decl, Link}; use failure::{format_err, Error, ResultExt}; -use lucet_module::{FunctionSpec, LUCET_MODULE_SYM, MODULE_DATA_SYM}; +use lucet_module::{FunctionSpec, VersionInfo, LUCET_MODULE_SYM, MODULE_DATA_SYM}; use std::collections::HashMap; use std::fs::File; use std::io::{Cursor, Write}; @@ -136,10 +136,15 @@ fn write_module( function_manifest_len: usize, obj: &mut Artifact, ) -> Result<(), Error> { - let mut native_data = Cursor::new(Vec::with_capacity(NATIVE_POINTER_SIZE * 4)); + let mut native_data = Cursor::new(Vec::with_capacity(16 + NATIVE_POINTER_SIZE * 4)); obj.declare(LUCET_MODULE_SYM, Decl::data().global()) .context(format!("declaring {}", LUCET_MODULE_SYM))?; + let version = + VersionInfo::current(include_str!(concat!(env!("OUT_DIR"), "/commit_hash")).as_bytes()); + + version.write_to(&mut native_data)?; + write_relocated_slice( obj, &mut native_data, From 54ae27962e0826f3ad7cedffb70b9e48d3339826 Mon Sep 17 00:00:00 2001 From: Andy Wortman Date: Fri, 13 Sep 2019 12:45:38 -0700 Subject: [PATCH 2/2] review comments --- lucet-module/src/version_info.rs | 8 ++++++++ lucetc/src/output.rs | 7 ++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/lucet-module/src/version_info.rs b/lucet-module/src/version_info.rs index e0cd70578..2914a9d69 100644 --- a/lucet-module/src/version_info.rs +++ b/lucet-module/src/version_info.rs @@ -13,6 +13,14 @@ pub struct VersionInfo { minor: u16, patch: u16, reserved: u16, + /// `version_hash` is either all nulls or the first eight ascii characters of the git commit + /// hash of wherever this Version is coming from. In the case of a compiled lucet module, this + /// hash will come from the git commit that the lucetc producing it came from. In a runtime + /// context, it will be the git commit of lucet-runtime built into the embedder. + /// + /// The version hash will typically populated only in release builds, but may blank even in + /// that case: if building from a packagd crate, or in a build environment that does not have + /// "git" installed, `lucetc` and `lucet-runtime` will fall back to an empty hash. version_hash: [u8; 8], } diff --git a/lucetc/src/output.rs b/lucetc/src/output.rs index 5e49b6bfa..66b706171 100644 --- a/lucetc/src/output.rs +++ b/lucetc/src/output.rs @@ -1,7 +1,6 @@ use crate::error::LucetcErrorKind; use crate::function_manifest::{write_function_manifest, FUNCTION_MANIFEST_SYM}; use crate::name::Name; -use crate::pointer::NATIVE_POINTER_SIZE; use crate::stack_probe; use crate::table::{link_tables, TABLE_SYM}; use crate::traps::write_trap_tables; @@ -10,7 +9,9 @@ use cranelift_codegen::{ir, isa}; use cranelift_faerie::FaerieProduct; use faerie::{Artifact, Decl, Link}; use failure::{format_err, Error, ResultExt}; -use lucet_module::{FunctionSpec, VersionInfo, LUCET_MODULE_SYM, MODULE_DATA_SYM}; +use lucet_module::{ + FunctionSpec, SerializedModule, VersionInfo, LUCET_MODULE_SYM, MODULE_DATA_SYM, +}; use std::collections::HashMap; use std::fs::File; use std::io::{Cursor, Write}; @@ -136,7 +137,7 @@ fn write_module( function_manifest_len: usize, obj: &mut Artifact, ) -> Result<(), Error> { - let mut native_data = Cursor::new(Vec::with_capacity(16 + NATIVE_POINTER_SIZE * 4)); + let mut native_data = Cursor::new(Vec::with_capacity(std::mem::size_of::())); obj.declare(LUCET_MODULE_SYM, Decl::data().global()) .context(format!("declaring {}", LUCET_MODULE_SYM))?;