Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 110 additions & 88 deletions crates/fuzzing/src/generators/gc_ops/ops.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
//! Operations for the `gc` operations.

use crate::generators::gc_ops::types::StackType;
use crate::generators::gc_ops::{
limits::GcOpsLimits,
types::{CompositeType, RecGroupId, StructType, TypeId, Types},
};
use mutatis::{Context, Generate, mutators as m};
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
use std::collections::BTreeMap;
use wasm_encoder::{
CodeSection, ConstExpr, EntityType, ExportKind, ExportSection, Function, FunctionSection,
Expand Down Expand Up @@ -245,7 +245,8 @@ impl GcOps {
self.types.fixup(&self.limits);

let mut new_ops = Vec::with_capacity(self.ops.len());
let mut stack = 0;
let mut stack: Vec<StackType> = Vec::new();
let num_types = self.types.type_defs.len() as u32;

for mut op in self.ops.iter().copied() {
if self.limits.max_types == 0
Expand All @@ -268,24 +269,27 @@ impl GcOps {

op.fixup(&self.limits);

let mut temp = SmallVec::<[_; 4]>::new();

while stack < op.operands_len() {
temp.push(GcOp::Null());
stack += 1;
match &mut op {
GcOp::StructNew(t) | GcOp::TakeStructCall(t) | GcOp::TakeTypedStructCall(t) => {
if num_types > 0 {
*t = *t % num_types;
}
Comment on lines +272 to +276
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this fixing-up of ops should go in GcOp::fixup. We can add num_types as a parameter there. And if num_types == 0, we should probably just remove this op, no? How do we struct.new or call a function that takes a concrete struct reference if we don't define any types?

}
_ => {}
}

temp.push(op);
stack = stack - op.operands_len() + op.results_len();
op.operand_types().iter().for_each(|ty| {
StackType::fixup_stack_type(*ty, &mut stack, &mut new_ops, num_types);
});

new_ops.extend(temp);
// Finally, emit the op itself (updates stack abstractly)
StackType::emit(op, &mut stack, &mut new_ops, num_types);
}

// Insert drops to balance the final stack state
for _ in 0..stack {
// Balance any leftovers with drops (works for any type)
for _ in 0..stack.len() {
new_ops.push(GcOp::Drop());
}

self.ops = new_ops;
}

Expand All @@ -301,79 +305,68 @@ impl GcOps {
macro_rules! define_gc_ops {
(
$(
$op:ident $( ( $($limit_var:ident : $limit:expr => $ty:ty),* ) )? : $params:expr => $results:expr ,
$op:ident
$( ( $($limit_var:ident : $limit:expr => $ty:ty),* ) )?
: ($operand_req:expr, $params:expr) => ($result_type:expr, $results:expr) ,
)*
) => {
/// The operations for the `gc` operations.
#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
pub(crate) enum GcOp {
/// The operations that can be performed by the `gc` function.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Stylistically, it is quite rare to have the doc comment below the #[...] attributes. Mind reverting this code motion?

pub enum GcOp {
$(
#[allow(missing_docs, reason = "macro-generated code")]
$op ( $( $($ty),* )? ),
)*
}

/// Names of the operations for testing purposes.
#[cfg(test)]
pub const OP_NAMES: &'static[&'static str] = &[
$(
stringify!($op),
)*
pub(crate) const OP_NAMES: &'static [&'static str] = &[
$( stringify!($op), )*
];

impl GcOp {
#[cfg(test)]
pub fn name(&self) -> &'static str {
match self {
$(
Self::$op (..) => stringify!($op),
)*
}
pub(crate) fn name(&self) -> &'static str {
match self { $( Self::$op(..) => stringify!($op), )* }
}

pub fn operands_len(&self) -> usize {
match self {
$(
Self::$op (..) => $params,
)*
}
pub(crate) fn operands_len(&self) -> usize {
match self { $( Self::$op(..) => $params, )* }
}

pub fn results_len(&self) -> usize {
#[allow(unreachable_patterns, reason = "macro-generated code")]
pub(crate) fn operand_types(&self) -> Vec<StackType> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To avoid a ton of temporary allocations, and reuse a single allocation instead, let's have this method take a mutable vector as an out parameter:

Suggested change
pub(crate) fn operand_types(&self) -> Vec<StackType> {
pub(crate) fn operand_types(&self, types: &mut Vec<StackType>) {

match self {
// special-cases
Self::TakeTypedStructCall(t) => vec![StackType::Struct(Some(*t))],
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’d like to remove these “special cases” that require an index. The macro expansion doesn’t accept it and I’d rather not complicate the macro further. If you know a clean way to handle this, please share it with me. I’ll address it in the next PR anyway.

$(
Self::$op (..) => $results,
)*
Self::$op(..) => match $operand_req {
None => vec![],
Some(req) => if $params == 0 { vec![] } else { vec![req; $params] },
}
),*
}
}
}

$(
#[allow(non_snake_case, reason = "macro-generated code")]
fn $op(
_ctx: &mut mutatis::Context,
_limits: &GcOpsLimits,
stack: usize,
) -> mutatis::Result<(GcOp, usize)> {
#[allow(unused_comparisons, reason = "macro-generated code")]
{
debug_assert!(stack >= $params);
}
pub(crate) fn results_len(&self) -> usize {
match self { $( Self::$op(..) => $results, )* }
}

let op = GcOp::$op(
$($({
let limit_fn = $limit as fn(&GcOpsLimits) -> $ty;
let limit = (limit_fn)(_limits);
debug_assert!(limit > 0);
m::range(0..=limit - 1).generate(_ctx)?
}),*)?
);
let new_stack = stack - $params + $results;
Ok((op, new_stack))
#[allow(unreachable_patterns, reason = "macro-generated code")]
pub(crate) fn result_types(&self) -> Vec<StackType> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And let's also do an out parameter here as well.

match self {
// special-cases
Self::StructNew(t) => vec![StackType::Struct(Some(*t))],
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same "special case" here

$(
Self::$op(..) => match $result_type {
None => vec![],
Some(ty) => if $results == 0 { vec![] } else { vec![ty; $results] },
}
),*
}
}
)*

impl GcOp {
fn fixup(&mut self, limits: &GcOpsLimits) {
pub(crate) fn fixup(&mut self, limits: &GcOpsLimits) {
match self {
$(
Self::$op( $( $( $limit_var ),* )? ) => {
Expand All @@ -396,13 +389,13 @@ macro_rules! define_gc_ops {
let mut valid_choices: Vec<
fn(&mut Context, &GcOpsLimits, usize) -> mutatis::Result<(GcOp, usize)>
> = vec![];

$(
#[allow(unused_comparisons, reason = "macro-generated code")]
if stack >= $params $($(
&& {
let limit_fn = $limit as fn(&GcOpsLimits) -> $ty;
let limit = (limit_fn)(&ops.limits);
limit > 0
(limit_fn)(&ops.limits) > 0
}
)*)? {
valid_choices.push($op);
Expand All @@ -412,36 +405,69 @@ macro_rules! define_gc_ops {
let f = *ctx.rng()
.choose(&valid_choices)
.expect("should always have a valid op choice");

(f)(ctx, &ops.limits, stack)
}
}

$(
#[allow(non_snake_case, reason = "macro-generated code")]
fn $op(
_ctx: &mut mutatis::Context,
_limits: &GcOpsLimits,
stack: usize,
) -> mutatis::Result<(GcOp, usize)> {
Comment on lines +414 to +418
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this still be taking a stack: usize and returning a new usize? Shouldn't it be taking a stack: &mut Vec<StackType> now instead?

Copy link
Contributor Author

@khagankhan khagankhan Dec 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am back!!!

Does this also mean that when we generate a new op, we should also be checking type compatibility, not just maintaining stack depth?

@fitzgen

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this also mean that when we generate a new op, we should also be checking type compatibility, not just maintaining stack depth?

Yes, we either have to do that (if we continue the existing approach) or else we switch to a new approach and instead just generate a random sequence of arbitrary ops and then rely on the fixup pass to make it valid. The latter might be easier long term, so that there is only one code path we have to maintain.

But also, backing up, we shouldn't really need to generate op sequences from scratch. The whole point of using a mutation-based paradigm is that we can generate arbitrary inputs via a series of mutations over time. So, instead of generating whole op sequences, we should be able to get away with something like

impl Generate<GcOps> for GcOpsMutator {
    fn generate(&mut self, ctx: &mut Context) -> mutatis::Result<GcOps> {
        let mut ops = GcOps::default();
        for _ in 0..N {
            self.mutate(ctx, &mut ops)?;
        }
        Ok(ops)
    }
}

(And we should probably build the generic version of this into mutatis directly eventually)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Awesome! That was the answer I needed. After addressing this I will push

Thanks a lot

#[allow(unused_comparisons, reason = "macro-generated code")]
{ debug_assert!(stack >= $params); }

let op = GcOp::$op(
$($({
let limit_fn = $limit as fn(&GcOpsLimits) -> $ty;
let limit = (limit_fn)(_limits);
debug_assert!(limit > 0);
m::range(0..=limit - 1).generate(_ctx)?
}),*)?
);
Ok((op, stack - $params + $results))
}
)*
};
}

define_gc_ops! {
Gc : 0 => 3,
Gc : (None, 0) => (Some(StackType::ExternRef), 3),

MakeRefs : 0 => 3,
TakeRefs : 3 => 0,
MakeRefs : (None, 0) => (Some(StackType::ExternRef), 3),
TakeRefs : (Some(StackType::ExternRef), 3) => (None, 0),

// Add one to make sure that out of bounds table accesses are possible, but still rare.
TableGet(elem_index: |ops| ops.table_size + 1 => u32) : 0 => 1,
TableSet(elem_index: |ops| ops.table_size + 1 => u32) : 1 => 0,

GlobalGet(global_index: |ops| ops.num_globals => u32) : 0 => 1,
GlobalSet(global_index: |ops| ops.num_globals => u32) : 1 => 0,

LocalGet(local_index: |ops| ops.num_params => u32) : 0 => 1,
LocalSet(local_index: |ops| ops.num_params => u32) : 1 => 0,

StructNew(type_index: |ops| ops.max_types => u32) : 0 => 0,
TakeStructCall(type_index: |ops| ops.max_types => u32) : 1 => 0,
TakeTypedStructCall(type_index: |ops| ops.max_types => u32) : 1 => 0,

Drop : 1 => 0,

Null : 0 => 1,
TableGet(elem_index: |ops| ops.table_size + 1 => u32)
: (None, 0) => (Some(StackType::ExternRef), 1),
TableSet(elem_index: |ops| ops.table_size + 1 => u32)
: (Some(StackType::ExternRef), 1) => (None, 0),

GlobalGet(global_index: |ops| ops.num_globals => u32)
: (None, 0) => (Some(StackType::ExternRef), 1),
GlobalSet(global_index: |ops| ops.num_globals => u32)
: (Some(StackType::ExternRef), 1) => (None, 0),

LocalGet(local_index: |ops| ops.num_params => u32)
: (None, 0) => (Some(StackType::ExternRef), 1),
LocalSet(local_index: |ops| ops.num_params => u32)
: (Some(StackType::ExternRef), 1) => (None, 0),

// Handled specially in result_types()
StructNew(type_index: |ops| ops.max_types => u32)
: (None, 0) => (Some(StackType::Anything), 1),
TakeStructCall(type_index: |ops| ops.max_types => u32)
: (Some(StackType::Struct(None)), 1) => (None, 0),
// Handled specially in operand_types()
// StackType::Anythng is just a placeholder
TakeTypedStructCall(type_index: |ops| ops.max_types => u32)
: (Some(StackType::Anything), 1) => (None, 0),

Drop : (Some(StackType::Anything), 1) => (None, 0),

Null : (None, 0) => (Some(StackType::ExternRef), 1),
}

impl GcOp {
Expand Down Expand Up @@ -497,16 +523,12 @@ impl GcOp {
}
Self::StructNew(x) => {
func.instruction(&Instruction::StructNew(x + struct_type_base));
func.instruction(&Instruction::Call(take_structref_idx));
}
Self::TakeStructCall(x) => {
func.instruction(&Instruction::StructNew(x + struct_type_base));
Self::TakeStructCall(_x) => {
func.instruction(&Instruction::Call(take_structref_idx));
}
Self::TakeTypedStructCall(x) => {
let s = struct_type_base + x;
let f = typed_first_func_index + x;
func.instruction(&Instruction::StructNew(s));
func.instruction(&Instruction::Call(f));
}
}
Expand Down
82 changes: 81 additions & 1 deletion crates/fuzzing/src/generators/gc_ops/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ fn test_wat_string() -> mutatis::Result<()> {
global.set 0
global.get 0
struct.new 5
call 3
drop
drop
drop
drop
Expand Down Expand Up @@ -297,3 +297,83 @@ fn emits_empty_rec_groups_and_validates() -> mutatis::Result<()> {

Ok(())
}

#[test]
fn fixup_check_types_and_indexes() -> mutatis::Result<()> {
let _ = env_logger::try_init();

// Create GcOps with 5 types so that 7 % 5 = 2
let mut ops = test_ops(5, 5, 5);
ops.limits.max_types = 5;

// We create max types 5 and out ouf bounds for the type index
ops.ops = vec![
GcOp::TakeTypedStructCall(27),
GcOp::GlobalSet(0),
GcOp::StructNew(24),
GcOp::LocalSet(0),
];

// Call fixup to resolve dependencies
// this should fix the types by inserting missing types
// also put the indexes in the correct bounds
ops.fixup();

// Verify that fixup()
// The expected sequence should be:
// 1. StructNew(_) - inserted by fixup to satisfy TakeTypedStructCall(_)
// 2. TakeTypedStructCall(_) - now has Struct(_) on stack
// 3. Null() - inserted by fixup to satisfy GlobalSet(0)
// 4. GlobalSet(0) - now has ExternRef on stack
// 5. StructNew(_) - produces Struct(_)
// 6. Null() - inserted by fixup to satisfy LocalSet(0)
// 7. LocalSet(0) - now has ExternRef on stack
// 8. Drop() - inserted by fixup to consume ExternRef before Drop()

// This is the expected sequence in wat format:
// loop ;; label = @1
// struct.new 7
// call 6
// ref.null extern
// global.set 0
// struct.new 9
// ref.null extern
// local.set 0
// drop
// br 0 (;@1;)
// end

// Find the index of TakeTypedStructCall(_) after fixup
let take_call_idx = ops
.ops
.iter()
.position(|op| matches!(op, GcOp::TakeTypedStructCall(_)))
.expect("TakeTypedStructCall(_) should be present after fixup");

// Verify that StructNew(_) appears before TakeTypedStructCall(_)
let struct_new_2_before = ops
.ops
.iter()
.take(take_call_idx)
.any(|op| matches!(op, GcOp::StructNew(_)));

assert!(
struct_new_2_before,
"fixup should insert StructNew(_) before TakeTypedStructCall(_) to satisfy the dependency"
);

// Verify the sequence validates correctly
let wasm = ops.to_wasm_binary();
let wat = wasmprinter::print_bytes(&wasm).unwrap();
log::debug!("{wat}");
let feats = wasmparser::WasmFeatures::default();
feats.reference_types();
feats.gc();
let mut validator = wasmparser::Validator::new_with_features(feats);
assert!(
validator.validate_all(&wasm).is_ok(),
"GC validation should pass after fixup"
);

Ok(())
}
Loading